Default Pipelines#
You may optionally consider using one of a number of fully pre-defined pipelines. These can be accessed though BPt.default.pipelines.
We can see a list of all available as follows:
In [1]: import BPt as bp
In [2]: from BPt.default.pipelines import pipelines_keys
In [3]: pipelines_keys
Out[3]:
['elastic_pipe',
'ridge_pipe',
'rf_pipe',
'gb_pipe',
'svm_pipe',
'svm_fs_pipe',
'stacking_pipe',
'voting_pipe',
'compare_pipe']
These represent options which we can import, for example:
In [4]: from BPt.default.pipelines import elastic_pipe
In [5]: elastic_pipe
Out[5]:
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Model(obj='elastic',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)])
We can go through and print each pipeline:
In [6]: for pipeline in pipelines_keys:
...: print(pipeline)
...: eval(f'print(bp.default.pipelines.{pipeline})')
...:
elastic_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Model(obj='elastic',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)])
ridge_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Model(obj='ridge',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)])
rf_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Model(obj='rf',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)])
gb_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Model(obj='hgb',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)])
svm_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Model(obj='svm',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)])
svm_fs_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Model(obj=Pipeline(param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
steps=[FeatSelector(obj='univariate '
'selection',
params=2),
Model(obj='svm', params=1)]))])
stacking_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Ensemble(base_model=Model(obj='ridge',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1),
models=[Model(obj='elastic',
param_...
Model(obj=Pipeline(param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
steps=[FeatSelector(obj='univariate '
'selection',
params=2),
Model(obj='svm',
params=1)])),
Model(obj='hgb',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)],
n_jobs_type='models', obj='stacking')])
voting_pipe
Pipeline(steps=[Imputer(obj='mean', scope='float'),
Imputer(obj='median', scope='category'), Scaler(obj='robust'),
Transformer(obj='one hot encoder', scope='category'),
Ensemble(models=[Model(obj='elastic',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1),
Model(obj='ridge',
param_search=Para...
Model(obj=Pipeline(param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
steps=[FeatSelector(obj='univariate '
'selection',
params=2),
Model(obj='svm',
params=1)])),
Model(obj='hgb',
param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(),
splits=5),
n_iter=60,
search_type='HammersleySearch'),
params=1)],
n_jobs_type='models', obj='voting')])
compare_pipe
Compare([elastic, ridge, svm_fs, gb])
Note also that the individual pieces which make up the default pipelines can be accessed as well.
In [7]: from BPt.default.pipelines import pieces_keys
In [8]: pieces_keys
Out[8]:
['m_imputer',
'c_imputer',
'r_scaler',
'ohe',
'random_search',
'hs_search',
'elastic_search',
'rf_search',
'gb_search',
'svm_search',
'u_feat',
'svm',
'svm_search_pipe',
'svm_fs_search',
'ridge_search',
'stacking',
'voting']
# Look at some
In [9]: bp.default.pipelines.u_feat
Out[9]: FeatSelector(obj='univariate selection', params=2)
In [10]: bp.default.pipelines.svm
Out[10]: Model(obj='svm', params=1)