Add scikit-learn Model Definitions to Database
rpwf_add_py_model.Rd
This function adds or updates scikit-learn model to the database. Use the
module (i.e., "xgboost"), the base learner in (i.e., "XGBClassifier"),
the corresponding {parsnip}
engine (i.e., "xgboost"), the equivalent hyper
parameter names (i.e., "mtry" in {parsnip}
is "colsample_bytree"), and
model mode (i.e., "classification").
Usage
rpwf_add_py_model(
db_con,
py_module,
py_base_learner,
r_engine,
hyper_par_rename,
model_mode
)
Arguments
- db_con
an
rpwf_connect_db()
object.- py_module
the module in scikit-learn, i.e.,
"sklearn.ensemble"
.- py_base_learner
the base learner in scikit-learn, i.e.,
"RandomForestClassifier"
.- r_engine
the engine in parsnip, i.e.,
"ranger"
or"rpart"
.- hyper_par_rename
a named list of equivalent hyper parameters, i.e.,
list(cost_complexity = "ccp_alpha")
.- model_mode
"classification"
or"regression"
.
Examples
# Generate dummy database
board <- pins::board_temp()
tmp_dir <- tempdir()
db_con <- rpwf_connect_db(paste(tmp_dir, "db.SQLite", sep = "/"), board)
DBI::dbListTables(db_con$con)
#> [1] "df_tbl" "model_type_tbl" "r_grid_tbl" "wflow_result_tbl"
#> [5] "wflow_tbl"
DBI::dbGetQuery(db_con$con, "SELECT * FROM model_type_tbl") # before adding
#> model_type_id py_module py_base_learner r_engine
#> 1 1 sklearn.linear_model LogisticRegression glmnet
#> 2 2 sklearn.linear_model ElasticNet glmnet
#> 3 3 sklearn.svm SVC kernlab
#> 4 4 sklearn.svm SVR kernlab
#> 5 5 xgboost XGBClassifier xgboost
#> hyper_par_rename
#> 1 {"penalty":["C"],"mixture":["l1_ratio"]}
#> 2 {"penalty":["alpha"],"mixture":["l1_ratio"]}
#> 3 {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"]}
#> 4 {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"],"margin":["epsilon"]}
#> 5 {"mtry":["colsample_bytree"],"trees":["n_estimators"],"min_n":["min_child_weight"],"tree_depth":["max_depth"],"learn_rate":["learning_rate"],"loss_reduction":["gamma"],"sample_size":["subsample"]}
#> model_mode
#> 1 classification
#> 2 regression
#> 3 classification
#> 4 regression
#> 5 classification
rpwf_add_py_model(
db_con,
"sklearn.ensemble",
"RandomForestClassifier",
"rpart",
list(
cost_complexity = "ccp_alpha",
tree_depth = "max_depth",
min_n = "min_samples_split"
),
"classification"
)
#> [1] 1
DBI::dbGetQuery(db_con$con, "SELECT * FROM model_type_tbl") # after adding
#> model_type_id py_module py_base_learner r_engine
#> 1 1 sklearn.linear_model LogisticRegression glmnet
#> 2 2 sklearn.linear_model ElasticNet glmnet
#> 3 3 sklearn.svm SVC kernlab
#> 4 4 sklearn.svm SVR kernlab
#> 5 5 xgboost XGBClassifier xgboost
#> 6 6 sklearn.ensemble RandomForestClassifier rpart
#> hyper_par_rename
#> 1 {"penalty":["C"],"mixture":["l1_ratio"]}
#> 2 {"penalty":["alpha"],"mixture":["l1_ratio"]}
#> 3 {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"]}
#> 4 {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"],"margin":["epsilon"]}
#> 5 {"mtry":["colsample_bytree"],"trees":["n_estimators"],"min_n":["min_child_weight"],"tree_depth":["max_depth"],"learn_rate":["learning_rate"],"loss_reduction":["gamma"],"sample_size":["subsample"]}
#> 6 {"cost_complexity":["ccp_alpha"],"tree_depth":["max_depth"],"min_n":["min_samples_split"]}
#> model_mode
#> 1 classification
#> 2 regression
#> 3 classification
#> 4 regression
#> 5 classification
#> 6 classification