Add scikit-learn Model Definitions to Database — rpwf_add_py

This function adds or updates scikit-learn model to the database. Use the module (i.e., "xgboost"), the base learner in (i.e., "XGBClassifier"), the corresponding {parsnip} engine (i.e., "xgboost"), the equivalent hyper parameter names (i.e., "mtry" in {parsnip} is "colsample_bytree"), and model mode (i.e., "classification").

Usage

rpwf_add_py_model(
  db_con,
  py_module,
  py_base_learner,
  r_engine,
  hyper_par_rename,
  model_mode
)

Arguments

db_con: an rpwf_connect_db() object.
py_module: the module in scikit-learn, i.e., "sklearn.ensemble".
py_base_learner: the base learner in scikit-learn, i.e., "RandomForestClassifier".
r_engine: the engine in parsnip, i.e., "ranger" or "rpart".
hyper_par_rename: a named list of equivalent hyper parameters, i.e., list(cost_complexity = "ccp_alpha").
model_mode: "classification" or "regression".

Examples

# Generate dummy database
board <- pins::board_temp()
tmp_dir <- tempdir()
db_con <- rpwf_connect_db(paste(tmp_dir, "db.SQLite", sep = "/"), board)
DBI::dbListTables(db_con$con)
#> [1] "df_tbl"           "model_type_tbl"   "r_grid_tbl"       "wflow_result_tbl"
#> [5] "wflow_tbl"       
DBI::dbGetQuery(db_con$con, "SELECT * FROM model_type_tbl") # before adding
#>   model_type_id            py_module    py_base_learner r_engine
#> 1             1 sklearn.linear_model LogisticRegression   glmnet
#> 2             2 sklearn.linear_model         ElasticNet   glmnet
#> 3             3          sklearn.svm                SVC  kernlab
#> 4             4          sklearn.svm                SVR  kernlab
#> 5             5              xgboost      XGBClassifier  xgboost
#>                                                                                                                                                                                       hyper_par_rename
#> 1                                                                                                                                                             {"penalty":["C"],"mixture":["l1_ratio"]}
#> 2                                                                                                                                                         {"penalty":["alpha"],"mixture":["l1_ratio"]}
#> 3                                                                                          {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"]}
#> 4                                                                     {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"],"margin":["epsilon"]}
#> 5 {"mtry":["colsample_bytree"],"trees":["n_estimators"],"min_n":["min_child_weight"],"tree_depth":["max_depth"],"learn_rate":["learning_rate"],"loss_reduction":["gamma"],"sample_size":["subsample"]}
#>       model_mode
#> 1 classification
#> 2     regression
#> 3 classification
#> 4     regression
#> 5 classification
rpwf_add_py_model(
  db_con,
  "sklearn.ensemble",
  "RandomForestClassifier",
  "rpart",
  list(
    cost_complexity = "ccp_alpha",
    tree_depth = "max_depth",
    min_n = "min_samples_split"
  ),
  "classification"
)
#> [1] 1
DBI::dbGetQuery(db_con$con, "SELECT * FROM model_type_tbl") # after adding
#>   model_type_id            py_module        py_base_learner r_engine
#> 1             1 sklearn.linear_model     LogisticRegression   glmnet
#> 2             2 sklearn.linear_model             ElasticNet   glmnet
#> 3             3          sklearn.svm                    SVC  kernlab
#> 4             4          sklearn.svm                    SVR  kernlab
#> 5             5              xgboost          XGBClassifier  xgboost
#> 6             6     sklearn.ensemble RandomForestClassifier    rpart
#>                                                                                                                                                                                       hyper_par_rename
#> 1                                                                                                                                                             {"penalty":["C"],"mixture":["l1_ratio"]}
#> 2                                                                                                                                                         {"penalty":["alpha"],"mixture":["l1_ratio"]}
#> 3                                                                                          {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"]}
#> 4                                                                     {"cost":["C"],"degree":["degree"],"scale_factor":["gamma"],"rbf_sigma":["gamma"],"kernel_offset":["coef0"],"margin":["epsilon"]}
#> 5 {"mtry":["colsample_bytree"],"trees":["n_estimators"],"min_n":["min_child_weight"],"tree_depth":["max_depth"],"learn_rate":["learning_rate"],"loss_reduction":["gamma"],"sample_size":["subsample"]}
#> 6                                                                                                           {"cost_complexity":["ccp_alpha"],"tree_depth":["max_depth"],"min_n":["min_samples_split"]}
#>       model_mode
#> 1 classification
#> 2     regression
#> 3 classification
#> 4     regression
#> 5 classification
#> 6 classification