下面总结了一些常用的模型调用方法,但是具体的超参数未列举出来,具体参数还是要查询API。
方法中都有Classifier(分类)和Regression(回归)
常用工具:
from sklearn.model_selection import train_test_split from sklearn import metrics import warnings warnings.filterwarnings("ignore")
模型评估
acc=metrics.accuracy_score(y_test,pre) print(acc)
KNN
from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier() model.fit(train_x, train_y)
朴素贝叶斯
from sklearn.naive_bayes import MultinomialNB model = MultinomialNB(alpha=0.01) model.fit(train_x, train_y)
线性回归
from sklearn.linear_model import LogisticRegression model = LogisticRegression(penalty='l2') model.fit(train_x, train_y)
GBDT
from sklearn.ensemble import GradientBoostingClassifier model = GradientBoostingClassifier(n_estimators=200) model.fit(train_x, train_y)
随机森林
from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators=8) model.fit(train_x, train_y)
支持向量机
from sklearn.svm import SVC model = SVC(kernel='rbf', probability=True) model.fit(train_x, train_y)
XGBOOST
import xgboost as xgb from xgboost.sklearn import XGBClassifier
1、xgb模型训练
xgb1 = XGBClassifier( learning_rate =0.05, n_estimators=2800, max_depth=5, min_child_weight=1, gamma=0.21, subsample=0.8, colsample_bytree=0.75, objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27) print("fiting") xgb1.fit(X_train,y_train) pre=xgb1.predict(X_test) print(pre) print(y_test) acc=metrics.accuracy_score(y_test,pre) print(acc)
2、xgb超参搜索
param_test1 = { 'max_depth':range(3,7,2), 'min_child_weight':range(1,6,2) } param_test2 = { 'max_depth':[4,5,6], 'min_child_weight':[1,2,3] } param_test2b = { 'min_child_weight':[6,8,10,12] } #[0.0, 0.1, 0.2, 0.3, 0.4] param_test3 = { 'gamma':[i/10.0 for i in range(0,5)] } param_test3b = { 'gamma':[0.17,0.18,0.19,0.20,0.21,0.22,0.23,0.24,0.25] } param_test4 = { 'subsample':[i/10.0 for i in range(6,10)], 'colsample_bytree':[i/10.0 for i in range(6,10)] } param_test5 = { 'subsample':[i/100.0 for i in range(75,90,5)], 'colsample_bytree':[i/100.0 for i in range(75,90,5)] } param_test6 = { 'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100] } param_test7 = { 'reg_alpha':[0, 0.001, 0.005, 0.01, 0.05] } param_test8 = { 'seed':[24,25,26,27,28] } param_test9 = { 'learning_rate':[0.04,0.05,0.06] } gsearch1 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.06, n_estimators=2500, max_depth=6, min_child_weight=1, gamma=0.2, subsample=0.8, colsample_bytree=0.75, objective= 'binary:logistic', nthread=2, scale_pos_weight=1, seed=25), param_grid=param_test9,cv=5,verbose=5) gsearch1.fit(X_train,y_train) print(gsearch1.best_params_,gsearch1.best_score_)
网格超参数搜索
from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC model = SVC(kernel='rbf', probability=True) param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]} grid_search = GridSearchCV(model, param_grid, n_jobs=1, verbose=1) grid_search.fit(train_x, train_y) best_parameters = grid_search.best_estimator_.get_params() for para, val in list(best_parameters.items()): print(para, val) model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True) model.fit(train_x, train_y)