文章目录
随机森林和集成学习
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from sklearn.model_selection import train_test_split
1. 生成数据
生成12000行的数据,训练集和测试集按照3:1划分
#生成12000个,维度为10维,类别为2的数据
from sklearn.datasets import make_hastie_10_2
data,target=make_hastie_10_2()
data.shape,target.shape
((12000, 10), (12000,))
len(target)
12000
target[target==-1]=0
target
array([1., 0., 0., ..., 0., 1., 0.])
data
array([[ 0.78630486, -1.11400257, 0.70382256, ..., 0.82847917,
-1.37139053, -2.19668365],
[-1.03368454, 1.38197532, 0.14687589, ..., -0.1266635 ,
-0.33387158, -0.26654399],
[-0.19796369, 0.3510344 , -0.36834309, ..., -0.06335681,
1.28379355, 0.78997227],
...,
[ 1.28611027, -0.12302968, -1.69093227, ..., 1.03900117,
-0.00831804, 0.07232734],
[-0.92488308, -0.40587571, 0.27916008, ..., 1.05126813,
0.45418349, 1.15997838],
[ 0.34342477, 0.16226858, -0.76778841, ..., 0.6696298 ,
0.28746692, -0.92645814]])
data[:2]
array([[ 0.78630486, -1.11400257, 0.70382256, -1.11910071, 0.13506472,
-0.75552071, 0.40645088, 0.82847917, -1.37139053, -2.19668365],
[-1.03368454, 1.38197532, 0.14687589, 0.1376996 , -0.5647537 ,
2.01163563, 0.38650119, -0.1266635 , -0.33387158, -0.26654399]])
#划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(data,target,test_size=0.2,random_state=0)
X_train.shape,X_test.shape
((9600, 10), (2400, 10))
2. 模型对比
对比六大模型,都使用默认参数
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score
import time
clf0 = KNeighborsClassifier(n_neighbors=3)
clf1 = LogisticRegression()
clf2 = RandomForestClassifier()
clf3 = AdaBoostClassifier()
clf4 = GradientBoostingClassifier()
clf5 = XGBClassifier()
clf6 = LGBMClassifier()
for clf, label in zip([clf0,clf1, clf2, clf3, clf4, clf5, clf6], ["KNN", 'Logistic Regression', 'Random Forest', 'AdaBoost', 'GBDT', 'XGBoost','LightGBM']):
start = time.time()
scores = cross_val_score(clf, X_train, y_train, scoring='accuracy', cv=5)
end = time.time()
running_time = end - start
print(label+"的Accuracy: %0.8f (+/- %0.2f),耗时%0.2f秒。模型名称[%s]" % (scores.mean(), scores.std(), running_time, label))
KNN的Accuracy: 0.72895833 (+/- 0.01),耗时0.93秒。模型名称[KNN]
Logistic Regression的Accuracy: 0.51510417 (+/- 0.01),耗时0.12秒。模型名称[Logistic Regression]
Random Forest的Accuracy: 0.88510417 (+/- 0.01),耗时11.39秒。模型名称[Random Forest]
AdaBoost的Accuracy: 0.87906250 (+/- 0.00),耗时2.36秒。模型名称[AdaBoost]
GBDT的Accuracy: 0.91541667 (+/- 0.01),耗时9.03秒。模型名称[GBDT]
XGBoost的Accuracy: 0.92989583 (+/- 0.01),耗时2.66秒。模型名称[XGBoost]
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000728 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000247 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000341 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000334 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000362 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
LightGBM的Accuracy: 0.93510417 (+/- 0.00),耗时0.71秒。模型名称[LightGBM]
对比了六大模型,可以看出,逻辑回归速度最快,但准确率最低。
而LightGBM,速度快,而且准确率最高,所以,现在处理结构化数据的时候,大部分都是用LightGBM算法。
3. XGBoost的使用
3.1 原生XGBoost的使用
import xgboost as xgb
#记录程序运行时间
import time
start_time = time.time()
#xgb矩阵赋值
xgb_train = xgb.DMatrix(X_train, y_train)
xgb_test = xgb.DMatrix(X_test, label=y_test)
##参数
params = {
'booster': 'gbtree',
#'silent': 1, #设置成1则没有运行信息输出,最好是设置为0.
#'nthread':7,# cpu 线程数 默认最大
'eta': 0.007, # 如同学习率
'min_child_weight': 3,
# 这个参数默认是 1,是每个叶子里面 h 的和至少是多少,对正负样本不均衡时的 0-1 分类而言
#,假设 h 在 0.01 附近,min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。
#这个参数非常影响结果,控制叶子节点中二阶导的和的最小值,该参数值越小,越容易 overfitting。
'max_depth': 6, # 构建树的深度,越大越容易过拟合
'gamma': 0.1, # 树的叶子节点上作进一步分区所需的最小损失减少,越大越保守,一般0.1、0.2这样子。
'subsample': 0.7, # 随机采样训练样本
'colsample_bytree': 0.7, # 生成树时进行的列采样
'lambda': 2, # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。
#'alpha':0, # L1 正则项参数
#'scale_pos_weight':1, #如果取值大于0的话,在类别样本不平衡的情况下有助于快速收敛。
#'objective': 'multi:softmax', #多分类的问题
#'num_class':10, # 类别数,多分类与 multisoftmax 并用
'seed': 1000, #随机种子
#'eval_metric': 'auc'
}
plst = list(params.items())
num_rounds = 500 # 迭代次数
watchlist = [(xgb_train, 'train'), (xgb_test, 'val')]
#训练模型并保存
#early_stopping_rounds 当设置的迭代次数较大时,early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练
model = xgb.train(
plst,
xgb_train,
num_rounds,
watchlist,
early_stopping_rounds=100,
)
#model.save_model('./model/xgb.model') # 用于存储训练出的模型
print("best best_ntree_limit", model.best_ntree_limit)
y_pred = model.predict(xgb_test, ntree_limit=model.best_ntree_limit)
print('error=%f' %(sum(1 for i in range(len(y_pred)) if int(y_pred[i] > 0.5) != y_test[i]) / float(len(y_pred))))
# 输出运行时长
cost_time = time.time() - start_time
print("xgboost success!", '\n', "cost time:", cost_time, "(s)......")
[0] train-rmse:0.49936 val-rmse:0.49942
[1] train-rmse:0.49870 val-rmse:0.49887
[2] train-rmse:0.49809 val-rmse:0.49834
[3] train-rmse:0.49743 val-rmse:0.49781
[4] train-rmse:0.49679 val-rmse:0.49734
[5] train-rmse:0.49618 val-rmse:0.49676
[6] train-rmse:0.49555 val-rmse:0.49622
[7] train-rmse:0.49493 val-rmse:0.49570
[8] train-rmse:0.49430 val-rmse:0.49518
[9] train-rmse:0.49374 val-rmse:0.49472
[10] train-rmse:0.49311 val-rmse:0.49421
[11] train-rmse:0.49252 val-rmse:0.49376
[12] train-rmse:0.49192 val-rmse:0.49324
[13] train-rmse:0.49129 val-rmse:0.49277
[14] train-rmse:0.49067 val-rmse:0.49218
[15] train-rmse:0.49006 val-rmse:0.49169
[16] train-rmse:0.48947 val-rmse:0.49120
[17] train-rmse:0.48886 val-rmse:0.49066
[18] train-rmse:0.48828 val-rmse:0.49025
[19] train-rmse:0.48771 val-rmse:0.48983
[20] train-rmse:0.48710 val-rmse:0.48934
[21] train-rmse:0.48648 val-rmse:0.48878
[22] train-rmse:0.48591 val-rmse:0.48829
[23] train-rmse:0.48532 val-rmse:0.48781
[24] train-rmse:0.48476 val-rmse:0.48738
[25] train-rmse:0.48418 val-rmse:0.48691
[26] train-rmse:0.48359 val-rmse:0.48645
[27] train-rmse:0.48296 val-rmse:0.48593
[28] train-rmse:0.48238 val-rmse:0.48548
[29] train-rmse:0.48180 val-rmse:0.48504
[30] train-rmse:0.48123 val-rmse:0.48452
[31] train-rmse:0.48063 val-rmse:0.48405
[32] train-rmse:0.48008 val-rmse:0.48362
[33] train-rmse:0.47953 val-rmse:0.48316
[34] train-rmse:0.47895 val-rmse:0.48271
[35] train-rmse:0.47837 val-rmse:0.48222
[36] train-rmse:0.47780 val-rmse:0.48176
[37] train-rmse:0.47721 val-rmse:0.48130
[38] train-rmse:0.47664 val-rmse:0.48085
[39] train-rmse:0.47608 val-rmse:0.48046
[40] train-rmse:0.47549 val-rmse:0.47995
[41] train-rmse:0.47489 val-rmse:0.47949
[42] train-rmse:0.47434 val-rmse:0.47905
[43] train-rmse:0.47380 val-rmse:0.47861
[44] train-rmse:0.47322 val-rmse:0.47814
[45] train-rmse:0.47264 val-rmse:0.47763
[46] train-rmse:0.47209 val-rmse:0.47716
[47] train-rmse:0.47153 val-rmse:0.47670
[48] train-rmse:0.47095 val-rmse:0.47624
[49] train-rmse:0.47041 val-rmse:0.47574
[50] train-rmse:0.46984 val-rmse:0.47531
[51] train-rmse:0.46928 val-rmse:0.47485
[52] train-rmse:0.46875 val-rmse:0.47440
[53] train-rmse:0.46823 val-rmse:0.47396
[54] train-rmse:0.46768 val-rmse:0.47352
[55] train-rmse:0.46712 val-rmse:0.47303
[56] train-rmse:0.46658 val-rmse:0.47260
[57] train-rmse:0.46602 val-rmse:0.47216
[58] train-rmse:0.46548 val-rmse:0.47179
[59] train-rmse:0.46491 val-rmse:0.47132
[60] train-rmse:0.46436 val-rmse:0.47077
[61] train-rmse:0.46384 val-rmse:0.47038
[62] train-rmse:0.46329 val-rmse:0.46986
[63] train-rmse:0.46270 val-rmse:0.46937
[64] train-rmse:0.46214 val-rmse:0.46895
[65] train-rmse:0.46164 val-rmse:0.46853
[66] train-rmse:0.46111 val-rmse:0.46808
[67] train-rmse:0.46056 val-rmse:0.46764
[68] train-rmse:0.46003 val-rmse:0.46715
[69] train-rmse:0.45949 val-rmse:0.46676
[70] train-rmse:0.45894 val-rmse:0.46627
[71] train-rmse:0.45844 val-rmse:0.46582
[72] train-rmse:0.45790 val-rmse:0.46538
[73] train-rmse:0.45737 val-rmse:0.46500
[74] train-rmse:0.45686 val-rmse:0.46457
[75] train-rmse:0.45634 val-rmse:0.46418
[76] train-rmse:0.45582 val-rmse:0.46372
[77] train-rmse:0.45527 val-rmse:0.46327
[78] train-rmse:0.45476 val-rmse:0.46284
[79] train-rmse:0.45423 val-rmse:0.46243
[80] train-rmse:0.45370 val-rmse:0.46198
[81] train-rmse:0.45318 val-rmse:0.46158
[82] train-rmse:0.45267 val-rmse:0.46112
[83] train-rmse:0.45218 val-rmse:0.46075
[84] train-rmse:0.45169 val-rmse:0.46028
[85] train-rmse:0.45118 val-rmse:0.45980
[86] train-rmse:0.45066 val-rmse:0.45941
[87] train-rmse:0.45014 val-rmse:0.45900
[88] train-rmse:0.44960 val-rmse:0.45857
[89] train-rmse:0.44907 val-rmse:0.45813
[90] train-rmse:0.44855 val-rmse:0.45769
[91] train-rmse:0.44803 val-rmse:0.45719
[92] train-rmse:0.44755 val-rmse:0.45678
[93] train-rmse:0.44703 val-rmse:0.45638
[94] train-rmse:0.44654 val-rmse:0.45592
[95] train-rmse:0.44606 val-rmse:0.45550
[96] train-rmse:0.44559 val-rmse:0.45515
[97] train-rmse:0.44508 val-rmse:0.45474
[98] train-rmse:0.44455 val-rmse:0.45435
[99] train-rmse:0.44407 val-rmse:0.45393
[100] train-rmse:0.44357 val-rmse:0.45347
[101] train-rmse:0.44306 val-rmse:0.45300
[102] train-rmse:0.44256 val-rmse:0.45260
[103] train-rmse:0.44208 val-rmse:0.45217
[104] train-rmse:0.44156 val-rmse:0.45176
[105] train-rmse:0.44107 val-rmse:0.45139
[106] train-rmse:0.44055 val-rmse:0.45095
[107] train-rmse:0.44005 val-rmse:0.45059
[108] train-rmse:0.43954 val-rmse:0.45019
[109] train-rmse:0.43903 val-rmse:0.44969
[110] train-rmse:0.43855 val-rmse:0.44934
[111] train-rmse:0.43805 val-rmse:0.44895
[112] train-rmse:0.43757 val-rmse:0.44851
[113] train-rmse:0.43710 val-rmse:0.44813
[114] train-rmse:0.43664 val-rmse:0.44777
[115] train-rmse:0.43620 val-rmse:0.44736
[116] train-rmse:0.43573 val-rmse:0.44700
[117] train-rmse:0.43523 val-rmse:0.44663
[118] train-rmse:0.43475 val-rmse:0.44621
[119] train-rmse:0.43424 val-rmse:0.44585
[120] train-rmse:0.43378 val-rmse:0.44548
[121] train-rmse:0.43332 val-rmse:0.44513
[122] train-rmse:0.43283 val-rmse:0.44474
[123] train-rmse:0.43237 val-rmse:0.44435
[124] train-rmse:0.43188 val-rmse:0.44393
[125] train-rmse:0.43140 val-rmse:0.44354
[126] train-rmse:0.43093 val-rmse:0.44313
[127] train-rmse:0.43046 val-rmse:0.44274
[128] train-rmse:0.43000 val-rmse:0.44234
[129] train-rmse:0.42955 val-rmse:0.44197
[130] train-rmse:0.42910 val-rmse:0.44156
[131] train-rmse:0.42866 val-rmse:0.44116
[132] train-rmse:0.42820 val-rmse:0.44083
[133] train-rmse:0.42774 val-rmse:0.44048
[134] train-rmse:0.42731 val-rmse:0.44017
[135] train-rmse:0.42687 val-rmse:0.43978
[136] train-rmse:0.42643 val-rmse:0.43943
[137] train-rmse:0.42602 val-rmse:0.43911
[138] train-rmse:0.42558 val-rmse:0.43879
[139] train-rmse:0.42514 val-rmse:0.43842
[140] train-rmse:0.42468 val-rmse:0.43806
[141] train-rmse:0.42424 val-rmse:0.43762
[142] train-rmse:0.42378 val-rmse:0.43724
[143] train-rmse:0.42333 val-rmse:0.43689
[144] train-rmse:0.42292 val-rmse:0.43654
[145] train-rmse:0.42248 val-rmse:0.43616
[146] train-rmse:0.42203 val-rmse:0.43578
[147] train-rmse:0.42157 val-rmse:0.43543
[148] train-rmse:0.42109 val-rmse:0.43506
[149] train-rmse:0.42069 val-rmse:0.43471
[150] train-rmse:0.42028 val-rmse:0.43433
[151] train-rmse:0.41984 val-rmse:0.43394
[152] train-rmse:0.41940 val-rmse:0.43360
[153] train-rmse:0.41896 val-rmse:0.43320
[154] train-rmse:0.41851 val-rmse:0.43282
[155] train-rmse:0.41808 val-rmse:0.43244
[156] train-rmse:0.41764 val-rmse:0.43209
[157] train-rmse:0.41723 val-rmse:0.43178
[158] train-rmse:0.41679 val-rmse:0.43145
[159] train-rmse:0.41637 val-rmse:0.43110
[160] train-rmse:0.41596 val-rmse:0.43078
[161] train-rmse:0.41551 val-rmse:0.43045
[162] train-rmse:0.41510 val-rmse:0.43015
[163] train-rmse:0.41467 val-rmse:0.42975
[164] train-rmse:0.41422 val-rmse:0.42939
[165] train-rmse:0.41379 val-rmse:0.42906
[166] train-rmse:0.41336 val-rmse:0.42871
[167] train-rmse:0.41294 val-rmse:0.42834
[168] train-rmse:0.41251 val-rmse:0.42805
[169] train-rmse:0.41210 val-rmse:0.42768
[170] train-rmse:0.41169 val-rmse:0.42734
[171] train-rmse:0.41128 val-rmse:0.42696
[172] train-rmse:0.41085 val-rmse:0.42660
[173] train-rmse:0.41047 val-rmse:0.42629
[174] train-rmse:0.41004 val-rmse:0.42594
[175] train-rmse:0.40960 val-rmse:0.42561
[176] train-rmse:0.40919 val-rmse:0.42529
[177] train-rmse:0.40875 val-rmse:0.42493
[178] train-rmse:0.40831 val-rmse:0.42459
[179] train-rmse:0.40788 val-rmse:0.42423
[180] train-rmse:0.40742 val-rmse:0.42388
[181] train-rmse:0.40700 val-rmse:0.42350
[182] train-rmse:0.40661 val-rmse:0.42317
[183] train-rmse:0.40622 val-rmse:0.42286
[184] train-rmse:0.40580 val-rmse:0.42249
[185] train-rmse:0.40539 val-rmse:0.42211
[186] train-rmse:0.40498 val-rmse:0.42177
[187] train-rmse:0.40458 val-rmse:0.42141
[188] train-rmse:0.40415 val-rmse:0.42106
[189] train-rmse:0.40376 val-rmse:0.42072
[190] train-rmse:0.40334 val-rmse:0.42039
[191] train-rmse:0.40292 val-rmse:0.42007
[192] train-rmse:0.40253 val-rmse:0.41973
[193] train-rmse:0.40213 val-rmse:0.41939
[194] train-rmse:0.40173 val-rmse:0.41909
[195] train-rmse:0.40131 val-rmse:0.41877
[196] train-rmse:0.40088 val-rmse:0.41841
[197] train-rmse:0.40047 val-rmse:0.41805
[198] train-rmse:0.40008 val-rmse:0.41770
[199] train-rmse:0.39969 val-rmse:0.41737
[200] train-rmse:0.39929 val-rmse:0.41709
[201] train-rmse:0.39888 val-rmse:0.41677
[202] train-rmse:0.39847 val-rmse:0.41645
[203] train-rmse:0.39808 val-rmse:0.41612
[204] train-rmse:0.39768 val-rmse:0.41580
[205] train-rmse:0.39731 val-rmse:0.41550
[206] train-rmse:0.39693 val-rmse:0.41516
[207] train-rmse:0.39653 val-rmse:0.41485
[208] train-rmse:0.39610 val-rmse:0.41452
[209] train-rmse:0.39572 val-rmse:0.41422
[210] train-rmse:0.39534 val-rmse:0.41391
[211] train-rmse:0.39495 val-rmse:0.41358
[212] train-rmse:0.39456 val-rmse:0.41332
[213] train-rmse:0.39420 val-rmse:0.41307
[214] train-rmse:0.39381 val-rmse:0.41275
[215] train-rmse:0.39343 val-rmse:0.41243
[216] train-rmse:0.39305 val-rmse:0.41213
[217] train-rmse:0.39267 val-rmse:0.41184
[218] train-rmse:0.39230 val-rmse:0.41160
[219] train-rmse:0.39193 val-rmse:0.41130
[220] train-rmse:0.39154 val-rmse:0.41095
[221] train-rmse:0.39116 val-rmse:0.41062
[222] train-rmse:0.39078 val-rmse:0.41029
[223] train-rmse:0.39042 val-rmse:0.41001
[224] train-rmse:0.39002 val-rmse:0.40968
[225] train-rmse:0.38964 val-rmse:0.40937
[226] train-rmse:0.38926 val-rmse:0.40905
[227] train-rmse:0.38887 val-rmse:0.40876
[228] train-rmse:0.38848 val-rmse:0.40846
[229] train-rmse:0.38811 val-rmse:0.40818
[230] train-rmse:0.38771 val-rmse:0.40789
[231] train-rmse:0.38734 val-rmse:0.40758
[232] train-rmse:0.38697 val-rmse:0.40733
[233] train-rmse:0.38658 val-rmse:0.40699
[234] train-rmse:0.38622 val-rmse:0.40677
[235] train-rmse:0.38585 val-rmse:0.40646
[236] train-rmse:0.38548 val-rmse:0.40615
[237] train-rmse:0.38510 val-rmse:0.40584
[238] train-rmse:0.38474 val-rmse:0.40553
[239] train-rmse:0.38438 val-rmse:0.40521
[240] train-rmse:0.38401 val-rmse:0.40488
[241] train-rmse:0.38362 val-rmse:0.40456
[242] train-rmse:0.38326 val-rmse:0.40428
[243] train-rmse:0.38289 val-rmse:0.40398
[244] train-rmse:0.38255 val-rmse:0.40369
[245] train-rmse:0.38219 val-rmse:0.40337
[246] train-rmse:0.38181 val-rmse:0.40305
[247] train-rmse:0.38147 val-rmse:0.40277
[248] train-rmse:0.38110 val-rmse:0.40248
[249] train-rmse:0.38075 val-rmse:0.40221
[250] train-rmse:0.38039 val-rmse:0.40194
[251] train-rmse:0.38001 val-rmse:0.40162
[252] train-rmse:0.37967 val-rmse:0.40135
[253] train-rmse:0.37929 val-rmse:0.40106
[254] train-rmse:0.37894 val-rmse:0.40074
[255] train-rmse:0.37856 val-rmse:0.40045
[256] train-rmse:0.37821 val-rmse:0.40016
[257] train-rmse:0.37787 val-rmse:0.39990
[258] train-rmse:0.37751 val-rmse:0.39963
[259] train-rmse:0.37717 val-rmse:0.39934
[260] train-rmse:0.37681 val-rmse:0.39909
[261] train-rmse:0.37645 val-rmse:0.39881
[262] train-rmse:0.37613 val-rmse:0.39858
[263] train-rmse:0.37578 val-rmse:0.39831
[264] train-rmse:0.37542 val-rmse:0.39804
[265] train-rmse:0.37507 val-rmse:0.39778
[266] train-rmse:0.37474 val-rmse:0.39752
[267] train-rmse:0.37438 val-rmse:0.39726
[268] train-rmse:0.37404 val-rmse:0.39698
[269] train-rmse:0.37372 val-rmse:0.39671
[270] train-rmse:0.37338 val-rmse:0.39642
[271] train-rmse:0.37306 val-rmse:0.39618
[272] train-rmse:0.37271 val-rmse:0.39591
[273] train-rmse:0.37236 val-rmse:0.39561
[274] train-rmse:0.37204 val-rmse:0.39532
[275] train-rmse:0.37171 val-rmse:0.39506
[276] train-rmse:0.37139 val-rmse:0.39479
[277] train-rmse:0.37106 val-rmse:0.39455
[278] train-rmse:0.37072 val-rmse:0.39432
[279] train-rmse:0.37040 val-rmse:0.39409
[280] train-rmse:0.37006 val-rmse:0.39379
[281] train-rmse:0.36971 val-rmse:0.39352
[282] train-rmse:0.36938 val-rmse:0.39330
[283] train-rmse:0.36906 val-rmse:0.39307
[284] train-rmse:0.36872 val-rmse:0.39281
[285] train-rmse:0.36842 val-rmse:0.39259
[286] train-rmse:0.36809 val-rmse:0.39235
[287] train-rmse:0.36773 val-rmse:0.39204
[288] train-rmse:0.36741 val-rmse:0.39179
[289] train-rmse:0.36708 val-rmse:0.39155
[290] train-rmse:0.36677 val-rmse:0.39129
[291] train-rmse:0.36643 val-rmse:0.39107
[292] train-rmse:0.36612 val-rmse:0.39079
[293] train-rmse:0.36582 val-rmse:0.39054
[294] train-rmse:0.36550 val-rmse:0.39029
[295] train-rmse:0.36521 val-rmse:0.39003
[296] train-rmse:0.36488 val-rmse:0.38980
[297] train-rmse:0.36455 val-rmse:0.38954
[298] train-rmse:0.36422 val-rmse:0.38926
[299] train-rmse:0.36391 val-rmse:0.38901
[300] train-rmse:0.36360 val-rmse:0.38874
[301] train-rmse:0.36330 val-rmse:0.38850
[302] train-rmse:0.36298 val-rmse:0.38823
[303] train-rmse:0.36271 val-rmse:0.38801
[304] train-rmse:0.36240 val-rmse:0.38775
[305] train-rmse:0.36209 val-rmse:0.38752
[306] train-rmse:0.36178 val-rmse:0.38729
[307] train-rmse:0.36148 val-rmse:0.38708
[308] train-rmse:0.36116 val-rmse:0.38682
[309] train-rmse:0.36085 val-rmse:0.38659
[310] train-rmse:0.36056 val-rmse:0.38636
[311] train-rmse:0.36024 val-rmse:0.38609
[312] train-rmse:0.35996 val-rmse:0.38582
[313] train-rmse:0.35966 val-rmse:0.38559
[314] train-rmse:0.35934 val-rmse:0.38532
[315] train-rmse:0.35904 val-rmse:0.38506
[316] train-rmse:0.35873 val-rmse:0.38486
[317] train-rmse:0.35843 val-rmse:0.38464
[318] train-rmse:0.35815 val-rmse:0.38441
[319] train-rmse:0.35783 val-rmse:0.38412
[320] train-rmse:0.35752 val-rmse:0.38387
[321] train-rmse:0.35724 val-rmse:0.38367
[322] train-rmse:0.35692 val-rmse:0.38344
[323] train-rmse:0.35660 val-rmse:0.38321
[324] train-rmse:0.35631 val-rmse:0.38299
[325] train-rmse:0.35601 val-rmse:0.38274
[326] train-rmse:0.35572 val-rmse:0.38253
[327] train-rmse:0.35544 val-rmse:0.38231
[328] train-rmse:0.35513 val-rmse:0.38210
[329] train-rmse:0.35484 val-rmse:0.38188
[330] train-rmse:0.35453 val-rmse:0.38167
[331] train-rmse:0.35425 val-rmse:0.38142
[332] train-rmse:0.35395 val-rmse:0.38117
[333] train-rmse:0.35368 val-rmse:0.38093
[334] train-rmse:0.35340 val-rmse:0.38074
[335] train-rmse:0.35310 val-rmse:0.38052
[336] train-rmse:0.35279 val-rmse:0.38028
[337] train-rmse:0.35248 val-rmse:0.38004
[338] train-rmse:0.35219 val-rmse:0.37980
[339] train-rmse:0.35187 val-rmse:0.37957
[340] train-rmse:0.35160 val-rmse:0.37932
[341] train-rmse:0.35132 val-rmse:0.37913
[342] train-rmse:0.35105 val-rmse:0.37895
[343] train-rmse:0.35076 val-rmse:0.37871
[344] train-rmse:0.35049 val-rmse:0.37853
[345] train-rmse:0.35021 val-rmse:0.37834
[346] train-rmse:0.34992 val-rmse:0.37807
[347] train-rmse:0.34965 val-rmse:0.37787
[348] train-rmse:0.34937 val-rmse:0.37765
[349] train-rmse:0.34909 val-rmse:0.37745
[350] train-rmse:0.34880 val-rmse:0.37721
[351] train-rmse:0.34851 val-rmse:0.37695
[352] train-rmse:0.34823 val-rmse:0.37676
[353] train-rmse:0.34796 val-rmse:0.37652
[354] train-rmse:0.34768 val-rmse:0.37632
[355] train-rmse:0.34743 val-rmse:0.37610
[356] train-rmse:0.34714 val-rmse:0.37586
[357] train-rmse:0.34687 val-rmse:0.37562
[358] train-rmse:0.34659 val-rmse:0.37539
[359] train-rmse:0.34633 val-rmse:0.37519
[360] train-rmse:0.34606 val-rmse:0.37500
[361] train-rmse:0.34578 val-rmse:0.37482
[362] train-rmse:0.34550 val-rmse:0.37459
[363] train-rmse:0.34523 val-rmse:0.37442
[364] train-rmse:0.34494 val-rmse:0.37424
[365] train-rmse:0.34465 val-rmse:0.37402
[366] train-rmse:0.34439 val-rmse:0.37382
[367] train-rmse:0.34414 val-rmse:0.37363
[368] train-rmse:0.34385 val-rmse:0.37341
[369] train-rmse:0.34357 val-rmse:0.37321
[370] train-rmse:0.34331 val-rmse:0.37301
[371] train-rmse:0.34305 val-rmse:0.37277
[372] train-rmse:0.34279 val-rmse:0.37259
[373] train-rmse:0.34251 val-rmse:0.37238
[374] train-rmse:0.34223 val-rmse:0.37218
[375] train-rmse:0.34195 val-rmse:0.37199
[376] train-rmse:0.34168 val-rmse:0.37177
[377] train-rmse:0.34140 val-rmse:0.37157
[378] train-rmse:0.34114 val-rmse:0.37139
[379] train-rmse:0.34085 val-rmse:0.37121
[380] train-rmse:0.34057 val-rmse:0.37101
[381] train-rmse:0.34032 val-rmse:0.37083
[382] train-rmse:0.34003 val-rmse:0.37060
[383] train-rmse:0.33978 val-rmse:0.37042
[384] train-rmse:0.33951 val-rmse:0.37020
[385] train-rmse:0.33923 val-rmse:0.36997
[386] train-rmse:0.33897 val-rmse:0.36981
[387] train-rmse:0.33871 val-rmse:0.36964
[388] train-rmse:0.33844 val-rmse:0.36940
[389] train-rmse:0.33818 val-rmse:0.36918
[390] train-rmse:0.33791 val-rmse:0.36899
[391] train-rmse:0.33766 val-rmse:0.36882
[392] train-rmse:0.33743 val-rmse:0.36862
[393] train-rmse:0.33714 val-rmse:0.36842
[394] train-rmse:0.33688 val-rmse:0.36826
[395] train-rmse:0.33661 val-rmse:0.36807
[396] train-rmse:0.33636 val-rmse:0.36791
[397] train-rmse:0.33612 val-rmse:0.36774
[398] train-rmse:0.33584 val-rmse:0.36754
[399] train-rmse:0.33555 val-rmse:0.36735
[400] train-rmse:0.33529 val-rmse:0.36714
[401] train-rmse:0.33505 val-rmse:0.36696
[402] train-rmse:0.33479 val-rmse:0.36677
[403] train-rmse:0.33453 val-rmse:0.36655
[404] train-rmse:0.33426 val-rmse:0.36635
[405] train-rmse:0.33402 val-rmse:0.36620
[406] train-rmse:0.33376 val-rmse:0.36604
[407] train-rmse:0.33352 val-rmse:0.36585
[408] train-rmse:0.33325 val-rmse:0.36567
[409] train-rmse:0.33299 val-rmse:0.36548
[410] train-rmse:0.33274 val-rmse:0.36530
[411] train-rmse:0.33250 val-rmse:0.36509
[412] train-rmse:0.33225 val-rmse:0.36491
[413] train-rmse:0.33200 val-rmse:0.36472
[414] train-rmse:0.33175 val-rmse:0.36452
[415] train-rmse:0.33150 val-rmse:0.36434
[416] train-rmse:0.33125 val-rmse:0.36415
[417] train-rmse:0.33099 val-rmse:0.36398
[418] train-rmse:0.33076 val-rmse:0.36379
[419] train-rmse:0.33051 val-rmse:0.36359
[420] train-rmse:0.33025 val-rmse:0.36341
[421] train-rmse:0.33001 val-rmse:0.36326
[422] train-rmse:0.32975 val-rmse:0.36310
[423] train-rmse:0.32949 val-rmse:0.36291
[424] train-rmse:0.32923 val-rmse:0.36273
[425] train-rmse:0.32898 val-rmse:0.36251
[426] train-rmse:0.32877 val-rmse:0.36235
[427] train-rmse:0.32852 val-rmse:0.36219
[428] train-rmse:0.32828 val-rmse:0.36204
[429] train-rmse:0.32803 val-rmse:0.36188
[430] train-rmse:0.32781 val-rmse:0.36172
[431] train-rmse:0.32755 val-rmse:0.36153
[432] train-rmse:0.32733 val-rmse:0.36134
[433] train-rmse:0.32709 val-rmse:0.36116
[434] train-rmse:0.32685 val-rmse:0.36100
[435] train-rmse:0.32662 val-rmse:0.36084
[436] train-rmse:0.32638 val-rmse:0.36063
[437] train-rmse:0.32615 val-rmse:0.36044
[438] train-rmse:0.32592 val-rmse:0.36027
[439] train-rmse:0.32570 val-rmse:0.36012
[440] train-rmse:0.32547 val-rmse:0.35994
[441] train-rmse:0.32526 val-rmse:0.35977
[442] train-rmse:0.32503 val-rmse:0.35964
[443] train-rmse:0.32480 val-rmse:0.35949
[444] train-rmse:0.32456 val-rmse:0.35930
[445] train-rmse:0.32433 val-rmse:0.35912
[446] train-rmse:0.32409 val-rmse:0.35896
[447] train-rmse:0.32387 val-rmse:0.35878
[448] train-rmse:0.32364 val-rmse:0.35859
[449] train-rmse:0.32341 val-rmse:0.35844
[450] train-rmse:0.32317 val-rmse:0.35823
[451] train-rmse:0.32291 val-rmse:0.35808
[452] train-rmse:0.32269 val-rmse:0.35790
[453] train-rmse:0.32246 val-rmse:0.35773
[454] train-rmse:0.32222 val-rmse:0.35757
[455] train-rmse:0.32201 val-rmse:0.35742
[456] train-rmse:0.32179 val-rmse:0.35724
[457] train-rmse:0.32158 val-rmse:0.35710
[458] train-rmse:0.32135 val-rmse:0.35692
[459] train-rmse:0.32112 val-rmse:0.35679
[460] train-rmse:0.32089 val-rmse:0.35661
[461] train-rmse:0.32065 val-rmse:0.35644
[462] train-rmse:0.32042 val-rmse:0.35628
[463] train-rmse:0.32019 val-rmse:0.35611
[464] train-rmse:0.31998 val-rmse:0.35596
[465] train-rmse:0.31976 val-rmse:0.35580
[466] train-rmse:0.31954 val-rmse:0.35562
[467] train-rmse:0.31933 val-rmse:0.35546
[468] train-rmse:0.31912 val-rmse:0.35531
[469] train-rmse:0.31889 val-rmse:0.35515
[470] train-rmse:0.31865 val-rmse:0.35499
[471] train-rmse:0.31844 val-rmse:0.35481
[472] train-rmse:0.31819 val-rmse:0.35465
[473] train-rmse:0.31796 val-rmse:0.35453
[474] train-rmse:0.31774 val-rmse:0.35441
[475] train-rmse:0.31751 val-rmse:0.35424
[476] train-rmse:0.31730 val-rmse:0.35410
[477] train-rmse:0.31707 val-rmse:0.35392
[478] train-rmse:0.31685 val-rmse:0.35374
[479] train-rmse:0.31663 val-rmse:0.35358
[480] train-rmse:0.31643 val-rmse:0.35342
[481] train-rmse:0.31622 val-rmse:0.35326
[482] train-rmse:0.31602 val-rmse:0.35311
[483] train-rmse:0.31580 val-rmse:0.35297
[484] train-rmse:0.31560 val-rmse:0.35284
[485] train-rmse:0.31539 val-rmse:0.35268
[486] train-rmse:0.31517 val-rmse:0.35253
[487] train-rmse:0.31496 val-rmse:0.35242
[488] train-rmse:0.31473 val-rmse:0.35228
[489] train-rmse:0.31452 val-rmse:0.35212
[490] train-rmse:0.31432 val-rmse:0.35198
[491] train-rmse:0.31411 val-rmse:0.35183
[492] train-rmse:0.31391 val-rmse:0.35167
[493] train-rmse:0.31369 val-rmse:0.35154
[494] train-rmse:0.31348 val-rmse:0.35135
[495] train-rmse:0.31329 val-rmse:0.35119
[496] train-rmse:0.31309 val-rmse:0.35105
[497] train-rmse:0.31291 val-rmse:0.35090
[498] train-rmse:0.31271 val-rmse:0.35077
[499] train-rmse:0.31250 val-rmse:0.35059
best best_ntree_limit 500
error=0.098750
xgboost success!
cost time: 8.833775758743286 (s)......
3.2 使用scikit-learn接口
会改变的函数名是:
eta -> learning_rate
lambda -> reg_lambda
alpha -> reg_alpha
from sklearn.model_selection import train_test_split
from sklearn import metrics
from xgboost import XGBClassifier
clf = XGBClassifier(
# silent=0, #设置成1则没有运行信息输出,最好是设置为0.是否在运行升级时打印消息。
#nthread=4,# cpu 线程数 默认最大
learning_rate=0.3, # 如同学习率
min_child_weight=1,
# 这个参数默认是 1,是每个叶子里面 h 的和至少是多少,对正负样本不均衡时的 0-1 分类而言
#,假设 h 在 0.01 附近,min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。
#这个参数非常影响结果,控制叶子节点中二阶导的和的最小值,该参数值越小,越容易 overfitting。
max_depth=6, # 构建树的深度,越大越容易过拟合
gamma=0, # 树的叶子节点上作进一步分区所需的最小损失减少,越大越保守,一般0.1、0.2这样子。
subsample=1, # 随机采样训练样本 训练实例的子采样比
max_delta_step=0, #最大增量步长,我们允许每个树的权重估计。
colsample_bytree=1, # 生成树时进行的列采样
reg_lambda=1, # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。
#reg_alpha=0, # L1 正则项参数
#scale_pos_weight=1, #如果取值大于0的话,在类别样本不平衡的情况下有助于快速收敛。平衡正负权重
#objective= 'multi:softmax', #多分类的问题 指定学习任务和相应的学习目标
#num_class=10, # 类别数,多分类与 multisoftmax 并用
n_estimators=100, #树的个数
seed=1000 #随机种子
#eval_metric= 'auc'
)
clf.fit(X_train, y_train)
y_true, y_pred = y_test, clf.predict(X_test)
print("Accuracy : %.4g" % metrics.accuracy_score(y_true, y_pred))
Accuracy : 0.9263
4. LIghtGBM的使用
4.1 原生接口
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
# 加载你的数据
# print('Load data...')
# df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')
# df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t')
#
# y_train = df_train[0].values
# y_test = df_test[0].values
# X_train = df_train.drop(0, axis=1).values
# X_test = df_test.drop(0, axis=1).values
# 创建成lgb特征的数据集格式
lgb_train = lgb.Dataset(X_train, y_train) # 将数据保存到LightGBM二进制文件将使加载更快
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) # 创建验证数据
# 将参数写成字典下形式
params = {
'task': 'train',
'boosting_type': 'gbdt', # 设置提升类型
'objective': 'regression', # 目标函数
'metric': {
'l2', 'auc'}, # 评估函数
'num_leaves': 31, # 叶子节点数
'learning_rate': 0.05, # 学习速率
'feature_fraction': 0.9, # 建树的特征选择比例
'bagging_fraction': 0.8, # 建树的样本采样比例
'bagging_freq': 5, # k 意味着每 k 次迭代执行bagging
'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}
print('Start training...')
# 训练 cv and train
gbm = lgb.train(params,lgb_train,num_boost_round=500,valid_sets=lgb_eval) # 训练数据需要参数列表和数据集
print('Save model...')
gbm.save_model('model.txt') # 训练后保存模型到文件
print('Start predicting...')
# 预测数据集
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) #如果在训练期间启用了早期停止,可以通过best_iteration方式从最佳迭代中获得预测
# 评估模型
print('error=%f' %(sum(1 for i in range(len(y_pred)) if int(y_pred[i] > 0.5) != y_test[i]) / float(len(y_pred))))
Start training...
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000427 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 10
[LightGBM] [Info] Start training from score 0.496354
Save model...
Start predicting...
error=0.096250
4.2 scikit-learn接口
from sklearn import metrics
from lightgbm import LGBMClassifier
clf = LGBMClassifier(
boosting_type='gbdt', # 提升树的类型 gbdt,dart,goss,rf
num_leaves=31, #树的最大叶子数,对比xgboost一般为2^(max_depth)
max_depth=-1, #最大树的深度
learning_rate=0.1, #学习率
n_estimators=100, # 拟合的树的棵树,相当于训练轮数
subsample_for_bin=200000,
objective=None,
class_weight=None,
min_split_gain=0.0, # 最小分割增益
min_child_weight=0.001, # 分支结点的最小权重
min_child_samples=20,
subsample=1.0, # 训练样本采样率 行
subsample_freq=0, # 子样本频率
colsample_bytree=1.0, # 训练特征采样率 列
reg_alpha=0.0, # L1正则化系数
reg_lambda=0.0, # L2正则化系数
random_state=None,
n_jobs=-1,
silent=True,
)
clf.fit(X_train, y_train, eval_metric='auc')
#设置验证集合 verbose=False不打印过程
clf.fit(X_train, y_train)
y_true, y_pred = y_test, clf.predict(X_test)
print("Accuracy : %.4g" % metrics.accuracy_score(y_true, y_pred))
[LightGBM] [Warning] Unknown parameter: silent
[LightGBM] [Warning] Unknown parameter: silent
[LightGBM] [Info] Number of positive: 4765, number of negative: 4835
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Warning] Unknown parameter: silent
[LightGBM] [Warning] Unknown parameter: silent
[LightGBM] [Info] Number of positive: 4765, number of negative: 4835
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Warning] Unknown parameter: silent
Accuracy : 0.9233
实验1 在鸢尾花数据集上,分别比较决策树、随机森林、adaboost, gbdt、xgboost和lightgbm的性能差异并进行分析
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import time
iris=load_iris()
X=iris["data"]
y=iris["target"]
X_train,X_test,y_train,y_test=train_test_split(data,target,test_size=0.2,random_state=0)
clf1 = DecisionTreeClassifier(criterion="entropy") #决策树
clf2 = RandomForestClassifier() #随机森林
clf3 = AdaBoostClassifier() #adaboost
clf4 = GradientBoostingClassifier() #gbdt
clf5 = XGBClassifier() #xgboost
clf6 = LGBMClassifier() #lightgbm
for clf, label in zip([clf1, clf2, clf3, clf4, clf5, clf6], ['DecisionTree', 'Random Forest', 'AdaBoost', 'GBDT', 'XGBoost','LightGBM']):
start = time.time()
scores = cross_val_score(clf, X_train, y_train, scoring='accuracy', cv=5)
end = time.time()
running_time = end - start
print(label+"的Accuracy: %0.8f (+/- %0.2f),耗时%0.2f秒。模型名称[%s]" % (scores.mean(), scores.std(), running_time, label))
DecisionTree的Accuracy: 0.80208333 (+/- 0.01),耗时0.80秒。模型名称[DecisionTree]
Random Forest的Accuracy: 0.88375000 (+/- 0.01),耗时10.57秒。模型名称[Random Forest]
AdaBoost的Accuracy: 0.87906250 (+/- 0.00),耗时2.26秒。模型名称[AdaBoost]
GBDT的Accuracy: 0.91541667 (+/- 0.01),耗时8.60秒。模型名称[GBDT]
XGBoost的Accuracy: 0.92989583 (+/- 0.01),耗时3.38秒。模型名称[XGBoost]
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000291 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000310 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000324 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000318 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000397 seconds.
You can set force_col_wise=true
to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
LightGBM的Accuracy: 0.93510417 (+/- 0.00),耗时0.61秒。模型名称[LightGBM]