1.安装xgboost pip3 install xgboost
2.xgboost demo
agaricus.txt.train agaricus.txt.test 蘑菇数据集---二分类问题
数据详细的解释地址 https://archive.ics.uci.edu/ml/datasets/Mushroom
每一行数据有22个 属性
# coding:utf-8
import xgboost as xgb
# 计算分类正确率
from sklearn.metrics import accuracy_score
# read in data,数据在xgboost安装的路径下的demo目录,现在我们将其copy到当前代码下的data目录
my_workpath = '../data/'
dtrain = xgb.DMatrix(my_workpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(my_workpath + 'agaricus.txt.test')
# specify parameters via map
param = {'max_depth': 2, 'eta': 1, 'silent': 0, 'objective': 'binary:logistic'}
print(param)
# 设置boosting迭代计算次数
num_round = 2
import time
starttime = time.clock()
bst = xgb.train(param, dtrain, num_round) # dtrain是训练数据集
endtime = time.clock()
print(endtime - starttime)
train_preds = bst.predict(dtrain) #
print("train_preds", train_preds)
train_predictions = [round(value) for value in train_preds]
print("train_predictions", train_predictions)
y_train = dtrain.get_label()
print("y_train", y_train)
train_accuracy = accuracy_score(y_train, train_predictions)
print("Train Accuary: %.2f%%" % (train_accuracy * 100.0))
# make prediction
preds = bst.predict(dtest)
predictions = [round(value) for value in preds]
y_test = dtest.get_label()
test_accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))