xgboost 入门

1.安装xgboost  pip3 install xgboost

2.xgboost demo

agaricus.txt.train  agaricus.txt.test 蘑菇数据集---二分类问题

数据详细的解释地址     https://archive.ics.uci.edu/ml/datasets/Mushroom

每一行数据有22个 属性

# coding:utf-8
import xgboost as xgb

# 计算分类正确率
from sklearn.metrics import accuracy_score

# read in data,数据在xgboost安装的路径下的demo目录,现在我们将其copy到当前代码下的data目录
my_workpath = '../data/'
dtrain = xgb.DMatrix(my_workpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(my_workpath + 'agaricus.txt.test')


# specify parameters via map
param = {'max_depth': 2, 'eta': 1, 'silent': 0, 'objective': 'binary:logistic'}
print(param)

# 设置boosting迭代计算次数
num_round = 2

import time

starttime = time.clock()

bst = xgb.train(param, dtrain, num_round)  # dtrain是训练数据集

endtime = time.clock()
print(endtime - starttime)

train_preds = bst.predict(dtrain)  #
print("train_preds", train_preds)

train_predictions = [round(value) for value in train_preds]
print("train_predictions", train_predictions)

y_train = dtrain.get_label()
print("y_train", y_train)

train_accuracy = accuracy_score(y_train, train_predictions)
print("Train Accuary: %.2f%%" % (train_accuracy * 100.0))

# make prediction
preds = bst.predict(dtest)
predictions = [round(value) for value in preds]

y_test = dtest.get_label()

test_accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))



猜你喜欢

转载自blog.csdn.net/u011243684/article/details/84563878