sklearn生成决策树及其评估

以下是以莺尾花数据进行建模生成的决策树。代码如下:

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn import tree
from sklearn.externals.six import StringIO
import pydot
import matplotlib.pyplot as plt

data = datasets.load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

k_con = []
score_con = []
for k in range(1, 6):
    clf = tree.DecisionTreeClassifier(max_depth=k)
    clf.fit(X_train, y_train)
    print('--------------------')
    print(y_test)
    print('--------------------')
    print(clf.predict(X_test))
    score = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
    print(score.mean())
    k_con.append(k)
    score_con.append(score.mean())
    dot_data = StringIO()
    tree.export_graphviz(clf, out_file=dot_data, max_depth=None, label='all',
                        filled=True, leaves_parallel=True, impurity=True,
                        node_ids=True, proportion=False, rotate=False,
                        rounded=True, special_characters=False, precision=3)
    p = pydot.graph_from_dot_data(dot_data.getvalue())
    strr = 'C:/Users/pc/Desktop/matplotlib/xx'+str(k)+'.pdf'
    p[0].write_pdf(strr)

plt.plot(k_con, score_con)
plt.xlabel('K')
plt.ylabel('accuracy')
stt = "C:/Users/pc/Desktop/matplotlib/xk值分析图.png"
plt.savefig(stt)

输出如下:

D:\anaconda\python.exe C:/Users/pc/untitled/decesiontree.py
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 2 0]
--------------------
[1 0 1 1 0 1 1 1 1 1 1 1 0 0 0 0 1 1 0 0 1 0 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0
 0 1 1 1 0 1 1 0]
0.6666666666666666
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 2 0]
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 1 0]
0.9333333333333332
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 2 0]
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 1 0]
0.9733333333333334
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 2 0]
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 1 0]
0.9666666666666668
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 2 0]
--------------------
[1 0 2 2 0 2 1 2 2 2 2 1 0 0 0 0 2 1 0 0 2 0 2 0 1 0 2 2 0 2 2 0 0 1 0 1 0
 0 2 1 2 0 2 1 0]
0.9533333333333334

Process finished with exit code 0

生成的决策树及K—accuracy曲线如下:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
K为决策树的最大深度,可以看到,当K大于三时,决策精度出现了下降。

猜你喜欢

转载自blog.csdn.net/qq_38226778/article/details/85059429