sklearn的快速使用之六（决策树分类）

print(__doc__)

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

# Parameters
n_classes = 3
plot_colors = "ryb"
plot_step = 0.02

# Load data
iris = load_iris()

print (iris.data)
print (iris.data[:, [0, 1]])
#print (iris.data[:, [1, 2]])
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
[1, 2], [1, 3], [2, 3]]):
# [0,1,2,3,4]从四列数据中选取2个要素进行训练
X = iris.data[:, pair]

y = iris.target

# Train
clf = DecisionTreeClassifier().fit(X, y)

# 2行3列排列图片
plt.subplot(2, 3, pairidx + 1)
#第一列
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
print (x_min,x_max)
#第二列
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
print (y_min,y_max)
# 绘制网格 xx 分割数 × yy 分割数 = x×y 维数据
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
np.arange(y_min, y_max, plot_step))
print (xx)
print (yy)
#plt.tight_layout()进行自动控制,此方法不能够很好的控制图像间的间隔
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
print ("---------------")
print (xx.ravel())
print ("---------------")
print (yy.ravel())
'''
把第一列花萼长度数据按h取等分,作为行，然后复制多行,得到xx网格矩阵
把第二列花萼宽度数据按h取等分，作为列，然后复制多列，得到yy网格矩阵
xx和yy矩阵都变成两个一维数组，然后到np.c_[] 函数组合成一个二维数组
'''
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
print (Z)

Z = Z.reshape(xx.shape)
print (Z)
#绘制等高线
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu)
#横坐标纵坐标
plt.xlabel(iris.feature_names[pair[0]])
plt.ylabel(iris.feature_names[pair[1]])

# Plot the training points
for i, color in zip(range(n_classes), plot_colors):
idx = np.where(y == i)
plt.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i],
cmap=plt.cm.RdYlBu, edgecolor='black', s=15)
plt.suptitle("Decision surface of a decision tree using paired features")
plt.legend(loc='lower right', borderpad=0, handletextpad=0)
plt.axis("tight")
plt.show()

sklearn的快速使用之六（决策树分类）

猜你喜欢