Machine learning combat-KNN algorithm-20

Machine learning combat-KNN algorithm-iris classification

# 导入算法包以及数据集
from sklearn import neighbors
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import random
# 载入数据
iris = datasets.load_iris()
print(iris)

Insert picture description here

# 打乱数据切分数据集
# x_train,x_test,y_train,y_test = train_test_split(iris.data, iris.target, test_size=0.2) #分割数据0.2为测试数据,0.8为训练数据

#打乱数据
data_size = iris.data.shape[0]
index = [i for i in range(data_size)] 
random.shuffle(index)  
iris.data = iris.data[index]
iris.target = iris.target[index]

#切分数据集
test_size = 40
x_train = iris.data[test_size:]
x_test =  iris.data[:test_size]
y_train = iris.target[test_size:]
y_test = iris.target[:test_size]

# 构建模型
model = neighbors.KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)
prediction = model.predict(x_test)

print(classification_report(y_test, prediction))

Insert picture description here

Machine Learning Actual Combat-KNN Algorithm-Fruit Classification

Insert picture description here

from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

Insert picture description here

data = pd.read_csv('fruit_data.csv')
data

Insert picture description here

labelencoder = LabelEncoder()
data.iloc[:,0] = labelencoder.fit_transform(data.iloc[:,0])
data

Insert picture description here

labelencoder.classes_

Insert picture description here

from sklearn.model_selection import train_test_split
# 切分数据集,stratify=y表示切分后训练集和测试集中的数据类型的比例跟切分前y中的比例一致
# 比如切分前y中0和1的比例为1:2,切分后y_train和y_test中0和1的比例也都是1:2
# 设置random_state,使用同样的随机方式来切分数据
x_train,x_test,y_train,y_test = train_test_split(data.iloc[:,1:], data.iloc[:,0], test_size=0.3, stratify=data.iloc[:,0], random_state=20)
# 保存不同k值测试集准确率
test_scores = []
# 保存不同k值训练集准确率
train_scores = []

# 设置30个k值
k = 30
for i in range(1,k):
    knn = KNeighborsClassifier(i)
    knn.fit(x_train,y_train)
    # 保存测试集准确率
    test_scores.append(knn.score(x_test,y_test))
    # 保存训练集准确率
    train_scores.append(knn.score(x_train,y_train))
plt.title('k-NN Varying number of neighbors')
plt.plot(range(1,k),test_scores,label="Test")
plt.plot(range(1,k),train_scores,label="Train")
plt.legend()
plt.xticks(range(1,k))
plt.xlabel('k')
plt.ylabel('accuracy')
plt.show()

Insert picture description here

# 选择一个最好的k值作为模型参数
k = np.argmax(test_scores)+1
knn = KNeighborsClassifier(k)
knn.fit(x_train,y_train)
print(k)
print(knn.score(x_test,y_test))

Insert picture description here

Guess you like

Origin blog.csdn.net/qq_37978800/article/details/113865291