前言
本节是对前面所学知识的运用
主要是用kNN和PCA
训练MNIST数据集
较为简单
实现
import numpy as np
from sklearn.datasets import fetch_openml
"""尝试对MNIST数据集进行操作"""
# 数据
mnist = fetch_openml('mnist_784')
X, y = mnist['data'], mnist['target']
X_train = np.array(X[:60000], dtype=float)
y_train = np.array(y[:60000], dtype=float)
X_test = np.array(X[60000:], dtype=float)
y_test = np.array(y[60000:], dtype=float)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
# kNN
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train, y_train)
print(knn_clf.score(X_test, y_test)) #时间很长
# PCA
from sklearn.decomposition import PCA
pca = PCA(0.90) #保留90%信息
pca.fit(X_train)
X_train_reduction = pca.transform(X_train)
X_test_reduction = pca.transform(X_test)
print(X_train_reduction.shape)
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train_reduction, y_train)
print(knn_clf.score(X_test_reduction, y_test))
# 时间减少,精度增加,降噪的效果
结语
最终精度
kNN在0.97左右
PCA后在0.98左右
应该是降噪带来的效果
简单运用了下前面所学内容