第7章 PCA与梯度上升法

 主成分分析法:主要作用是降维

 疑似右侧比较好?

第三种降维方式:

 问题:?????

 方差:描述样本整体分布的疏密的指标,方差越大,样本之间越稀疏;越小,越密集

第一步:

 总结:

 问题:????怎样使其最大

 变换后:

 最后的问题:????

 注意区别于线性回归

 使用梯度上升法解决PCA问题:

  

  

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

digits = datasets.load_digits()  # 手写识别数据
X = digits.data
y = digits.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
# 使用K近邻
from sklearn.neighbors import KNeighborsClassifier
knn_clf=KNeighborsClassifier()
knn_clf.fit(X_train,y_train)
a1=knn_clf.score(X_test,y_test)
# print(a1)
# 使用PCA
from sklearn.decomposition import PCA
pca=PCA(n_components=2)
pca.fit(X_train)
X_train_reduction=pca.transform(X_train)
X_test_reduction=pca.transform(X_test)
knn_clf=KNeighborsClassifier()
knn_clf.fit(X_train_reduction,y_train)
a2=knn_clf.score(X_test_reduction,y_test)
# print(a2)

# print(pca.explained_variance_ratio_)
pca=PCA(n_components=X_train.shape[1])
pca.fit(X_train)
# print(pca.explained_variance_ratio_)

plt.plot([i for i in range(X_train.shape[1])],
        [np.sum(pca.explained_variance_ratio_[:i+1]) for i in range(X_train.shape[1])])
# plt.show()

pca1=PCA(0.95) # 能解释95%以上的方差
pca1.fit(X_train)
print(pca.n_components_)

from sklearn.decomposition import PCA
pca=PCA(0.95)
pca.fit(X_train)
X_train_reduction=pca.transform(X_train)
X_test_reduction=pca.transform(X_test)
knn_clf=KNeighborsClassifier()
knn_clf.fit(X_train_reduction,y_train)
a3=knn_clf.score(X_test_reduction,y_test)
print(a3)

pca=PCA(n_components=2)
pca.fit(X)
X_reduction=pca.transform(X)
for i in range(10):
    plt.scatter(X_reduction[y==i,0],X_reduction[y==i,1],alpha=0.8)
plt.show()
scikit-learn中的PCA

猜你喜欢

转载自blog.csdn.net/weixin_42224488/article/details/109667335