sklearn call PCA algorithm

sklearn call PCA algorithm

PCA algorithm is a method of dimensionality reduction, which can be reduced for the dimension data, and calculated to achieve efficiency training data , without loss of crucial information data, which call sklearn PCA algorithm and the specific operation codes are as follows :

#sklearn PCA function call and associated training calculation (custom data)
Import numpy AS NP
Import matplotlib.pyplot AS PLT
X = np.empty ((100,2))
X [:, 0] = np.random. Uniform (0.0,100.0, size = 100)
X [:,. 1] = 0.75 * X [:, 0] + 3.0 * np.random.normal (0,3, size = 100)
plt.figure ()
plt.scatter (X [:, 0], X [:,. 1])
plt.show ()
from sklearn.decomposition call # Import PCA PCA sklearn machine learning algorithm in the
pca = PCA (n_components = 1)define desired Principal component analysis the number n
pca.fit (X) on the basis of the data set # correlation calculation, obtaining the corresponding main component
print (pca.components_) # n unit outputs a corresponding vector component of the direction of the main
x_reduction = pca.transform ( dimensionality reduction x) # data of
x_restore = pca.inverse_transform (x_reduction) # of dimension reduction data related to restoration work
plt.figure ()
plt.scatter (X [:, 0], X [:,. 1], Color = "G")
plt.scatter (x_restore [:, 0], x_restore [:,. 1], Color = "R & lt")
PLT. Show ()

#sklearn utilizing handwriting data set the actual PCA algorithm
# 1-1 importing the library functions
from sklearn import datasets
D = datasets.load_digits ()
X = d.data
Y = d.target
from sklearn. Import train_test_split model_selection
x_train, x_test, y_train, android.permission.FACTOR. train_test_split = (X, Y, random_state = 666)
Print (x_train.shape)
from sklearn.neighbors Import KNeighborsClassifier
KNN = KNeighborsClassifier ()
knn.fit (x_train, y_train)
Print (KNN. Score (x_test, android.permission.FACTOR.))

# 1-2 to the raw data to reduce the dimensionality of the 64-dimensional, two-dimensional data down
PCA1 = the PCA (n_components = 2)
pca1.fit (x_train)
x_train_re = pca1.transform (x_train) # For the training data and test data dimensionality reduction to two-dimensional data
x_test_re = pca1.transform (x_test)
knn1 = KNeighborsClassifier ()
knn1.fit (x_train_re, y_train) # again to the dimension reduction two-dimensional data KNN algorithm training and testing accuracy
Print (knn1.score (x_test_re, android.permission.FACTOR.))
Print (pca1.explained_variance_ratio_)

# 1-3-1 dimensionality reduction to the 64-dimensional data for training and testing data, dimensions unchanged
pca2 = PCA (n_components = 64) # for the training data and test data dimensionality reduction to the 64-dimensional data, dimensions unchanged
pca2.fit (x_train)
x_train_re = pca2.transform (x_train)
x_test_re = pca2.transform (x_test)
= KNeighborsClassifier knn1 ()
knn1.fit (x_train_re, y_train)
Print (knn1.score (x_test_re, android.permission.FACTOR.))
Print (pca2.explained_variance_ratio_) # outputs of the respective proportion of the variance of the principal component reflects the overall data
# 1-3-2 output wherein n principal components before the right can be reflected in the weight data
plt.figure ()
plt.plot ([I for I in Range (x.shape [. 1])], [np.sum (pca2.explained_variance_ratio _ [: I +. 1]) for I in Range (x.shape [. 1])])
PLT .Show ()
# 1-4PCA (a) is a digital brackets 0-1, showing output meets to reflect the gravity of the raw data at the lowest dimension of a PCA when, after training and classification will improve the computational efficiency 5-10 times, but almost the same basic classification accuracy, the accuracy can be calculated exchange efficiency
PCA3 the PCA = ( 0.95 )
pca3.fit (x_train)
data dimensions print (pca3.n_components_) # falls at this time the output of
x_train_re1 pca3.transform = (x_train)
x_test_re1 = pca3.transform (x_test)
knn2 = KNeighborsClassifier ()
knn2.fit (x_train_re1, y_train)
Print (knn2.score (x_test_re1, android.permission.FACTOR.))
# 1-5 to reduce the dimension data for a dimension 64 the two-dimensional data, followed by visualization of data, queries may be performed for different visual distinction and classification results
PCA1 = the PCA (n_components = 2)
pca1.fit (X)
x_re = pca1.transform (X)
plt.figure ()
for i in range (10):
plt.scatter (x_re [Y == I, 0], x_re [Y == I,. 1])
plt.show ()

output is as follows:

 

 

Guess you like

Origin www.cnblogs.com/Yanjy-OnlyOne/p/11325330.html