Mechanical learning--4

1. Apply K-means algorithm for image compression 
from sklearn.datasets import load_sample_image
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image as imgs
import sys #read
picture
ch = load_sample_image ("china .jpg ")
plt.imshow (ch)
plt.show ( ) #observe the
picture
print ('size:', ch.size)
print ('pixel distribution before reducing resolution:', ch.shape)
print ('memory : ', Sys.getsizeof (ch)) #Reduced
image resolution
img = ch [:: 3, :: 3]
plt.imshow (img)
print (' Pixel distribution after reduced resolution: ', img.shape)
plt.show ()
# two-dimensional linear
x = img.reshape (-1,3) #model
training
model = KMeans (n_clusters = 64) .fit (x) #predicted
value pre, cluster point center
pre = model.predict (x)
print ('Pixel distribution:', pre)
center = model.cluster_centers_
print ("color point:", center) #compressed
picture
new_img = center [pre] .reshape (img.shape)
plt.imshow (new_img.astype (np.uint8))
plt.show ()
# Before and after the modification on the opposite side
print ('File size before compression:', ch.size)
print ('File size before compression:', new_img.size)
print ('Memory before compression Size: ', sys.getsizeof (ch))
print (' Compressed memory size: ', sys.getsizeof (new_img))

Before compression:

 

After compression:

 

 

 

 

 

 

2. Observe the problems that can be solved with K-means in learning and life. ,

  According to the NBA players' scoring rate, the average scoring ability is analyzed. For NBA, excellent scoring players can keep the whole team in a rhythm. Having a fast scoring rhythm means that the chance of winning increases.

#Study the NBA players' average goals and points to score the shooting rate to predict the average score 
import pandas as pd
from numpy import *
import numpy as np #Import
nba player data
data = pd.read_excel ("nba.xlsx")
name = data [ 'Player'].
Values ​​data0 = data ['shooting']. Values
data1 = data ['free throw']. Values
data2 = data ['three points'].
Values ​​data3 = data ['score'].
Values ​​datax = np .zeros ([200,4])
datax [:, 0] = data0
datax [:, 1] = data1
datax [:, 2] = data2
datax [:, 3] = data3
print (datax.shape) #assumed

poly The cluster is 4, and the value of k can be changed
k = 4 #Create
a multi-dimensional array to collect the score level
dist = np.zeros ([data.shape [0], datax.shape [1] +1]) #initialize the
particle center
def initcent (x, k):
center = x [: k
,: ] return center

#Select the subscript closest to the mean through the Euclidean distance and put it into the dist multidimensional array
def nearest (x, center):
a = []
for j in range (k):
a.append (np.sqrt (sum (( x-center [j ,: ]) ** 2)))
# print (a)
return a

def xclassify (x, dist, center):
for i in range (x.shape [0]):
dist [i ,: k] = nearest (x [i
,: ], center) dist [i, k] = np.argmin (dist [i,: k])
return dist #Iterative
clustering point
def kcmean (x, dist, center, k ):
centerNew = np.zeros ([k, center.shape [1]])
for c in range (k):
p = []
q = np.where (dist [:, k] == c)
for i in range (datax.shape [1]):
n = mean (x [q] [:, i] .reshape (-1,1))
p.append (n)
# print (p)
# print (centerNew [c])
if all (centerNew [c]! = p):
centerNew [c] = p
return np.array (centerNew) #Through

multiple iterations of particles, determine the best quality
center = initcent (datax, k)
while True:
xclas = xclassify (datax, dist, center)
centerNew = kcmean (datax, xclas, center, k)
if all (center == centerNew):
break
else:
center = centerNew #Print
10 Data
for i in range (10):
print ('Player:', name [i], ',' 'Average scoring ability level:', dist [i, k])
#Use the legend to determine the meaning of particles
plt.scatter(dist[:,k], datax[:,3], c=array(xclas[:,k]), s=50, cmap='rainbow', marker='p', alpha=0.5)
plt.show()

 

 #Score ability level is 1 means that the score can be very strong, 4 is relatively strong, 2 is relatively good, 0 is general

 

Guess you like

Origin www.cnblogs.com/zzkai/p/12723734.html