Machine Learning a second job - Iris

Machine Learning a second job - Iris

1. Load dataset Iris

The data set is a dictionary

{

"data":...,

"target":...,

"target_names":...

"DESCR":...

}

wherein the data field is data numpy

target is the type of flower

target_name refers to the nickname

DESCR data set is described in

2. Visualization data set

2.1 Display Data Visualization

image.png

As can be seen from the figure virgincia versicolor and are more difficult to distinguish, setosa compared to their more obvious distinguishing feature

2.2 code to achieve:

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
# plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['savefig.dpi'] = 300 #图片像素
plt.rcParams['figure.dpi'] = 300 #分辨率

def data_visualization_3D(df_Iris,tar):
	flag1, flag2, flag3,flag4 = 0, 0, 0,0
	labels = ["First", "Second", "Third"]
	fig=plt.figure(figsize=(10,10))
	xx=[[0,1,2],[1,2,3],[0,1,3],[0,2,3]]
	yy=[["sepal_length (cm)","sepal_width (cm)","petal_length (cm)"],["sepal_width (cm)","petal_length (cm)","petal_width (cm)"],["sepal_length","sepal_width","petal_width"],["sepal_length","petal_length","petal_width"]]
	for i in range(4):
		ax=fig.add_subplot(221+i,projection="3d")
		ax.scatter(df_Iris[tar==0,xx[i][0]],df_Iris[tar==0,xx[i][1]],df_Iris[tar==0,xx[i][2]],c="r",marker="o",label="setosa")
		ax.scatter(df_Iris[tar == 1, xx[i][0]], df_Iris[tar == 1, xx[i][1]], df_Iris[tar == 1, xx[i][2]], c="b",
				   marker="x", label="versicolor")
		ax.scatter(df_Iris[tar == 2, xx[i][0]], df_Iris[tar == 2, xx[i][1]], df_Iris[tar == 2, xx[i][2]], c="g",
				   marker="^", label="virginica")
		ax.set_xlabel(yy[i][0])
		ax.set_ylabel(yy[i][1])
		ax.set_zlabel(yy[i][2])
		plt.legend(loc=0)
	plt.show()

    
  
data,tar=readData()
data_visualization_3D(data,tar)

3.MED linear classification

3.1 classification results

image.png

3.2 quantitative indicators

Accuracy: 1.0
Recall: 1.0
specificity: 1.0
F1_Score 1.0

3.3 core code

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

def get_Iris_linear(data,tar,flag):
	linear_data=[data[i] for i in range(data.shape[0]) if tar[i]!=flag]
	linear_tar=[tar[i] for i in range(data.shape[0]) if tar[i]!=flag]
	return np.asarray(linear_data,dtype="float64"),np.asarray(linear_tar,dtype="float64")

def hold_out_partition(testRate, trainRate, data, tar): #留出法
	import random
	import numpy as np
	testSet = []
	testTar = []
	trainSet = []
	trainTar = []
	listNum = []
	for i in range(data.shape[0]):
		if tar[i] == 1:
			listNum.append(i)
	for i in random.sample(listNum, int(testRate * 50)):
		testSet.append(data[i])
		testTar.append(tar[i])
		listNum.remove(i)

	for i in listNum:
		trainSet.append(data[i])
		trainTar.append(tar[i])
	listNum = []
	for i in range(data.shape[0]):
		if tar[i] != 1:
			listNum.append(i)
	for i in random.sample(listNum, int(testRate * 50)):
		testSet.append(data[i])
		testTar.append(tar[i])
		listNum.remove(i)
	for i in listNum:
		trainSet.append(data[i])
		trainTar.append(tar[i])
	return np.asarray(testSet, dtype="float64"), np.asarray(testTar, dtype="float64"), np.asarray(trainSet,dtype="float64"), np.asarray(trainTar, dtype="float64")

cmap={
	0:"r",
	1:"b",
	2:"g"
}
shapeMap={
	0:"o",
	1:"x",
	2:"^"
}
map={
	"Iris-setosa":0,
	"Iris-versicolor":1,
	"Iris-virginica":2,
	0:"setosa",
	1:"versicolor",
	2:"virginica"

}
def classifier_MED(data,tar,posC,negC): #MED分类器
	testSet ,testTar ,trainSet ,trainTar = partition.hold_out_partition(0.3,0.7,data,tar)
	C1,C2=[],[]
	N1,N2=0,0
	for i in range(trainSet.shape[0]):
		if trainTar[i]==negC:
			N1+=1
			C1.append(trainSet[i])
		elif trainTar[i]==posC:
			N2+=1
			C2.append(trainSet[i])
	C1,C2=np.asarray(C1),np.asarray(C2)
	z1,z2=C1.sum(axis=0)/N1,C2.sum(axis=0)/N2
	testRes=[]
	for x in testSet:
		res=np.dot((z2-z1).transpose(),(x-(z1+z2)/2))
		testRes.append(res)
	testTar=testTar.astype("int16")
	TP,FP,TN,FN=0,0,0,0
	for i in range (len(testRes)):
		#第C2类为正类,第C1类为负类
		if testTar[i]==posC and testRes[i]>=0:
			TP+=1
		elif testTar[i]==posC and testRes[i]<0:
			FN+=1
		elif testTar[i]==negC and testRes[i]<0:
			TN+=1
		elif testTar[i] == negC and testRes[i]>=0:
			FP+=1
	testRes=np.array(testRes)
	accuracy=float((TP+TN)/(TP+TN+FP+FN))
	recall=float(TP/(TP+FN))
	precision=float(TP/(TP+FP))
	specificity=float(TN/(TN+FP))
	F1_Score=float((2*recall*precision)/(recall+precision))
	print("Accuracy:",accuracy)
	print("Recall:",recall)
	print("specificity:",specificity)
	print("F1_Score",F1_Score)


	#画图部分
	fig = plt.figure(figsize=(10, 10))
	xx = [[0, 1, 2], [1, 2, 3], [0, 1, 3], [0, 2, 3]]
	yy = [["sepal_length (cm)", "sepal_width (cm)", "petal_length (cm)"],
		  ["sepal_width (cm)", "petal_length (cm)", "petal_width (cm)"],
		  ["sepal_length", "sepal_width", "petal_width"], ["sepal_length", "petal_length", "petal_width"]]
	for i in range(4):
		ax = fig.add_subplot(221 + i, projection="3d")
		X, Y = np.meshgrid(np.arange(testSet.min(axis=0)[xx[i][0]],testSet.max(axis=0)[xx[i][0]],1), np.arange(testSet.min(axis=0)[xx[i][1]],testSet.max(axis=0)[xx[i][1]],1))
		u1=np.array([z1[xx[i][0]],z1[xx[i][1]],z1[xx[i][2]]])
		u2=np.array([z2[xx[i][0]],z2[xx[i][1]],z2[xx[i][2]]])
		u=(u2-u1).transpose()
		Z=(np.dot(u,(u1+u2)/2)-u[0]*X-u[1]*Y)/u[2]
		ax.scatter(testSet[testRes>=0,xx[i][0]], testSet[testRes>=0,xx[i][1]],testSet[testRes>=0,xx[i][2]], c=cmap[posC], marker=shapeMap[posC], label=map[posC])
		ax.scatter(testSet[testRes<0,xx[i][0]],testSet[testRes<0,xx[i][1]], testSet[testRes<0,xx[i][2]],c=cmap[negC], marker=shapeMap[negC],label=map[negC])
		ax.set_xlabel(yy[i][0])
		ax.set_ylabel(yy[i][1])
		ax.set_zlabel(yy[i][2])
		ax.plot_surface(X,Y,Z,alpha=0.4)#
		ax.legend(loc=0)
	plt.show()
	
linear_data,linear_tar=get_Iris_linear(data,tar,2)
classifier_MED(linear_data,linear_tar,0,1)

4. whitened data set

Photo Gallery 4.1

image.png

After bleaching, the data on certain dimensions easier to distinguish the

4.2 core code

def witening(data):
	Ex=np.cov(data,rowvar=False) #Ex为data的协方差矩阵
	print(Ex.shape)
	a, b = np.linalg.eig(Ex) #原始特征协方差矩阵Ex的特征值和特征向量
	#特征向量单位化
	modulus=[]
	b=np.real(b)
	for i in range(b.shape[1]):
		sum=0
		for j in range(b.shape[0]):
			sum+=b[i][j]**2
		modulus.append(sum)
	modulus=np.asarray(modulus,dtype="float64")
	b=b/modulus
	# print(b)
	#对角矩阵A
	a=np.real(a)
	A=np.diag(a**(-0.5))
	W=np.dot(A,b.transpose())
	X=np.dot(W,np.dot(Ex,W.transpose()))
	for i in range(W.shape[0]):
		for j in range(W.shape[1]):
			if np.isnan(W[i][j]):
				W[i][j]=0
	print(W)
	return np.dot(data,W)
	
witening(data)
visualization.data_visualization_3D(witening(data),tar)

5.MED nonlinear classification

5.1 The results show

image.png

5.2 The results show

Accuracy: 0.9
Recall: 0.8666666666666667
specificity: 0.9333333333333333
F1_Score 0.896551724137931

5.3 core code

def get_Iris_noLinear(data,tar,flag):
	linear_data = [data[i] for i in range(data.shape[0]) if tar[i] != flag]
	linear_tar = [tar[i] for i in range(data.shape[0]) if tar[i] != flag]
	return np.asarray(linear_data, dtype="float64"), np.asarray(linear_tar, dtype="float64")

noLinear_data,noLinear_tar=get_Iris_noLinear(data,tar,0)
classifier_MED(noLinear_data,noLinear_tar,1,2)

6. Multi-class Bayesian classifier

6.1 Data Visualization

image.png

Accuracy: 0.9933333333333334

6.2 core code

#K折验证
def	K_Folds_Cross_Validation(data,tar,k):
	import random
	import numpy as np
	Set=[]
	Tar=[]
	listNum = []
	for i in range(k):
		tempSet=[]
		tempTar=[]
		tempSet.extend(data[i*10:(i+1)*10])
		tempTar.extend(tar[i*10:(i+1)*10])
		tempSet.extend(data[(i+5) * 10:(i + 6) * 10])
		tempTar.extend(tar[(i+5) * 10:(i + 6) * 10])
		tempSet.extend(data[(i+10) * 10:(i + 11) * 10])
		tempTar.extend(tar[(i+10) * 10:(i + 11) * 10])
		Set.append(tempSet)
		Tar.append(tempTar)
	return np.asarray(Set),np.asarray(Tar)

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy import stats
import visualization,partition
# plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['savefig.dpi'] = 300 #图片像素
plt.rcParams['figure.dpi'] = 300 #分辨率
map={
	"Iris-setosa":0,
	"Iris-versicolor":1,
	"Iris-virginica":2

}
#贝叶斯分类器
class BayesParameter(): #存储贝叶斯分类器参数

	def __init__(self,mean,cov,category):
		self.mean=mean
		self.cov=cov
		self.category=category
class  BayesClassifier():  #贝叶斯分类器,高斯分布概率估计

	def __init__(self):
		self.parameters=[]

	def train(self,X_data,Y_data):

		for category in set(Y_data):#遍历每一种类别
			selected= Y_data==category #选中对应该类别的数据
			X_newData= X_data[selected] #得到新数据
			mean=np.mean(X_newData,axis=0) #得到均值
			cov = np.cov(X_newData.transpose()) #注意坑 或者设定参数np.cov(X_newData, rowvar=False)
			self.parameters.append(BayesParameter(mean,cov,category))

	def predit(self,data):
		res=-1
		probability=0
		for parameter in self.parameters:
			if stats.multivariate_normal.pdf(data, mean=parameter.means, cov=parameter.cov)>probability:
				res=parameter.category
				probability=stats.multivariate_normal.pdf(data, mean=parameter.means, cov=parameter.cov)
		return res

    
if __name__=="__main__":
    set,tar=partition.K_Folds_Cross_Validation(data,tar,5)
	accuracy=0
	print(tar[0].shape)
	for i in range(5): #第i个子集作为测试集
		x,y=0,0
		X_data,Y_data=None,None
		for j in range(5):
			if i!=j:
				if x*y==0:
					X_data=set[i]
					Y_data=tar[i]
				else:
					X_data=np.concatenate((X_data,set[i]),axis=0)
					Y_data = np.concatenate((Y_data, tar[i]), axis=0)
					x+=1
					y+=1
		bc=BayesClassifier()
		bc.train(X_data,Y_data)
		y_predict=[bc.predit(x) for x in set[i]]
		tempAccuracy=np.sum(y_predict==tar[i])/tar[i].shape[0]
		accuracy+=tempAccuracy
	accuracy=accuracy/5
	print(accuracy)
#https://blog.csdn.net/weixin_37895339/article/details/80351541 协方差和高斯分布的关系
def data_visualization_2D_Bayes(data,tar):
	testSet, testTar, trainSet, trainTar = partition.hold_out_partition(0.3, 0.7, data, tar)
	bc = BayesClassifier()
	bc.train(trainSet, trainTar)
	testPredict = np.array([bc.predit(x) for x in testSet],dtype="int")

	import math
	# 画图部分
	fig = plt.figure(figsize=(10, 10))
	xx = [[0, 1], [1, 2], [2, 3], [0,2],[0,3],[1,3]]
	yy = [["sepal_length (cm)", "sepal_width (cm)"],
		  ["sepal_width (cm)", "petal_length (cm)"],
		  ["sepal_width(cm)", "petal_width(cm)"],
		  ["sepal_length (cm)","petal_length (cm)"],
		  ["sepal_length (cm)","petal_width(cm)"],
		  ["sepal_width (cm)","petal_width(cm)"]]
	for i in range(6):
		ax = fig.add_subplot(321 + i)
		x_max,x_min=testSet.max(axis=0)[xx[i][0]]+0.5,testSet.min(axis=0)[xx[i][0]]-0.5
		y_max,y_min=testSet.max(axis=0)[xx[i][1]]+0.5,testSet.min(axis=0)[xx[i][1]]-0.5
		xlist = np.linspace(x_min, x_max, 100)  # Create 1-D arrays for x,y dimensions
		ylist = np.linspace(y_min, y_max, 100)
		XX, YY = np.meshgrid(xlist, ylist)
		bc = BayesClassifier()
		bc.train(trainSet[:, xx[i]],trainTar)
		xys = [np.array([xx, yy]).reshape(1, -1) for xx, yy in zip(np.ravel(XX), np.ravel(YY))]
		zz = np.array([bc.predit(x) for x in xys])
		Z = zz.reshape(XX.shape)
		plt.contourf(XX, YY, Z, 2, alpha=.1, colors=('blue', 'red', 'green'))
		ax.scatter(testSet[testPredict == 0, xx[i][0]], testSet[testPredict == 0, xx[i][1]],
				  c='r', marker='o',
				   label="setosa")
		ax.scatter(testSet[testPredict==1, xx[i][0]], testSet[testPredict==1, xx[i][1]], c='b', marker='x',
				   label="versicolor")
		ax.scatter(testSet[testPredict==2, xx[i][0]], testSet[testPredict==2, xx[i][1]], c='g', marker='^',
				   label="virginica")
		ax.set_xlabel(yy[i][0])
		ax.set_ylabel(yy[i][1])
		ax.legend(loc=0)
	plt.show()
data,tar=readData()
data_visualization_2D_Bayes(data,tar)

Guess you like

Origin www.cnblogs.com/JustNo/p/12640959.html