机器学习(周志华) 西瓜书 第三章课后习题3.4—— Python实现

机器学习(周志华) 西瓜书 第三章课后习题3.4—— Python实现

个人原创,禁止转载——Zetrue_Li

数据获取

UCI数据官网:http://archive.ics.uci.edu/ml/index.php

选取最受欢迎的Iris数据集:下载链接 http://archive.ics.uci.edu/ml/machine-learning-databases/iris/

Python代码

本题调用了3.3题的程序,传送门https://blog.csdn.net/weixin_37922777/article/details/88625728

# -*- coding: utf-8 -*-

# 调用3.3题的python实现程序
import Chap3_3
import numpy as np
import pandas as pd

def loadData(filename):
	names = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class']
	dataSet = pd.read_csv(filename, names=names)
	dataSet['b'] = 1
	return dataSet

def processData(dataSet, n=10):

	values = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
	classifications = []
	gaps = []
	for value in values:
		temp = dataSet.loc[dataSet['class']==value]
		classifications.append(temp)

		gap = temp.shape[0]//n
		gaps.append(gap)

	D = [None for _ in range(n)]
	for a in range(n):
		for gap, classification in zip(gaps, classifications):
			begin = a * gap
			#print(classification[begin:begin+gap])
			if type(D[a]).__name__ == 'NoneType':
				D[a] = classification[begin:begin+gap]
			else:
				D[a] = D[a].append(classification[begin:begin+gap])
			#print(type(D[a]))
			#print(classification[begin:begin+gap])

	return D

def judge_function(D_train, D_test):
    # values = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
	# 0: not Iris-setosa 1: Iris-setosa
	x_train = np.array(D_train[['sepal length', 'sepal width', 'petal length', 'petal width', 'b']])
	y_train = np.array(D_train[['class']].replace(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], [1, 0, 0]))

	beta = Chap3_3.run(x_train, y_train)

	x_test = np.array(D_test[['sepal length', 'sepal width', 'petal length', 'petal width', 'b']])
	y_test = np.array(D_test[['class']].replace(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], [1, 0, 0]))

	accuracy = 0
	for xi, yi in zip(x_test, y_test):
		p1 = Chap3_3.p1_function(xi, beta)

		judge = 0 if p1 < 0.5 else 1
		# print(p1, yi[0], judge)
		accuracy += (judge == yi[0])

	return accuracy

def cross_validation(dataSet):
    # 10折交差验证
    n = 10
    # 预处理数据
    D =  processData(dataSet, n)
    average = 0
    for a in range(n):
    	D_train, D_test = None, D[a]
    	for b in range(n):
    		if a != b:
    			if type(D_train).__name__ == 'NoneType':
    				D_train = D[b]
    			else:
    				D_train = D_train.append(D[b])
    	
    	accuracy = judge_function(D_train, D_test)
    	# print(accuracy)
    	average += accuracy

    error = 1 - average / dataSet.shape[0]
    return error

def leave_one_out(dataSet):
	average, k = 0, dataSet.shape[0]	
	for a in range(k):
		D_train = dataSet[0:a].append(dataSet[a+1:])
		D_test = dataSet[a:a+1]

		accuracy = judge_function(D_train, D_test)
		average += accuracy

	error = 1 - average / dataSet.shape[0]
	return error 

if __name__=="__main__":
    # 读取数据
    filename = 'UCI/iris/iris.data'
    dataSet = loadData(filename)

    # 10折交差验证
    error1 = cross_validation(dataSet)
    print('Cross validation:', error1*100, '%')

    # 留一法验证
    error2 = leave_one_out(dataSet)
    print('Leave one out:', error2*100, '%')

实现结果

猜你喜欢

转载自blog.csdn.net/weixin_37922777/article/details/88625974