这篇文章主要讲解了如何利用SVM分类。并用在输出分类精度的同时输出混淆矩阵。
注意x_train,y_train,x_test,y_test,数据集需要用自己的数据,其中x为多维矩阵,y为类别列表
# coding=utf-8
import numpy as np
from PIL import Image
from sklearn import svm
# 使用交叉验证的方法,把数据集分为训练集合测试集
from sklearn.cross_validation import train_test_split
from merge import x_train,y_train,x_test,y_test,X_test
def test_LinearSVC(train,label,testdata):
# 选择模型
cls = svm.LinearSVC()
# 把数据交给模型训练
cls.fit(train,label.astype('int'))
# 预测数据
#print(cls.predict(testdata))
results=cls.predict(testdata)
return results
#输出混淆矩阵
from sklearn.metrics import confusion_matrix
def my_confusion_matrix(y_true, y_pred):
labels = list(set(y_true))
conf_mat = confusion_matrix(list(y_true), list(y_pred), labels = labels)
print ("confusion_matrix(left labels: y_true, up labels: y_pred):")
print ("labels"," ",end='')
for i in range(len(labels)):
print (labels[i]," ",end='')
print('\n')
for i in range(len(conf_mat)):
print (i," ",end='')
for j in range(len(conf_mat[i])):
print (conf_mat[i][j]," ",end='')
print('\n')
print
if __name__=="__main__":
#测试集分类结果---列表
y_pred = test_LinearSVC(x_train,y_train,x_test)
label_list = [int(i) for i in y_test]
#测试集合的精度
kk = 0
for i in range(len(y_test)):
if (y_pred[i] == y_test[i]):
kk = kk + 1
print('acc rate is %f' % float(kk / len(y_test)))
a = list(y_test[:40])
b = list(y_pred[:40])
my_confusion_matrix(y_test, y_pred) # 输出混淆矩阵