Python(线性可分SVM)

线性可分|线性近似可分(SVM)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv('data.csv',encoding='utf8',header=None)
arr = data.values

encoder = preprocessing.LabelEncoder()
labels = encoder.fit_transform(arr[:,1])
for label,num in enumerate(encoder.classes_):
print(label,'-->',num)
arr[:,1] = labels

x,y = arr[:,0:-1],arr[:,-1]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)

C = 1
n,m = x_train.shape[0],x_train.shape[1]
w = np.zeros(m)
b = 0
lr = 0.01
maxgen = 1000
for t in range(maxgen):
e = 1-(np.dot(x_train,w)+b)*y_train
err = max(e)
if err>0:
index = np.argmax(e)
w = (1-lr)*w+C*lr*y_train[index]*(x_train[index].T)
b = b+C*lr*y_train[index]
else:
break

predict_train = np.sign(np.dot(x_train,w)+b)
mistag = np.where(predict_train*y_train<=0)[0]
train_acc = round((len(y_train)-len(mistag))/len(y_train),3)
print('Train acc:\n',classification_report(y_train.astype(int),predict_train.astype(int)))
predict_test = np.sign(np.dot(x_test,w)+b)
mistag = np.where(predict_test*y_test<=0)[0]
test_acc = round((len(y_test)-len(mistag))/len(y_test),3)
print('Test acc:\n',classification_report(y_test.astype(int),predict_test.astype(int)))
print('Train acc = ',train_acc,' Test acc = ',test_acc)

print('------sklearn-SVC------')
clf = SVC(C=1.0,kernel='linear')
clf.fit(x_train,y_train.astype(int))
predict_train = clf.predict(x_train)
train_acc = len(y_train[predict_train==y_train])/len(y_train)
predict_test = clf.predict(x_test)
test_acc = len(y_test[predict_test==y_test])/len(y_test)
print('Train acc:',train_acc,'\n',classification_report(y_train.astype(int),predict_train))
print('Test acc:',test_acc,'\n',classification_report(y_test.astype(int),predict_test))

Python(线性可分SVM)

线性可分|线性近似可分(SVM)

猜你喜欢