白板推导系列Pytorch-支持向量机(SVM)
支持向量机的代码实现主要是SMO算法的实现,我参考了下面这篇博客
https://www.cnblogs.com/lsm-boke/p/12317200.html
该博客中使用numpy实现的svm,我对numpy版本做了一点修改,并且使用pytorch的API写了一个新版本,但除了函数名不同基本一致,只是numpy版本的收敛速度比pytorch要快很多。另外我调用了sklearn中的svm,速度都远超这两个实现
pytorch版本
导入所需的包
import torch
生成数据集
def create_dataset(n_samples=1000):
x0 = torch.normal(2, 1, size=(n_samples // 2, 2), dtype=torch.float32)
y0 = -torch.ones(n_samples // 2, dtype=torch.float32)
x1 = torch.normal(-2, 1, size=(n_samples - n_samples // 2, 2), dtype=torch.float32)
y1 = torch.ones(n_samples - n_samples // 2, dtype=torch.float32)
# 合并数据x,y
x = torch.cat((x0, x1), 0)
y = torch.cat((y0, y1), 0)
return x, y
定义SVM模型
class svm:
def __init__(self,toler=0.001,max_iter=100, kernel='linear'):
self.toler = toler
self.max_iter = max_iter
self._kernel = kernel
# 初始化模型
def init_args(self, features, labels):
self.m, self.n = features.shape
self.X = features
self.Y = labels
self.b = 0.0
# 将Ei保存在一个列表里
self.alpha = torch.ones(self.m)
self.E = torch.tensor([self._e(i) for i in range(self.m)], dtype=torch.float)
# 错误惩罚参数
self.C = 1.0
# kkt条件
def _kkt(self, i):
y_g = self._g(self.X[i]) * self.Y[i]
if self.alpha[i] == 0:
return y_g >= 1
elif 0 < self.alpha[i] < self.C:
return y_g == 1
else:
return y_g <= 1
# g(x)预测值,输入xi(X[i])
def _g(self, xi):
return (self.alpha * self.Y * self.kernel(self.X, xi)).sum() + self.b
# 核函数,多项式添加二次项即可
def kernel(self, X_data, x2, gamma=1, r=0, d=3):
if len(X_data.shape) > 1:
res = []
for x1 in X_data:
res.append(self.kernel(x1, x2).item())
return torch.tensor(res, dtype=torch.float)
else:
x1 = X_data
if self._kernel == 'linear':
return (x1 * x2).sum()
elif self._kernel == 'poly':
return (gamma * (x1 * x2).sum() + r) ** d
return 0
# E(x)为g(x)对输入x的预测值和y的差
def _e(self, i):
return self._g(self.X[i]) - self.Y[i]
# 初始alpha
def _init_alpha(self):
# 外层循环首先遍历所有满足0<a<C的样本点,检验是否满足KKT
index_list = [i for i in range(self.m) if 0 < self.alpha[i] < self.C]
# 否则遍历整个训练集
non_satisfy_list = [i for i in range(self.m) if i not in index_list]
index_list.extend(non_satisfy_list)
for i in index_list:
if self._kkt(i):
continue
E1 = self.E[i]
# 如果E2是+,选择最小的;如果E2是负的,选择最大的
if E1 >= 0:
j = torch.argmin(self.E)
else:
j = torch.argmax(self.E)
return i, j
# return -1,-1
# 选择alpha参数
@staticmethod
def _compare(_alpha, L, H):
if _alpha > H:
return H
elif _alpha < L:
return L
else:
return _alpha
# 训练
def fit(self, features, labels):
self.init_args(features, labels)
for t in range(self.max_iter):
i1, i2 = self._init_alpha()
# if i1==-1 and i2==-1:
# # 没有找到违背kkt条件的点
# return
# 边界
if self.Y[i1] == self.Y[i2]:
L = max(0, self.alpha[i1] + self.alpha[i2] - self.C)
H = min(self.C, self.alpha[i1] + self.alpha[i2])
else:
L = max(0, self.alpha[i2] - self.alpha[i1])
H = min(self.C, self.C + self.alpha[i2] - self.alpha[i1])
E1 = self.E[i1]
E2 = self.E[i2]
# eta=K11+K22-2K12
eta = self.kernel(self.X[i1], self.X[i1]) + self.kernel(self.X[i2], self.X[i2]) - 2 * self.kernel(
self.X[i1], self.X[i2])
# 不懂为什么有下面这个判断,如果有读者知道不妨在评论区解释一下
if eta <= 0:
continue
alpha2_new_unc = self.alpha[i2] + self.Y[i2] * (E2 - E1) / eta
alpha2_new = self._compare(alpha2_new_unc, L, H)
alpha1_new = self.alpha[i1] + self.Y[i1] * self.Y[i2] * (self.alpha[i2] - alpha2_new)
b1_new = -E1 - self.Y[i1] * self.kernel(self.X[i1], self.X[i1]) * (alpha1_new - self.alpha[i1]) - self.Y[
i2] * self.kernel(self.X[i2], self.X[i1]) * (alpha2_new - self.alpha[i2]) + self.b
b2_new = -E2 - self.Y[i1] * self.kernel(self.X[i1], self.X[i2]) * (alpha1_new - self.alpha[i1]) - self.Y[
i2] * self.kernel(self.X[i2], self.X[i2]) * (alpha2_new - self.alpha[i2]) + self.b
if 0 < alpha1_new < self.C:
b_new = b1_new
elif 0 < alpha2_new < self.C:
b_new = b2_new
else:
# 选择中点
b_new = (b1_new + b2_new) / 2
# 更新参数
self.alpha[i1] = alpha1_new
self.alpha[i2] = alpha2_new
self.b = b_new
self.E[i1] = self._e(i1)
self.E[i2] = self._e(i2)
def predict(self, X_data):
y_pred = []
for data in X_data:
r = (self.Y * self.alpha * self.kernel(self.X, data)).sum()
y_pred.append(torch.sign(r).item())
return torch.tensor(y_pred, dtype=torch.float)
def score(self, X_data, y_data):
y_pred = self.predict(X_data)
return (y_pred == y_data).sum() / len(y_data)
测试
X, y = create_dataset(1000)
model = svm(max_iter=100, kernel='linear')
model.fit(X[:800], y[:800])
model.score(X[800:], y[800:])
运行时间大概是11s左右,准确率1.0
numpy版本
# encoding=utf8
import numpy as np
class svm:
def __init__(self, max_iter=100, kernel='linear'):
'''
input:max_iter(int):最大训练轮数
kernel(str):核函数,等于'linear'表示线性,等于'poly'表示多项式
'''
self.max_iter = max_iter
self._kernel = kernel
# 初始化模型
def init_args(self, features, labels):
self.m, self.n = features.shape
self.X = features
self.Y = labels
self.b = 0.0
# 将Ei保存在一个列表里
self.alpha = np.ones(self.m)
self.E = [self._E(i) for i in range(self.m)]
# 错误惩罚参数
self.C = 1.0
# kkt条件
def _KKT(self, i):
y_g = self._g(i) * self.Y[i]
if self.alpha[i] == 0:
return y_g >= 1
elif 0 < self.alpha[i] < self.C:
return y_g == 1
else:
return y_g <= 1
# g(x)预测值,输入xi(X[i])
def _g(self, i):
r = self.b
for j in range(self.m):
r += self.alpha[j] * self.Y[j] * self.kernel(self.X[i], self.X[j])
return r
# 核函数,多项式添加二次项即可
def kernel(self, x1, x2):
if self._kernel == 'linear':
return sum([x1[k] * x2[k] for k in range(self.n)])
elif self._kernel == 'poly':
return (sum([x1[k] * x2[k] for k in range(self.n)]) + 1) ** 2
return 0
# E(x)为g(x)对输入x的预测值和y的差
def _E(self, i):
return self._g(i) - self.Y[i]
# 初始alpha
def _init_alpha(self):
# 外层循环首先遍历所有满足0<a<C的样本点,检验是否满足KKT
index_list = [i for i in range(self.m) if 0 < self.alpha[i] < self.C]
# 否则遍历整个训练集
non_satisfy_list = [i for i in range(self.m) if i not in index_list]
index_list.extend(non_satisfy_list)
for i in index_list:
if self._KKT(i):
continue
E1 = self.E[i]
# 如果E2是+,选择最小的;如果E2是负的,选择最大的
if E1 >= 0:
j = min(range(self.m), key=lambda x: self.E[x])
else:
j = max(range(self.m), key=lambda x: self.E[x])
return i, j
# 选择alpha参数
def _compare(self, _alpha, L, H):
if _alpha > H:
return H
elif _alpha < L:
return L
else:
return _alpha
# 训练
def fit(self, features, labels):
'''
input:features(ndarray):特征
label(ndarray):标签
'''
self.init_args(features, labels)
for t in range(self.max_iter):
i1, i2 = self._init_alpha()
# 边界
if self.Y[i1] == self.Y[i2]:
L = max(0, self.alpha[i1] + self.alpha[i2] - self.C)
H = min(self.C, self.alpha[i1] + self.alpha[i2])
else:
L = max(0, self.alpha[i2] - self.alpha[i1])
H = min(self.C, self.C + self.alpha[i2] - self.alpha[i1])
E1 = self.E[i1]
E2 = self.E[i2]
# eta=K11+K22-2K12
eta = self.kernel(self.X[i1], self.X[i1]) + self.kernel(self.X[i2], self.X[i2]) - 2 * self.kernel(
self.X[i1], self.X[i2])
if eta <= 0:
continue
alpha2_new_unc = self.alpha[i2] + self.Y[i2] * (E2 - E1) / eta
alpha2_new = self._compare(alpha2_new_unc, L, H)
alpha1_new = self.alpha[i1] + self.Y[i1] * self.Y[i2] * (self.alpha[i2] - alpha2_new)
b1_new = -E1 - self.Y[i1] * self.kernel(self.X[i1], self.X[i1]) * (alpha1_new - self.alpha[i1]) - self.Y[
i2] * self.kernel(self.X[i2], self.X[i1]) * (alpha2_new - self.alpha[i2]) + self.b
b2_new = -E2 - self.Y[i1] * self.kernel(self.X[i1], self.X[i2]) * (alpha1_new - self.alpha[i1]) - self.Y[
i2] * self.kernel(self.X[i2], self.X[i2]) * (alpha2_new - self.alpha[i2]) + self.b
if 0 < alpha1_new < self.C:
b_new = b1_new
elif 0 < alpha2_new < self.C:
b_new = b2_new
else:
# 选择中点
b_new = (b1_new + b2_new) / 2
# 更新参数
self.alpha[i1] = alpha1_new
self.alpha[i2] = alpha2_new
self.b = b_new
self.E[i1] = self._E(i1)
self.E[i2] = self._E(i2)
def predict(self, X_data):
'''
input:data(ndarray):单个样本
output:预测为正样本返回+1,负样本返回-1
'''
y_pred = []
for data in X_data:
r = self.b
for i in range(self.m):
r += self.alpha[i] * self.Y[i] * self.kernel(data, self.X[i])
y_pred.append(1 if r > 0 else -1)
return np.array(y_pred)
def score(self, X_data, y_data):
y_pred = self.predict(X_data)
return np.sum(y_pred == y_data) / len(y_data)
def create_dataset(n_samples=1000):
x0 = np.random.normal(2, 1, size=(n_samples // 2, 2))
y0 = -np.ones(n_samples // 2)
x1 = np.random.normal(-2, 1, size=(n_samples - n_samples // 2, 2))
y1 = np.ones(n_samples - n_samples // 2)
# 合并数据x,y
x = np.vstack((x0, x1))
y = np.hstack((y0, y1))
return x, y
X, y = create_dataset(1000)
nb = svm()
nb.fit(X[:800], y[:800])
print(nb.score(X[800:], y[800:]))
运行时间3s左右,准确率1.0
sklearn
from sklearn.svm import SVC
model = SVC(kernel='linear')
model.fit(X[:800],y[:800])
model.score(X[800:],y[800:])
运行时间0.5s,准确率1.0
所以我很好奇sklearn中svm是怎么实现的,之后了解到会加以补充