该代码是在理解了svm方法及smo的原理的基础上写的,不过因为编程水平有限加上对svm和smo的理解还不够透彻,原版代码只有该版代码的50%左右,最终还是改成了书上的版本。
import random
from numpy import *
def loadDataSet(filename): #读取数据文件并进行处理。得到数据点数组和分类数组
dataMat = [];labelMat = []
fr = open(filename)
for read in fr.readlines():
lineArr = read.strip().split('\t')
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
def selectJRand(i,m): #随机选取一个与i不同的点
j = i
while (j == i):
j = int(random.uniform(0,m))
return j
def clipAlpha(aj,H,L): #当alpha在范围外时,将其变换到边界上
if aj > H :
aj = H
if aj < L :
aj = L
return aj
def smoSimple(dataMatIn,classLabels,C,toler,maxIter): #输入参数为数据数组、分类数组、惩罚系数C(决定在多大\
dataMatrix = mat(dataMatIn);labelMat = \ #程度上容忍过界点)、toler(对误差容忍度的绝对值)\
mat(classLabels).transpose() #及maxIter(最大迭代次数)
b=0;m,n = shape(dataMatrix)
alphas = mat(zeros((m,1)))
iteration = 0
while(iteration<maxIter):
alphaPairsChanged = 0
for i in range(m):
fXi = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[i,:].T))+b
Ei = fXi - float(labelMat[i]) #计算误差
if((labelMat[i]*Ei<-toler)and(alphas[i]<C) \ #若误差过大,则随机选择另一个点对alpha进行更新
or ((labelMat[i]*Ei>toler) and (alphas[i] > 0)):
j = selectJrand(i,m)
fXj = float(multiply(alphas,labelMat).T*(dataMatrix * dataMatrix[j,:].T))+b
Ej = fXj - float(labelMat[j])
alphaIold = alphas[i].copy() #为了不改变原值,使用copy()
alphaJold = alphas[j].copy()
if(labelMat[i] != labelMat[j]): #因为ai与aj类别不同时,计算公式不同
L = max(0,alphas[j] - alphas[i])
H = min(C,C+alphas[j] - alphas[i])
else:
L = max(0,alphas[j] + alphas[i] - C)
H = min(C,alphas[j] - alphas[i])
if L == H:print "L = H ";continue
eta = 2.0 * dataMatrix[i,:] * dataMatrix[j,:].T\ #eta为aj的最优修改量,若≥0,则不需修改
- dataMatrix[i,:] * dataMatrix[i,:].T -\
- dataMatrix[j,:] * dataMatrix [j,:].T
if eta >= 0 :print "eta >= 0" ;continue;
alphas[j] -= labelMat[j]*(Ei-Ej)/eta
alphas[j] = clipAlpha(alphas[j],H,L)
if(abs(alphas[j] - alphaJold)<0.0001):print \ #若aj改变过小,则认为该点无效果,另选它点计算
"j not moving enough";continue
alphas[i] += labelMat[j] * labelMat[i] * (alphaJold - alphas[j])
b1 = b-Ei-labelMat[i] * (alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T -\
labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:] * dataMatrix[j,:].T
b2 = b-Ei-labelMat[i] * (alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T - \
labelMat[j]*(alphas[j]-alphaJold)* dataMatrix[j,:] * dataMatrix[j,:].T
if 0 < alphas[i] and C > alphas[i] : b = b1
elif 0<alphas[j] and C > alphas[j] : b = b2 #实际上,只要aiNew或ajNew在(0,C)上,b1=b2,\
else:b=(b1+b2)/2.0 #否则,取其中值
alphaPairsChanged += 1
print "iter:%d i:%d,paris changed %d" % (iteration,i,alphaPairsChanged)
if alphaPairsChanged == 0 : iteration += 1
else:iteration = 0
print "iteration number:%d"%iteration
return b,alphas
运行结果如下:
…
结果为: