机器学习之支撑向量机SVM

Soft Margin和SVM中的正则化

"""scikit-learn中的SVM"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris=datasets.load_iris()
X=iris.data
y=iris.target

X=X[y<2,:2]
y=y[y<2]

plt.scatter(X[y==0,0],X[y==0,1],color='red')
plt.scatter(X[y==1,0],X[y==1,1],color='blue')
plt.show()

"""使用SVM之前一定要先进行数据归一化"""
standardScaler=StandardScaler()
standardScaler.fit(X)
x_standard=standardScaler.transform(X)

svc=LinearSVC(C=1e9)
svc.fit(x_standard,y)

"""绘制函数"""
def plot_decision_boundary(model,axis):
    x0,x1 = np.meshgrid(
        np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
        np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
    )
    X_new = np.c_[x0.ravel(),x1.ravel()]
    y_predict = model.predict(X_new)
    zz = y_predict.reshape(x0.shape)
    from matplotlib.colors import ListedColormap
    custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
    plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
plot_decision_boundary(svc,axis=[-3,3,-3,3])
plt.scatter(x_standard[y==0,0],x_standard[y==0,1])
plt.scatter(x_standard[y==1,0],x_standard[y==1,1])
plt.show()

"""绘制上下两个支撑向量的图像"""
def plot_svc_decision_boundary(model,axis):
    x0,x1 = np.meshgrid(
        np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
        np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
    )
    X_new = np.c_[x0.ravel(),x1.ravel()]
    y_predict = model.predict(X_new)
    zz = y_predict.reshape(x0.shape)
    from matplotlib.colors import ListedColormap
    custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
    plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)

    w=model.coef_[0]
    b=model.intercept_[0]

    #w0*x0+w1*x1+b=0
    #->x1=-w0/w1*x0-b/w1
    plot_x=np.linspace(axis[0],axis[1],200)
    up_y=-w[0]/w[1]*plot_x-b/w[1]+1/w[1]
    down_y=-w[0]/w[1]*plot_x-b/w[1]-1/w[1]

    up_index=(up_y>=axis[2])&(up_y<=axis[3])
    down_index=(down_y>=axis[2])&(down_y<=axis[3])
    plt.plot(plot_x[up_index],up_y[up_index],color='black')
    plt.plot(plot_x[down_index],down_y[down_index],color='black')

plot_svc_decision_boundary(svc,axis=[-3,3,-3,3])
plt.scatter(x_standard[y==0,0],x_standard[y==0,1])
plt.scatter(x_standard[y==1,0],x_standard[y==1,1])
plt.show()

结果:

 SVM中的核函数

"""SVM中使用多项式特征"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

X,y=datasets.make_moons(noise=0.15,random_state=666)
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

"""使用多项式特征的SVM"""
from sklearn.preprocessing import PolynomialFeatures,StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline

def PolynomialSVC(degree,C=1.0):
    return Pipeline([
        ('Poly',PolynomialFeatures(degree=degree)),
        ('std_scaler',StandardScaler()),
        ('linearSVC',LinearSVC())
    ])
poly_svc=PolynomialSVC(degree=3)
poly_svc.fit(X,y)

def plot_decision_boundary(model,axis):
    x0,x1 = np.meshgrid(
        np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
        np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
    )
    X_new = np.c_[x0.ravel(),x1.ravel()]
    y_predict = model.predict(X_new)
    zz = y_predict.reshape(x0.shape)
    from matplotlib.colors import ListedColormap
    custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
    plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
plot_decision_boundary(poly_svc,axis=[-1.5,2.5,-1.0,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

"""使用多项式核函数的SVM"""
from sklearn.svm import SVC

def PolynomialKernelSVC(degree,C=1.0):
    return Pipeline([
        ('std,scaler',StandardScaler()),
        ('KernelSVC',SVC(kernel='poly',degree=degree,C=C))
    ])
poly_kernel_svc=PolynomialKernelSVC(degree=3)
poly_kernel_svc.fit(X,y)
plot_decision_boundary(poly_kernel_svc,axis=[-1.5,2.5,-1.0,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

结果:

 

 

核函数本质

​核函数的本质可以概括为如下三点:

1)实际应用中,常常遇到线性不可分的情况​。针对这种情况,常用做法是把样例特征映射到高维空间中,转化为线性可分问题。

2)将样例特征映射到高维空间,可能会遇到维度过高的问题​。

3)针对可能的维灾难,可以利用核函数。核函数也是将特征从低维到高维的转换,但避免了直接进行高维空间中的复杂计算,​可以在低维上进行计算,却能在实质上将分类效果表现在高维上。

当然,SVM也能处理线性可分问题,这时使用的就是线性核了。​

常用的核函数包括如下几个:​线性核函数,多项式核函数,RBF核函数(高斯核),Sigmoid核函数

 

"""高斯核函数"""
import numpy as np
import matplotlib.pyplot as plt

X=np.arange(-4,5,1)
y=np.array((X>=-2)&(X<=2),dtype='int')

plt.scatter(X[y==0],[0]*len(X[y==0]))
plt.scatter(X[y==1],[0]*len(X[y==1]))
plt.show()

def gaussian(x,l):
    gamma=1.0
    return np.exp(-gamma*(x-l)**2)

l1,l2=-1,1
X_new=np.empty((len(X),2))

for i,data in enumerate(X):
    X_new[i,0]=gaussian(data,l1)
    X_new[i,1]=gaussian(data,l2)

plt.scatter(X_new[y==0,0],X_new[y==0,1])
plt.scatter(X_new[y==1,0],X_new[y==1,1])
plt.show()

 结果:

"""scikit-learn中使用RBF核"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
X,y=datasets.make_moons(noise=0.15,random_state=666)
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

def RBFKernelSVC(gamma=1.0):
    return Pipeline([
        ('std_scaler',StandardScaler()),
        ('svc',SVC(kernel='rbf',gamma=gamma))
    ])

svc=RBFKernelSVC(gamma=1.0)
svc.fit(X,y)

def plot_decision_boundary(model,axis):
    x0,x1 = np.meshgrid(
        np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
        np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
    )
    X_new = np.c_[x0.ravel(),x1.ravel()]
    y_predict = model.predict(X_new)
    zz = y_predict.reshape(x0.shape)
    from matplotlib.colors import ListedColormap
    custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
    plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
plot_decision_boundary(svc,axis=[-1.5,2.5,-1.0,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

 

 改变gamma的值可以改变决策边界。

 SVM解决回归问题

"""SVM思想解决回归问题"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

boston=datasets.load_boston()
X=boston.data
y=boston.target

X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=666)

from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def StandardLinearSVR(epsilon=0.1):
    return Pipeline([
        ('std_scaler',StandardScaler()),
        ('linearSVR',LinearSVR(epsilon=epsilon))
    ])
svr=StandardLinearSVR()
svr.fit(X_train,y_train)
print(svr.score(X_test,y_test))

猜你喜欢

转载自blog.csdn.net/qq_35654080/article/details/81591082