感知机学习算法实现

一, 单变量函数的梯度下降

我们假设有一个单变量的函数

函数的微分


初始化,起点为

学习率为

根据梯度下降的计算公式

我们开始进行梯度下降的迭代计算过程:

如图,经过四次的运算,也就是走了四步,基本就抵达了函数的最低点,也就是山底

二, 感知机原始学习算法的python实现

随机梯度下降算法

import numpy as np
x_train=np.array([[3,3],[4,3], [1,1]])
y_train=np.array([1,1,-1])


alpha=1
omega=np.array([0,0])
b=0
i=0
while i < len(x_train):
    if (x_train[i].dot(omega)+b)*y_train[i] <=0:
        omega=omega+alpha*y_train[i]*x_train[i]
        b=b+alpha*y_train[i]
        i=0
    else:
        i=i+1
    print i,omega, b

批量梯度下降算法

import numpy as np
import matplotlib.pyplot as plt
import math

def prodata(omega1=2,omega2=-3,b=5,seed=1,size=30):
    np.random.seed(seed)
    omega=np.array([omega1,omega2])
    x1 = np.arange(0, size)
    t = np.random.normal(loc=0, scale=5,size=size)
    x2=t-(b+omega[0]*x1)/omega[1]*1.0
    y_train=[]
    x_train=np.array(zip(x1,x2))
    for x in t:
        if x>=0:
            y_train.append(1)
        else:
            y_train.append(-1)
    y_train=np.array(y_train)
    return x_train,y_train

def SGD(x_train,y_train):
    alpha=0.01
    omega=np.array([0,0])
    b,i=0,0
    while i < len(x_train):
        if (x_train[i].dot(omega)+b)*y_train[i] <=0:
            omega=omega+alpha*y_train[i]*x_train[i]
            b=b+alpha*y_train[i]
            i=0
        else:
            i=i+1
    return omega,b

def SGD2(x_train,y_train,epsion = 1,alpha = 0.01):

    omega,b=np.array([0,0]),0
    def get_sample(omega,b,x_train,y_train):
        x,y=[],[]
        for i in range(0,len(x_train)):
            if(x_train[i].dot(omega) + b) * y_train[i] <= 0:
                x.append(x_train[i])
                y.append(y_train[i])
        x=np.array(x)
        y=np.array(y)
        return x,y

    def gradient_sum(x,y):
            return (y.reshape(-1, 1) * x).sum(0), y.sum()

    def target(omega,b,x_train,y_train):
        x,y=get_sample(omega,b,x_train,y_train)
        if x !=[]:
            return -(y*((omega*x).sum(1)+b)).sum(),x,y
        else:
            return 0,x,y

    old, x,y= target(omega,b,x_train,y_train)
    now = old-(epsion +1)
    while abs(old-now) > epsion and len(x) !=0:
        omega=omega+alpha*gradient_sum(x,y)[0]
        b=b+alpha*gradient_sum(x,y)[1]
        old=now
        now,x,y=target(omega,b,x_train,y_train)
        print "omega,b,now", omega, b, now

    return omega,b,old-now

if __name__ == '__main__':
    #生成原始数据
    omega1,omega2,b=2,-3,5
    size=30
    x_train,y_train=prodata(omega1,omega2,b,1,size)

    #梯度下降法参数估计
    omega_estimate,b_estimate,erro=SGD2(x_train,y_train)
   # omega_estimate, b_estimate = SGD(x_train, y_train)

    #打印结果
    print "actual is :",omega1,omega2,b
    n=math.sqrt((omega1**2+omega2**2+b**2)*1.0)
    print "norm is ",omega1*1.0/n,omega2*1.0/n,b*1.0/n
    print "estimate is :",omega_estimate[0],omega_estimate[1],b_estimate
    print "erro:",erro


    #画图
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    plt.xlabel('X1')
    plt.ylabel('X2')


    x1 = np.arange(0, 35,34)
    omega=np.array([omega1,omega2])
    x2 = -(b+omega1*x1)/omega2*1.0
    ax1.plot(x1,x2,c="black")#实际的直线,黑色
    x2 = -(b_estimate+omega_estimate[0]*x1)/omega_estimate[1]*1.0#拟合的直线,红色
    ax1.plot(x1,x2,c="red")
    for i in range(0,len(x_train)):
        if y_train[i]>0:
            ax1.scatter(x_train[i,0],x_train[i,1],c = "r" ,marker = 'o')
        else:
            ax1.scatter(x_train[i, 0], x_train[i, 1], c="b", marker='^')
    plt.show()

三, 感知机学习算法的对偶形式

\alpha _{i}\leftarrow\alpha _{i}+\eta=n_{i}\eta +\eta =(n_{i}+1)\eta

两个角度看待这个更新,分别是 \alpha _{i}+\eta  和 (n_{i}+1)\eta。前者是在走的总步长基础上遇到新的错误样本再增加一个步长(因为每遇到一个错误样本增加一个步长 \eta )。后者是对第  i 个样本总的使用次数在它又被选为错误样本时对她的计数更新(因为每遇到一个错误样本增加的步长是固定的\eta ,所以只需要更新计数,记下样本总的使用次数)

关于该处的理解 https://blog.csdn.net/qq_26826585/article/details/87967520

import numpy as np

def SGD4(x_train,y_train):
    N=len(x_train)
    alpha=np.zeros((N,))
    def Gram(x_train):
        gram=np.zeros((len(x_train),len(x_train)))
        for i in range(0,len(x_train)):
            for j in range(0,len(x_train)):
                gram[i,j]=x_train[i].dot(x_train[j])
        return gram

    b,i,theta=0,0,1
    gram=Gram(x_train)
    print gram
    while i < len(x_train):
        print i
        alpha*y_train*gram[:,i]


        if ((alpha*y_train).dot(gram[:,i])+b)*y_train[i] <=0:
            alpha[i]=alpha[i]+theta
            b=b+theta*y_train[i]
            i=0
        else:
            i=i+1
        print alpha, b
    return alpha,b

if __name__ =='__main__':
    x_train=np.array([[3,3],[4,3],[1,1]])
    y_train=np.array([1,1,-1])
    print SGD4(x_train,y_train)

猜你喜欢

转载自blog.csdn.net/asdfsadfasdfsa/article/details/89351484