python-机器学习常用算法

机器学习算法-感知机

数据CSV:

       颜色     大小     测试人员     测试动作         结果
0     黄色      小           成人          用手打        不爆炸
1     黄色      小           成人          用脚踩           爆炸
2     黄色      小           小孩          用手打        不爆炸
3     黄色      小           小孩          用脚踩           爆炸
4     黄色      小           小孩          用脚踩           爆炸
5     黄色      小           小孩          用脚踩           爆炸
6     黄色      大           成人          用手打           爆炸
7     黄色      大           成人          用脚踩           爆炸
8     黄色      大           小孩          用手打        不爆炸
9     紫色      小           成人          用手打        不爆炸
10   紫色      小           小孩          用手打        不爆炸
11   紫色      大           小孩          用手打        不爆炸


原始形式

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.linear_model import Perceptron

from sklearn.metrics import accuracy_score

dataset = pd.read_csv('test.csv',encoding='gbk')
featNames = dataset.columns[:-1]
labelNames = np.unique(dataset.values[:,-1])

feat,label = dataset.values[:,0:-1],dataset.values[:,-1]

def codeXByLabelEncoder(feat):
    tmp = feat.copy()
    encoder = preprocessing.LabelEncoder()
    for i in range(feat.shape[1]):
        tmp[:,i] = encoder.fit_transform(tmp[:,i])
    return tmp
x = codeXByLabelEncoder(feat)

def codeYByLabelEncoder(label):
    tmp = label.copy()
    tmp[tmp=='不爆炸'] = -1
    tmp[tmp=='爆炸'] = 1
    return tmp
y = codeYByLabelEncoder(label)

def trainModelByOrigin(x,y):
    featNum = x.shape[1]
    w = np.zeros(featNum)
    b,lr,iter_time= 0,0.01,1000

    for time in range(iter_time):
        tag = y*(np.dot(x,w)+b)
        wrongIndex = np.where(tag<=0)[0]
        if(len(wrongIndex)):
            _id_ = np.random.permutation(wrongIndex)[0]
            w = w + lr*y[_id_]*x[_id_]
            b = b + lr*y[_id_]
        else:
            break

    return w,b

w,b = trainModelByOrigin(x,y)
w = w.astype(float)

def predictByOrigin(w,b,testset):
    count = 0
    num = testset.shape[0]
    for i in range(len(testset)):
        predLabel = np.sign(np.dot(testset[i,0:-1],w)+b)
        print('pred:{} actual:{}'.format(int(predLabel),testset[i,-1]))
        if(predLabel==testset[i,-1]):
            count += 1

    print('accuracy: ',count/num)

trainset = np.hstack((x,y.reshape(len(y),1)));
testset = np.array([
        [1,1,0,1,1],
        [0,0,1,1,1],
        [1,0,0,0,1],
        [0,1,0,1,-1],
        [1,1,1,1,1]
        ]) 
print('------训练集------')
predictByOrigin(w,b,trainset)
print('------测试集------')
predictByOrigin(w,b,testset)

def predictFromModel(x,y,testset):
    clf = Perceptron(max_iter=1000,eta0=0.01,random_state=0)
    x,y = x.astype(int),y.astype(int)
    clf.fit(x,y)
    train = clf.predict(x)
    print('----------perceptron----------')
    print('accuracy in trainset: ',accuracy_score(y,train))
    print('accuracy in testset: ',accuracy_score(testset[:,-1],clf.predict(testset[:,0:-1])))
    print('w: ',clf.coef_[0])
    print('b: ',clf.intercept_[0])
     

predictFromModel(x,y,testset)

对偶形式

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.linear_model import Perceptron

dataset = pd.read_csv('test.csv',encoding='gbk')
featNames = dataset.columns[:-1]
labelNames = np.unique(dataset.values[:,-1])
feat,label = dataset.values[:,0:-1],dataset.values[:,-1]

def codeXByLabelEncoder(feat):
    tmp = feat.copy()
    encoder = preprocessing.LabelEncoder()
    for i in range(feat.shape[1]):
        tmp[:,i] = encoder.fit_transform(tmp[:,i])
    return tmp
x = codeXByLabelEncoder(feat)

def codeYByLabelEncoder(label):
    tmp = label.copy()
    tmp[tmp=='不爆炸'] = -1
    tmp[tmp=='爆炸'] = 1
    return tmp
y = codeYByLabelEncoder(label)

def trainModelByDual(x,y):
    alpha = np.zeros(x.shape[0])
    lr,iter_times = 0.01,1000
    def getGramMatrix(x):
        arr = []
        row = x.shape[0]
        for i in range(row):
            for j in range(row):
                arr.append(np.dot(x[i],x[j].T))
        arr = np.array(arr).reshape(row,row)
        return arr
    mat = getGramMatrix(x)
    
    for time in range(iter_times):
        tag = y*(np.dot((alpha*y).T,mat+1))
        wrongIndex = np.where(tag<=0)[0]
        if(len(wrongIndex)):
            _id_ = np.random.permutation(wrongIndex)[0]
            alpha[_id_] = alpha[_id_] + lr
        else:
            break
    w = np.dot((alpha*y).T,x)
    b = np.dot(alpha.T,y)
    return w,b
          
w,b = trainModelByDual(x,y)
w = w.astype(float)

def predictByDual(w,b,testset):
    count = 0
    num = testset.shape[0]
    for i in range(len(testset)):
        predLabel = np.sign(np.dot(testset[i,0:-1],w)+b)
        print('pred:{} actual:{}'.format(int(predLabel),testset[i,-1]))
        if(predLabel==testset[i,-1]):
            count += 1
    print('accuracy: ',count/num)

trainset = np.hstack((x,y.reshape(len(y),1)));
testset = np.array([
        [1,1,0,1,1],
        [0,0,1,1,1],
        [1,0,0,0,1],
        [0,1,0,1,-1],
        [1,1,1,1,1]
        ]) 
print('------训练集------')
predictByDual(w,b,trainset)
print('------测试集------')
predictByDual(w,b,testset)    

def predictFromModel(x,y,testset):
    clf = Perceptron(max_iter=1000,eta0=0.01,random_state=0)
    x,y = x.astype(int),y.astype(int)
    clf.fit(x,y)
    train = clf.predict(x)
    print('----------perceptron----------')
    print('accuracy in trainset: ',accuracy_score(y,train))
    print('accuracy in testset: ',accuracy_score(testset[:,-1],clf.predict(testset[:,0:-1])))
    print('w: ',clf.coef_[0])

    print('b: ',clf.intercept_[0])

predictFromModel(x,y,testset)













猜你喜欢

转载自blog.csdn.net/qq_42394743/article/details/80632200