机器学习--LR算法实现

numpy库下的实践:

import numpy as np

import matplotlib.pyplot as plt

#加载数据

def loadDataSet():

dataMat=[]; labelMat=[]

fr=open('testSet.txt')

for line in fr.readlines():

lineArr=line.strip().split() #strip()默认删除每行头尾的空格或换行符

dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #属性集

labelMat.append(int(lineArr[2])) #标签集

return dataMat,labelMat

def sigmoid(inx):

return 1.0/(1+np.exp(-inx)) #注意python3和python2的区别

#梯度下降法对weights进行优化

def gradAscent(dataMatIn,classLabels):

dataMatrix=np.mat(dataMatIn)

labelMat=np.mat(classLabels).transpose()

m,n=np.shape(dataMatrix)

alpha=0.001 #梯度下降步长

maxCycles=600 #迭代次数

weights=np.ones((n,1)) #初始化系数矩阵

for k in range(maxCycles):

h=sigmoid(dataMatrix*weights)

error=(h-labelMat)

weights=weights-alpha*dataMatrix.transpose()*error #更新权重

return weights

#函数调用

dataMat,labelMat=loadDataSet()

weights=gradAscent(dataMat,labelMat)

print(weights)

#画出决策边界

def plotBestFit(dataMat,labelMat,weights):

dataArr=np.array(dataMat)

n=np.shape(dataArr)[0] #样本数

xcode1=[];ycode1=[]

xcode2=[];ycode2=[]

for i in range(n):

if int(labelMat[i]==1):

xcode1.append(dataArr[i,1]);ycode1.append(dataArr[i,2])

else:

xcode2.append(dataArr[i,1]);ycode2.append(dataArr[i,2])

fig=plt.figure()

ax=fig.add_subplot(111)

ax.scatter(xcode1,ycode1,s=30,c='red')

ax.scatter(xcode2,ycode2,s=30,c='green')

x=np.arange(-3.0,3.0,0.1)

y=(-weights[0]-weights[1]*x)/weights[2] #由w1*x1+x2*x2+b=0推到得到

ax.plot(x,y)

plt.xlabel('X1');plt.ylabel('X2');

plt.show()

#函数调用

plotBestFit(dataMat,labelMat,weights.getA()) #getA()矩阵转换成数组

##################随机梯度法##################

import numpy as np

import matplotlib.pyplot as plt

#加载数据

def loadDataSet():

dataMat=[]; labelMat=[]

fr=open('testSet.txt')

for line in fr.readlines():

lineArr=line.strip().split() #strip()默认删除每行头尾的空格或换行符

dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #属性集

labelMat.append(int(lineArr[2])) #标签集

return dataMat,labelMat

def sigmoid(inx):

return 1.0/(1+np.exp(-inx)) #注意python3和python2的区别

#梯度下降法对weights进行优化

def gradAscent(dataMatIn,classLabels):

dataMatrix=np.mat(dataMatIn)

labelMat=np.mat(classLabels).transpose()

m,n=np.shape(dataMatrix)

alpha=0.001 #梯度下降步长

maxCycles=600 #迭代次数

weights=np.ones((n,1)) #初始化系数矩阵

for k in range(maxCycles):

h=sigmoid(dataMatrix*weights)

error=(h-labelMat)

weights=weights-alpha*dataMatrix.transpose()*error #更新权重

return weights

#随机梯度下降

def stocGradAscent(dataMatIn,classLabels):

m, n = np.shape(dataMatIn)

alpha=0.01

maxCycles=200

weights = np.ones(n)

for j in range(maxCycles):

for i in range(m):

h=sigmoid(sum(dataMatIn[i]*weights))

error = (h - labelMat[i])

weights = weights - alpha * error* dataMatIn[i] # 更新权重

return weights

#函数调用

dataMat,labelMat=loadDataSet()

weights=stocGradAscent(np.array(dataMat),labelMat)

print(weights)

# 画出决策边界

def plotBestFit(dataMat,labelMat,weights):

dataArr=np.array(dataMat)

n=np.shape(dataArr)[0] #样本数

xcode1=[];ycode1=[]

xcode2=[];ycode2=[]

for i in range(n):

if int(labelMat[i]==1):

xcode1.append(dataArr[i,1]);ycode1.append(dataArr[i,2])

else:

xcode2.append(dataArr[i,1]);ycode2.append(dataArr[i,2])

fig=plt.figure()

ax=fig.add_subplot(111)

ax.scatter(xcode1,ycode1,s=30,c='red')

ax.scatter(xcode2,ycode2,s=30,c='green')

x=np.arange(-3.0,3.0,0.1)

y=(-weights[0]-weights[1]*x)/weights[2] #由w1*x1+x2*x2+b=0推到得到

ax.plot(x,y)

plt.xlabel('X1');plt.ylabel('X2');

plt.show()

# 函数调用

plotBestFit(dataMat,labelMat,weights)

sklearn库下的实践:

import numpy as np

from sklearn.model_selection import train_test_split

#加载数据

def loadDataSet():

dataMat=[]; labelMat=[]

fr=open('testSet.txt')

for line in fr.readlines():

lineArr=line.strip().split() #strip()默认删除每行头尾的空格或换行符

dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #属性集

labelMat.append(int(lineArr[2])) #标签集

return dataMat,labelMat

#生成训练集、测试集

dataMat,labelMat=loadDataSet()

X_train, X_test, y_train, y_test = train_test_split(dataMat, labelMat, test_size=0.3, random_state=0)

#数据预处理

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

sc.fit(X_train)

X_train_std = sc.transform(X_train)

X_test_std = sc.transform(X_test)

#训练模型

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(C=10000.0, random_state=0)

lr.fit(X_train_std, y_train)

#预测

import matplotlib.pyplot as plt

def plotPredict(X_test_std,y_test,model):

result=list(model.predict(X_test_std))

print(y_test)

print(result)

n=np.shape(X_test_std)[0] #样本

xcode1=[];ycode1=[];truePrex=[];truePrey=[]

xcode2=[];ycode2=[];falsePrex=[];falsePrey=[]

for i in range(n):

if int(y_test[i]==1):

xcode1.append(X_test_std[i,1]);ycode1.append(X_test_std[i,2])

else:

xcode2.append(X_test_std[i,1]);ycode2.append(X_test_std[i,2])

for j in range(n):

if int(result[j] == y_test[j]):

truePrex.append(X_test_std[j,1]);truePrey.append(X_test_std[j,2])

else:

falsePrex.append(X_test_std[j,1]);falsePrey.append(X_test_std[j,2])

fig=plt.figure()

ax=fig.add_subplot(111)

ax.scatter(xcode1,ycode1,s=30,c='red')

ax.scatter(xcode2,ycode2,s=30,c='green')

ax.scatter(truePrex,truePrey,c='yellow',marker='v')

ax.scatter(falsePrex,falsePrey,s=30,c='yellow',marker='x')

plt.show()

plotPredict(X_test_std,y_test,lr)

猜你喜欢

转载自blog.csdn.net/uncledrew2017/article/details/82750357