1、假设,线性线的函数是:
f(x) = θ0+θ1*x11 + θ2*x12
传说中的激活函数,将数值转换为概率值:
sigmoid,relu 函数:
g(z) = 1/(1+e(-z)) # e=2.718
z = f(x)
# 逻辑回归问题的假设函数:
h(x) = 1/(1+e^(-(θ0+θ1*x11 + θ2*x12)))
[0, 1] 0.5为分界线 >= 0.7 1 小于0.7为 0
h(x) = 0.7
1/(1+e(-(θ0+θ1*x11 + θ2*x12))) = 0.7
2、 逻辑回归问题的成本函数,假设有m个样本:
J(θ) = (-y(1)*log(1/(1+e^(-(θ0+θ1*x11 + θ2*x12))) - (1-y(1))*log(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12))) )+
... + (-y(m)*log(1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))) - (1-y(m))*log(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))) )
3、基于成本函数,求以上式子的极小值,涉及到求导数:
J’(θ0) = -y(1)*(1+e^(-(θ0+θ1*x11 + θ2*x12))*1/(1+e^(-(θ0+θ1*x11 + θ2*x12))*(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12)))*(1)
-(1-y(1))*(1/(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12))))*-1*(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12))*1/(1+e^(-(θ0+θ1*x11 + θ2*x12)*1
+...+
-y(m)*(1+e^(-(θ0+θ1*xm1 + θ2*xm2))*1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))*(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)))*(1)
-(1-y(m))*(1/(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))))*-1*(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))*1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)*1
= -y1*(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12))) + (1 - y1)*1/(1+e^(-(θ0+θ1*x11 + θ2*x12)+...+
ym*(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))) + (1 - ym)*1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)
= (-y1 + 1*(1/(1+e^(-(θ0+θ1*x11 + θ2*x12)))*1+...+ (-ym + 1*(1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)))*1
...
J’(θ2) = -y(1)*(1+e^(-(θ0+θ1*x11 + θ2*x12))*1/(1+e^(-(θ0+θ1*x11 + θ2*x12))*(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12)))*(x12)
-(1-y(1))*(1/(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12))))*-1*(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12))*1/(1+e^(-(θ0+θ1*x11 + θ2*x12)*x12
+...+
-y(m)*(1+e^(-(θ0+θ1*xm1 + θ2*xm2))*1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))*(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)))*(xm2)
-(1-y(m))*(1/(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))))*-1*(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2))*1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)*xm2
= y1*(1 - 1/(1+e^(-(θ0+θ1*x11 + θ2*x12)))*x12 - (1 - y1)*1/(1+e^(-(θ0+θ1*x11 + θ2*x12)*x12+...+
ym*(1 - 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)))*xm2 - (1 - ym)*1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)*xm2
= (-y1 + 1/(1+e^(-(θ0+θ1*x11 + θ2*x12)*x12 +
...
+
(-ym + 1/(1+e^(-(θ0+θ1*xm1 + θ2*xm2)*xm2
以上存在规律:
在X扩完全为1的截距列之后:
J'(θ) = X.T.dot(h(X.dot(θ)) - y)
算法实现:
# 手写实现逻辑回归算法
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# 1、定义好数学方法:假设函数定义出来
def sigmoid(x):
return 1./(1+np.exp(-x))
pass
# 定义假设函数:X是一个矩阵 W是一个列向量
def hyFunction(X, W):
return np.array([sigmoid(x) for x in X.dot(W)])
pass
# 2、定义成本函数:基于信息论理论中信息熵构造
def costFunction(X, W, y,λ):
px = np.array([sigmoid(x) for x in X.dot(W)])
a = np.array([np.log(x) if x>0 else 0 for x in px ])
b = np.array([np.log(1 - x) if 1 - x>0 else 0 for x in px ])
return (np.sum((-y * (a) - (1 - y) * (b))) + λ*np.sum(W**2)) /len(X)
pass
# 3.成本函数的导函数:X是样本矩阵,W是系数,y是实际结果
def gradientFunction(X, W, y, λ):
d = hyFunction(X, W)
f = X.T.dot(hyFunction(X, W) - y)/len(X)
return (X.T.dot(hyFunction(X, W) - y) + λ*W)/len(X) # 一次计算所有的W的梯度值
pass
# 4.定义梯度下降算法:X是样本,w是初始系数,costFunc成本函数,gFunc梯度函数,lamb步进系数,tolance收敛条件,times最大计算次数
def gradientDescent(X, w, y, costFunc, gFunc, lamb=0.005, tolance=1.0e-5, times=50000, λ=0.001):
t = 0
# 上一次结算结果
result = costFunc(X, w, y, λ)
# 上一次的梯度
g = gFunc(X, w, y, λ)
# 根据梯度和步进系数计算的新的点
newW = w - lamb * g
# 代入判别函数计算新的结果值
newResult = costFunc(X, newW, y, λ)
# 如果两次的结算结果的差值没有达到收敛条件,继续迭代
while np.abs(result - newResult) > tolance:
# 把点移动到新的点
w = newW
result = newResult
g = gFunc(X, w, y, λ)
newW = w - lamb * g
newResult = costFunc(X, newW, y, λ)
t += 1
# 避免无法收敛的情况
if t > times:
break
pass
pass
return w
pass
# 5.构造测试数据:验证算法的有效性
# 正则化
X = np.loadtxt(open('exam_score.csv', 'r'), delimiter=",",skiprows=1)
y = X[:,2]
X = X[:, [0, 1]]
XXX = X
# 必须做正则化
X = (X - X.mean(axis=0)) / X.std(axis=0)
row = X.shape[0]
one = np.ones(row)
one = one[:,np.newaxis]
# X = np.hstack((X, one))
X= np.c_[one, X]
dataFrame = pd.DataFrame(X)
dataFrame.to_csv('data.csv')
# 猜一组初始的系数
w = gradientDescent(X, np.array([0.5, 0.5, 0.5]), y, costFunction, gradientFunction)
print(w)
# 验证数据验证算法的正确性
XX = np.array([[20,20],
[88, 90],
[90,90],
[11, 12],
[50,50]])
# 验证数据也需要正则化然后代入判别函数进行预测
XX = (XX - XX.mean(axis=0)) / XX.std(axis=0) # 标准的正则化方法:减去平均值,然后除以标准差
row = XX.shape[0]
one = np.ones(row)
one = one[:,np.newaxis]
XX= np.c_[one, XX]
# 预测验证数据的结果
print(hyFunction(XX, w))
# 6.对于简单数据可以图形化观察结果:hyFunction(XX, w) = 0.7
def splitLineFunc(X11):
x12 = -np.log(3 / 7) / 1.8 - 1.1 * X11 - 0.57 / 1.8
return x12
pass
Y = splitLineFunc(X[:,1])
Z = y
Z = np.where(Z==1, 10, 0)
plt.scatter(X[:, 1], X[:, 2], c= Z)
plt.plot(X[:, 1], Y)
plt.show()