代码
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
import scipy.optimize as op
import math
from scipy import optimize
def displayData(X, *example_width):
if example_width == ():
example_width = round(np.sqrt(X.shape[1]))
# gray image
# ...
m, n = X.shape
rows = math.floor(np.sqrt(m))
cols = math.ceil(m / rows)
fig, ax_array = plt.subplots(
nrows=rows, ncols=cols, sharey=True, sharex=True, figsize=(8, 8))
for row in range(rows):
for column in range(cols):
ax_array[row, column].matshow(
X[rows*row+column].reshape((20, 20)), cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
def sigmoid(z):
h = 1 / (1 + np.exp(-z))
return h
def lrCostFunction_J(theta_t, X_t, y_t, lambda_t):
m = y_t.shape[0]
theta = theta_t.copy() # 如果不加copy(),theta和theta_t指向同一内存。
theta[0] = 0
# print(theta_t)
h = sigmoid(X_t.dot(theta_t))
J = 1/m * np.sum(-1*y_t*np.log(h) - (1-y_t)*np.log(1-h)) + \
lambda_t/(2*m) * theta.T.dot(theta)
return J
def lrCostFunction_grad(theta_t, X_t, y_t, lambda_t):
# print('theta_shape', theta_t.shape) # 测量错误
theta_t = theta_t.reshape(theta_t.shape[0], 1)
m = y_t.shape[0]
theta = theta_t.copy()
theta[0] = 0
h = sigmoid(X_t.dot(theta_t))
h = np.reshape(h, (h.shape[0], 1))
grad = X_t.T.dot(h - y_t)/m + lambda_t/m * theta
#grad = (X_t.T).dot(h - y_t) / m + lambda_t/m * theta_temp
return grad.flatten()
def oneVsAll(X, y, num_labels, Mylambda):
m, n = X.shape
all_theta = np.zeros((num_labels, n+1))
X = np.column_stack((np.ones((X.shape[0], 1)), X))
for i in range(1, num_labels+1):
print('Learning class:', i)
theta = np.zeros(X.shape[1])
y_i = np.array([1 if label == i else 0 for label in y])
y_i = y_i.reshape(y_i.shape[0], 1)
ret = op.minimize(fun=lrCostFunction_J, x0=theta, args=(X, y_i, Mylambda), method='TNC',
jac=lrCostFunction_grad, options={'disp': False})
all_theta[i-1, :] = ret.x
return all_theta
def predictOneVsAll(all_theta, X):
m, n = X.shape
num_labels = all_theta.shape[0]
p = np.zeros((m, 1))
X = np.column_stack((np.ones((X.shape[0], 1)), X))
h = sigmoid(X.dot(all_theta.T)) # 5000x10
prediction = np.argmax(h, axis=1) + 1
return prediction
if __name__ == '__main__':
# Setup the parameters you will use for this part of the exercise
input_layer_size = 400 # 20x20 input image of digit
num_labels = 10 # 10 labels, from 1 to 10
# =========== Part 1: Loading and Visualizing Data =============
print('Loading and visualizing Data ...')
file = 'ex3data1'
data = loadmat(file) # 这里的data是字典dict类型
X = data['X']
y = data['y']
# print(data.keys()) # 返回data中所有键
m, n = X.shape
# randomly select 100 data points to display
rand_indices = np.random.randint(1, m, size=100)
sel = X[rand_indices, :]
displayData(sel)
print('='*40)
# ============ Part 2a: Vectorize Logistic Regression ============
print('Testing lrCostFunction() with regularization')
theta_t = np.array([-2, -1, 1, 2])
theta_t = theta_t.reshape((theta_t.shape[0], 1))
X_t = np.column_stack(
(np.ones((5, 1)), (np.array([range(1, 16)])/10).reshape(3, 5).T))
y_t = np.array([1, 0, 1, 0, 1]).reshape(5, 1)
lambda_t = 3
J = lrCostFunction_J(theta_t, X_t, y_t, lambda_t)
grad = lrCostFunction_grad(theta_t, X_t, y_t, lambda_t)
print('Cost:', J)
print('Expected cost: 2.534819')
print('Gradients:\n', grad)
print('Expected gradients :\n[0.146561 -0.548558 0.724722 1.398003]')
print('='*40)
# ============ Part 2b: One-vs-All Training ============
print('Training One-vs-All Logistic Regression...')
# 使用python优化函数训练
Mylambda = 0.1
all_theta = oneVsAll(X, y, num_labels, Mylambda)
# 将训练结果保存,避免调试后续代码时重复训练浪费时间,方便调试后续代码
# np.save('all_theta1',all_theta)
# 加载用matlab fmincg 训练的参数:这个参数的精度为0.949
# para = loadmat('all_theta_mat')
# all_theta = para['all_theta']
# 加载用python训练的参数:这个参数精度为0.7974
# all_theta = np.load('all_theta1.npy')
print('='*40)
# ================ Part 3: Predict for One-Vs-All ================
pred = predictOneVsAll(all_theta, X)
accuracy = np.mean(pred == y.flatten())
print('Training set Accuracy:', accuracy)
print('='*40)
运行结果
从结果可以看到,准确度只有0.7974,这与matlab上运行的0.949差太多了,其原因是本人对于python上可用的优化算法不太熟练,目前只尝试过用TNC优化方法进行优化,还找不到类似于matlab的fmincg优化方法,日后找到更好的优化方法会继续更新。如果有读者找到了欢迎跟博主共享。
踩到的坑
1、在定义损失函数:lrCostFunction时,theta[0]不应参与正则化。在处理时应注意:
theta = theta_t.copy(),此处如果不加copy(),结果就是将theta指向theta_t的内存单元,即共享同一个内存单元,如果修改theta:
theta[0] = 0 ,则theta_t[0]也会变成0。
2、在运行oneVsAll函数时,报错:“operands could not be broadcast together with shapes (401,5000) (401,) ”
报错位置:lrCostFunction_grad( )里的grad=...那行
报错原因:h.shape=(5000, )
解决办法:将h reshape为5000x1的
3、报错:“invalid gradient vector from minimized function”
报错位置:op.minimize()函数所在行
报错原因:在运行oneVsAll( ) 函数时,传入到 lrCostFunction( )的 theta_t 的shape发生变化,变为(401, ),应为(401, 1)
解决办法:将theta_t reshape为(401, 1) :theta_t = theta_t.reshape(theta_t.shape[0], 1)
应加深对numpy中参数广播的理解。
4、在执行优化op.minimize()时,由于是分别训练10个分类器的参数theta,在每次训练时忘记处理标签,即未把需要训练的分类器的标签置为1,其余类别置为0
导致训练出来的参数全为0。
method='TNC' 指的是用牛顿截断(truncated Newton)法优化
https://docs.scipy.org/doc/scipy/reference/optimize.minimize-tnc.html#optimize-minimize-tnc