《机器学习》编程作业的Python实现【ex3.py】

代码

import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
import scipy.optimize as op
import math
from scipy import optimize


def displayData(X, *example_width):
    if example_width == ():
        example_width = round(np.sqrt(X.shape[1]))

    # gray image
    # ...
    m, n = X.shape
    rows = math.floor(np.sqrt(m))
    cols = math.ceil(m / rows)
    fig, ax_array = plt.subplots(
        nrows=rows, ncols=cols, sharey=True, sharex=True, figsize=(8, 8))

    for row in range(rows):
        for column in range(cols):
            ax_array[row, column].matshow(
                X[rows*row+column].reshape((20, 20)), cmap='gray_r')
    plt.xticks([])
    plt.yticks([])
    plt.show()


def sigmoid(z):

    h = 1 / (1 + np.exp(-z))

    return h


def lrCostFunction_J(theta_t, X_t, y_t, lambda_t):

    m = y_t.shape[0]
    theta = theta_t.copy()  # 如果不加copy()，theta和theta_t指向同一内存。
    theta[0] = 0
    # print(theta_t)
    h = sigmoid(X_t.dot(theta_t))
    J = 1/m * np.sum(-1*y_t*np.log(h) - (1-y_t)*np.log(1-h)) + \
        lambda_t/(2*m) * theta.T.dot(theta)
    return J


def lrCostFunction_grad(theta_t, X_t, y_t, lambda_t):
    # print('theta_shape', theta_t.shape) # 测量错误
    theta_t = theta_t.reshape(theta_t.shape[0], 1)
    m = y_t.shape[0]
    theta = theta_t.copy()
    theta[0] = 0
    h = sigmoid(X_t.dot(theta_t))
    h = np.reshape(h, (h.shape[0], 1))
    grad = X_t.T.dot(h - y_t)/m + lambda_t/m * theta
    #grad = (X_t.T).dot(h - y_t) / m + lambda_t/m * theta_temp
    return grad.flatten()


def oneVsAll(X, y, num_labels, Mylambda):
    m, n = X.shape
    all_theta = np.zeros((num_labels, n+1))
    X = np.column_stack((np.ones((X.shape[0], 1)), X))
    for i in range(1, num_labels+1):
        print('Learning class:', i)
        theta = np.zeros(X.shape[1])
        y_i = np.array([1 if label == i else 0 for label in y])
        y_i = y_i.reshape(y_i.shape[0], 1)
        ret = op.minimize(fun=lrCostFunction_J, x0=theta, args=(X, y_i, Mylambda), method='TNC',
                          jac=lrCostFunction_grad, options={'disp': False})
        all_theta[i-1, :] = ret.x
    return all_theta


def predictOneVsAll(all_theta, X):
    m, n = X.shape
    num_labels = all_theta.shape[0]
    p = np.zeros((m, 1))
    X = np.column_stack((np.ones((X.shape[0], 1)), X))
    h = sigmoid(X.dot(all_theta.T))  # 5000x10
    prediction = np.argmax(h, axis=1) + 1

    return prediction


if __name__ == '__main__':
    # Setup the parameters you will use for this part of the exercise
    input_layer_size = 400  # 20x20 input image of digit
    num_labels = 10  # 10 labels, from 1 to 10
# =========== Part 1: Loading and Visualizing Data =============
    print('Loading and visualizing Data ...')
    file = 'ex3data1'
    data = loadmat(file)  # 这里的data是字典dict类型
    X = data['X']
    y = data['y']
    # print(data.keys()) # 返回data中所有键
    m, n = X.shape
    # randomly select 100 data points to display
    rand_indices = np.random.randint(1, m, size=100)
    sel = X[rand_indices, :]
    displayData(sel)
    print('='*40)
# ============ Part 2a: Vectorize Logistic Regression ============
    print('Testing lrCostFunction() with regularization')
    theta_t = np.array([-2, -1, 1, 2])
    theta_t = theta_t.reshape((theta_t.shape[0], 1))
    X_t = np.column_stack(
        (np.ones((5, 1)), (np.array([range(1, 16)])/10).reshape(3, 5).T))
    y_t = np.array([1, 0, 1, 0, 1]).reshape(5, 1)
    lambda_t = 3
    J = lrCostFunction_J(theta_t, X_t, y_t, lambda_t)
    grad = lrCostFunction_grad(theta_t, X_t, y_t, lambda_t)
    print('Cost:', J)
    print('Expected cost: 2.534819')
    print('Gradients:\n', grad)
    print('Expected gradients :\n[0.146561   -0.548558   0.724722   1.398003]')
    print('='*40)


# ============ Part 2b: One-vs-All Training ============
    print('Training One-vs-All Logistic Regression...')
    # 使用python优化函数训练
    Mylambda = 0.1
    all_theta = oneVsAll(X, y, num_labels, Mylambda)

    # 将训练结果保存，避免调试后续代码时重复训练浪费时间，方便调试后续代码
    # np.save('all_theta1',all_theta)

    # 加载用matlab fmincg 训练的参数：这个参数的精度为0.949
    # para = loadmat('all_theta_mat')
    # all_theta = para['all_theta']

    # 加载用python训练的参数：这个参数精度为0.7974
    # all_theta = np.load('all_theta1.npy')
    print('='*40)
# ================ Part 3: Predict for One-Vs-All ================
    pred = predictOneVsAll(all_theta, X)
    accuracy = np.mean(pred == y.flatten())
    print('Training set Accuracy:', accuracy)
    print('='*40)

运行结果

从结果可以看到，准确度只有0.7974，这与matlab上运行的0.949差太多了，其原因是本人对于python上可用的优化算法不太熟练，目前只尝试过用TNC优化方法进行优化，还找不到类似于matlab的fmincg优化方法，日后找到更好的优化方法会继续更新。如果有读者找到了欢迎跟博主共享。

踩到的坑

1、在定义损失函数：lrCostFunction时，theta[0]不应参与正则化。在处理时应注意：
theta = theta_t.copy()，此处如果不加copy()，结果就是将theta指向theta_t的内存单元，即共享同一个内存单元，如果修改theta：
theta[0] = 0 ，则theta_t[0]也会变成0。

2、在运行oneVsAll函数时，报错：“operands could not be broadcast together with shapes (401,5000) (401,) ”
       报错位置：lrCostFunction_grad( )里的grad=...那行
       报错原因：h.shape=(5000, )
       解决办法：将h reshape为5000x1的

3、报错：“invalid gradient vector from minimized function”
       报错位置：op.minimize()函数所在行
       报错原因：在运行oneVsAll( ) 函数时，传入到 lrCostFunction( )的 theta_t 的shape发生变化，变为(401, )，应为(401, 1)
       解决办法：将theta_t reshape为(401, 1) ：theta_t = theta_t.reshape(theta_t.shape[0], 1)

应加深对numpy中参数广播的理解。

4、在执行优化op.minimize()时，由于是分别训练10个分类器的参数theta，在每次训练时忘记处理标签，即未把需要训练的分类器的标签置为1，其余类别置为0
       导致训练出来的参数全为0。
       method='TNC' 指的是用牛顿截断(truncated Newton)法优化
       https://docs.scipy.org/doc/scipy/reference/optimize.minimize-tnc.html#optimize-minimize-tnc