《机器学习》编程作业的Python实现【ex2.py】

代码:

# Machine Learning Online Class - Exercise 2: Logistic Regression
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as op


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def plotData(X, y):
    index0 = list()
    index1 = list()
    j = 0
    for i in y:
        if i == 0:
            index0.append(j)
        else:
            index1.append(j)
        j = j + 1
    plt.scatter(X[index0, 0], X[index0, 1], marker='o')
    plt.scatter(X[index1, 0], X[index1, 1], marker='+')
    plt.xlabel('Exam 1 score')
    plt.ylabel('Exam 2 score')
    plt.legend(['Admitted', 'Not admitted'], loc='upper right')
    #plt.show()


def costFunction(initial_theta, X, y, myLambda):
    m = y.shape[0]
    #grad = np.zeros((initial_theta.shape))

    J = np.sum(np.dot((-1*y).T, np.log(sigmoid(np.dot(X, initial_theta))))
               - np.dot((1-y).T, np.log(1 - sigmoid(np.dot(X, initial_theta)))))/m
    #grad = np.dot(X.T, sigmoid(np.dot(X, initial_theta)) - y)/m
    return J  # , grad


def gradient(initial_theta, X, y, myLambda):
    m, n = np.shape(X)
    initial_theta = initial_theta.reshape((n, 1))
    # print(initial_theta.shape)
    #grad = np.zeros((initial_theta.shape))
    grad = np.dot(X.T, sigmoid(np.dot(X, initial_theta)) - y)/m
    #grad = ((X.T).dot(sigmoid(np.dot(X, initial_theta)) - y)) / m
    return grad.flatten()


def plotDecisionBoundary(theta, X, y):
    figure = plotData(X[:, 1:], y)
    m, n = X.shape
    # Only need 2 points to define a line, so choose two endpoints
    if n <= 3:
        point1 = np.min(X[:, 1])
        point2 = np.max(X[:, 1])
        point = np.array([point1, point2])
        plot_y = -1*(theta[0] + theta[1]*point)/theta[2]
        plt.plot(point, plot_y, '-')
        plt.legend(['Admitted', 'Not admitted', 'Boundary'], loc='lower left')
    plt.show()
    return 0


def predict(theta, X):
    m, n = X.shape
    p = np.zeros((m, 1))
    k = np.where(sigmoid(X.dot(theta)) >= 0.5)
    p[k] = 1
    return p

if __name__ == '__main__':
# Load Data
    data = np.loadtxt('ex2data1.txt', delimiter=',')
    X = data[:, 0:2]
    y = data[:, 2].astype(int)
    y = np.reshape(y, (y.shape[0], 1))  # 把y转换成mx1的列向量
# ==================== Part 1: Plotting ====================
    print('Plotting data with + indicating (y = 1) examples \
	       and o indicating (y = 0) examples')
    plotData(X, y)
    print("="*40)
# ============ Part 2: Compute Cost and Gradient ============
# Setup the data matrix appropriately, and add ones for the intercept term
    (m, n) = X.shape
    X = np.column_stack((np.ones((m, 1)), X))
    (m, n) = X.shape
    initial_theta = np.zeros((n, 1))  # nitialize fitting parameters
    myLambda = 0
# Compute and display initial cost and gradient
    cost = costFunction(initial_theta, X, y, myLambda)
    grad = gradient(initial_theta, X, y, myLambda)
    print('Cost at initial theta (zeros):', cost)
    print('Expected cost (approx): 0.693\n')
    print('Gradient at initial theta (zeros): ')
    print(grad)
    print('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n')

# Compute and display cost and gradient with non-zero theta
    test_theta = np.array([[-24], [0.2], [0.2]])
    cost = costFunction(test_theta, X, y, myLambda)
    grad = gradient(initial_theta, X, y, myLambda)
    print('Cost at initial theta :', cost)
    print('Expected cost (approx): 0.218\n')
    print('Gradient at initial theta : ')
    print(grad)
    print('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n')
    print("="*40)

# ============= Part 3: Optimizing using fminunc  =============
#  In this exercise, you will use a built-in function (fminunc) to find the
#  optimal parameters theta.
    myLambda = 1
# Result = op.minimize(fun=costFunction, x0=initial_theta, args=(X, y, myLambda), method='TNC', jac=gradient)
    Result = op.minimize(fun=costFunction, x0=initial_theta,
                     args=(X, y, myLambda), method='TNC', jac=gradient)
#Result = op.fmin_tnc(func=costFunction,x0=initial_theta,args=(X,y))
# Result = op.fmin_bfgs(f=costFunction,x0=initial_theta,fprime=gradient(X,y,1),gtol=1e-5,\disp=1)
    theta = Result.x
    cost = Result.fun
    print('Cost at theta found by fminunc:', cost)
    print('Expected cost (approx):0.203\n')
    print('theta:', theta)
    print('Expected theta (approx):[-25.161  0.206  0.201]\n')
    plotDecisionBoundary(theta, X, y)
    print('='*40)
#  ============== Part 4: Predict and Accuracies ==============
    sample = np.array([1, 45, 85])
    prob = sigmoid(np.dot(sample, theta))
    print('For a student with scores 45 and 85, we predict an admission\
	       probability of ', prob)
    print('Expected value: 0.775 +/- 0.002\n\n')
# compute accuracy on our training set
    p = predict(theta, X)
    accuracy = np.mean(np.double(p == y)) * 100
    print('Train Accuracy:', accuracy)
    print('Expected accuracy (approx): 89.0')
    print('='*40)

运行结果

踩到的坑:

1、经常手误把np.ones() 和 np.zeros() 混淆
2、在处理正则化项的theta时出错:自以为是地把theta第一列全部变成0,在合并向量时检索错误(神奇的是,此时计算Lambda=1的结果正确)
                正确的做法是把theta的第一项变为0(theta为(n x 1)的列向量)
                原因:对正则化理解不透彻

3、array作为参数传入到定义的函数里时,shape可能会改变。这里遇到的坑是:
            initial_theta在设定时是(28,1),但到了def costFunctionReg_grad()的肚子里面就变成了(28, ),
             如果不reshape一下,就会报错:“ValueError: operands could not be broadcast together with shapes (28,118) (28,) ”
            这是经常踩到的坑!!!!!

猜你喜欢

转载自blog.csdn.net/LCCFlccf/article/details/84935572