多项式事件模型、神经网络模型-机器学习公开课第六讲

机器学习公开课第六讲

这篇笔记对应的是公开课视频的第六个，Ng原来的视频中讲到了多项式事件模型(Multivariate Event Model)、神经网络模型和支持向量机(SVM)的一些概念。由于视频比较老，Ng在这里对神经网络模型的介绍并不详细，我参考Ng最新的视频重点介绍神经网络模型，同时将支持向量机放在下一节中作为一个完整的算法展开介绍。
在这里插入图片描述
python代码实现：
import numpy as np
import xlrd
import matplotlib.pyplot as plt

#  神经元结构：2-5-3-1
input_dim = 2  # input layer dimensionality
output_dim = 1  # output layer dimensionality
lr = 0.5  # learning rate for gradient descent

# 加载数据,返回的特征集是数组，标签集也是数组
def load_data(filename):
    workbook = xlrd.open_workbook(filename)
    boyinfo = workbook.sheet_by_index(0)
    col_num = boyinfo.ncols
    row_num = boyinfo.nrows
    # 定义特征集和标签集
    dataset = []
    labelset = []
    print(col_num,row_num)
    for i in range(1, row_num):
        row = boyinfo.row_values(i)[0:]
        dataset.append([row[0], row[1]])
        labelset.append(row[2])
    return np.array(dataset), np.array(labelset)

# 归一化数据
def normalization(X):
    Xmin = np.min(X, axis=0)
    Xmax = np.max(X, axis=0)
    Xmu = np.mean(X, axis=0)
    X_norm = (X-Xmu)/(Xmax-Xmin)
    return X_norm

# 随机初始化权重
def randInitializeWeights(L_in,L_out):
    W = np.zeros((L_out, 1+L_in))
    epsilon = 0.12
    #本函数可以返回一个或一组服从“0-1”均匀分布的随机样本值,随机样本取值范围是[0, 1)，不包括1
    W = np.random.rand(L_out, 1+L_in)*(2*epsilon)-epsilon
    return W

# 定义sigmoid激活函数，将输入数据压缩在0-1之间
def sigmoid(z):
    g = 1 / (1 + np.exp(-z))  # 根据sigmoid 函数公式写出
    return g  # 返回函数输出值

# 定义sigmoidGradient函数，计算sigmoid函数的梯度值
def sigmoidGradient(z):
    g = np.array(1.0 / (1.0 + np.exp(-z)))  # 根据sigmoid 函数公式写出
    g = g * (1 - g)  # 根据sigmoid 函数公式写出
    return np.mat(g) # 返回梯度值

# 损失函数
def calculate_loss(model, x_m, y):
    m = y.shape[0]
    W1, W2, W3 = model["W1"], model["W2"], model["W3"]
    a1 = np.mat(x_m)
    z2 = a1 * W1.T
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones((m, 1)), a2))
    z3 = a2 * W2.T
    a3 = sigmoid(z3)
    a3 = np.column_stack((np.ones((m, 1)), a3))
    z4 = a3 * W3.T
    a4 = sigmoid(z4)
    one = np.multiply(y, np.log(a4))
    two = np.multiply((1-y), np.log(1-a4))
    media_para = (one+two).sum()
    data_loss = -media_para/m
    return data_loss

# 定义compare函数，将预测值大于0.5的归为正例，小于0.5的归为负例
def compare(X):
    num = X.shape[0]
    result = []
    for i in range(num):
        if X[i, 0]>0.5:
            result.append(1.0)
        else:
            result.append(0.0)
    return result

# 预测输出结果的函数
def precision(model, x_m):
    m = x_m.shape[0]
    W1, W2, W3 = model["W1"], model["W2"], model["W3"]
    a1 = np.mat(x_m)
    z2 = a1 * W1.T
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones((m, 1)), a2))
    z3 = a2 * W2.T
    a3 = sigmoid(z3)
    a3 = np.column_stack((np.ones((m, 1)), a3))
    z4 = a3 * W3.T
    a4 = sigmoid(z4)
    result = compare(a4)
    return result

# 构建神经网络模型
# 定义build_model函数，输入为特征矩阵X，标签向量y，第一层隐藏层神经元个数，第二层隐藏层神经元个数，迭代次数，
# 是否打印损失函数的布尔变量
# 作用是完成神经网络的前向和反向传播，训练参数W1 W2 W3
def build_model(X, y, hidden1_dim,hidden2_dim, iterNum=2000, print_loss=False):
    # 样本数量
    m = X.shape[0]
    # 初始化网络结构中的所有权重
    W1 = randInitializeWeights(input_dim, hidden1_dim)
    W2 = randInitializeWeights(hidden1_dim, hidden2_dim)
    W3 = randInitializeWeights(hidden2_dim, output_dim)
    # 将model声明为字典数据格式
    model = {}
    # 梯度下降
    logName = "logText.txt"  # 日志文件名称
    logFile = open(logName, "w")  # 调用open函数,打开文件，模式为写
    for t in range(iterNum):
        # forward propagation
        x_m = np.column_stack((np.ones((m, 1)), X))
        a1 = np.mat(x_m)
        # print("a1:", a1.shape)
        # print("W1:", W1.shape)
        # print("W2:", W2.shape)
        # print("W3:", W3.shape)
        z2 = a1*W1.T
        # print("z2:", z2.shape)
        a2 = sigmoid(z2)
        a2 = np.column_stack((np.ones((m, 1)), a2))
        # print("a2:", a2.shape)
        z3 = a2*W2.T
        # print("z3:", z3.shape)
        a3 = sigmoid(z3)
        a3 = np.column_stack((np.ones((m, 1)), a3))
        # print("a3:", a3.shape)
        z4 = a3*W3.T
        a4 = sigmoid(z4)
        # print("a4:", a4.shape)
        # Backpropagation
        y_m = np.reshape(y, [-1, 1])
        #print("y_m:", y_m.shape)
        delta4 = a4-y_m
        #print("delta4:", delta4.shape)
        delta3 = np.multiply((delta4*W3)[:, 1:], sigmoidGradient(z3))
        #print("delta3:", delta3.shape)
        delta2 = np.multiply((delta3*W2)[:, 1:], sigmoidGradient(z2))
        #print("delta2:",delta2.shape)

        DW3 = (delta4.T * a3)/m
        #print("DW3:", DW3.shape)
        DW2 = (delta3.T * a2)/m
        #print("DW2:", DW2.shape)
        DW1 = (delta2.T * a1)/m
        #print("DW1:", DW1.shape)
        # 更新参数
        W1 -= lr * DW1
        W2 -= lr * DW2
        W3 -= lr * DW3

        model = {"W1":W1, "W2":W2, "W3":W3}
        #print(y.shape)
        if print_loss and t % 1000 == 0:
            print("Loss after iteration %i: %f" % (t, calculate_loss(model, x_m, y_m)))
            logFile.write("Loss after iteration %i: %f" % (t, calculate_loss(model, x_m, y_m)))
            logFile.write("\n")
            result = precision(model, x_m)
            print("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))
            logFile.write("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))
            logFile.write("\n")
    logFile.close()
    return model

# 定义预测函数,输入为特征矩阵和训练好的模型
def predict(X,model):
    m = X.shape[0]
    W1, W2, W3 = model["W1"], model["W2"], model["W3"]
    x_m = np.column_stack((np.ones((m, 1)), X))
    a1 = np.mat(x_m)
    z2 = a1 * W1.T
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones((m, 1)), a2))
    z3 = a2 * W2.T
    a3 = sigmoid(z3)
    a3 = np.column_stack((np.ones((m, 1)), a3))
    z4 = a3 * W3.T
    a4 = sigmoid(z4)
    return a4

# 定义一个 plotData 函数，输入参数是 数据 X 和标志 flag: y，返回作图操作 plt, p1, p2 ， 目的是为了画图
def plotData(X, y):
    # 找到标签为1和0的索引组成数组赋给pos 和 neg
    pos = np.where(y==1)
    neg = np.where(y==0)
    p1 = plt.plot(X[pos, 0], X[pos, 1], marker='s', markersize=7, color='red')[0]
    p2 = plt.plot(X[neg, 0], X[neg, 1], marker='o', markersize=7, color='green')[1]
    return p1,p2

# 画出决策边界
def plot_decision_boundary(predict_func,X_norm,y):
    x_min, x_max = X_norm[:, 0].min()-0.5, X_norm[:, 0].max()+0.5
    y_min, y_max = X_norm[:, 1].min()-0.5, X_norm[:, 1].max()+0.5
    # 定义步长
    stride = 0.01
    # 由np.arrange生成一维数组作为np.meshgrid的参数，返回xx矩阵，yy矩阵
    x_med = np.arange(x_min, x_max, stride)
    y_med = np.arange(y_min, y_max, stride)
    xx, yy = np.meshgrid(x_med, y_med)
    # .ravel()方法将xx,yy矩阵压缩为一维向量；np.c_：是按行连接两个矩阵，就是把两矩阵左右相加，要求行数相等
    # 合成的矩阵作为pred_func的输入，返回预测值
    mat = np.c_[xx.ravel(), yy.ravel()]
    Z = predict_func(mat)
    Z = (Z.T).reshape(xx.shape)
    p1, p2 = plotData(X_norm, y)
    p3 = plt.contour(xx, yy, Z, levels=0, linewidths=2)
    plt.xlabel("tall")  # 横坐标的标签为tall
    plt.ylabel("salary")  # 纵坐标的标签为salary
    plt.legend((p1, p2, p3), ('y = I like you', "y = I don't like you", 'Decision Boundary'), numpoints=1,
               handlelength=0)
    plt.title("ANN")

# 可视化函数
def visualize(X_norm,y,model):
    plot_decision_boundary(lambda x:predict(x, model), X_norm, y)
    plt.savefig("result.png")
    plt.show()

if __name__ == "__main__":
    dataset, labelset = load_data("data.xls")
    print("输入矩阵维度：",dataset.shape)
    print("标签维度:",labelset.shape)
    # 原始数据进行归一化处理
    X_norm = normalization(dataset)
    # 训练模型
    model = build_model(X_norm, labelset, 5, 3, iterNum=20000, print_loss=True)
    # 可视化
    visualize(X_norm, labelset, model)
运行代码：
在这里插入图片描述
可以看出来，神经网络模型找到了一个非线性分类器对样本实现了正确的分类。
多项式事件模型、神经网络模型-机器学习公开课第六讲

机器学习公开课第六讲

猜你喜欢