多项式事件模型、神经网络模型-机器学习公开课第六讲

机器学习公开课第六讲

这篇笔记对应的是公开课视频的第六个,Ng原来的视频中讲到了多项式事件模型(Multivariate Event Model)、神经网络模型和支持向量机(SVM)的一些概念。由于视频比较老,Ng在这里对神经网络模型的介绍并不详细,我参考Ng最新的视频重点介绍神经网络模型,同时将支持向量机放在下一节中作为一个完整的算法展开介绍。
在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述
python代码实现:

import numpy as np
import xlrd
import matplotlib.pyplot as plt

#  神经元结构:2-5-3-1
input_dim = 2  # input layer dimensionality
output_dim = 1  # output layer dimensionality
lr = 0.5  # learning rate for gradient descent

# 加载数据,返回的特征集是数组,标签集也是数组
def load_data(filename):
    workbook = xlrd.open_workbook(filename)
    boyinfo = workbook.sheet_by_index(0)
    col_num = boyinfo.ncols
    row_num = boyinfo.nrows
    # 定义特征集和标签集
    dataset = []
    labelset = []
    print(col_num,row_num)
    for i in range(1, row_num):
        row = boyinfo.row_values(i)[0:]
        dataset.append([row[0], row[1]])
        labelset.append(row[2])
    return np.array(dataset), np.array(labelset)

# 归一化数据
def normalization(X):
    Xmin = np.min(X, axis=0)
    Xmax = np.max(X, axis=0)
    Xmu = np.mean(X, axis=0)
    X_norm = (X-Xmu)/(Xmax-Xmin)
    return X_norm

# 随机初始化权重
def randInitializeWeights(L_in,L_out):
    W = np.zeros((L_out, 1+L_in))
    epsilon = 0.12
    #本函数可以返回一个或一组服从“0-1”均匀分布的随机样本值,随机样本取值范围是[0, 1),不包括1
    W = np.random.rand(L_out, 1+L_in)*(2*epsilon)-epsilon
    return W

# 定义sigmoid激活函数,将输入数据压缩在0-1之间
def sigmoid(z):
    g = 1 / (1 + np.exp(-z))  # 根据sigmoid 函数公式写出
    return g  # 返回函数输出值

# 定义sigmoidGradient函数,计算sigmoid函数的梯度值
def sigmoidGradient(z):
    g = np.array(1.0 / (1.0 + np.exp(-z)))  # 根据sigmoid 函数公式写出
    g = g * (1 - g)  # 根据sigmoid 函数公式写出
    return np.mat(g) # 返回梯度值

# 损失函数
def calculate_loss(model, x_m, y):
    m = y.shape[0]
    W1, W2, W3 = model["W1"], model["W2"], model["W3"]
    a1 = np.mat(x_m)
    z2 = a1 * W1.T
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones((m, 1)), a2))
    z3 = a2 * W2.T
    a3 = sigmoid(z3)
    a3 = np.column_stack((np.ones((m, 1)), a3))
    z4 = a3 * W3.T
    a4 = sigmoid(z4)
    one = np.multiply(y, np.log(a4))
    two = np.multiply((1-y), np.log(1-a4))
    media_para = (one+two).sum()
    data_loss = -media_para/m
    return data_loss

# 定义compare函数,将预测值大于0.5的归为正例,小于0.5的归为负例
def compare(X):
    num = X.shape[0]
    result = []
    for i in range(num):
        if X[i, 0]>0.5:
            result.append(1.0)
        else:
            result.append(0.0)
    return result

# 预测输出结果的函数
def precision(model, x_m):
    m = x_m.shape[0]
    W1, W2, W3 = model["W1"], model["W2"], model["W3"]
    a1 = np.mat(x_m)
    z2 = a1 * W1.T
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones((m, 1)), a2))
    z3 = a2 * W2.T
    a3 = sigmoid(z3)
    a3 = np.column_stack((np.ones((m, 1)), a3))
    z4 = a3 * W3.T
    a4 = sigmoid(z4)
    result = compare(a4)
    return result

# 构建神经网络模型
# 定义build_model函数,输入为特征矩阵X,标签向量y,第一层隐藏层神经元个数,第二层隐藏层神经元个数,迭代次数,
# 是否打印损失函数的布尔变量
# 作用是完成神经网络的前向和反向传播,训练参数W1 W2 W3
def build_model(X, y, hidden1_dim,hidden2_dim, iterNum=2000, print_loss=False):
    # 样本数量
    m = X.shape[0]
    # 初始化网络结构中的所有权重
    W1 = randInitializeWeights(input_dim, hidden1_dim)
    W2 = randInitializeWeights(hidden1_dim, hidden2_dim)
    W3 = randInitializeWeights(hidden2_dim, output_dim)
    # 将model声明为字典数据格式
    model = {}
    # 梯度下降
    logName = "logText.txt"  # 日志文件名称
    logFile = open(logName, "w")  # 调用open函数,打开文件,模式为写
    for t in range(iterNum):
        # forward propagation
        x_m = np.column_stack((np.ones((m, 1)), X))
        a1 = np.mat(x_m)
        # print("a1:", a1.shape)
        # print("W1:", W1.shape)
        # print("W2:", W2.shape)
        # print("W3:", W3.shape)
        z2 = a1*W1.T
        # print("z2:", z2.shape)
        a2 = sigmoid(z2)
        a2 = np.column_stack((np.ones((m, 1)), a2))
        # print("a2:", a2.shape)
        z3 = a2*W2.T
        # print("z3:", z3.shape)
        a3 = sigmoid(z3)
        a3 = np.column_stack((np.ones((m, 1)), a3))
        # print("a3:", a3.shape)
        z4 = a3*W3.T
        a4 = sigmoid(z4)
        # print("a4:", a4.shape)
        # Backpropagation
        y_m = np.reshape(y, [-1, 1])
        #print("y_m:", y_m.shape)
        delta4 = a4-y_m
        #print("delta4:", delta4.shape)
        delta3 = np.multiply((delta4*W3)[:, 1:], sigmoidGradient(z3))
        #print("delta3:", delta3.shape)
        delta2 = np.multiply((delta3*W2)[:, 1:], sigmoidGradient(z2))
        #print("delta2:",delta2.shape)

        DW3 = (delta4.T * a3)/m
        #print("DW3:", DW3.shape)
        DW2 = (delta3.T * a2)/m
        #print("DW2:", DW2.shape)
        DW1 = (delta2.T * a1)/m
        #print("DW1:", DW1.shape)
        # 更新参数
        W1 -= lr * DW1
        W2 -= lr * DW2
        W3 -= lr * DW3

        model = {"W1":W1, "W2":W2, "W3":W3}
        #print(y.shape)
        if print_loss and t % 1000 == 0:
            print("Loss after iteration %i: %f" % (t, calculate_loss(model, x_m, y_m)))
            logFile.write("Loss after iteration %i: %f" % (t, calculate_loss(model, x_m, y_m)))
            logFile.write("\n")
            result = precision(model, x_m)
            print("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))
            logFile.write("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))
            logFile.write("\n")
    logFile.close()
    return model

# 定义预测函数,输入为特征矩阵和训练好的模型
def predict(X,model):
    m = X.shape[0]
    W1, W2, W3 = model["W1"], model["W2"], model["W3"]
    x_m = np.column_stack((np.ones((m, 1)), X))
    a1 = np.mat(x_m)
    z2 = a1 * W1.T
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones((m, 1)), a2))
    z3 = a2 * W2.T
    a3 = sigmoid(z3)
    a3 = np.column_stack((np.ones((m, 1)), a3))
    z4 = a3 * W3.T
    a4 = sigmoid(z4)
    return a4

# 定义一个 plotData 函数,输入参数是 数据 X 和标志 flag: y,返回作图操作 plt, p1, p2 , 目的是为了画图
def plotData(X, y):
    # 找到标签为1和0的索引组成数组赋给pos 和 neg
    pos = np.where(y==1)
    neg = np.where(y==0)
    p1 = plt.plot(X[pos, 0], X[pos, 1], marker='s', markersize=7, color='red')[0]
    p2 = plt.plot(X[neg, 0], X[neg, 1], marker='o', markersize=7, color='green')[1]
    return p1,p2

# 画出决策边界
def plot_decision_boundary(predict_func,X_norm,y):
    x_min, x_max = X_norm[:, 0].min()-0.5, X_norm[:, 0].max()+0.5
    y_min, y_max = X_norm[:, 1].min()-0.5, X_norm[:, 1].max()+0.5
    # 定义步长
    stride = 0.01
    # 由np.arrange生成一维数组作为np.meshgrid的参数,返回xx矩阵,yy矩阵
    x_med = np.arange(x_min, x_max, stride)
    y_med = np.arange(y_min, y_max, stride)
    xx, yy = np.meshgrid(x_med, y_med)
    # .ravel()方法将xx,yy矩阵压缩为一维向量;np.c_:是按行连接两个矩阵,就是把两矩阵左右相加,要求行数相等
    # 合成的矩阵作为pred_func的输入,返回预测值
    mat = np.c_[xx.ravel(), yy.ravel()]
    Z = predict_func(mat)
    Z = (Z.T).reshape(xx.shape)
    p1, p2 = plotData(X_norm, y)
    p3 = plt.contour(xx, yy, Z, levels=0, linewidths=2)
    plt.xlabel("tall")  # 横坐标的标签为tall
    plt.ylabel("salary")  # 纵坐标的标签为salary
    plt.legend((p1, p2, p3), ('y = I like you', "y = I don't like you", 'Decision Boundary'), numpoints=1,
               handlelength=0)
    plt.title("ANN")

# 可视化函数
def visualize(X_norm,y,model):
    plot_decision_boundary(lambda x:predict(x, model), X_norm, y)
    plt.savefig("result.png")
    plt.show()

if __name__ == "__main__":
    dataset, labelset = load_data("data.xls")
    print("输入矩阵维度:",dataset.shape)
    print("标签维度:",labelset.shape)
    # 原始数据进行归一化处理
    X_norm = normalization(dataset)
    # 训练模型
    model = build_model(X_norm, labelset, 5, 3, iterNum=20000, print_loss=True)
    # 可视化
    visualize(X_norm, labelset, model)

运行代码:
在这里插入图片描述
可以看出来,神经网络模型找到了一个非线性分类器对样本实现了正确的分类。

猜你喜欢

转载自blog.csdn.net/h__ang/article/details/84113072
今日推荐