1.1.2 Building basic functions with numpy

1.1.2.2 numpy.exp, sigmoid, sigmoid gradient

import numpy as np

def sigmoid(x):
    s = 1/(1+np.exp(-x))
    return s

# 设sigmoid为s, s' = s*(1-s)
def sigmoid_derivative(x):
    s = 1/(1+np.exp(-x))
    ds = s*(1-s)
    return ds

plt.figure(1)　　# 编号为1的figure
x = np.arange(-5, 5, 0.1)　　
y = sigmoid(x)
plt.subplot(211)　　# 将子图划分为2行，1列，选中2行中的第1行
plt.plot(x, y)

y = sigmoid_derivative(x)
plt.subplot(212)　　# 子图中2行中的第2行
plt.plot(x, y)
plt.show()

1.1.2.3 numpy.reshape(), numpy.shape

def image2vector(image):
    """
    Argument:
    image -- a numpy array of shape (length, height, depth)

    Returns:
    v -- a vector of shape (length*height*depth, 1)
    """

    v = image.reshape(image.shape[0] * image.shape[1] * image.shape[2], 1)

    return v

1.1.2.4 Normalizing rows

np.linalg.norm求对矩阵x按axis作向量内积

def normalizeRows(x):
    """
    Implement a function that normalizes each row of the matrix x (to have
    unit length).

    Argument:
    x -- A numpy matrix of shape (n, m)

    Returns:
    x -- The normalized (by row) numpy matrix. You are allowed to modify x.
    """
    # Compute x_norm as the norm 2 of x. Use np.linalg.norm(..., ord = 2,
    # axis = ..., keepdims = True)
    # linalg=linear+algebra.
    x_norm = np.linalg.norm(x, axis=1, keepdims=True)

    # Divide x by its norm.
    x = x/x_norm
    return x

x = np.array([
    [0, 3, 4],
    [1, 6, 4]
    ])
print("normalizeRows(x) = " + str(normalizeRows(x)))

1.1.2.5 Broadcasting and the softmax function

def softmax(x):
    x_exp = np.exp(x)
    s_sum = np.sum(x_exp, axis=1, keepdims=True)
    s = x_exp/s_sum
    return s

来，敲黑板：

1.np.exp(x)对任何np.array的x都可以使用并且是对每个元素进行的求指数

2.sigmoid函数以及其导数

3.image2vector在深度学习中很常用

4.np.reshape应用很广泛。保持矩阵/向量的维度会消除大量的BUG。

5.numpy有很多高效的内建函数。

6.广播非常非常有用

1.1.2 Vectorization

import time

x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0]
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]

### CLASSIC DOT PRODUCT OF VECTORS IMPLEMENTATION ###
### 向量点乘（内积）: a▪b = a^T*b （-|型）= a1b1+a2b2+......+anbn

tic = time.process_time()
dot = 0
for i in range(len(x1)):
    dot += x1[i]*x2[i]
toc = time.process_time()
print("dot = " + str(dot) + "\n ----- Computation time = " +
        str(1000*(toc - tic)) + "ms")

### CLASSIC OUTER PRODUCT IMPLEMENTATION ###
### 向量叉乘（外积）: axb = a*b^T (|-型)

tic = time.process_time()
outer = np.zeros((len(x1), len(x2))) # we create a len(x1)*len(x2) matrix with
    # only zeros
for i in range(len(x1)):
    for j in range(len(x2)):
        outer[i, j] = x1[i] * x2[j]
toc = time.process_time()
print("outer = " + str(outer) + "\n ----- Computation time = " +
        str(1000*(toc - tic)) + "ms")

### CLASSIC ELEMENTWISE IMPLEMENTATION ###
### 向量元素依次相乘

tic = time.process_time()
mul = np.zeros(len(x1))
for i in range(len(x1)):
    mul[i] = x1[i] * x2[i]
toc = time.process_time()
print("elementwise multiplication = " + str(mul) +
        "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

### CLASSIC GENERAL DOT PRODUCT IMPLEMENTATION ###
### 

W = np.random.rand(3, len(x1)) # Random 3*len(x1) numpy array
tic = time.process_time()
gdot = np.zeros(W.shape[0])
for i in range(W.shape[0]):
    for j in range(len(x1)):　　# W的每一行与x1相乘
        gdot[i] += W[i,j]*x1[j]　　
toc = time.process_time()
print("gdot = " + str(gdot) + "\n ----- Computation time = " +
        str(1000*(toc - tic)) + "ms")

输出：

dot = 278
 ----- Computation time = 0.00854900000035741ms
outer = [[81. 18. 18. 81.  0. 81. 18. 45.  0.  0. 81. 18. 45.  0.  0.]
 [18.  4.  4. 18.  0. 18.  4. 10.  0.  0. 18.  4. 10.  0.  0.]
 [45. 10. 10. 45.  0. 45. 10. 25.  0.  0. 45. 10. 25.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [63. 14. 14. 63.  0. 63. 14. 35.  0.  0. 63. 14. 35.  0.  0.]
 [45. 10. 10. 45.  0. 45. 10. 25.  0.  0. 45. 10. 25.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [81. 18. 18. 81.  0. 81. 18. 45.  0.  0. 81. 18. 45.  0.  0.]
 [18.  4.  4. 18.  0. 18.  4. 10.  0.  0. 18.  4. 10.  0.  0.]
 [45. 10. 10. 45.  0. 45. 10. 25.  0.  0. 45. 10. 25.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]
 ----- Computation time = 0.12781600000000282ms
elementwise multiplication = [81.  4. 10.  0.  0. 63. 10.  0.  0.  0. 81.  4. 25.  0.  0.]
 ----- Computation time = 0.018939999999911805ms
gdot = [21.88386459 17.22658932 13.05841111]
 ----- Computation time = 0.07001299999975785ms

numpy实现

import time
import numpy as np

x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0]
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]

### VECTORIZED DOT PRODUCT OF VECTORS ###
tic = time.process_time()
dot = np.dot(x1, x2)
toc = time.process_time()
print("dot = " + str(dot) + "\n ----- Computation time = " +
        str(1000*(toc - tic)) + "ms")

### VECTORIZED OUTER PRODUCT ###
tic = time.process_time()
outer = np.outer(x1, x2)
toc = time.process_time()
print("outer = " + str(outer) + "\n ----- Computation time = " +
        str(1000*(toc - tic)) + "ms")

### VECOTRIZED ELEMENTWISE MULTIPLICATION ###
tic = time.process_time()
mul = np.multiply(x1, x2)
toc = time.process_time()
print("elementwise multiplication = " + str(mul) +
        "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

### VECOTRIZED GENERAL DOT PRODUCT ###
W = np.random.rand(3, len(x1))
tic = time.process_time()
gdot = np.dot(W, x1)
toc = time.process_time()
print("gdot = " + str(gdot) + "\n ----- Computation time = " +
        str(1000*(toc - tic)) + "ms")

输出：

dot = 278
 ----- Computation time = 0.17038700000027163ms
outer = [[81 18 18 81  0 81 18 45  0  0 81 18 45  0  0]
 [18  4  4 18  0 18  4 10  0  0 18  4 10  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [63 14 14 63  0 63 14 35  0  0 63 14 35  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [81 18 18 81  0 81 18 45  0  0 81 18 45  0  0]
 [18  4  4 18  0 18  4 10  0  0 18  4 10  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]
 ----- Computation time = 0.1971060000003355ms
elementwise multiplication = [81  4 10  0  0 63 10  0  0  0 81  4 25  0  0]
 ----- Computation time = 0.06556499999987864ms
gdot = [19.3061823  18.29576413 24.1581206 ]
 ----- Computation time = 0.06616899999967174ms

As you may have noticed, the vectorized implementation is much cleaner and more efcient. For bigger vectors/matrices, the diﬀerences in running time become even bigger.

这里意思是numpy的向量化实现更加简洁和高效，对于更庞大的向量和矩阵，运行效率会相差更多。然鹅我运行出来明明是numpy运行时间更多一丢丢。。。估计和我的环境有关系？？？

那么先不管了，接着刚下面的。。。

1.1.3.1 Implement the L1 and L2 loss functions

那么L1就是个这：

L2就是个这：

这里都是范式的概念，L1假设的是模型的参数取值满足拉普拉斯分布，L2假设的模型参数是满足高斯分布，所谓的范式其实就是加上对参数的约束，使得模型更不会overfit，但是如果要说是不是加了约束就会好，这个没有人能回答，只能说，加约束的情况下，理论上应该可以获得泛化能力更强的结果。

贴代码：

import numpy as np

# GRADED FUNCTION: L1
def L1(yhat, y):
    """
    Arguments:
    yhat -- vector of size m (predicted labels)
    y -- vector of size m (true labels)

    Returns:
    loss -- the value of the L1 loss function defined above
    """

    loss = sum(abs(y-yhat))

    return loss

# GRADED FUNCTION: L2
def L2(yhat, y):
    loss = np.dot(y-yhat, y-yhat)

    return loss

yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L1 = " + str(L1(yhat, y)))
print("L2 = " + str(L2(yhat, y)))

来，敲黑板：

1.向量化在深度学习中灰常重要，TA使计算更加高效和明了。

2.回顾了L1和L2 LOSS。

3.熟悉了numpy的np.sum, np.dot, np.multiply, np.maximum等等。

接着刚1.2

1.2 Logistic Regression with a Neural Network mindset

那么在这一节我们要开始第一个深度学习的练习了。在这里将会build你的第一个图像识别算法----猫子分类器，70%的acc哦~

那么在这里完成作业以后你将会：

用logistic回归的方式构建神经网络。

学习如何最小化代价函数。

明白如何对代价函数求导来更新参数。

Instructions:

不要在代码中使用循环(for/while)，除非instructions明确的让你这么做。

你将会学到：

构建一个一般的学习算法，包括：

　　-初始化参数

　　-计算代价函数及其梯度

　　-使用一个优化算法（梯度下降）

在main函数里面正确的使用以上三个函数。

1.2.1 Packages

先来介绍几个包：

numpy: python里面的一个科学计算基础包

h5py: 和存储为H5文件的数据集做交互的通用包

matplotlib: python里面一个很屌的绘图库

PIL: 在这里用来对你自己的图片在最后进行测试（其实就是个图像库）

1.2.2 Overview of the Problem set

问题表述：给定一个数据集"data.h5", 其中包括：

*标有cat(y=1)或non-cat(y=0)的训练集

*标有cat或non-cat的测试集

*每张图片为(num_px, num_px, 3)的shape，其中3代表3通道(RGB)，图片是方形，高num_px宽num_px

贴一波代码：

import numpy as np
from matplotlib import pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset

#matplotlib inline

# Loading the data (cat/non-cat)
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes =\
    load_dataset()

# Show datasets' shapes
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]

print("Number of training examples: m_train = " + str(m_train))
print("Number of testing examples: m_test = " + str(m_test))
print("Height/Width of each image: num_px = " + str(num_px))
print("Each image's size is: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print("train_set_x shape: " + str(train_set_x_orig.shape))
print("train_set_y shape: " + str(train_set_y.shape))
print("test_set_x shape: " + str(test_set_x_orig.shape))
print("test_set_y shape: " + str(test_set_y.shape))

# Reshape dataset's shape (209, 64, 64, 3) to shape (209, 64*64*3)
train_set_x_flatten = \
        train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = \
        test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

print("train_set_x_flatten shape: " + str(train_set_x_flatten.shape))
print("test_set_x_flatten shape: " + str(test_set_x_flatten.shape))
print("sanity check after reshaping: " + str(train_set_x_flatten[0:5, 0]))
# Visualize an example of a picture
index = 25
plt.imshow(train_set_x_orig[index])
print("y = " + str(train_set_y[:, index]) + ", it's a '" +
        classes[np.squeeze(train_set_y[:, index])].decode("utf-8") +
        "' picture. '")
plt.show()

敲黑板：

一般对一个新数据集进行预处理的步骤为：

* 找出问题的dimensions和shapes(m_train, m_test, num_px, ...)

* 将数据集reshape使每个样本都成为一个向量，大小为(num_px * num_px *3, 1)

* 标准化数据

1.2.3 General Architecture of the learning algorithm

下图解释了为什么 Logistics回归是一个非常简单的神经网络：

该算法的数学表达：

对于每一个样本x⁽ⁱ⁾:

这里的L其实是，每一个样本看做一次伯努利实验，那么分布就是0-1分布，那么对这个0-1分布求极大似然估计就是...贴一波《统计学习方法》的推导：

吴恩达在这里的L函数其实是统计学习方法中的对数似然函数取负的其中一项。

那吴恩达在这里给出的代价函数为：

那么可以看到他这个代价函数J其实就是对《统计学习方法》中给出的似然函数取负并归一化了一下（除以m）。

那么接着往下刚。。。

Key steps:

* 初始化模型参数

* 通过最小化代价函数学习模型参数

* 使用学习到的参数来做预测（在测试集上）

* 分析结果并得出结论

1.2.4 Building the parts of our algorithm

构建一个神经网络的主要步骤有：

* 定义模型架构（如输入features的个数）

* 初始化模型参数

* 循环：

　　- 计算当前的损失（前向传播）

　　- 计算当前的梯度（反向传播）

　　- 更新参数（梯度下降）

通常将1-3步分别实现并集成在一个model()函数里。

1.2.4.1 Helper functions

一：前提准备

1：首先数据是以.h5的形式进行存储，熟悉h5的操作

#测试H5的属性
f = h5py.File('./test_catvnoncat.h5','r')
f.keys() #可以查看所有的主键
print([key for key in f.keys()])

print('first, we get values of x:', f['list_classes'][:])#
print('****************************************************\n')
print('then, we get values of y:', f['test_set_x'][:])
print('****************************************************\n')
print('then, we get values of y:', f['test_set_y'][:])

print(f['test_set_x'][:].shape)
print(f['test_set_y'][:].shape)

2：从数据集从挑选一张图片进行展示

#展示图片
train_dataset = h5py.File('./train_catvnoncat.h5', "r")#matlab文件
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
index = 24
plt.imshow(train_set_x_orig[index])
plt.show()
print("this is picture")

3：查看数据集的维度等信息

#测试图片的数量
train_dataset = h5py.File('./train_catvnoncat.h5', "r")#matlab文件
train_set_x = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y = np.array(train_dataset["train_set_y"][:]) # your train set labels

test_dataset = h5py.File('./test_catvnoncat.h5', "r")
test_set_x = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y = np.array(test_dataset["test_set_y"][:]) # your test set labels

#shape表示所有维，shape[i]表示第i维
m_train = train_set_x.shape[0] #训练集里图片的数量。
m_test = test_set_x.shape[0] #测试集里图片的数量。
num_px = train_set_x.shape[1] #训练、测试集里面的图片的宽度和高度（均为64x64）。

#现在看一看我们加载的东西的具体情况
print ("训练集的数量: m_train = " + str(m_train))
print ("测试集的数量 : m_test = " + str(m_test))
print ("每张图片的宽/高 : num_px = " + str(num_px))
print ("每张图片的大小 : (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("训练集_图片的维数 : " + str(train_set_x.shape))#(209, 64, 64, 3)的四维矩阵
print ("训练集_标签的维数 : " + str(train_set_y.shape))#(209,)的一维矩阵
print ("测试集_图片的维数: " + str(test_set_x.shape))#(50, 64, 64, 3)的四维矩阵
print ("测试集_标签的维数: " + str(test_set_y.shape))#(50,)的一维矩阵

4：将数据进行降低维读处理并转置，之后进行压缩

#将训练集的维度降低并转置。
train_set_x_flatten  = train_set_x.reshape(train_set_x.shape[0],-1).T
train_set_y_flatten = train_set_y.reshape((1, train_set_y.shape[0]))
#将测试集的维度降低并转置。
test_set_x_flatten = test_set_x.reshape(test_set_x.shape[0], -1).T
test_set_y_flatten= test_set_y.reshape((1, test_set_y.shape[0]))
print ("训练集降维最后的维度： " + str(train_set_x_flatten.shape))
print ("训练集_标签的维数 : " + str(train_set_y_flatten.shape))
print ("测试集降维之后的维度: " + str(test_set_x_flatten.shape))
print ("测试集_标签的维数 : " + str(test_set_y_flatten.shape))
#标准化数据
train_set_x = train_set_x_flatten / 255
test_set_x = test_set_x_flatten / 255

二：训练过程

顾名思义，训练神经网络即是通过训练集得到最能够使得代价函数降到最小的W与B的集合
神经网络的训练分为：
propagate：前向传播和计算代价函数
optimize：反向传播和计算准确率
predict：预测函数，利用训练好的模型进行预测

def propagate(w, b, X, Y):
    """
    实现前向和后向传播的成本函数及其梯度。
    参数：
        w  - 权重，大小不等的数组（num_px * num_px * 3，1）
        b  - 偏差，一个标量
        X  - 矩阵类型为（num_px * num_px * 3，训练数量）
        Y  - 真正的“标签”矢量（如果非猫则为0，如果是猫则为1），矩阵维度为(1,训练数据数量)

    返回：
        cost- 逻辑回归的负对数似然成本
        dw  - 相对于w的损失梯度，因此与w相同的形状
        db  - 相对于b的损失梯度，因此与b的形状相同
    """
    m = X.shape[1]#训练集降维最后的维度： (12288, 209)

    #正向传播
    A = sigmoid(np.dot(w.T,X) + b) #计算激活值，请参考公式2。 A（1,209）
    cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) #计算成本，请参考公式3和4。
    #Y(1,209)  A(1 209) 矩阵*为对应位置相乘，np.dot为矩阵乘法
    #反向传播
    dw = (1 / m) * np.dot(X, (A - Y).T) #请参考视频中的偏导公式  (12288, 209) (209,1) 12288,1
    db = (1 / m) * np.sum(A - Y) #请参考视频中的偏导公式。1,209

    #使用断言确保我的数据是正确的
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
   #cost = np.squeeze(cost) #删除一个单维度
    #assert(cost.shape == ())

    #创建一个字典，把dw和db保存起来。
    grads = {
        "dw": dw,
        "db": db
    }
    return (grads , cost)

def optimize(w , b , X , Y , num_iterations , learning_rate , print_cost = False):
    """
    此函数通过运行梯度下降算法来优化w和b

    参数：
        w  - 权重，大小不等的数组（num_px * num_px * 3，1）
        b  - 偏差，一个标量
        X  - 维度为（num_px * num_px * 3，训练数据的数量）的数组。
        Y  - 真正的“标签”矢量（如果非猫则为0，如果是猫则为1），矩阵维度为(1,训练数据的数量)
        num_iterations  - 优化循环的迭代次数
        learning_rate  - 梯度下降更新规则的学习率
        print_cost  - 每100步打印一次损失值

    返回：
        params  - 包含权重w和偏差b的字典
        grads  - 包含权重和偏差相对于成本函数的梯度的字典
        成本 - 优化期间计算的所有成本列表，将用于绘制学习曲线。

    提示：
    我们需要写下两个步骤并遍历它们：
        1）计算当前参数的成本和梯度，使用propagate（）。
        2）使用w和b的梯度下降法则更新参数。
    """

    costs = []

    for i in range(num_iterations):

        grads, cost = propagate(w, b, X, Y)

        dw = grads["dw"]
        db = grads["db"]

        w = w - learning_rate * dw
        b = b - learning_rate * db

        #记录成本
        if i % 100 == 0:
            costs.append(cost)
        #打印成本数据
        if (print_cost) and (i % 100 == 0):
            print("迭代的次数: %i ， 误差值： %f" % (i,cost))

    params  = {
        "w" : w,
        "b" : b }
    grads = {
        "dw": dw,
        "db": db }
    return (params , grads , costs)

def predict(w , b , X ):
    """
    使用学习逻辑回归参数logistic （w，b）预测标签是0还是1，

    参数：
        w  - 权重，大小不等的数组（num_px * num_px * 3，1）
        b  - 偏差，一个标量
        X  - 维度为（num_px * num_px * 3，训练数据的数量）的数据

    返回：
        Y_prediction  - 包含X中所有图片的所有预测【0 | 1】的一个numpy数组（向量）

    """
    m  = X.shape[1] #图片的数量(12880,209)
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0],1)

    #计预测猫在图片中出现的概率
    A = sigmoid(np.dot(w.T , X) + b)
    for i in range(A.shape[1]):
        #将概率a [0，i]转换为实际预测p [0，i]
        Y_prediction[0,i] = 1 if A[0,i] > 0.5 else 0
    #使用断言
    assert(Y_prediction.shape == (1,m))
    return Y_prediction

三：正式运行

def model(X_train , Y_train , X_test , Y_test , num_iterations = 2000 , learning_rate = 0.5 , print_cost = False):
    """
    通过调用之前实现的函数来构建逻辑回归模型

    参数：
        X_train  - numpy的数组,维度为（num_px * num_px * 3，m_train）的训练集
        Y_train  - numpy的数组,维度为（1，m_train）（矢量）的训练标签集
        X_test   - numpy的数组,维度为（num_px * num_px * 3，m_test）的测试集
        Y_test   - numpy的数组,维度为（1，m_test）的（向量）的测试标签集
        num_iterations  - 表示用于优化参数的迭代次数的超参数
        learning_rate  - 表示optimize（）更新规则中使用的学习速率的超参数
        print_cost  - 设置为true以每100次迭代打印成本

    返回：
        d  - 包含有关模型信息的字典。
    """
    w , b = initialize_with_zeros(X_train.shape[0])

    parameters , grads , costs = optimize(w , b , X_train , Y_train,num_iterations , learning_rate , print_cost)

    #从字典“参数”中检索参数w和b
    w , b = parameters["w"] , parameters["b"]

    #预测测试/训练集的例子
    Y_prediction_test = predict(w , b, X_test)
    Y_prediction_train = predict(w , b, X_train)

    #打印训练后的准确性
    print("训练集准确性："  , format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100) ,"%")
    print("测试集准确性："  , format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100) ,"%")

    d = {
        "costs" : costs,
        "Y_prediction_test" : Y_prediction_test,
        "Y_prediciton_train" : Y_prediction_train,
        "w" : w,
        "b" : b,
        "learning_rate" : learning_rate,
        "num_iterations" : num_iterations }
    return d

主过程

print("====================测试model====================")
#这里加载的是真实的数据，请参见上面的代码部分。
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
#绘制图
costs = np.squeeze(d['costs'])
plt.plot(costs)
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
plt.title("Learning rate =" + str(d["learning_rate"]))
plt.show()

learning_rates = [0.01, 0.001, 0.0001]
models = {}
for i in learning_rates:
  print ("learning rate is: " + str(i))
  models[str(i)] = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 1500, learning_rate = i, print_cost = False)
  print ('\n' + "-------------------------------------------------------" + '\n')

for i in learning_rates:
  plt.plot(np.squeeze(models[str(i)]["costs"]), label= str(models[str(i)]["learning_rate"]))

plt.ylabel('cost')
plt.xlabel('iterations')
legend = plt.legend(loc='upper center', shadow=True)
frame = legend.get_frame()
frame.set_facecolor('0.90')
plt.show()

四：利用训练的模型预测本地图片

利用训练的模型预测本地图片：
步骤1：首先将一张图片利用“画图”工具转化成64*64像素的图片
步骤2：将图片导入IDE，执行代码

import matplotlib.image as mpimg
from package.preparetion import train_set_x, train_set_y
from package.code import model

cat=mpimg.imread('1.jpg')
cat=cat.reshape(-1,1)
print(cat.shape[0])
print(cat.shape[1])

d = model(train_set_x, train_set_y, cat, 0, num_iterations = 2000, learning_rate = 0.005, print_cost = True)

第一次神经网络作业，神经网络预测图片是否为猫的代码实现