机器学习实战（基于scikit-learn和TensorFlow）-第九章tensorflow笔记（一）

关注微信公共号：小程在线

关注CSDN博客：程志伟的博客

Python 3.7.6 (default, Jan 8 2020, 20:23:39) [MSC v.1916 64 bit (AMD64)]
Type "copyright", "credits" or "license" for more information.

IPython 7.12.0 -- An enhanced Interactive Python.

from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)

# To plot pretty figures
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "tensorflow"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
print("Saving figure", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format=fig_extension, dpi=resolution)

####创建和运行图形

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
reset_graph()

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2
f
WARNING:tensorflow:From E:\anaconda3\lib\site-packages\tensorflow\python\compat\v2_compat.py:101: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
Out[3]: <tf.Tensor 'add_1:0' shape=() dtype=int32>

#创建一个会话，初始化所有变量，然后求值，最后f关闭整个会话（释放占用的资源）
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
42

sess.close()

#每次都重复sess.run（）看起来有些笨拙，好在有更好的方式：
with tf.Session() as sess:
x.initializer.run()
y.initializer.run()
result = f.eval()
result
Out[6]: 42

#可以使用global_variables_initializer（）函数来每个变量调用初始化器
init = tf.global_variables_initializer()

with tf.Session() as sess:
init.run()
result = f.eval()
result
Out[7]: 42

#InteractiveSession在创建时会将自己设置为默认会话，因此你无须使用with块

init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
42

sess.close()

result
Out[10]: 42

################### 管理图 ###################
#创建的所有节点都会自动添加到默认图上

reset_graph()

x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()
[autoreload of tensorflow failed: Traceback (most recent call last):

Out[11]: True

#有时候你可能想要管理多个互不依赖的图。可以创建一个新的图，然后用with块临时将它设置为默认图

graph = tf.Graph()
with graph.as_default():
x2 = tf.Variable(2)

x2.graph is graph
Out[12]: True

#通过tf.reset_default_graph（）来重置默认图

x2.graph is tf.get_default_graph()
Out[13]: False

###节点值的生命周期

x2.graph is tf.get_default_graph()
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
print(y.eval()) # 10
print(z.eval()) # 15
10
15

#不希望对y和z重复求值，那么必须告诉TensorFlow在一次图的执行中就完成y和z的求值
with tf.Session() as sess:
y_val, z_val = sess.run([y, z])
print(y_val) # 10
print(z_val) # 15
10
15

################# TensorFlow中的线性回归 ##############################
import numpy as np
from sklearn.datasets import fetch_california_housing

reset_graph()

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
theta_value = theta.eval()
theta_value
Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to C:\Users\cheng\scikit_learn_data
Out[16]:
array([[-3.7158638e+01],
[ 4.3620414e-01],
[ 9.3944781e-03],
[-1.0677545e-01],
[ 6.4319843e-01],
[-4.0679406e-06],
[-3.7808842e-03],
[-4.2343181e-01],
[-4.3694979e-01]], dtype=float32)

###直接用NumPy来计算正规方程
X = housing_data_plus_bias
y = housing.target.reshape(-1, 1)
theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

print(theta_numpy)
[[-3.69419202e+01]
[ 4.36693293e-01]
[ 9.43577803e-03]
[-1.07322041e-01]
[ 6.45065694e-01]
[-3.97638942e-06]
[-3.78654265e-03]
[-4.21314378e-01]
[-4.34513755e-01]]

####Scikit-Learn
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(housing.data, housing.target.reshape(-1, 1))

print(np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T])
[[-3.69419202e+01]
[ 4.36693293e-01]
[ 9.43577803e-03]
[-1.07322041e-01]
[ 6.45065694e-01]
[-3.97638942e-06]
[-3.78654265e-03]
[-4.21314378e-01]
[-4.34513755e-01]]

##################### 实现梯度下降 ##########################
#梯度下降首先需要缩放特征向量。我们可以使用TF来实现这一点，但现在让我们使用Scikit学习。

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

print(scaled_housing_data_plus_bias.mean(axis=0))
print(scaled_housing_data_plus_bias.mean(axis=1))
print(scaled_housing_data_plus_bias.mean())
print(scaled_housing_data_plus_bias.shape)
[ 1.00000000e+00 6.60969987e-17 5.50808322e-18 6.60969987e-17
-1.06030602e-16 -1.10161664e-17 3.44255201e-18 -1.07958431e-15
-8.52651283e-15]
[ 0.38915536 0.36424355 0.5116157 ... -0.06612179 -0.06360587
0.01359031]
0.11111111111111005
(20640, 9)

####手动计算梯度
#主循环部分不断执行训练步骤（共n_epochs次），每经过100次迭代，
#它会打印当前的均方误差（Mean Squared Error）。这个值应该是不断降低的

reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init)

for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)

best_theta = theta.eval()
Epoch 0 MSE = 9.161542
Epoch 100 MSE = 0.71450037
Epoch 200 MSE = 0.56670487
Epoch 300 MSE = 0.55557173
Epoch 400 MSE = 0.5488112
Epoch 500 MSE = 0.5436363
Epoch 600 MSE = 0.5396291
Epoch 700 MSE = 0.5365093
Epoch 800 MSE = 0.53406775
Epoch 900 MSE = 0.5321473

best_theta
Out[22]:
array([[ 2.0685525 ],
[ 0.8874027 ],
[ 0.14401658],
[-0.34770885],
[ 0.3617837 ],
[ 0.00393811],
[-0.04269556],
[-0.6614528 ],
[-0.63752776]], dtype=float32)

####使用自动微分

reset_graph()

n_epochs = 1000
learning_rate = 0.01

gradients = tf.gradients(mse, [theta])[0]

training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init)

for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)

best_theta = theta.eval()

print("Best theta:")
print(best_theta)
Epoch 0 MSE = 9.161542
Epoch 100 MSE = 0.71450037
Epoch 200 MSE = 0.56670487
Epoch 300 MSE = 0.55557173
Epoch 400 MSE = 0.5488112
Epoch 500 MSE = 0.5436363
Epoch 600 MSE = 0.5396291
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.53406775
Epoch 900 MSE = 0.5321473
Best theta:
[[ 2.0685525 ]
[ 0.8874027 ]
[ 0.14401658]
[-0.34770882]
[ 0.36178368]
[ 0.00393811]
[-0.04269556]
[-0.6614528 ]
[-0.6375277 ]]

#你怎么能找到下列函数对a和b的偏导数？

def my_func(a, b):
z = 0
for i in range(100):
z = a * np.cos(z + i) + z * np.sin(b - i)
return z

my_func(0.2, 0.3)
Out[27]: -0.21253923284754914

reset_graph()

a = tf.Variable(0.2, name="a")
b = tf.Variable(0.3, name="b")
z = tf.constant(0.0, name="z0")
for i in range(100):
z = a * tf.cos(z + i) + z * tf.sin(b - i)

grads = tf.gradients(z, [a, b])
init = tf.global_variables_initializer()
with tf.Session() as sess:
init.run()
print(z.eval())
print(sess.run(grads))
-0.21253741
[-1.1388495, 0.19671395]

####使用梯度下降优化器

reset_graph()

n_epochs = 1000
learning_rate = 0.01

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init)

for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)

best_theta = theta.eval()

####使用动量优化器

reset_graph()

n_epochs = 1000
learning_rate = 0.01

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
momentum=0.9)

training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init)

for epoch in range(n_epochs):
sess.run(training_op)

best_theta = theta.eval()

print("Best theta:")
print(best_theta)
Best theta:
[[ 2.068558 ]
[ 0.8296286 ]
[ 0.11875337]
[-0.26554456]
[ 0.3057109 ]
[-0.00450251]
[-0.03932662]
[-0.89986444]
[-0.87052065]]