import tensorflow as tf
import numpy as np
tf.enable_eager_execution() #这句得在程序开始执行前写
x = tf.ones((2, 2))
with tf.GradientTape() as t:
t.watch(x)
y = tf.reduce_sum(x)
z = tf.multiply(y, y)
print(y)
print(z)
# Derivative of z with respect to the original input tensor x
dz_dx = t.gradient(z, x)
for i in [0, 1]:
for j in [0, 1]:
print(dz_dx[i][j])
print(dz_dx[i][j].numpy())
assert dz_dx[i][j].numpy() == 8.0
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(16.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)
8.0
tf.Tensor(8.0, shape=(), dtype=float32)
8.0
tf.Tensor(8.0, shape=(), dtype=float32)
8.0
tf.Tensor(8.0, shape=(), dtype=float32)
8.0
x=tf.constant(np.array(3))
x
<tf.Tensor: id=513, shape=(), dtype=int32, numpy=3>
y=tf.constant(3)
y
<tf.Tensor: id=515, shape=(), dtype=int32, numpy=3>
tf.GradientTape:
使用gradient()一次就会立即释放资源:
x = tf.constant(3.0)
with tf.GradientTape() as g:
g.watch(x)
y = x * x
z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(z, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-34-9060b9af2a1a> in <module>
5 z = 2*y
6 dz_dy = g.gradient(z, y) # Will compute to 6.0
----> 7 dz_dx = g.gradient(z, x) # Will compute to 6.0
8 print(dz_dy)
9 print(dz_dx)
D:\Anaconda2\envs\py3\lib\site-packages\tensorflow\python\eager\backprop.py in gradient(self, target, sources, output_gradients)
871 """
872 if self._tape is None:
--> 873 raise RuntimeError("GradientTape.gradient can only be called once on "
874 "non-persistent tapes.")
875 if self._recording:
RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.
如果要对链上的多个变量求导,需要将GradientTape中的persistent设置为True:
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
g.watch(x)
y = x * x
z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(z, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(12.0, shape=(), dtype=float32)
不加watch没法求导,加了watch链上的结果都会记录下来
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
#g.watch(x)
y = x * x
z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(z, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)
None
None
reset(): Clears all information stored in this tape.
with tf.GradientTape() as t:
loss = loss_fn()
with tf.GradientTape() as t:
loss += other_loss_fn()
t.gradient(loss, ...) # Only differentiates other_loss_fn, not loss_fn
# The following is equivalent to the above
with tf.GradientTape() as t:
loss = loss_fn()
t.reset()
loss += other_loss_fn()
t.gradient(loss, ...) # Only differentiates other_loss_fn, not loss_fn
stop_recording(): Temporarily stops recording operations on this tape.
节省内存
with tf.GradientTape(persistent=True) as t:
loss = compute_loss(model)
with t.stop_recording():
# The gradient computation below is not traced, saving memory.
grads = t.gradient(loss, model.variables)
del
x = tf.ones((2, 2))
with tf.GradientTape() as t:
t.watch(x)
y = tf.reduce_sum(x)
z = tf.multiply(y, y)
# Use the tape to compute the derivative of z with respect to the
# intermediate value y.
dz_dy = t.gradient(z, y)
assert dz_dy.numpy() == 8.0
t
<tensorflow.python.eager.backprop.GradientTape at 0x2f1e59530b8>
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as t:
t.watch(x)
y = x * x
z = y * y
dz_dx = t.gradient(z, x) # 108.0 (4*x^3 at x = 3)
dy_dx = t.gradient(y, x) # 6.0
del t # Drop the reference to the tape
t
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-84-5c1aa7ffd189> in <module>
7 dy_dx = t.gradient(y, x) # 6.0
8 del t # Drop the reference to the tape
----> 9 t
NameError: name 't' is not defined
def f(x, y):
output = 1.0
for i in range(y):
if i > 1 and i < 5:
output = tf.multiply(output, x)
return output
def grad(x, y):
with tf.GradientTape() as t:
t.watch(x)
out = f(x, y)
return t.gradient(out, x)
x = tf.convert_to_tensor(2.0)
assert grad(x, 6).numpy() == 12.0
assert grad(x, 5).numpy() == 12.0
assert grad(x, 4).numpy() == 4.0
print(grad(x, 6))
tf.Tensor(12.0, shape=(), dtype=float32)
tf.convert_to_tensor() tf.constant()
x = tf.convert_to_tensor(2.0)
x
<tf.Tensor: id=1193, shape=(), dtype=float32, numpy=2.0>
x=tf.constant(2.0)
x
<tf.Tensor: id=1195, shape=(), dtype=float32, numpy=2.0>
高阶导数:
x = tf.constant(3.0)
with tf.GradientTape() as g:
g.watch(x)
with tf.GradientTape() as gg:
gg.watch(x)
y = x * x
dy_dx = gg.gradient(y, x) # Will compute to 6.0
d2y_dx2 = g.gradient(dy_dx, x) # Will compute to 2.0
print(dy_dx)
print(d2y_dx2)
tf.Tensor(6.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
g.watch(x)
y = x * x
z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(dz_dy, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)
tf.Tensor(2.0, shape=(), dtype=float32)
None