Tensorflow官方教程笔记--Automatic differentiation and gradient tape

import tensorflow as tf
import numpy as np
tf.enable_eager_execution() #这句得在程序开始执行前写

x = tf.ones((2, 2))

with tf.GradientTape() as t:
  t.watch(x)
  y = tf.reduce_sum(x)
  z = tf.multiply(y, y)

print(y)
print(z)
# Derivative of z with respect to the original input tensor x
dz_dx = t.gradient(z, x)
for i in [0, 1]:
  for j in [0, 1]:
    print(dz_dx[i][j])
    print(dz_dx[i][j].numpy())
    assert dz_dx[i][j].numpy() == 8.0

tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(16.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)
8.0
tf.Tensor(8.0, shape=(), dtype=float32)
8.0
tf.Tensor(8.0, shape=(), dtype=float32)
8.0
tf.Tensor(8.0, shape=(), dtype=float32)
8.0

x=tf.constant(np.array(3))

<tf.Tensor: id=513, shape=(), dtype=int32, numpy=3>

y=tf.constant(3)

<tf.Tensor: id=515, shape=(), dtype=int32, numpy=3>

tf.GradientTape:

使用gradient()一次就会立即释放资源:

x = tf.constant(3.0)
with tf.GradientTape() as g:
  g.watch(x)
  y = x * x
  z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(z, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)

---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-34-9060b9af2a1a> in <module>
      5   z = 2*y
      6 dz_dy = g.gradient(z, y) # Will compute to 6.0
----> 7 dz_dx = g.gradient(z, x) # Will compute to 6.0
      8 print(dz_dy)
      9 print(dz_dx)


D:\Anaconda2\envs\py3\lib\site-packages\tensorflow\python\eager\backprop.py in gradient(self, target, sources, output_gradients)
    871     """
    872     if self._tape is None:
--> 873       raise RuntimeError("GradientTape.gradient can only be called once on "
    874                          "non-persistent tapes.")
    875     if self._recording:


RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.

如果要对链上的多个变量求导，需要将GradientTape中的persistent设置为True:

x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
  g.watch(x)
  y = x * x
  z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(z, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)

tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(12.0, shape=(), dtype=float32)

不加watch没法求导，加了watch链上的结果都会记录下来

x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
  #g.watch(x)
  y = x * x
  z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(z, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)

None
None

reset(): Clears all information stored in this tape.

with tf.GradientTape() as t:
  loss = loss_fn()
with tf.GradientTape() as t:
  loss += other_loss_fn()
t.gradient(loss, ...)  # Only differentiates other_loss_fn, not loss_fn


# The following is equivalent to the above
with tf.GradientTape() as t:
  loss = loss_fn()
  t.reset()
  loss += other_loss_fn()
t.gradient(loss, ...)  # Only differentiates other_loss_fn, not loss_fn

stop_recording(): Temporarily stops recording operations on this tape.

节省内存

with tf.GradientTape(persistent=True) as t:
    loss = compute_loss(model)
    with t.stop_recording():
      # The gradient computation below is not traced, saving memory.
      grads = t.gradient(loss, model.variables)

del

x = tf.ones((2, 2))
  
with tf.GradientTape() as t:
  t.watch(x)
  y = tf.reduce_sum(x)
  z = tf.multiply(y, y)

# Use the tape to compute the derivative of z with respect to the
# intermediate value y.
dz_dy = t.gradient(z, y)
assert dz_dy.numpy() == 8.0
t

<tensorflow.python.eager.backprop.GradientTape at 0x2f1e59530b8>

x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as t:
  t.watch(x)
  y = x * x
  z = y * y
dz_dx = t.gradient(z, x)  # 108.0 (4*x^3 at x = 3)
dy_dx = t.gradient(y, x)  # 6.0
del t  # Drop the reference to the tape
t

---------------------------------------------------------------------------

NameError                                 Traceback (most recent call last)

<ipython-input-84-5c1aa7ffd189> in <module>
      7 dy_dx = t.gradient(y, x)  # 6.0
      8 del t  # Drop the reference to the tape
----> 9 t


NameError: name 't' is not defined

def f(x, y):
  output = 1.0
  for i in range(y):
    if i > 1 and i < 5:
      output = tf.multiply(output, x)
  return output

def grad(x, y):
  with tf.GradientTape() as t:
    t.watch(x)
    out = f(x, y)
  return t.gradient(out, x) 

x = tf.convert_to_tensor(2.0)

assert grad(x, 6).numpy() == 12.0
assert grad(x, 5).numpy() == 12.0
assert grad(x, 4).numpy() == 4.0

print(grad(x, 6))

tf.Tensor(12.0, shape=(), dtype=float32)

tf.convert_to_tensor() $\hspace{4mm}$ tf.constant()

x = tf.convert_to_tensor(2.0)
x

<tf.Tensor: id=1193, shape=(), dtype=float32, numpy=2.0>

x=tf.constant(2.0)
x

<tf.Tensor: id=1195, shape=(), dtype=float32, numpy=2.0>

高阶导数:

x = tf.constant(3.0)
with tf.GradientTape() as g:
  g.watch(x)
  with tf.GradientTape() as gg:
    gg.watch(x)
    y = x * x
  dy_dx = gg.gradient(y, x)     # Will compute to 6.0
d2y_dx2 = g.gradient(dy_dx, x)  # Will compute to 2.0

print(dy_dx)
print(d2y_dx2)

tf.Tensor(6.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)

x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
  g.watch(x)
  y = x * x
  z = 2*y
dz_dy = g.gradient(z, y) # Will compute to 6.0
dz_dx = g.gradient(dz_dy, x) # Will compute to 6.0
print(dz_dy)
print(dz_dx)

tf.Tensor(2.0, shape=(), dtype=float32)
None

Tensorflow官方教程笔记--Automatic differentiation and gradient tape

tf.GradientTape:

reset(): Clears all information stored in this tape.

stop_recording(): Temporarily stops recording operations on this tape.

del

tf.convert_to_tensor() \hspace{4mm} tf.constant()

高阶导数:

猜你喜欢

tf.convert_to_tensor() $\hspace{4mm}$ tf.constant()