Use Tensorflow structures from the encoder (Autoencoder)

From the encoder is a data compression algorithm, in which the data compression and decompression of data compression function is related, from the training sample came. Most from the encoder, the compression and decompression functions are implemented by the neural network.

1. convolutional neural network structures from the encoder

Import MNIST data set (grayscale pixel range of 0 to 1)

import numpy as np
 import tensorflow as tf
 import matplotlib.pyplot as plt
 from tensorflow.examples.tutorials.mnist import input_data
 mnist = input_data.read_data_sets('MNIST_data', validation_size=0)

View Code

Build networks

  inputs_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='inputs')
  targets_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='targets')
  ### Encoder
  conv1 = tf.layers.conv2d(inputs_, 16, (3,3), padding='same', activation=tf.nn.relu) # 28x28x16
  maxpool1 = tf.layers.max_pooling2d(conv1, (2,2), (2,2), padding='same') # 14x14x16
  conv2 = tf.layers.conv2d(maxpool1, 8, (3,3), padding='same', activation=tf.nn.relu) # 14x14x8
  maxpool2 = tf.layers.max_pooling2d(conv2, (2,2), (2,2), padding='same') # 7x7x8
  conv3 = tf.layers.conv2d(maxpool2, 8, (3,3), padding='same', activation=tf.nn.relu) # 7x7x8
  encoded = tf.layers.max_pooling2d(conv3, (2,2), (2,2), padding='same') # 4x4x8
  ### Decoder
  upsample1 = tf.image.resize_nearest_neighbor(encoded, (7,7)) # 7x7x8
  conv4 = tf.layers.conv2d(upsample1, 8, (3,3), padding='same', activation=tf.nn.relu) # 7x7x8
  upsample2 = tf.image.resize_nearest_neighbor(conv4, (14,14)) # 14x14x8
  conv5 = tf.layers.conv2d(upsample2, 8, (3,3), padding='same', activation=tf.nn.relu) # 14x14x8
  upsample3 = tf.image.resize_nearest_neighbor(conv5, (28,28)) # 28x28x8
  conv6 = tf.layers.conv2d(upsample3, 16, (3,3), padding='same', activation=tf.nn.relu) # 28x28x16
  logits = tf.layers.conv2d(conv6, 1, (3,3), padding='same', activation=None) # 28x28x1
  decoded = tf.nn.sigmoid(logits, name='decoded') # 28x28x1
  ### Loss and Optimization:
  loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits)
  cost = tf.reduce_mean(loss)
  opt = tf.train.AdamOptimizer(0.001).minimize(cost)

View Code

The model used in the decoding section is not upsample + convolution transposed convolution ( Reference )

Training Network

  sess = tf.Session()
  epochs = 20
  batch_size = 200
  sess.run(tf.global_variables_initializer())
  for e in range(epochs):
      for ii in range(mnist.train.num_examples//batch_size):
          batch = mnist.train.next_batch(batch_size)
          imgs = batch[0].reshape((-1, 28, 28, 1))
          batch_cost, _ = sess.run([cost, opt], feed_dict={inputs_: imgs, targets_: imgs})
          print("Epoch: {}/{}...".format(e+1, epochs), "Training loss: {:.4f}".format(batch_cost))

View Code

Network test

  fig, axes = plt.subplots(nrows=2, ncols=10, sharex=True, sharey=True, figsize=(20,4))
  in_imgs = mnist.test.images[:10]
  reconstructed, compressed = sess.run([decoded, encoded], feed_dict={inputs_: in_imgs.reshape((10, 28, 28, 1))})
  # plot
  for images, row in zip([in_imgs, reconstructed], axes):
      for img, ax in zip(images, row):
          ax.imshow(img.reshape((28, 28)), cmap='Greys_r')
          ax.get_xaxis().set_visible(False)
          ax.get_yaxis().set_visible(False)
  fig.tight_layout(pad=0.1)
  sess.close()

View Code

2. Noise from the encoder

Network structures (ibid., But the number of feature map by 16-8-8-8-8-16 becomes 32-32-16-16-32-32)

Training Network

  sess = tf.Session()
  epochs = 100
  batch_size = 200
  # Set's how much noise we're adding to the MNIST images
  noise_factor = 0.5
  sess.run(tf.global_variables_initializer())
  for e in range(epochs):
      for ii in range(mnist.train.num_examples//batch_size):
          batch = mnist.train.next_batch(batch_size)
          # Get images from the batch
          imgs = batch[0].reshape((-1, 28, 28, 1))
          # Add random noise to the input images
          noisy_imgs = imgs + noise_factor * np.random.randn(*imgs.shape)
          # Clip the images to be between 0 and 1
          noisy_imgs = np.clip(noisy_imgs, 0., 1.)       
          # Noisy images as inputs, original images as targets
          batch_cost, _ = sess.run([cost, opt], feed_dict={inputs_: noisy_imgs, targets_: imgs})
          print("Epoch: {}/{}...".format(e+1, epochs), "Training loss: {:.4f}".format(batch_cost))

View Code

检验网络

  fig, axes = plt.subplots(nrows=2, ncols=10, sharex=True, sharey=True, figsize=(20,4))
  in_imgs = mnist.test.images[:10]
  noisy_imgs = in_imgs + noise_factor * np.random.randn(*in_imgs.shape)
  noisy_imgs = np.clip(noisy_imgs, 0., 1.)
  reconstructed = sess.run(decoded, feed_dict={inputs_: noisy_imgs.reshape((10, 28, 28, 1))})
  for images, row in zip([noisy_imgs, reconstructed], axes):
      for img, ax in zip(images, row):
          ax.imshow(img.reshape((28, 28)), cmap='Greys_r')
          ax.get_xaxis().set_visible(False)
          ax.get_yaxis().set_visible(False)
  fig.tight_layout(pad=0.1)
  sess.close()

View Code

Use Tensorflow structures from the encoder (Autoencoder)

Guess you like