[keras] 创建自定义层，以及关于build函数的一些疑惑

关于build函数的疑惑，build函数会在__call__之前被调用一次，但是如果已经调用过了那么就不会被调用，看是否被调用的标志是self.built是否为True，如果是True,那么下一次__call__的时候就不会调用，所以我们调用官方的layers的时候是不需要额外的build的。

image.png
只有自定义层的__init__需要super了，而build中只需要手动的设置self.built = True即可。下面是官方最新的例子。

image.png

from keras import backend as K
from keras.layers import Layer

class MyLayer(Layer):

    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='kernel',
                                      shape=(input_shape[0][1], self.output_dim),
                                      initializer='uniform',
                                      trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        assert isinstance(x, list)
        a, b = x
        return [K.dot(a, self.kernel) + b, K.mean(b, axis=-1)]

    def compute_output_shape(self, input_shape):
        assert isinstance(input_shape, list)
        shape_a, shape_b = input_shape
        return [(shape_a[0], self.output_dim), shape_b[:-1]]

再来一个源码作为的例子，不用另外每调用一个layer就build一次，也不用在build里面super了：

@tf_export('keras.layers.Dense')
class Dense(Layer):
  """Just your regular densely-connected NN layer.
  `Dense` implements the operation:
  `output = activation(dot(input, kernel) + bias)`
  where `activation` is the element-wise activation function
  passed as the `activation` argument, `kernel` is a weights matrix
  created by the layer, and `bias` is a bias vector created by the layer
  (only applicable if `use_bias` is `True`).
  Note: if the input to the layer has a rank greater than 2, then
  it is flattened prior to the initial dot product with `kernel`.
  Example:
  python
      # as first layer in a sequential model:
      model = Sequential()
      model.add(Dense(32, input_shape=(16,)))
      # now the model will take as input arrays of shape (*, 16)
      # and output arrays of shape (*, 32)
      # after the first layer, you don't need to specify
      # the size of the input anymore:
      model.add(Dense(32))

  Arguments:
      units: Positive integer, dimensionality of the output space.
      activation: Activation function to use.
          If you don't specify anything, no activation is applied
          (ie. "linear" activation: `a(x) = x`).
      use_bias: Boolean, whether the layer uses a bias vector.
      kernel_initializer: Initializer for the `kernel` weights matrix.
      bias_initializer: Initializer for the bias vector.
      kernel_regularizer: Regularizer function applied to
          the `kernel` weights matrix.
      bias_regularizer: Regularizer function applied to the bias vector.
      activity_regularizer: Regularizer function applied to
          the output of the layer (its "activation")..
      kernel_constraint: Constraint function applied to
          the `kernel` weights matrix.
      bias_constraint: Constraint function applied to the bias vector.
  Input shape:
      nD tensor with shape: `(batch_size, ..., input_dim)`.
      The most common situation would be
      a 2D input with shape `(batch_size, input_dim)`.
  Output shape:
      nD tensor with shape: `(batch_size, ..., units)`.
      For instance, for a 2D input with shape `(batch_size, input_dim)`,
      the output would have shape `(batch_size, units)`.
  """

  def __init__(self,
               units,
               activation=None,
               use_bias=True,
               kernel_initializer='glorot_uniform',
               bias_initializer='zeros',
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               **kwargs):
    if 'input_shape' not in kwargs and 'input_dim' in kwargs:
      kwargs['input_shape'] = (kwargs.pop('input_dim'),)

    super(Dense, self).__init__(
        activity_regularizer=regularizers.get(activity_regularizer), **kwargs)
    self.units = int(units)
    self.activation = activations.get(activation)
    self.use_bias = use_bias
    self.kernel_initializer = initializers.get(kernel_initializer)
    self.bias_initializer = initializers.get(bias_initializer)
    self.kernel_regularizer = regularizers.get(kernel_regularizer)
    self.bias_regularizer = regularizers.get(bias_regularizer)
    self.kernel_constraint = constraints.get(kernel_constraint)
    self.bias_constraint = constraints.get(bias_constraint)

    self.supports_masking = True
    self.input_spec = InputSpec(min_ndim=2)

  def build(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape)
    if input_shape[-1].value is None:
      raise ValueError('The last dimension of the inputs to `Dense` '
                       'should be defined. Found `None`.')
    self.input_spec = InputSpec(min_ndim=2,
                                axes={-1: input_shape[-1].value})
    self.kernel = self.add_weight(
        'kernel',
        shape=[input_shape[-1].value, self.units],
        initializer=self.kernel_initializer,
        regularizer=self.kernel_regularizer,
        constraint=self.kernel_constraint,
        dtype=self.dtype,
        trainable=True)
    if self.use_bias:
      self.bias = self.add_weight(
          'bias',
          shape=[self.units,],
          initializer=self.bias_initializer,
          regularizer=self.bias_regularizer,
          constraint=self.bias_constraint,
          dtype=self.dtype,
          trainable=True)
    else:
      self.bias = None
    self.built = True

  def call(self, inputs):
    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
    rank = common_shapes.rank(inputs)
    if rank > 2:
      # Broadcasting is required for the inputs.
      outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]])
      # Reshape the output back to the original ndim of the input.
      if not context.executing_eagerly():
        shape = inputs.get_shape().as_list()
        output_shape = shape[:-1] + [self.units]
        outputs.set_shape(output_shape)
    else:
      outputs = gen_math_ops.mat_mul(inputs, self.kernel)
    if self.use_bias:
      outputs = nn.bias_add(outputs, self.bias)
    if self.activation is not None:
      return self.activation(outputs)  # pylint: disable=not-callable
    return outputs

  def compute_output_shape(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape)
    input_shape = input_shape.with_rank_at_least(2)
    if input_shape[-1].value is None:
      raise ValueError(
          'The innermost dimension of input_shape must be defined, but saw: %s'
          % input_shape)
    return input_shape[:-1].concatenate(self.units)

  def get_config(self):
    config = {
        'units': self.units,
        'activation': activations.serialize(self.activation),
        'use_bias': self.use_bias,
        'kernel_initializer': initializers.serialize(self.kernel_initializer),
        'bias_initializer': initializers.serialize(self.bias_initializer),
        'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
        'bias_regularizer': regularizers.serialize(self.bias_regularizer),
        'activity_regularizer':
            regularizers.serialize(self.activity_regularizer),
        'kernel_constraint': constraints.serialize(self.kernel_constraint),
        'bias_constraint': constraints.serialize(self.bias_constraint)
    }
    base_config = super(Dense, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

自己写的一个层：

class Actionselect(object):

  def __init__(self,
               action_class,
               **kwargs):
    self.multiclass_dense_layer = K.layers.Dense(action_class)  
    
  def __call__(self,input_data):
    return self.multiclass_dense_layer(input_data)

基本上一个层需要在两个地方进行初始化一个是__init__的时候，一个是build的时候，前提是里面没有引入其他layer层，如果引入了那么因为引入的过程就是一个初始化的过程，所以还需要额外的在build里面self.multiclass_dense_layer.build(input_shape)。
super(SoftmaxLoss, self).__init__(**kwargs)是为了初始化layer,
self.multiclass_dense_layer = K.layers.Dense(self.vocab_size)里面再加入的层就不需要显示的初始化了，因为将self.vocab_size给他的过程就是一个初始化的过程。
如果要有多个输入那么就了输入到网络之前进行concatenate，有多个输出更简单，在call的时候返回多个几个即可。

import tensorflow as tf
K = tf.keras

class SoftmaxLoss(K.layers.Layer):
  """Softmax xentropy loss with candidate sampling."""

  def __init__(self,
               vocab_size,
               num_candidate_samples=-1,
               vocab_freqs=None,
               **kwargs):
    self.vocab_size = vocab_size
    self.num_candidate_samples = num_candidate_samples
    self.vocab_freqs = vocab_freqs
    super(SoftmaxLoss, self).__init__(**kwargs)
    self.multiclass_dense_layer = K.layers.Dense(self.vocab_size)  

  def build(self, input_shape):
    input_shape = input_shape[0]
    with tf.device('/cpu:0'):
      self.lin_w = self.add_weight(
          shape=(input_shape[-1], self.vocab_size),
          name='lm_lin_w',
          initializer=K.initializers.glorot_uniform())
      self.lin_b = self.add_weight(
          shape=(self.vocab_size,),
          name='lm_lin_b',
          initializer=K.initializers.glorot_uniform())
      self.multiclass_dense_layer.build(input_shape)

    super(SoftmaxLoss, self).build(input_shape)

  def call(self, inputs):
    x, labels, weights = inputs
    if self.num_candidate_samples > -1:
      assert self.vocab_freqs is not None
      labels_reshaped = tf.reshape(labels, [-1])
      labels_reshaped = tf.expand_dims(labels_reshaped, -1)
      sampled = tf.nn.fixed_unigram_candidate_sampler(
          true_classes=labels_reshaped,
          num_true=1,
          num_sampled=self.num_candidate_samples,
          unique=True,
          range_max=self.vocab_size,
          unigrams=self.vocab_freqs)
      inputs_reshaped = tf.reshape(x, [-1, int(x.get_shape()[2])])

      lm_loss = tf.nn.sampled_softmax_loss(
          weights=tf.transpose(self.lin_w),
          biases=self.lin_b,
          labels=labels_reshaped,
          inputs=inputs_reshaped,
          num_sampled=self.num_candidate_samples,
          num_classes=self.vocab_size,
          sampled_values=sampled)
      lm_loss = tf.reshape(
          lm_loss,
          [int(x.get_shape()[0]), int(x.get_shape()[1])])
    else:
      logits = self.multiclass_dense_layer(x)
      lm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=labels)

    lm_loss = tf.identity(
        tf.reduce_sum(lm_loss * weights) / _num_labels(weights),
        name='lm_xentropy_loss')
    return lm_loss

[keras] 创建自定义层，以及关于build函数的一些疑惑

猜你喜欢