mxnet 参数正则

get_internals()

Gets a new grouped symbol sgroup. The output of sgroup is a list of outputs of all of the internal nodes.

>>> a = mx.sym.var('a')
>>> b = mx.sym.var('b')
>>> c = a + b
>>> d = c.get_internals()
>>> d

>>> d.list_outputs()
['a', 'b', '_plus4_output']

def L2_penalty(w, b):
    print(w,b)
    # return mx.sym.sum(mx.sym.square(mx.sym.Variable(w))) + mx.sym.sum(mx.sym.square(mx.sym.Variable(b)))
    return mx.sym.sum(mx.sym.square(w)) + mx.sym.sum(mx.sym.square(b))

def get_symbol():
    ...
    fc3 = mx.sym.FullyConnected(
        name='fc3', data=dropout2, num_hidden=num_classes)
    if dtype == 'float16':
        fc3 = mx.sym.Cast(data=fc3, dtype=np.float32)
    output = mx.sym.softmax(data=fc3, axis=1, name='softmax_layer')

    print(output.get_internals()['conv1_weight'])

    mate_cnn_fc = [L2_penalty(output.get_internals()[output.list_arguments()[i]], output.get_internals()[output.list_arguments()[i+1]]) for i in range(1,len(output.list_arguments()),2)]
    mates_sum = mx.sym.add_n(*mate_cnn_fc)

    loss = mx.sym.mean(emd_l2(output, label, num_classes)) + WeightDecay * mates_sum
    emd2_loss = mx.sym.MakeLoss(loss, name='loss')
    pred_loss = mx.sym.Group(
        [mx.sym.BlockGrad(output, name='pred'), emd2_loss])
    # softmax = mx.sym.SoftmaxOutput(data=fc3, name='softmax')
    # return softmax
    return pred_loss

#网络定义
def getMTL(sym, layer_name):
    all_layers = sym.get_internals()
    flat = all_layers[layer_name + '_output']

    pred_gender = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_gender')
#     pred_gender = mx.symbol.FullyConnected(data=flat, num_hidden=1, name='pred_gender')
    pred_age = mx.symbol.FullyConnected(data=flat, num_hidden=1, name='pred_age')
    pred_mask = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_mask')
    pred_glass = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_glass')
    pred_sunglass = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_sunglass')
    pred_hat = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_hat') 

    labels = mx.symbol.Variable('attr_label')

    label_gender = mx.symbol.slice_axis(data=labels, axis=1, begin=0, end=1,name='slice01')
    label_gender = mx.symbol.Flatten(data=label_gender)
    label_gender_reshape = mx.symbol.Reshape(data=label_gender, shape=(-1,))
    loss_gender = mx.symbol.SoftmaxOutput(data=pred_gender, label=label_gender_reshape ,grad_scale=1, use_ignore=True, ignore_label=-1,name='gender_out')
#     loss_gender = mx.symbol.LogisticRegressionOutput(data=pred_gender, label=label_gender_reshape) * (label_gender != -1)

    label_age = mx.symbol.slice_axis(data=labels, axis=1, begin=1, end=2,name='slice12')
    label_age = mx.symbol.Flatten(data=label_age)
#     label_age=label_age/50.0
#     pred_age = pred_age/50.0
    label_age_reshape = mx.symbol.Reshape(data=label_age, shape=(-1,))
#     loss_age = mx.symbol.LogisticRegressionOutput(data=pred_age, label=label_age_reshape) * (label_age != -1)
    loss_age = mx.symbol.Custom(data=pred_age, label=label_age, op_type='l2_regression')* (label_age != -1)

    label_mask = mx.symbol.slice_axis(data=labels, axis=1, begin=2, end=3,name='slice23')
    label_mask = mx.symbol.Flatten(data=label_mask)
    label_mask_reshape = mx.symbol.Reshape(data=label_mask, shape=(-1,))
    loss_mask = mx.symbol.SoftmaxOutput(data=pred_mask, label=label_mask_reshape, grad_scale=1, use_ignore=True, ignore_label=-1 ,name='mask_out')

    label_glass = mx.symbol.slice_axis(data=labels, axis=1, begin=3, end=4,name='slice34')
    label_glass = mx.symbol.Flatten(data=label_glass)
    label_glass_reshape = mx.symbol.Reshape(data=label_glass, shape=(-1,))
    loss_glass = mx.symbol.SoftmaxOutput(data=pred_glass, label=label_glass_reshape, grad_scale=1, use_ignore=True, ignore_label=-1 ,name='glass_out')

    label_sunglass = mx.symbol.slice_axis(data=labels, axis=1, begin=4, end=5,name='slice45')
    label_sunglass = mx.symbol.Flatten(data=label_sunglass)
    label_sunglass_reshape = mx.symbol.Reshape(data=label_sunglass, shape=(-1,))
    loss_sunglass = mx.symbol.SoftmaxOutput(data=pred_sunglass, label=label_sunglass_reshape, grad_scale=1, use_ignore=True, ignore_label=-1,name='sunglass_out')

    label_hat = mx.symbol.slice_axis(data=labels, axis=1, begin=5, end=6,name='slice56')
    label_hat = mx.symbol.Flatten(data=label_hat)
    label_hat_reshape = mx.symbol.Reshape(data=label_hat, shape=(-1,))
    loss_hat = mx.symbol.SoftmaxOutput(data=pred_hat, label=label_hat_reshape, grad_scale=1, use_ignore=True, ignore_label=-1,name='hat_out')

    return mx.symbol.Group([loss_gender, loss_age,loss_mask,loss_glass,loss_sunglass, loss_hat])

list_arguments()

Lists all the arguments in the symbol.

Example

>>> a = mx.sym.var('a')
>>> b = mx.sym.var('b')
>>> c = a + b
>>> c.list_arguments
['a', 'b']
Returns:    args – List containing the names of all the arguments required to compute the symbol.

>>> data = mx.sym.Variable('data')
>>> prev = mx.sym.Variable('prev')
>>> fc1  = mx.sym.FullyConnected(data=data, name='fc1', num_hidden=128)
>>> fc2  = mx.sym.FullyConnected(data=prev, name='fc2', num_hidden=128)
>>> out  = mx.sym.Activation(data=mx.sym.elemwise_add(fc1, fc2), act_type='relu')
>>> out.list_arguments()
['data', 'fc1_weight', 'fc1_bias', 'prev', 'fc2_weight', 'fc2_bias']
>>> out.infer_shape(data=(10,64))
(None, None, None)
>>> out.infer_shape_partial(data=(10,64))
([(10L, 64L), (128L, 64L), (128L,), (), (), ()], [(10L, 128L)], [])
>>> # infers shape if you give information about fc2
>>> out.infer_shape(data=(10,64), prev=(10,128))
([(10L, 64L), (128L, 64L), (128L,), (10L, 128L), (128L, 128L), (128L,)], [(10L, 128L)], [])

- parameters (for modules with parameters)
        - `get_params()`: return a tuple `(arg_params, aux_params)`. Each of those
          is a dictionary of name to ``NDArray`` mapping. Those `NDArray` always lives on
          CPU. The actual parameters used for computing might live on other devices (GPUs),
          this function will retrieve (a copy of) the latest parameters. Therefore, modifying
        - ``set_params(arg_params, aux_params)``: assign parameters to the devices
          doing the computation.

def get_params(self):
        """Gets parameters, those are potentially copies of the the actual parameters used
        to do computation on the device.

        Returns
        -------
        ``(arg_params, aux_params)``
            A pair of dictionaries each mapping parameter names to NDArray values.

        Examples
        --------
        >>> # An example of getting module parameters.
        >>> print mod.get_params()
        ({'fc2_weight': , 'fc1_weight': ,
        'fc3_bias': , 'fc3_weight': ,
        'fc2_bias': , 'fc1_bias': }, {})
        """
        raise NotImplementedError()

def update(self):
        """Updates parameters according to the installed optimizer and the gradients computed
        in the previous forward-backward batch.

        Examples
        --------
        >>> # An example of updating module parameters.
        >>> mod.init_optimizer(kvstore='local', optimizer='sgd',
        ...     optimizer_params=(('learning_rate', 0.01), ))
        >>> mod.backward()
        >>> mod.update()
        >>> print mod.get_params()[0]['fc3_weight'].asnumpy()
        [[  5.86930104e-03   5.28078526e-03  -8.88729654e-03  -1.08308345e-03
            6.13054074e-03   4.27560415e-03   1.53817423e-03   4.62131854e-03
            4.69872449e-03  -2.42400169e-03   9.94111411e-04   1.12386420e-03
            ...]]
        """
        raise NotImplementedError()


def save_params(self, fname):
        """Saves model parameters to file.

        Parameters
        ----------
        fname : str
            Path to output param file.

        Examples
        --------
        >>> # An example of saving module parameters.
        >>> mod.save_params('myfile')
        """
        arg_params, aux_params = self.get_params()
        save_dict = {('arg:%s' % k) : v.as_in_context(cpu()) for k, v in arg_params.items()}
        save_dict.update({('aux:%s' % k) : v.as_in_context(cpu()) for k, v in aux_params.items()})
        ndarray.save(fname, save_dict)

def load_params(self, fname):
        """Loads model parameters from file.

        Parameters
        ----------
        fname : str
            Path to input param file.

        Examples
        --------
        >>> # An example of loading module parameters.
        >>> mod.load_params('myfile')
        """
        save_dict = ndarray.load(fname)
        arg_params = {}
        aux_params = {}
        for k, value in save_dict.items():
            arg_type, name = k.split(':', 1)
            if arg_type == 'arg':
                arg_params[name] = value
            elif arg_type == 'aux':
                aux_params[name] = value
            else:
                raise ValueError("Invalid param file " + fname)
        self.set_params(arg_params, aux_params)

def forward(self, data_batch, is_train=None):
        """Forward computation. It supports data batches with different shapes, such as
        different batch sizes or different image sizes.
        If reshaping of data batch relates to modification of symbol or module, such as
        changing image layout ordering or switching from training to predicting, module
        rebinding is required.

        Parameters
        ----------
        data_batch : DataBatch
            Could be anything with similar API implemented.
        is_train : bool
            Default is ``None``, which means `is_train` takes the value of ``self.for_training``.

        Examples
        --------
        >>> import mxnet as mx
        >>> from collections import namedtuple
        >>> Batch = namedtuple('Batch', ['data'])
        >>> data = mx.sym.Variable('data')
        >>> out = data * 2
        >>> mod = mx.mod.Module(symbol=out, label_names=None)
        >>> mod.bind(data_shapes=[('data', (1, 10))])
        >>> mod.init_params()
        >>> data1 = [mx.nd.ones((1, 10))]
        >>> mod.forward(Batch(data1))
        >>> print mod.get_outputs()[0].asnumpy()
        [[ 2.  2.  2.  2.  2.  2.  2.  2.  2.  2.]]
        >>> # Forward with data batch of different shape
        >>> data2 = [mx.nd.ones((3, 5))]
        >>> mod.forward(Batch(data2))
        >>> print mod.get_outputs()[0].asnumpy()
        [[ 2.  2.  2.  2.  2.]
         [ 2.  2.  2.  2.  2.]
         [ 2.  2.  2.  2.  2.]]
        """
        raise NotImplementedError()

def backward(self, out_grads=None):
        """Backward computation.

        Parameters
        ----------
        out_grads : NDArray or list of NDArray, optional
            Gradient on the outputs to be propagated back.
            This parameter is only needed when bind is called
            on outputs that are not a loss function.

        Examples
        --------
        >>> # An example of backward computation.
        >>> mod.backward()
        >>> print mod.get_input_grads()[0].asnumpy()
        [[[  1.10182791e-05   5.12257748e-06   4.01927764e-06   8.32566820e-06
            -1.59775993e-06   7.24269375e-06   7.28067835e-06  -1.65902311e-05
             5.46342608e-06   8.44196393e-07]
             ...]]
        """
        raise NotImplementedError()

def get_outputs(self, merge_multi_context=True):
        """Gets outputs of the previous forward computation.

        If `merge_multi_context` is ``True``, it is like ``[out1, out2]``. Otherwise,
        it returns out put of form ``[[out1_dev1, out1_dev2], [out2_dev1, out2_dev2]]``.
        All the output elements have type `NDArray`. When `merge_multi_context` is ``False``,
        those `NDArray` instances might live on different devices.

        Parameters
        ----------
        merge_multi_context : bool
            Defaults to ``True``. In the case when data-parallelism is used, the outputs
            will be collected from multiple devices. A ``True`` value indicates that we
            should merge the collected results so that they look like from a single
            executor.

        Returns
        -------
        list of `NDArray` or list of list of `NDArray`.
            Output

        Examples
        --------
        >>> # An example of getting forward output.
        >>> print mod.get_outputs()[0].asnumpy()
        [[ 0.09999977  0.10000153  0.10000716  0.10000195  0.09999853  0.09999743
           0.10000272  0.10000113  0.09999088  0.09999888]]
        """
        raise NotImplementedError()

def get_input_grads(self, merge_multi_context=True):
        """Gets the gradients to the inputs, computed in the previous backward computation.

        If `merge_multi_context` is ``True``, it is like ``[grad1, grad2]``. Otherwise, it
        is like ``[[grad1_dev1, grad1_dev2], [grad2_dev1, grad2_dev2]]``. All the output
        elements have type `NDArray`. When `merge_multi_context` is ``False``, those `NDArray`
        instances might live on different devices.

        Parameters
        ----------
        merge_multi_context : bool
            Defaults to ``True``. In the case when data-parallelism is used, the gradients
            will be collected from multiple devices. A ``True`` value indicates that we
            should merge the collected results so that they look like from a single
            executor.

        Returns
        -------
        list of NDArray or list of list of NDArray
              Input gradients.

        Examples
        --------
        >>> # An example of getting input gradients.
        >>> print mod.get_input_grads()[0].asnumpy()
        [[[  1.10182791e-05   5.12257748e-06   4.01927764e-06   8.32566820e-06
            -1.59775993e-06   7.24269375e-06   7.28067835e-06  -1.65902311e-05
            5.46342608e-06   8.44196393e-07]
            ...]]
        """
        raise NotImplementedError()

class mxnet.metric.CompositeEvalMetric(metrics=None, name='composite', output_names=None, label_names=None)[source]
Manages multiple evaluation metrics.

Parameters: 
metrics (list of EvalMetric) – List of child metrics.
name (str) – Name of this metric instance for display.
output_names (list of str, or None) – Name of predictions that should be used when updating with update_dict. By default include all predictions.
label_names (list of str, or None) – Name of labels that should be used when updating with update_dict. By default include all labels.

Examples

>>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])]
>>> labels   = [mx.nd.array([0, 1, 1])]
>>> eval_metrics_1 = mx.metric.Accuracy()
>>> eval_metrics_2 = mx.metric.F1()
>>> eval_metrics = mx.metric.CompositeEvalMetric()
>>> for child_metric in [eval_metrics_1, eval_metrics_2]:
>>>     eval_metrics.add(child_metric)
>>> eval_metrics.update(labels = labels, preds = predicts)
>>> print eval_metrics.get()
(['accuracy', 'f1'], [0.6666666666666666, 0.8])

查看mxnet中的symbol图的所有变量，以及他们的shape

>>> import mxnet as mx
>>> 
>>> a = mx.sym.Variable('data')
>>> b = mx.sym.FullyConnected(data=a,name='fc1',num_hidden=100)
>>> data_shape = {'data':(256,64)}
>>> arg_shape,_,_ = b.infer_shape(**data_shape)
>>> b.list_arguments()               # 列出symbol中的所有参数，这里是输入以及全连接层的权值和偏置
['data', 'fc1_weight', 'fc1_bias']
>>> arg_shape                        # 这里展示上面三个参数的size
[(256L, 64L), (100L, 64L), (100L,)]

从0实现正则化例子
代码：

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#沐神教程实战之正则化学习
#本例子使用人工生成数据集


from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
num_train = 20
num_test = 100
num_inputs = 200

# 生成数据集
# 定义模型真实的参数
true_w = nd.ones((num_inputs, 1)) * 0.01
true_b = 0.05

# ⽣成训练和测试数据集
X = nd.random.normal(shape=(num_train + num_test, num_inputs))
Y = nd.dot(X, true_w)
Y += .01 * nd.random.normal(shape=Y.shape)


X_train, X_test = X[:num_train], X[num_train:]
Y_train, Y_test = Y[:num_train], Y[num_train:]


# 定义⼀个函数它每次返回batch_size 个随机的样本和对应的⽬标
import random
batch_size = 1
def data_iter(num_examples):
    idx = list(range(num_examples))
    random.shuffle(idx)
    for i in range(0, num_examples, batch_size):
        j = nd.array(idx[i:min(i + batch_size, num_examples)])
        yield X.take(j), Y.take(j)

# 初始化模型参数
def get_params():
    w = nd.random.normal(shape=(num_inputs, 1)) * 0.1
    b = nd.zeros((1,))
    for param in (w, b):
        param.attach_grad()
    return (w, b)

# L2 范数正则化
def L2_penalty(w, b):
    return (w ** 2).sum() + b ** 2


# 定义训练和测试
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt

def net(X, lambd, w, b):
    return nd.dot(X, w) + b

def square_loss(yhat, y):
    return (yhat - y.reshape(yhat.shape)) ** 2

def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad


def test(params, X, y):
    return square_loss(net(X, 0, *params), y).mean().asscalar()


def train(lambd):
    epochs = 10
    learning_rate = 0.002
    params = get_params()
    train_loss = []
    test_loss = []
    for e in range(epochs):
        for data, label in data_iter(num_train):
            with autograd.record():
                output = net(data, lambd, *params)
                # 加入L2正则，惩罚过于复杂的模型
                loss = square_loss(output, label) + lambd * L2_penalty(*params)
            loss.backward()
            SGD(params, learning_rate)
        train_loss.append(test(params, X_train, Y_train))
        test_loss.append(test(params, X_test, Y_test))
    plt.plot(train_loss)
    plt.plot(test_loss)
    plt.legend(['train', 'test'])
    plt.show()
    return 'learned w[:10]:', params[0][:10], 'learend b:', params[1]

# 测试
# 先不用正则化，lamda=0
train(0)
# 使用lamda=2的正则化
# train(2.5)

使用Gloun
代码：

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#沐神教程实战之正则化学习
#本例子使用人工生成数据集
#使用gluon


from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
num_train = 20
num_test = 100
num_inputs = 200

true_w = nd.ones((num_inputs, 1)) * 0.01
true_b = 0.05

X = nd.random.normal(shape=(num_train + num_test, num_inputs))
y = nd.dot(X, true_w)
y += .01 * nd.random.normal(shape=y.shape)

X_train, X_test = X[:num_train, :], X[num_train:, :]
y_train, y_test = y[:num_train], y[num_train:]


import matplotlib.pyplot as plt
import matplotlib as mpl

batch_size = 1
dataset_train = gluon.data.ArrayDataset(X_train, y_train)
data_iter_train = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)

square_loss = gluon.loss.L2Loss()

def test(net, X, y):
    return square_loss(net(X), y).mean().asscalar()


def train(weight_decay):
    learning_rate = 0.005
    epochs = 10
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(1))
    net.initialize()
    # 注意到这⾥ 'wd':weight decay
    # 通过优化算法的wd参数实现对模型的正则化(相当于 L2 范数正则化)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': learning_rate, 'wd': weight_decay})
    train_loss = []
    test_loss = []
    for e in range(epochs):
        for data, label in data_iter_train:
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
        train_loss.append(test(net, X_train, y_train))
        test_loss.append(test(net, X_test, y_test))
    plt.plot(train_loss)
    plt.plot(test_loss)
    plt.legend(['train', 'test'])
    plt.show()
    return ('learned w[:10]:', net[0].weight.data()[:, :10],
            'learned b:', net[0].bias.data())

train(6)

get_internals()

list_arguments()

猜你喜欢