深度学习:tensorflow Layers的实现,numpy实现深度学习(二)

基类Layer的实现:

前面已经提到过一个layer的包含:shape,激活函数,梯度的处理以及输出层的处理。

import numpy as np

class Layer:

    def __init__(self, shape):
        self.shape = shape

    def _activate(self, x, predict):
        pass
    
    def __str__(self):
        return self.__class__.__name__
    
    def __repr__(self):
        return str(self)
    @property
    def name(self):
        return str(self)
    
    
    # v^i = f^(u^i)  , y is v^k
    def derivative(self, y):
        pass
    # forward pass : XW + b
    def activate(self, x, w, bias):
        return x.dot(w) + bias
    # y = v^i, w = w^i, prev_delta = delta^i+1
    # f' = f *(1-f), so if y is v^i, it is easy to derivate
    # 反向传播计算误差,非输出层的处理
    def bp(self, y, w, pre_delta):
        return pre_delta.dot(w.T)*self.derivative(y)

激活层的实现:

class Sigmoid(Layer):
    def _activate(self, x):
        return 1 / (1 + np.exp(-x))
    
    def derivative(self, y):
        return y*(1-y)
    

CostLayer的实现:

涉及到2个部分处理,一是损失的计算方式,而是特殊的变换,还有最后一层的误差需要单独计算。

# =============================================================================
#     没有激活函数、但可能会有特殊的变换函数(比如说 Softmax),同时还需要定义某个损失函数
#     定义导函数时,需要考虑到自身特殊的变换函数并计算相应的、整合后的梯度
# =============================================================================
# 输出层有两个功能:一是特殊的变换,二是计算最后一层的梯度,最后一层的梯度需要特殊处理
class CostLayer(Layer):
    """
        初始化结构
        self._available_cost_functions:记录所有损失函数的字典
        self._available_transform_functions:记录所有特殊变换函数的字典
        self._cost_function、self._cost_function_name:记录损失函数及其名字的两个属性
        self._transform_function 、self._transform:记录特殊变换函数及其名字的两个属性
    """
    def __init__(self, shape, cost_function="MSE", transform=None):
        super(CostLayer, self).__init__(shape)
        self._available_cost_functions = {
                "MSE": CostLayer._mse,
                "SVM": CostLayer._svm,
                "CrossEntropy": CostLayer._cross_entropy
                }
        
        self._available_transform_functions = {
                "Softmax": CostLayer._softmax,
                "Sigmoid": CostLayer._sigmoid
                }
        
        self._cost_function_name = cost_function
        self._cost_function = self._available_cost_functions[cost_function]
        
        if transform is None and cost_function == "CrossEntropy":
            self._transform = "Softmax"
            self._transform_function = CostLayer._softmax
            
        else:
            self._transform = transform
            self._transform_function = self._available_transform_functions.get(transform,None)
            
    def __str__(self):
        return self._cost_function_name
    
    def _activate(self, x, predict):
        if self._transform_function is None:
            return x
        return self._transform_function(x)
    
    def _derivative(self, y, delta=None):
        pass
    
    @staticmethod
    def safe_exp(x):
        return np.exp(x - np.max(x,axis=1,keepdims=True))
    # 特殊变换函数
    @staticmethod
    def _softmax(y, diff=False):
        if diff:
            return y*(1-y)
        exp_y = CostLayer.safe_exp(y)
        return exp_y / np.sum(exp_y, axis=1, keepdims=True)
    
    @staticmethod
    def _sigmoid(y, diff=False):
        if diff:
            return y * (1 - y)
        return 1 / (1 + np.exp(-y))
    
    # 单独计算输出层的误差
    def bp_first(self, y, y_pred):
        if self._cost_function_name == "CrossEntropy" and (self._transform == "Softmax" or self._transform =="Sigmoid"):
            return y - y_pred        
        # 否则、就只能用普适性公式进行计算:
        #            (没有特殊变换函数)
        #  (有特殊变换函数)
        dy = -self._cost_function(y, y_pred)
        if self._transform_function is None:
            return dy
        return dy * self._transform_function(y_pred, diff=True)
    
    @property
    def calculate(self):
        return lambda y, y_pred: self._cost_function(y,y_pred,False)
    
    # 损失函数
    @staticmethod
    def _mse(y, y_pred, diff=True):
        if diff:
            return y_pred - y
        return 0.5*np.average((y-y_pred)**2)
    @staticmethod
    def _cross_entropy(y, y_pred, diff=True, eps=1e-8):
        if diff:
            return -y / (y_pred + eps) + (1-y)/ (1- y_pred +eps)
        return np.average(-y * np.log(y_pred + eps) - (1 - y) * np.log(1 - y_pred + eps))

猜你喜欢

转载自blog.csdn.net/weixin_40759186/article/details/86440818