基类Layer的实现:
前面已经提到过一个layer的包含:shape,激活函数,梯度的处理以及输出层的处理。
import numpy as np
class Layer:
def __init__(self, shape):
self.shape = shape
def _activate(self, x, predict):
pass
def __str__(self):
return self.__class__.__name__
def __repr__(self):
return str(self)
@property
def name(self):
return str(self)
# v^i = f^(u^i) , y is v^k
def derivative(self, y):
pass
# forward pass : XW + b
def activate(self, x, w, bias):
return x.dot(w) + bias
# y = v^i, w = w^i, prev_delta = delta^i+1
# f' = f *(1-f), so if y is v^i, it is easy to derivate
# 反向传播计算误差,非输出层的处理
def bp(self, y, w, pre_delta):
return pre_delta.dot(w.T)*self.derivative(y)
激活层的实现:
class Sigmoid(Layer):
def _activate(self, x):
return 1 / (1 + np.exp(-x))
def derivative(self, y):
return y*(1-y)
CostLayer的实现:
涉及到2个部分处理,一是损失的计算方式,而是特殊的变换,还有最后一层的误差需要单独计算。
# =============================================================================
# 没有激活函数、但可能会有特殊的变换函数(比如说 Softmax),同时还需要定义某个损失函数
# 定义导函数时,需要考虑到自身特殊的变换函数并计算相应的、整合后的梯度
# =============================================================================
# 输出层有两个功能:一是特殊的变换,二是计算最后一层的梯度,最后一层的梯度需要特殊处理
class CostLayer(Layer):
"""
初始化结构
self._available_cost_functions:记录所有损失函数的字典
self._available_transform_functions:记录所有特殊变换函数的字典
self._cost_function、self._cost_function_name:记录损失函数及其名字的两个属性
self._transform_function 、self._transform:记录特殊变换函数及其名字的两个属性
"""
def __init__(self, shape, cost_function="MSE", transform=None):
super(CostLayer, self).__init__(shape)
self._available_cost_functions = {
"MSE": CostLayer._mse,
"SVM": CostLayer._svm,
"CrossEntropy": CostLayer._cross_entropy
}
self._available_transform_functions = {
"Softmax": CostLayer._softmax,
"Sigmoid": CostLayer._sigmoid
}
self._cost_function_name = cost_function
self._cost_function = self._available_cost_functions[cost_function]
if transform is None and cost_function == "CrossEntropy":
self._transform = "Softmax"
self._transform_function = CostLayer._softmax
else:
self._transform = transform
self._transform_function = self._available_transform_functions.get(transform,None)
def __str__(self):
return self._cost_function_name
def _activate(self, x, predict):
if self._transform_function is None:
return x
return self._transform_function(x)
def _derivative(self, y, delta=None):
pass
@staticmethod
def safe_exp(x):
return np.exp(x - np.max(x,axis=1,keepdims=True))
# 特殊变换函数
@staticmethod
def _softmax(y, diff=False):
if diff:
return y*(1-y)
exp_y = CostLayer.safe_exp(y)
return exp_y / np.sum(exp_y, axis=1, keepdims=True)
@staticmethod
def _sigmoid(y, diff=False):
if diff:
return y * (1 - y)
return 1 / (1 + np.exp(-y))
# 单独计算输出层的误差
def bp_first(self, y, y_pred):
if self._cost_function_name == "CrossEntropy" and (self._transform == "Softmax" or self._transform =="Sigmoid"):
return y - y_pred
# 否则、就只能用普适性公式进行计算:
# (没有特殊变换函数)
# (有特殊变换函数)
dy = -self._cost_function(y, y_pred)
if self._transform_function is None:
return dy
return dy * self._transform_function(y_pred, diff=True)
@property
def calculate(self):
return lambda y, y_pred: self._cost_function(y,y_pred,False)
# 损失函数
@staticmethod
def _mse(y, y_pred, diff=True):
if diff:
return y_pred - y
return 0.5*np.average((y-y_pred)**2)
@staticmethod
def _cross_entropy(y, y_pred, diff=True, eps=1e-8):
if diff:
return -y / (y_pred + eps) + (1-y)/ (1- y_pred +eps)
return np.average(-y * np.log(y_pred + eps) - (1 - y) * np.log(1 - y_pred + eps))