How to Write neural networks with less than 200 lines of C ++

A simple neural network comprising an input node, a linear layer, an activation function and a loss function. In order to reduce coding, we use Eigen place numpy , to achieve correlation matrix operations.

First define a Node node class as a base class, which contains an input node and an output queues node; and the current value of the node, back-propagation gradient value connected to it, the forward propagation and reverse propagation interfaces Interface . In addition, to facilitate debugging, we gave Node adds a name member variable, which identifies the data corresponding to the node.

 1 class Node {
 2 public:
 3 　　virtual void forward() = 0;
 4 　　virtual void backward() = 0;
 5 protected:
 6 　　Eigen::MatrixXf _value;
 7 　　std::vector<Node*> _inputs;
 8 　　std::vector<Node*> _outputs;
 9 　　std::map<Node*, Eigen::MatrixXf> _gradients;
10 　　std::string _name;
11 };

Input node Input inheritance Node class, representative of the input variables, these variables will be assigned the data. Corresponding to tensorflow the Variable class. Here we initialize its size in the constructor row and col

1 class Input : public Node {
2 public:
3     Input(const char* name,size_t rows=0,size_t cols=0);
4 };

Linear node, representing the whole connection layer, before the implementation of the interface to spread WX + bias , where bias with WX calculated matrix is not the same, the need for bias to make a broadcast operation; back-propagation need to compute the corresponding output node of W is , X- , BIAS gradient value

 1 class Linear : public Node {
 2 public:
 3     Linear(Node* nodes, Node* weights, Node* bias);
 4 
 5     virtual void forward(){_value = (_nodes->getValue() * _weights->getValue()).rowwise() + Eigen::VectorXf(_bias->getValue()).transpose();}
 6 
 7     virtual void backward(){
 8 　　　　for (auto node : _outputs){
 9 　　　　　　auto grad = node->getGradient(this);
10 　　　　　　_gradients[_weights] = _nodes->getValue().transpose() * grad;
11 　　　　　　_gradients[_bias] = grad.colwise().sum().transpose();
12 　　　　　　_gradients[_nodes] = grad * _weights->getValue().transpose();
13 　　　　}
14 　　}
15 
16 private:
17     Node* _nodes = nullptr;
18     Node* _weights = nullptr;
19     Node* _bias = nullptr;
20 };

Sigmoid node, representative of the activation function, before calculating propagation sigmoid function result, backpropagation calculated sigmoid derivative function

 1 class Sigmoid : public Node {
 2 public:
 3 　　Sigmoid(Node* node);
 4 　　virtual void forward(){_value = _impl(_node->getValue());}
 5 　　virtual void backward(){
 6 　　　　auto y = _value;
 7 　　　　auto y2 = y.cwiseProduct(y);
 8 　　　　_partial = y-y2;
 9 
10 　　　　for (auto node : _outputs) {
11 　　　　　　auto grad = node->getGradient(this);
12 　　　　　　_gradients[_node] = grad.cwiseProduct(_partial);
13 　　　　}
14 　　}
15 private:
16 　　Eigen::MatrixXf _impl(const Eigen::MatrixXf& x){return (-x.array().exp() + 1).inverse();}
17 private:
18 　　Node* _node = nullptr;
19 　　//sigmoid的偏导
20 　　Eigen::MatrixXf _partial;
21 };

MSE node representative Loss function , before calculating the variance estimate of the true value propagation, counter propagation first derivative of the results need to calculate the variance

 1 class MSE : public Node {
 2 public:
 3 　　MSE(Node* y, Node* y_hat);
 4 　　virtual void forward(){
 5 　　　　_diff = _y->getValue() - _y_hat->getValue();
 6 　　　　auto diff2= _diff.cwiseProduct(_diff);
 7 　　　　auto v = Eigen::MatrixXf(1, 1);
 8 　　　　v << (diff2).mean();
 9 　　　　_value = v;
10 　　}
11 　　virtual void backward(){
12 　　　　auto r = _y_hat->getValue().rows();
13 　　　　_gradients[_y] = _diff * (2.f / r);
14 　　　　_gradients[_y_hat] = _diff * (-2.f / r);
15 　　}
16 private:
17 　　Node* _y;
18 　　Node* _y_hat;
19 　　Eigen::MatrixXf _diff;
20 };

These nodes are we going to build the basic elements of the framework. Then we also need to implement a topological sorting of FIG front of the node to be sorted iteration, calculating predicted results; then reverse iteration, a gradient is calculated for each connection.

. 1 STD :: Vector <* the Node> topological_sort (the Node * input_nodes) {
 2  　　// The incoming data structure initialization of FIG 
. 3  　　the Node * • pNode = nullptr a;
 . 4  　　// pair as a first input, a second output 
5  　　Map :: STD <* the node, STD :: pair <STD :: SET <the node *>, STD :: SET <* the node>>> G;
 . 6  　　// surrounding nodes to be traversed 
. 7  　　STD :: List <* the node > vnodes;
 . 8  　　vNodes.emplace_back (input_nodes);
 . 9  　　// breadth traversal, traversing first output node, the input node and then traverse
 10  　　// has traversed node 
. 11  　　STD :: SET <* the node> sVisited;
 12 is 　　the while (vNodes.size () && (• pNode = vNodes.front ())) {
 13 is  　　　　IF (! sVisited.find (• pNode) = sVisited.end ()) vNodes.pop_front ();
 14  　　　　const Auto & Outputs = pNode-> getOutputs ();
 15  　　　　for (Auto item: outputs) {
 16  　　　　　　G [• pNode] .second.insert (item);     // add item pnode the output node 
. 17  　　　　　　G [item] .first.insert (• pNode);     // Add item pnode to an input node 
18 is  　　　　　　IF (sVisited.find (item) sVisited.end == ()) vNodes.emplace_back (item);     // add a node to be accessed without the access queue 
. 19  　　　　}
 20 is  　　　　const Auto & inputs = pNode->getInputs ();
 21 is  　　　　for (Auto item: Inputs) {
 22 is  　　　　　　G [• pNode] .first.insert (item);     // add the item pnode input node 
23 is  　　　　　　G [item] .second.insert (• pNode);     // Add item pnode to output node 
24  　　　　　　IF (sVisited.find (item) == sVisited.end ()) vNodes.emplace_back (item);
 25  　　　　}
 26 is  　　　　sVisited.emplace (• pNode);
 27  　　　　vNodes.pop_front ();
 28  　　}
 29  
30  　　// topological sorting configuration according to FIG. 
31 is  　　STD :: Vector <* the Node> vSorted;
 32  　　the while (g.size ()) {
 33 is  　　　　for(ITR = g.begin Auto ();! g.end ITR = (); ++ ITR)
 34 is  　　　　{
 35  　　　　　　// no input node 
36  　　　　　　Auto & F = G [ITR-> First];
 37 [  　　　　　　IF (f.first. size () == 0 ) {
 38 is  　　　　　　　　vSorted.push_back (ITR-> First);
 39  　　　　　　　　// find the output node of the node view, and the output node corresponding to the parent node removes 
40  　　　　　　　　Auto outputs = f.second; // F [ 'OUT'] 
41 is  　　　　　　　　for (Auto & Output: Outputs) G [Output] .first.erase (ITR-> First);
 42 is  　　　　　　　　// then the node is removed from the figure 
43 is  　　　　　　　　g.erase (ITR- > First);
 44 is  　　　　　　　　BREAK ;
45 　　　　　　}
46 　　　　}
47 　　}
48 　　return vSorted;
49 }

Test procedure, we define for each node, and the configured connections between the nodes; passed after the input node topological_sort . The function starts from the input node, breadth-first traversal, to build a structure of FIG.; And The topological sort algorithm, this will be sorted into a queue structure of FIG return; in the queue tensorflow called "map" in.

Then, the test run train_one_batch previous traversal to obtain a prediction value, and then traversed backward once gradient to give each node is connected;

1 void train_one_batch(std::vector<Node*>& graph){
2 　　for (auto node:graph){
3 　　　　node->forward();
4 　　}
5 　　for (int idx = graph.size() - 1; idx >= 0;--idx) {
6 　　　　graph[idx]->backward();
7 　　}
8 }

Then according to the calculated gradient value, updating the weights node W is , B , to complete a training

1 void sgd_update(std::vector<Node*> update_nodes, float learning_rate){
2 　　for (auto node: update_nodes){
3 　　　　Eigen::MatrixXf delta = -1 * learning_rate * node->getGradient(node);
4 　　　　node->setValue(node->getValue() + delta);
5 　　}
6 }

Reference Code links: https://github.com/webbery/MiniEngine

How to Write neural networks with less than 200 lines of C ++

Guess you like