Simple RNN pytorch code implementation

Simple RNN pytorch code implementation

Before writing this blog, the blogger wants to say one thing. Many simple RNN codes on the Internet are wrong, and the blogger’s is also wrong. Why?
Because the gradient descent code of simple RNN must be written by yourself, the gradient descent of simple RNN cannot use the default mechanism of pytorch, otherwise the gradient will disappear directly. The blogger has done a lot of experiments. At first, I always thought that the code was written wrong. , simple RNN cannot be done using the general gradient descent algorithm, but must be implemented using the gradient descent algorithm over time, that is, if you want to reproduce simple RNN, you need to write the gradient descent code yourself, and you cannot directly build a model for training. However, it is okay to use two or three layers of simple RNN, because the gradient disappearance is not serious. Here, we give our code:

#coding=gbk

import torch
from torch.autograd import Variable
import os
from torch.utils import data
import matplotlib.pyplot as plt
import torch.nn.functional as F
import numpy as np
import torch.nn as nn
sample_num=1000
sequense_num=10
input_length=10
train_de_test=0.8
hidden_size=10
num_epochs=100
batch=32
learning_rate=0.01
#torch.manual_seed(10)

x_data=[]

y_data=[]
for i in range(sample_num):
    if i%2==0:
        x_gene=torch.randint(0,5,(sequense_num,input_length))
     
        y_data.append(1)
   
        
    else:
         x_gene=torch.randint(6,10,(sequense_num,input_length))
        #  y=torch.sum(x_gene)

       #  y_data.append(1)
         y_data.append(0)
   
    x_data.append(x_gene)


x_data=torch.stack((x_data),0)

x_data=x_data.type(dtype=torch.float32)
y_data=torch.as_tensor(y_data)
print(x_data,y_data)
print(x_data.size())

    # 神经网络搭建

 
class sRNN(nn.Module):
    def __init__(self, sequense_num,input_length,hidden_size):
        
        super().__init__()
        self.sequense_num=sequense_num
        self.input_length=input_length
        self.hidden_size=hidden_size

        self.W = torch.nn.Parameter(data=torch.randn(input_length, hidden_size, requires_grad=True))    
        self.U = torch.nn.Parameter(data=torch.randn(input_length, hidden_size, requires_grad=True))   
        self.b = torch.nn.Parameter(data=torch.randn( 1,hidden_size, requires_grad=True))  
        self.V = torch.nn.Parameter(data=torch.randn(hidden_size,1 , requires_grad=True))  
        self.f=nn.Sigmoid()
    def forward(self,input):#d就是整个网络的输入
        hidden_state_pre=torch.zeros(1, hidden_size)
        for i in range(sequense_num):
           # print(torch.matmul(self.W,input[:,i,:]))
            z=torch.matmul(hidden_state_pre,self.U)+torch.matmul(input[:,i,:],self.W)+ self.b
            hidden_state_pre=F.relu(z)
      #  print(hidden_state_pre)
        y=self.f(torch.matmul(hidden_state_pre,self.V))
        return y
            
    def backward(self):
        pass
loss_fn = nn.BCELoss()

def sampling(sample_num):
   
    index_sequense=torch.randperm(sample_num)
    return index_sequense
def get_batch(index_sequense,X_data,Y_data,index,bacth):

    return X_data[index:index+bacth],Y_data[index:index+bacth]

srnn=sRNN(sequense_num,input_length,hidden_size)

loss_fn = nn.BCELoss()
index_sequense=sampling(sample_num)
optimizer = torch.optim.Adam(srnn.parameters(), lr=0.1)
co=0


for param_tensor in srnn.state_dict(): # 字典的遍历默认是遍历 key,所以param_tensor实际上是键值
    print(param_tensor,'\t',srnn.state_dict()[param_tensor].size())
    print(srnn.state_dict()[param_tensor])
acc_list=[]

       
index=0
index_sequense=torch.randperm(sample_num)
loss_list=[]
for k in range(num_epochs):
            
                if index+batch>=sample_num-1:
                     index=0
                     index_sequense=torch.randperm(sample_num)

                x_batch,y_batch=get_batch(index_sequense,x_data,y_data,index,batch)
                y_batch=y_batch.type(dtype=torch.float32)
                y_batch=y_batch.reshape(batch,1)
           #     print(x_batch)
                predict=srnn(x_batch)
                co=torch.sum(torch.abs(y_batch-predict)<0.5)

                loss = loss_fn(predict,y_batch)
                index=index+batch
                optimizer.zero_grad()

                loss.backward()
                optimizer.step()
                for p in srnn.parameters():
               #    p.data.add_(p.grad.data, alpha=-learning_rate)
                  # print("p.data",p.data)
                   print("p.grad.data",p.grad.data)
              #  print(predict)
               # print(y_batch-predict)
            
                print("loss :",loss)

                print("accuracy :",co/batch)
                loss_list.append(loss)
                acc_list.append(co/batch)


              #except:
              #    index=0
              #    index_sequense=torch.randperm(sample_num)



                #optimizer.zero_grad()
                #loss.backward()








#
#print(x_batch,y_batch)
x_batch,y_batch=get_batch(index_sequense,x_data,y_data,0,batch)
print(x_batch.size())
print("#")
print(srnn(x_batch))
#y_batch=y_batch.type(dtype=torch.float32)


#train(x_data,y_data,num_epochs,batch)

epoch_list=list(range(num_epochs))
plt.plot(epoch_list,acc_list,label='adam')
plt.title("loss")
plt.legend()

plt.show()


plt.plot(epoch_list,loss_list,label='adam')
plt.title("loss")
plt.legend()

plt.show()



#print(srnn.parameters())

#print(type(srnn.state_dict()))  # 查看state_dict所返回的类型,是一个“顺序字典OrderedDict”


 
#for param_tensor in srnn.state_dict(): # 字典的遍历默认是遍历 key,所以param_tensor实际上是键值
#    print(param_tensor,'\t',srnn.state_dict()[param_tensor].size())
#    print(srnn.state_dict()[param_tensor])

os.system("pause")

operation result:
insert image description here
insert image description here

The above code is not completely correct simple RNN code, but you need to improve the gradient descent algorithm. It is enough to calculate the gradient over time in reverse.

Guess you like

Origin blog.csdn.net/weixin_43327597/article/details/131715117