[Study notes] CNN combined with RNN method

CNN combined with RNN

problem

A few days ago learned RNN derivation and code, then the question is, Can RNN and CNN together, we extracted by CNN features, can also be seen as a sequence it? The answer is yes.

But I think the general is directly extracted features are fed oh RNN training little sense, because RNN good at dealing with variable length sequence, that is to say, seq size is uncertain, but the number of general characteristics of neuron image is given, this time followed by a rnn say much to be honest sense unless design a structure that allows the network variable length output. (My idea is a simple redesign of a road to learn a neuron weights mask, filter out some of the neurons in accordance with the rules, and then thrown into the rnn or lstm training)

How to achieve it

import torch
import torch.nn as nn
from torchsummary import summary
from torchvision import datasets,transforms
import torch.optim as optim
from tqdm import tqdm
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(1,16,kernel_size = 3,stride=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16,64,kernel_size = 3,stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64,128,kernel_size = 3,stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )
        self.rnn = nn.RNN(128,256,2) # input_size,output_size,hidden_num
        self.h0 = torch.zeros(2,32,256) # 层数 batchsize hidden_dim
        self.predictor = nn.Linear(4*256,10)
    def forward(self,x):
        x = self.feature_extractor(x) # (-1,128,2,2),4个神经元,128维度
        x,ht = self.rnn(x.view(4,-1,128),self.h0) # (h*w,batch_size,hidden_dim)
        #self.h0 = ht
        x = self.predictor(x.view(-1,256*4))
        return x

if __name__ == "__main__":
    model = Model()
    #summary(model,(1,28,28),device = "cpu")
    loss_fn = nn.CrossEntropyLoss()
    train_dataset = datasets.MNIST(root="./data/",train = True,transform = transforms.ToTensor(),download = True)
    test_dataset = datasets.MNIST(root="./data/",train = False,transform = transforms.ToTensor(),download = True)
    
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=32,
                                           shuffle=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                            batch_size=128,
                                            shuffle=False)
    optimizer = optim.Adam(model.parameters(),lr = 1e-3)
    print(len(train_loader))
    for epoch in range(100):
        epoch_loss = 0.
        for x,target in train_loader:
            #print(x.size())
            y = model(x)
            loss = loss_fn(y,target)
            epoch_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        print("epoch : {} and loss is : {}".format(epoch +1,epoch_loss))
    torch.save(model.state_dict(),"rnn_cnn.pth")

I can see that the above code has specified the number of RNN input neurons, so it must be fixed-length input, after my training is convergent.

For variable length, but it is still no way to change seq len each batch, because it must be specified in the longest seq len, so I can not truly variable length. So I can do is learn the role of a heavy right through the leg up to the original feature, this weight is 0-1 weight, in fact, so that you can achieve the desired effect.

import torch
import torch.nn as nn
from torchsummary import summary
from torchvision import datasets,transforms
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(1,16,kernel_size = 3,stride=2),
            nn.BatchNorm2d(16),
            nn.ReLU6(),
            nn.Conv2d(16,64,kernel_size = 3,stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU6(),
            nn.Conv2d(64,128,kernel_size = 3,stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU6(),
        )
        self.attn = nn.Conv2d(128,1,kernel_size = 1)
        self.rnn = nn.RNN(128,256,2) # input_size,output_size,hidden_num
        
        self.h0 = torch.zeros(2,32,256) # 层数 batchsize hidden_dim
        self.predictor = nn.Linear(4*256,10)
    def forward(self,x):
        x = self.feature_extractor(x) # (-1,128,2,2),4个神经元,128维度
        attn = F.relu(self.attn(x)) # (-1,1,2,2) -> (-1,4)
        x = x * attn
        #print(x.size()) 
        x,ht = self.rnn(x.view(4,-1,128),self.h0) # (h*w,batch_size,hidden_dim)
        #self.h0 = ht
        x = self.predictor(x.view(-1,256*4))
        return x

if __name__ == "__main__":
    model = Model()
    #summary(model,(1,28,28),device = "cpu")
    #exit()
    loss_fn = nn.CrossEntropyLoss()
    train_dataset = datasets.MNIST(root="./data/",train = True,transform = transforms.ToTensor(),download = True)
    test_dataset = datasets.MNIST(root="./data/",train = False,transform = transforms.ToTensor(),download = True)
    
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=32,
                                           shuffle=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                            batch_size=128,
                                            shuffle=False)
    optimizer = optim.Adam(model.parameters(),lr = 1e-3)
    print(len(train_loader))
    for epoch in range(100):
        epoch_loss = 0.
        for x,target in train_loader:
            #print(x.size())

            y = model(x)
            
            loss = loss_fn(y,target)
            epoch_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        print("epoch : {} and loss is : {}".format(epoch +1,epoch_loss))
    torch.save(model.state_dict(),"rnn_cnn.pth")

I trained a little, the latter than the former convergence faster.

Guess you like

Origin www.cnblogs.com/aoru45/p/11576023.html