The gods are silent-personal CSDN blog post directory
This article introduces the use of PyTorch's Bi-GRU model to implement sequence classification (generally a text classification task in NLP), the GRU output is obtained by using different pooling methods to obtain sequence representations, and the differences in writing and effects of different pooling methods.
Note: It is recommended to use RNN with variable length, because this will speed up the operation of RNN (related principles will be supplemented later).
Article Directory
1. Output at the last valid time step
1.1 Fixed-length RNN version
Slightly, to be filled.
1.2 Variable length RNN version
class GRUEncoder(nn.Module):
def __init__(self,input_dim,hidden_dim,num_layers,dropout_rate,bias=True,bidirectional=True):
super(GRUEncoder,self).__init__()
self.embs=nn.Embedding(word_num,input_dim)
self.embs.weight.data.copy_(embedding)
self.embs.weight.requires_grad=False
self.rnns=nn.GRU(input_size=input_dim,hidden_size=hidden_dim,num_layers=num_layers,bias=bias,dropout=dropout_rate,bidirectional=bidirectional,
batch_first=True)
self.lin=nn.Linear(in_features=hidden_dim*2 if bidirectional else hidden_dim,out_features=label_num)
def forward(self,x,sent_len):
"""
x: pad后的输入张量,维度为[batch_size,max_sequence_length]
sent_len:列表,每一维是每个sequence的有效token数
"""
x=self.embs(x)
#[batch_size,max_sequence_length,input_dim]
packed_input=nn.utils.rnn.pack_padded_sequence(x,lengths=sent_len,batch_first=True,enforce_sorted=False)
op,hn=self.rnns(packed_input)
op,lens_unpacked=nn.utils.rnn.pad_packed_sequence(op,batch_first=True)
#[batch_size,max_sequence_length,hidden_dim*num_directions]
#取最后一个有效时间步上的表征,作为最终表征
outputs=op[torch.arange(0,op.size()[0]).to(gpu_device),lens_unpacked-1]
return self.lin(outputs)
2. Average pooling of outputs over all valid time steps
2.1 Fixed-length RNN version
Slightly, to be filled.
2.2 Variable length RNN version
class GRUEncoder(nn.Module):
def __init__(self,input_dim,hidden_dim,num_layers,dropout_rate,bias=True,bidirectional=True):
super(GRUEncoder,self).__init__()
self.embs=nn.Embedding(word_num,input_dim)
self.embs.weight.data.copy_(embedding)
self.embs.weight.requires_grad=False
self.rnns=nn.GRU(input_size=input_dim,hidden_size=hidden_dim,num_layers=num_layers,bias=bias,dropout=dropout_rate,bidirectional=bidirectional,
batch_first=True)
self.lin=nn.Linear(in_features=hidden_dim*2 if bidirectional else hidden_dim,out_features=label_num)
def forward(self,x,sent_len):
x=self.embs(x)
packed_input=nn.utils.rnn.pack_padded_sequence(x,lengths=sent_len,batch_first=True,enforce_sorted=False)
op,hn=self.rnns(packed_input)
op,lens_unpacked=nn.utils.rnn.pad_packed_sequence(op,batch_first=True)
#[batch_size,max_sequence_length,hidden_dim*num_directions]
#取所有有效时间步的输出的平均值池化,作为最终表征
outputs_sum=op.sum(axis=1)
outputs=outputs_sum/(lens_unpacked.to(gpu_device).unsqueeze(1)) #lens_unpacked在CPU上
return self.lin(outputs)
3. Weighted summation of outputs over all valid time steps (attention)
Slightly, to be filled.
4. The state of the last 2 hidden layers
Slightly, to be filled.
5. Experimental results
Slightly, to be filled.