- 网络详解:
- 封装训练代码:
https://blog.csdn.net/qq_42363032/article/details/126758368
- 网络结构:
from torch import nn
import torch
import numpy as np
import torch.nn.functional as F
class Config(object):
'''
:param embedding_pretrained: 预训练词向量
:param emb_dim: 字向量维度, 若使用了预训练词向量,则维度统一
:param dropout:
:param num_classes:
:param n_vocab: 词表大小
:param channel_size: 卷积核数量、输出通道数
'''
def __init__(self, emb_way, emb_dim, pad_size, num_classes, n_vocab):
'''
:param emb_way: emb初始化方式,搜狗新闻:embedding_SougouNews.npz, 腾讯:embedding_Tencent.npz, 随机初始化:random
:param emb_dim:
:param num_classes:
:param n_vocab: 词表大小
'''
self.embedding_pretrained = torch.tensor(np.load('./data/' + emb_way)["embeddings"].astype('float32'))\
if emb_way != 'random' else None
self.emb_dim = self.embedding_pretrained.size(1) \
if self.embedding_pretrained is not None else emb_dim
self.dropout = 0.5
self.num_classes = num_classes
self.n_vocab = n_vocab
self.channel_size = 250
class DPCNN(nn.Module):
def __init__(self, config):
super(DPCNN, self).__init__()
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
# 原文中称为region emb
self.conv_region = nn.Conv2d(1, config.channel_size, (3, config.emb_dim), stride=1)
self.conv = nn.Conv2d(config.channel_size, config.channel_size, (3, 1), stride=1)
self.pooling = nn.MaxPool2d((3, 1), stride=2)
self.padding_conv = nn.ZeroPad2d((0, 0, 1, 1))
self.padding_pool = nn.ZeroPad2d((0, 0, 0, 1))
self.relu = nn.ReLU()
self.fc = nn.Linear(config.channel_size, config.num_classes)
def forward(self, x):
x, x_len = x
bc = x.size()[0]
x = self.embedding(x)
x = x.unsqueeze(1) # [batch_size, 1, seq_len, emb_dim]
x = self.conv_region(x) # [batch_size, channel_size, seq_len-2, 1]
x = self._padd_and_conv(self.padding_conv, self.relu, self.conv, x) # [batch_size, channel_size, seq_len-2, 1]
x = self._padd_and_conv(self.padding_conv, self.relu, self.conv, x) # [batch_size, channel_size, seq_len-2, 1]
while x.size()[2] > 2: # [batch_size, channel_size, 1, 1]
x = self._block(x)
x = x.view(bc, -1) # [batch_size, channel_size]
x = self.fc(x) # [batch_size, num_classes]
return x
def _block(self, x):
x = self.padding_pool(x)
px = self.pooling(x)
x = self._padd_and_conv( self.padding_conv, F.relu, self.conv, px)
x = self._padd_and_conv(self.padding_conv, F.relu, self.conv, x)
# shortcut
x = x + px
return x
def _padd_and_conv(self, padd, relu, conv, x):
# pad保证等长卷积,注意原文是先通过激活函数再卷积
x = padd(x)
x = relu(x)
x = conv(x)
return x