CBOW模型实战——pytorch

CONTEXT_SIZE = 2      # 2 words to the left, 2 to the right
EMBEDDING_DIM = 100
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

word_to_ix = {word:i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text)-2):
    context = [raw_text[i-2], raw_text[i-1],
               raw_text[i+1], raw_text[i+2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:5])

测试打印 data[:5] 为:

[(['We', 'are', 'to', 'study'], 'about'), 
(['are', 'about', 'study', 'the'], 'to'), 
(['about', 'to', 'the', 'idea'], 'study'), 
(['to', 'study', 'idea', 'of'], 'the'), 
(['study', 'the', 'of', 'a'], 'idea')]

构建模型:

class CBOW(nn.Module):

    def __init__(self, vocab_size, n_dim, context_size):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, n_dim)
        self.linear1 = nn.Linear(2*context_size * n_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, inputs):
        embeds = self.embeddings(inputs).view(1, -1)
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        return log_probs
# create your model and train.  here are some functions to help you make
# the data ready for use by your module

def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

print(make_context_vector(data[0][0], word_to_ix))  # example

model = CBOW(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
if torch.cuda.is_available():
    model = model.cuda()
losses = []
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

for epoch in range(200):
    total_loss = 0
    context_one_hots = []
    for context, target in data:
        context_vector = make_context_vector(context, word_to_ix)
        target = torch.tensor([word_to_ix[target]], dtype=torch.long)
        if torch.cuda.is_available():
            context_vector = context_vector.cuda()
            target = target.cuda()
        
        optimizer.zero_grad()
        
        log_probs = model(context_vector)
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    print("epoch", epoch, " -->", total_loss)
    losses.append(total_loss)
发布了46 篇原创文章 · 获赞 9 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/SMith7412/article/details/95627538
今日推荐