トレーニングプロセスと一般的なエラー - ベースのcrnn画像シーケンスは-pytorchコードを予測しました

内容
1、-pytorchの予測画像シーケンスcrnnコードに基づいて、 -独自のデータセットロード
、2 予測-pytorch crnn画像シーケンスコードをベース-で説明したモデル
3、-pytorchの予測画像シーケンスcrnnコードに基づい-訓練プロセスそして、一般的なエラー

次のようにVGG_LSTM、アダム最適化アルゴリズムが選択されている例では、損失関数CrossEntropyLoss()、詳細なトレーニング・コードは次のとおり

if __name__ == "__main__":
    model = VGG_LSTM()
    print(model)
    if torch.cuda.is_available():
        model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_func = nn.CrossEntropyLoss()
    for epoch in range(100):
        print('epoch {}'.format(epoch + 1))
        train_loss = 0.
        train_acc = 0.
        for batch_x, batch_y in train_loader:
            # print(batch_x.size())
            batch_x, batch_y = Variable(batch_x).cuda(), Variable(batch_y).cuda()
            out = model(batch_x)
            # print(batch_x.size())
            loss = loss_func(out, batch_y)
            train_loss += loss.data[0]
            pred = torch.max(out, 1)[1]
            train_correct = (pred == batch_y).sum()
            train_acc += train_correct.data[0]
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len(
            train_data)), train_acc / (len(train_data))))



        # -----------------------evaluation--------------------------------
        model.eval()
        eval_loss = 0.
        eval_acc = 0.
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = Variable(batch_x, volatile=True).cuda(), Variable(batch_y, volatile=True).cuda()
            out = model(batch_x)
            loss = loss_func(out, batch_y)
            eval_loss += loss.data[0]
            pred = torch.max(out, 1)[1]
            num_correct = (pred == batch_y).sum()
            eval_acc += num_correct.data[0]
        print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
            test_data)), eval_acc / (len(test_data))))

次のようにデータをロードする、詳細なモデルのトレーニングコードを含む全体のプロセスは、次のとおりです。

import torch.nn.functional as F
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import torch.optim as optim

BATCH_SIZE = 4
learning_rate = 0.0001

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    # transforms.Normalize((.5, .5, .5), (.5, .5, .5))
])
val_transforms = transforms.Compose([
    # transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    # transforms.Normalize((.5, .5, .5), (.5, .5, .5))
])

def default_loader(path):
    return Image.open(path).convert('RGB')


class MyDataset(Dataset):
    def __init__(self, txt, transform=None, target_transform=None, loader=default_loader):
        fh = open(txt, 'r')
        imgs = []
        for line in fh:
            line = line.strip('\n')
            line = line.rstrip()
            words = line.split()
            imgs.append((words[0], int(words[1])))
        imgs.sort(key=lambda x: x[0], reverse=False)
        self.num_samples = len(imgs)
        self.num_samples_per_iteration = 9
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        current_index = np.random.choice(range(self.num_samples_per_iteration, self.num_samples))
        current_imgs = []
        current_label = self.imgs[current_index][1]
        for i in range(current_index - self.num_samples_per_iteration, current_index):
            fn, label = self.imgs[i]
            img = self.loader(fn)
            if self.transform is not None:
                img = self.transform(img)
            current_imgs.append(img)
        batch_cur_imgs = np.stack(current_imgs, axis=0)  # [9, 3, 256, 256]
        return batch_cur_imgs, current_label

    def __len__(self):
        return len(self.imgs)


train_data = MyDataset(txt='trainset256.txt', transform=train_transforms)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=False)

test_data = MyDataset(txt='testset256.txt', transform=val_transforms)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)
print('num_of_trainData:', len(train_data))
print('num_of_testData:', len(test_data))


class VGG_LSTM(nn.Module):
    def __init__(self, lstm_hidden_size=256, num_lstm_layers=1, bidirectional=True):
        super(VGG_LSTM, self).__init__()
        net = models.vgg16(pretrained=True)
        net.classifier = nn.Sequential()
        self.num_directions = 2 if bidirectional else 1
        self.num_lstm_layers = num_lstm_layers
        self.lstm_hidden_size = lstm_hidden_size
        # [B, 3, 224, 224] -> [B, 512, 7, 7]
        self.features = net
        self.lstm1 = nn.LSTM(input_size=512 * 7 * 7,
                             hidden_size=lstm_hidden_size,
                             num_layers=num_lstm_layers,
                             batch_first=True,
                             dropout=0.5,
                             bidirectional=bidirectional)  # [B, 7, lstm_hidden_size]
        self.linear1 = nn.Sequential(nn.Linear(lstm_hidden_size * self.num_directions * num_lstm_layers, 64),
                                     nn.ReLU(inplace=True))
        self.output_layer = nn.Linear(64, 3)

    def init_hidden(self, x):
        batch_size = x.size(0)
        h = x.data.new(
                self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
        c = x.data.new(
                self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
        return Variable(h).cuda(), Variable(c).cuda()

    def forward(self, x):
        # x shape: [B, 9, 3, 224, 224]
        B = x.size(0)
        x = x.view(B * 9, 3, 224, 224)
        output = self.features(x)  # [B*9, 512, 7, 7]
        output = output.view(B * 9, -1).transpose(0, 1).contiguous().view(512 * 7 * 7, B, 9)
        output = output.permute(1, 2, 0)  # -> [B, 9, 512*7*7]
        h, c = self.init_hidden(output)
        output, (h, c) = self.lstm1(output, (h, c))  # h: (num_layers * num_directions, batch, lstm_hidden_size)
        h = h.transpose_(0, 1).contiguous().view(B, -1)  # -> [B, num_layers * num_directions*lstm_hidden_size]
        output = self.linear1(h)  # [B, 64]
        output = self.output_layer(output)  # [B, 3]
        return output


if __name__ == "__main__":
    model = VGG_LSTM()
    print(model)
    if torch.cuda.is_available():
        model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_func = nn.CrossEntropyLoss()
    for epoch in range(100):
        print('epoch {}'.format(epoch + 1))
        train_loss = 0.
        train_acc = 0.
        for batch_x, batch_y in train_loader:
            # print(batch_x.size())
            batch_x, batch_y = Variable(batch_x).cuda(), Variable(batch_y).cuda()
            out = model(batch_x)
            # print(batch_x.size())
            loss = loss_func(out, batch_y)
            train_loss += loss.data[0]
            pred = torch.max(out, 1)[1]
            train_correct = (pred == batch_y).sum()
            train_acc += train_correct.data[0]
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len(
            train_data)), train_acc / (len(train_data))))



        # -----------------------evaluation--------------------------------
        model.eval()
        eval_loss = 0.
        eval_acc = 0.
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = Variable(batch_x, volatile=True).cuda(), Variable(batch_y, volatile=True).cuda()
            out = model(batch_x)
            loss = loss_func(out, batch_y)
            eval_loss += loss.data[0]
            pred = torch.max(out, 1)[1]
            num_correct = (pred == batch_y).sum()
            eval_acc += num_correct.data[0]
        print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
            test_data)), eval_acc / (len(test_data))))

一般的なミス
1、エラー:サイズの不一致、以下のように:
ここに画像を挿入説明溶液は:このエラーは、層の畳み込みとの間の寸法のためにミスマッチであると報告されています。入力と出力の大きさのすべてのステップをクリアするには、各ステップサイズを表示デバッグすることができます。LSTM入力と出力フォーマットがあり、参照はpytorch公式サイトかもしれません。
ここに画像を挿入説明ここに画像を挿入説明
2、エラー:MEMORのうち、以下に示すように:
ここに画像を挿入説明
ソリューション
小BATCH_SIZEまず、階調値。

画像が小さくなって第二に、入力し、あなたはリサイズを使用することができます。

第三に、ビデオカードを変更します。

また、ネットワークのない畳み込み部分は、より良く、私の仕事のように、畳み込み部分移行VGG、RESNET精度が低下することに注意してください。だから我々は、特定のタスクに応じてネットワークの構造を調整する必要があります。

おすすめ

転載: blog.csdn.net/hnu_zzt/article/details/86519448