使用pytorch最关键的几个点

第一个就是pytorch的可视化模块 visdom
使用之前需要安装
使用pip3 install visdom

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from visdom import Visdom

batch_size=200
learning_rate=0.01
epochs=10

数据加载

train_loader = torch.utils.data.DataLoader(
datasets.MNIST(’…/data’, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST(’…/data’, train=False, transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)

建立多层感知神经网络

class MLP(nn.Module):

def __init__(self):
    super(MLP, self).__init__()

    self.model = nn.Sequential(
        nn.Linear(784, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 10),
        nn.LeakyReLU(inplace=True),
    )
    # 前向传播过程
def forward(self, x):
    x = self.model(x)

    return x

net = MLP()
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss()

初始化可视化类

viz = Visdom()

viz.line([0.], [0.], win=‘train_loss’, opts=dict(title=‘train loss’))
viz.line([[0.0, 0.0]], [0.], win=‘test’, opts=dict(title=‘test loss&acc.’,
legend=[‘loss’, ‘acc.’]))
global_step = 0

for epoch in range(epochs):
# 训练数据

for batch_idx, (data, target) in enumerate(train_loader):
    data = data.view(-1, 28*28)
    data, target = data, target

    logits = net(data)
    loss = criteon(logits, target)

    optimizer.zero_grad()
    loss.backward()
    # print(w1.grad.norm(), w2.grad.norm())
    optimizer.step()

    global_step += 1
    # 添加数据并渲染
    viz.line([loss.item()], [global_step], win='train_loss', update='append')

    if batch_idx % 100 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
                   100. * batch_idx / len(train_loader), loss.item()))


test_loss = 0
correct = 0
# 测试数据
for data, target in test_loader:
    data = data.view(-1, 28 * 28)
    data, target = data, target
    logits = net(data)
    test_loss += criteon(logits, target).item()

    pred = logits.argmax(dim=1)
    correct += pred.eq(target).float().sum().item()
# 显示测试数据
viz.line([[test_loss, correct / len(test_loader.dataset)]],
         [global_step], win='test', update='append')
viz.images(data.view(-1, 1, 28, 28), win='x')
viz.text(str(pred.numpy()), win='pred',
         opts=dict(title='pred'))

test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

if name == ‘main’:
pass

第二个就是过拟合问题的解决 首先要测试是否过拟合,那么采取交叉验证的方式来测是数据,这样就能很好的检测出 是否过拟合
方法意思是说 将数据分为三个部分 训练 测试 验证

交叉的意思是说选中间任意某个部分作为 验证数据 而后在使用 测试数据测试 这样的结果基本一致 说明这个模型没有过拟合

使用cpu的源码

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

batch_size=200
learning_rate=0.01
epochs=10

train_db = datasets.MNIST(’…/data’, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
train_loader = torch.utils.data.DataLoader(
train_db,
batch_size=batch_size, shuffle=True)

test_db = datasets.MNIST(’…/data’, train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
test_loader = torch.utils.data.DataLoader(test_db,
batch_size=batch_size, shuffle=True)

print(‘train:’, len(train_db), ‘test:’, len(test_db))
train_db, val_db = torch.utils.data.random_split(train_db, [50000, 10000])
print(‘db1:’, len(train_db), ‘db2:’, len(val_db))
train_loader = torch.utils.data.DataLoader(
train_db,
batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(
val_db,
batch_size=batch_size, shuffle=True)

class MLP(nn.Module):

def __init__(self):
    super(MLP, self).__init__()

    self.model = nn.Sequential(
        nn.Linear(784, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 10),
        nn.LeakyReLU(inplace=True),
    )

def forward(self, x):
    x = self.model(x)

    return x

net = MLP()
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss()

for epoch in range(epochs):

for batch_idx, (data, target) in enumerate(train_loader):
    data = data.view(-1, 28*28)
    data, target = data, target

    logits = net(data)
    loss = criteon(logits, target)

    optimizer.zero_grad()
    loss.backward()
    # print(w1.grad.norm(), w2.grad.norm())
    optimizer.step()

    if batch_idx % 100 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
                   100. * batch_idx / len(train_loader), loss.item()))


test_loss = 0
correct = 0
for data, target in val_loader:
    data = data.view(-1, 28 * 28)
    data, target = data, target
    logits = net(data)
    test_loss += criteon(logits, target).item()

    pred = logits.data.max(1)[1]
    correct += pred.eq(target.data).sum()

test_loss /= len(val_loader.dataset)
print('\nVAL set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(val_loader.dataset),
    100. * correct / len(val_loader.dataset)))

test_loss = 0
correct = 0
for data, target in test_loader:
data = data.view(-1, 28 * 28)
data, target = data, target
logits = net(data)
test_loss += criteon(logits, target).item()

pred = logits.data.max(1)[1]
correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)
print(’\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n’.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))

if name == ‘main’:
pass

gpu的源码

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

batch_size=200
learning_rate=0.01
epochs=10

train_db = datasets.MNIST(’…/data’, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
train_loader = torch.utils.data.DataLoader(
train_db,
batch_size=batch_size, shuffle=True)

test_db = datasets.MNIST(’…/data’, train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
test_loader = torch.utils.data.DataLoader(test_db,
batch_size=batch_size, shuffle=True)

print(‘train:’, len(train_db), ‘test:’, len(test_db))
train_db, val_db = torch.utils.data.random_split(train_db, [50000, 10000])
print(‘db1:’, len(train_db), ‘db2:’, len(val_db))
train_loader = torch.utils.data.DataLoader(
train_db,
batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(
val_db,
batch_size=batch_size, shuffle=True)

class MLP(nn.Module):

def __init__(self):
    super(MLP, self).__init__()

    self.model = nn.Sequential(
        nn.Linear(784, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 10),
        nn.LeakyReLU(inplace=True),
    )

def forward(self, x):
    x = self.model(x)

    return x

device = torch.device(‘cuda:0’)
net = MLP().to(device)
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss().to(device)

for epoch in range(epochs):

for batch_idx, (data, target) in enumerate(train_loader):
    data = data.view(-1, 28*28)
    data, target = data.to(device), target.cuda()

    logits = net(data)
    loss = criteon(logits, target)

    optimizer.zero_grad()
    loss.backward()
    # print(w1.grad.norm(), w2.grad.norm())
    optimizer.step()

    if batch_idx % 100 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
                   100. * batch_idx / len(train_loader), loss.item()))


test_loss = 0
correct = 0
for data, target in val_loader:
    data = data.view(-1, 28 * 28)
    data, target = data.to(device), target.cuda()
    logits = net(data)
    test_loss += criteon(logits, target).item()

    pred = logits.data.max(1)[1]
    correct += pred.eq(target.data).sum()

test_loss /= len(val_loader.dataset)
print('\nVAL set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(val_loader.dataset),
    100. * correct / len(val_loader.dataset)))

test_loss = 0
correct = 0
for data, target in test_loader:
data = data.view(-1, 28 * 28)
data, target = data.to(device), target.cuda()
logits = net(data)
test_loss += criteon(logits, target).item()

pred = logits.data.max(1)[1]
correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)
print(’\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n’.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))

那么如何处理过拟合 几种简单的方法 就是提前停止 ,当数据的loss变化不明显的时候及时停止
使用 dropout 也可以避免过拟合 而且可以使得 反向传播函数更好的找到梯度最小值 他的基本原理就是 减少某 些节点
dropout的例子

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from visdom import Visdom

batch_size=200
learning_rate=0.01
epochs=10

train_loader = torch.utils.data.DataLoader(
datasets.MNIST(’…/data’, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST(’…/data’, train=False, transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)

class MLP(nn.Module):

def __init__(self):
    super(MLP, self).__init__()

    self.model = nn.Sequential(
        nn.Linear(784, 200),
        # 表示切断百分之50的节点使得能找到更小梯度也可以适当的解决过拟合问题
        nn.Dropout(0.5),
        nn.LeakyReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(200, 200),
        nn.LeakyReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(200, 10),
        nn.LeakyReLU(inplace=True),
    )

def forward(self, x):
    x = self.model(x)

    return x

device = torch.device(‘cuda:0’)
net = MLP().to(device)
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss().to(device)

viz = Visdom()

viz.line([0.], [0.], win=‘train_loss’, opts=dict(title=‘train loss’))
viz.line([[0.0, 0.0]], [0.], win=‘test’, opts=dict(title=‘test loss&acc.’,
legend=[‘loss’, ‘acc.’]))
global_step = 0

for epoch in range(epochs):

for batch_idx, (data, target) in enumerate(train_loader):
    data = data.view(-1, 28*28)
    data, target = data.to(device), target.cuda()

    logits = net(data)
    loss = criteon(logits, target)

    optimizer.zero_grad()
    loss.backward()
    # print(w1.grad.norm(), w2.grad.norm())
    optimizer.step()

    global_step += 1
    viz.line([loss.item()], [global_step], win='train_loss', update='append')

    if batch_idx % 100 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
                   100. * batch_idx / len(train_loader), loss.item()))


test_loss = 0
correct = 0
for data, target in test_loader:
    data = data.view(-1, 28 * 28)
    data, target = data.to(device), target.cuda()
    logits = net(data)
    test_loss += criteon(logits, target).item()

    pred = logits.argmax(dim=1)
    correct += pred.eq(target).float().sum().item()

viz.line([[test_loss, correct / len(test_loader.dataset)]],
         [global_step], win='test', update='append')
viz.images(data.view(-1, 1, 28, 28), win='x')
viz.text(str(pred.detach().cpu().numpy()), win='pred',
         opts=dict(title='pred'))

test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

还有一个方法就是正则化
例子如下
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from visdom import Visdom

batch_size=200
learning_rate=0.01
epochs=10

train_loader = torch.utils.data.DataLoader(
datasets.MNIST(’…/data’, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST(’…/data’, train=False, transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)

class MLP(nn.Module):

def __init__(self):
    super(MLP, self).__init__()

    self.model = nn.Sequential(
        nn.Linear(784, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 200),
        nn.LeakyReLU(inplace=True),
        nn.Linear(200, 10),
        nn.LeakyReLU(inplace=True),
    )

def forward(self, x):
    x = self.model(x)

    return x

device = torch.device(‘cuda:0’)
net = MLP().to(device)

正则化要设置weight_decay的值

optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.01)
criteon = nn.CrossEntropyLoss().to(device)

for epoch in range(epochs):

for batch_idx, (data, target) in enumerate(train_loader):
    data = data.view(-1, 28*28)
    data, target = data.to(device), target.cuda()
    logits = net(data)
    # 计算正则化参数
    for param in model.parameters():
        regularization_loss+=torch.sum(torch.abs(param))

    classify_loss=criteon(logits, target)

    
    loss = classify_loss+0.01*regularization_loss

    optimizer.zero_grad()
    loss.backward()
    # print(w1.grad.norm(), w2.grad.norm())
    optimizer.step()

    global_step += 1
   

    if batch_idx % 100 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
                   100. * batch_idx / len(train_loader), loss.item()))


test_loss = 0
correct = 0
for data, target in test_loader:
    data = data.view(-1, 28 * 28)
    data, target = data.to(device), target.cuda()
    logits = net(data)
    test_loss += criteon(logits, target).item()

    pred = logits.argmax(dim=1)
    correct += pred.eq(target).float().sum().item()


test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

下面是优化学习效率设置的代码 主要就是为了能够更好的找到梯度的最小值

计算每次的学习效率

发布了172 篇原创文章 · 获赞 52 · 访问量 4万+

猜你喜欢

转载自blog.csdn.net/weixin_32759777/article/details/92385349
今日推荐