Datawhale-AI夏令营:脑PET图像分析和疾病预测挑战赛baseline解读

这段代码是一个完整的深度学习模型训练和预测的流程。下面我会逐步解释每个步骤的作用。

首先,这段代码导入了必要的库,包括PyTorch、numpy、pandas等。接着,打印出CUDA版本和是否可用GPU,并将模型部署到GPU上(如果可用)。

接下来是数据预处理的部分。通过glob.glob函数获取训练和测试图像的路径,并对其进行随机化。然后定义了一个自定义的Dataset类XunFeiDataset,用于读取和处理图像数据。在__getitem__方法中,首先检查数据是否已经加载过,如果已经加载过则直接使用,否则通过nibabel库读取图像数据,并进行一些预处理操作(例如随机选择通道、图像增强等)。最后返回处理后的图像数据及其对应的标签。__len__方法返回数据集的大小。

接下来是数据集的处理,将数据集分为训练集、验证集和测试集。分别创建了train_loader、val_loader和test_loader三个DataLoader对象,用于加载训练、验证和测试数据。其中,在train_loader和val_loader中使用了不同的数据增强操作。

然后定义了一个自定义的CNN网络XunFeiNet,它基于预训练的ResNet34模型,并修改了输入和输出层的形状以适应特定的任务。在forward方法中,将输入数据传入ResNet模型并输出结果。

接下来是模型训练与验证的部分。train函数定义了模型的训练过程,包括前向传播、计算损失、反向传播和参数更新,并返回训练数据集上的平均损失。validate函数定义了模型的验证过程,包括前向传播和计算准确率,并返回验证数据集上的平均准确率。然后使用这两个函数分别在训练集和验证集上进行模型的训练和验证,并打印出每次迭代的损失和准确率。

最后是模型预测与提交的部分。定义了predict函数用于在测试集上进行模型预测,并返回预测结果。通过循环调用predict函数多次,然后对预测结果进行求和,得到最终的预测结果。最后将预测结果保存为CSV文件。

整个代码流程包括数据预处理、模型定义、训练与验证、模型预测与提交,实现了一个完整的深度学习模型训练和预测的流程。
baseline解读

step0:引入必要库

import torch
import os, sys, glob, argparse
import pandas as pd
import numpy as np
import albumentations as A
from tqdm import tqdm
import cv2 as cv
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
import nibabel as nib
from nibabel.viewers import OrthoSlicer3D

step1:检验cuda是否安装成功,如果成功则部署到GPU上

print(‘CUDA版本:’, torch.cuda_version)
print(‘torch能否使用GPU:’, torch.cuda.is_available())
device = torch.device(‘cuda:0’ if torch.cuda.is_available() else ‘cpu’)

step2:数据预处理

train_path = glob.glob(‘./脑PET图像分析和疾病预测挑战赛公开数据/Train//’)
test_path = glob.glob(‘./脑PET图像分析和疾病预测挑战赛公开数据/Test/*’)

np.random.shuffle(train_path)
np.random.shuffle(test_path)

DATA_LOADER = {}

class XunFeiDataset(Dataset):
def init(self, img_path, transform=None):
self.img_path = img_path
if transform is not None:
self.transform = transform
else:
self.transform = None

def __getitem__(self, index):
    if self.img_path[index] in DATA_LOADER:
        img = DATA_LOADER[self.img_path[index]]
    else:
        img = nib.load(self.img_path[index])
        img = img.dataobj[:, :, :, 0]
        DATA_LOADER[self.img_path[index]] = img

    # 随机选择一些通道
    idx = np.random.choice(range(img.shape[-1]), 50)
    img = img[:, :, idx]
    img = img.astype(np.float32)

    if self.transform is not None:
        img = self.transform(image=img)['image']

    img = img.transpose([2, 0, 1])
    return img, torch.from_numpy(np.array(int('NC' in self.img_path[index])))

def __len__(self):
    return len(self.img_path)

step3:数据集处理(train、test、val)

train_loader = torch.utils.data.DataLoader(
XunFeiDataset(train_path[:-10],
A.Compose([
A.RandomRotate90(),
A.RandomCrop(120, 120),
A.HorizontalFlip(p=0.5),
A.RandomContrast(p=0.5),
A.RandomBrightnessContrast(p=0.5),
])
), batch_size=2, shuffle=True, num_workers=0, pin_memory=False
)

val_loader = torch.utils.data.DataLoader(
XunFeiDataset(train_path[-10:],
A.Compose([
A.RandomCrop(120, 120),
])
), batch_size=2, shuffle=False, num_workers=0, pin_memory=False
)

test_loader = torch.utils.data.DataLoader(
XunFeiDataset(test_path,
A.Compose([
A.RandomCrop(128, 128),
A.HorizontalFlip(p=0.5),
A.RandomContrast(p=0.5),
])
), batch_size=2, shuffle=False, num_workers=0, pin_memory=False
)

step4:自定义CNN网络

class XunFeiNet(nn.Module):
def init(self):
super(XunFeiNet, self).init()
model = models.resnet34(True)
model.conv1 = torch.nn.Conv2d(50, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.avgpool = nn.AdaptiveAvgPool2d(1)
model.fc = nn.Linear(512, 2)
self.resnet = model

def forward(self, img):
    out = self.resnet(img)
    return out

部署到GPU上

model = XunFeiNet().to(device)
Loss_Function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

step5:模型训练与验证

def train(train_loader, model, Loss_Function, optimizer):
“”"

:param train_loader: 脑PET图像分析和疾病预测中Train部分数据
:param model: Resnet34
:param Loss_Function:交叉熵损失函数
:param optimizer: SGD优化算法

:return:train_loss
"""
model.train()
train_loss = 0.0
for i, (input, target) in enumerate(train_loader):
    input = input.to(device)
    target = target.to(device)

    output = model(input)
    loss = Loss_Function(output, target.long())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # if i % 20 == 0:
    #     print(loss.item())

    train_loss += loss.item()

return train_loss / len(train_loader)

def validate(val_loader, model, Loss_Function):
“”"

:param val_loader: 脑PET图像分析和疾病预测中Train部分数据
:param model: Resnet34
:param Loss_Function:交叉熵损失函数

:return: val_acc
"""
model.eval()
val_acc = 0.0

with torch.no_grad():
    for i, (input, target) in enumerate(val_loader):
        input = input.to(device)
        target = target.to(device)

        # compute output
        output = model(input)
        loss = Loss_Function(output, target.long())

        val_acc += (output.argmax(1) == target).sum().item()

return val_acc / len(val_loader.dataset)

迭代次数

num = 0
for _ in range(30):
num += 1
train_loss = train(train_loader, model, Loss_Function, optimizer)
val_acc = validate(val_loader, model, Loss_Function)
train_acc = validate(train_loader, model, Loss_Function)
print(f’第{num}次\n训练模型的损失:{train_loss}\t训练正确率:{train_acc}\t验证正确率{val_acc}\n’)

step6:模型预测与提交

def predict(test_loader, model, Loss_Function):
model.eval()
val_acc = 0.0

test_pred = []
with torch.no_grad():
    for i, (input, target) in enumerate(test_loader):
        input = input.to(device)
        target = target.to(device)
        output = model(input)
        test_pred.append(output.data.cpu().numpy())

return np.vstack(test_pred)

pred = None
for i in range(50):
if pred is None:
pred = predict(test_loader, model, Loss_Function)
else:
pred += predict(test_loader, model, Loss_Function)

print(‘预测成功,正在生成.csv文件’)
submit = pd.DataFrame(
{
‘uuid’: [int(x.split(‘\’)[-1][:-4]) for x in test_path],
‘label’: pred.argmax(1)
})
submit[‘label’] = submit[‘label’].map({1: ‘NC’, 0: ‘MCI’})
submit = submit.sort_values(by=‘uuid’)
submit.to_csv(‘submit.csv’, index=None)

猜你喜欢

转载自blog.csdn.net/m0_68165821/article/details/131879243