sofasofa-shape recognition: is it square or round

Subject address: sofasofa-6

Convolutional Neural Network (Pytorch, Python)

The idea is the same as the official answer keras, simplified a bit, after all, I'm too good at it. In the answer, 30% of the training set was used as the test set, and I also drew a picture, which I omitted.

important point

The format of torch and numpy is different, numpy to torch needs torch array = torch.from_numpy (numpy array), after the conversion, add .float(), because it is found that the parameter type of the neural network is the same as the training data, which is originally long, weight It will also become Long, and then an error will be reported that the data type expected by weight is long, and the result will be float. Correspondence in the text

#将numpy格式转换为torch格式,并且要变成float,不然说weight期望是long,却是float的,报错
   train_x,train_y=torch.from_numpy(train_x).float(),torch.from_numpy(train_y).float()

The main idea

Build a network

def build_model():#输入数据 图片(1,40,40)
    net=nn.Sequential(#除起来截断#数据形状(n,高,长,宽)
        nn.Conv2d(in_channels=1,out_channels=8,kernel_size=5),#40-4=36
        nn.ReLU(),
        nn.Conv2d(in_channels=8,out_channels=16,kernel_size=3),#36-2=34
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=4),#34/4=8
        nn.Conv2d(16,16,3),#8-2=6
        nn.ReLU(),
        nn.MaxPool2d(4),#6/4=1
        nn.Flatten(),#16
        nn.Linear(16,128),
        nn.Dropout(0.5),
        nn.Linear(128,1),
        nn.Sigmoid()
    )
    return net

Dataset and dataloader processing

Train_x and train_y have been processed before, and they are omitted here, and they become numpy-type matrices.

#将numpy格式转换为torch格式,并且要变成float,不然说weight期望是long,却是float的,报错
    train_x,train_y=torch.from_numpy(train_x).float(),torch.from_numpy(train_y).float()
    #简易的创建一个torch格式的dataset,网上的都是写类创建的,当然这个函数的实现也是写类
    torch_dataset=Data.TensorDataset(train_x,train_y)
    data_loader=Data.DataLoader(
        dataset=torch_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,
    )

Training and prediction

optimizer=torch.optim.Adam(model.parameters(),lr=0.001)
    # training and testing
    for epoch in range(epochs):
        for step,(b_x,b_y) in enumerate(data_loader):   # 分配 batch data, normalize x when iterate train_loader
            output = model(b_x)               # cnn output
            loss=F.binary_cross_entropy(output,b_y)  # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients
    
    test=torch.from_numpy(test).float()
    pred=model(test).detach().numpy()#报错后就这么提醒我的,加.detach().numpy()

Complete code

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data

def load_train_test_data(train,test):
    np.random.shuffle(train)
    labels=train[:,-1]
    data_test=np.array(test)

    data, data_test = data_modify_suitable_train(train, True), data_modify_suitable_train(test, False)
    return data, labels,data_test

def data_modify_suitable_train(data_set=None, type=True):
    if data_set is not None:
        data = []
        if type is True:
            np.random.shuffle(data_set)#总喜欢shuffle一下
            data = data_set[:, 0: data_set.shape[1] - 1]
        else:
            data = data_set
    data = np.array([np.reshape(i, (40, 40)) for i in data])#一维转化成二维
    data = np.array([np.reshape(i, (1,i.shape[0], i.shape[1])) for i in data])#(高,长,宽)一般卷积都有个长宽高,加个高
    return data

def f1(y_true, y_pred):
    def recall(y_true,y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true,y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall))

def build_model():#输入数据 图片(1,40,40)
    net=nn.Sequential(#除起来截断#数据形状(n,高,长,宽)
        nn.Conv2d(in_channels=1,out_channels=8,kernel_size=5),#40-4=36
        nn.ReLU(),
        nn.Conv2d(in_channels=8,out_channels=16,kernel_size=3),#36-2=34
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=4),#34/4=8
        nn.Conv2d(16,16,3),#8-2=6
        nn.ReLU(),
        nn.MaxPool2d(4),#6/4=1
        nn.Flatten(),#16
        nn.Linear(16,128),
        nn.Dropout(0.5),
        nn.Linear(128,1),
        nn.Sigmoid()
    )
    return net

def train_model(train,test,batch_size=64,epochs=10,model=None):
    train_x, train_y, test = load_train_test_data(train, test)
    if model is None:
        model=build_model()

    #将numpy格式转换为torch格式,并且要变成float,不然说weight期望是long,却是float的,报错
    train_x,train_y=torch.from_numpy(train_x).float(),torch.from_numpy(train_y).float()
    #简易的创建一个torch格式的dataset,网上的都是写类创建的,当然这个函数的实现也是写类
    torch_dataset=Data.TensorDataset(train_x,train_y)
    data_loader=Data.DataLoader(
        dataset=torch_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,
    )

    optimizer=torch.optim.Adam(model.parameters(),lr=0.001)
    # training and testing
    for epoch in range(epochs):
        for step,(b_x,b_y) in enumerate(data_loader):   # 分配 batch data, normalize x when iterate train_loader
            output = model(b_x)               # cnn output
            loss=F.binary_cross_entropy(output,b_y)  # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients
    
    test=torch.from_numpy(test).float()
    pred=model(test).detach().numpy()#报错后就这么提醒我的,加.detach().numpy()
    return pred

if __name__ == '__main__':
    train, test = pd.read_csv('train.csv'), pd.read_csv('test.csv') 
    train = np.array(train.drop('id', axis=1))
    test = np.array(test.drop('id', axis=1))

    pred = train_model(train, test)
    pred=(pred>0.5).astype(int)#转换格式
    submit = pd.read_csv('sample_submit.csv')
    submit['y'] = pred
    submit.to_csv('Pytorch_my_CNN_prediction.csv', index=False)

Guess you like

Origin blog.csdn.net/nanfeizhenkuangou/article/details/103890679