Python project actual combat-CiFar10 data set Tensorflow2.0 implementation-CNN classification

%matplotlib inline
import matplotlib as mpl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sklearn
import sys
import tensorflow as tf
import time
from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in mpl,np,pd,sklearn,tf,keras:
    print(module.__name__,module.__version__)

class_names=['airplane','automobile','bird','cat',
             'deer','dog','frog','horse','ship','truck',] # 用于映射序号和类别,最后写进文件的是类别
train_lables_file='./cifar10/trainLabels.csv'
test_csv_file='./cifar10/sampleSubmission.csv'
train_folder='./cifar10/train'
test_folder='./cifar10/test'
def parse_csv_file(filepath,folder):
    '''parse csv into (path,label)'''
    results=[]
    with open(filepath,'r') as f:
        lines=f.readlines()[1:]
    for line in lines:
        image_id,label_str=line.strip('\n').split(',')
        image_full_path=os.path.join(folder,image_id + '.png')
        results.append((image_full_path,label_str))
    return results

train_labels_info=parse_csv_file(train_lables_file,train_folder)
test_csv_info=parse_csv_file(test_csv_file,test_folder)
import pprint
pprint.pprint(train_labels_info[0:5])
pprint.pprint(test_csv_info[0:5])
print(len(train_labels_info),len(test_csv_info))
train_df = pd.DataFrame(train_labels_info)
train_df = pd.DataFrame(train_labels_info[0:45000])
valid_df = pd.DataFrame(train_labels_info[45000:]) # 切分
test_df = pd.DataFrame(test_csv_info)
# 设置列名
train_df.columns = ['filepath','class']
valid_df.columns = ['filepath','class']
test_df.columns = ['filepath','class']

print(train_df.head())
print(valid_df.head())
print(test_df.head())
height = 32
width = 32 # 图片缩放大小
channels = 3
batch_size=32
num_classes = 10
# 缩放,旋转,左右位移,上下位移(随机选数),剪切强度,缩放度,随机水平翻转,填充
# 进行图片数据读取,比dataset方便
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    directory =r'./',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size =(height,width),
    batch_size = batch_size,
    seed =  7,
    shuffle = True,
    class_mode = 'sparse', #取出的是一个数字
)


valid_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
valid_generator = valid_datagen.flow_from_dataframe(
                                                    dataframe = valid_df,
                                                   directory=r"./",
                                                    x_col= 'filepath',
                                                    y_col = 'class',
                                                    classes =class_names,
                                                    target_size = (height,width),
                                                   batch_size=batch_size,
                                                   seed=7,
                                                   shuffle=False,
                                                   class_mode='sparse',)
train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num,valid_num)
# 检查读取的数据
for i in range(2):
    x,y = train_generator.next()
    print(x.shape,y.shape)
    print(y) # 32个样本
model = keras.models.Sequential([
    # 图片小可以多选一些卷积核
    keras.layers.Conv2D(filters=128,kernel_size=3,padding='same',activation='relu',input_shape=[width,height,channels]),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=128,kernel_size=3,padding='same',activation='relu'),
    keras.layers.BatchNormalization(), # 为了训练的更快
    keras.layers.MaxPool2D(pool_size=2),
    keras.layers.Conv2D(filters=256,kernel_size=3,padding='same',activation='relu'), # pooling过后要翻倍
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=256,kernel_size=3,padding='same',activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),
    keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(128,activation='relu'),
    keras.layers.Dense(num_classes,activation='softmax'),
])

model.compile(loss='sparse_categorical_crossentropy',
             optimizer = 'adam',
             metrics=['accuracy'])
model.summary()
# 显存爆炸了,笔记本无法跑出结果
epochs = 5
history = model.fit_generator(train_generator,
                             steps_per_epoch=train_num//batch_size,
                             epochs=epochs,
                             validation_data=valid_generator,
                             validation_steps=valid_num//batch_size,
                             )
def plot_learning_curves(history,label,epochs,min_value,max_value): # 绘制曲线
    data = {}
    data[label] = history.history[label]
    data['val_'+label] = history.history['val_'+label]
    pd.DataFrame(data).plot(figsize=(8,5))
    plt.grid(True)
    plt.axis([0,epochs,min_value,max_value])
    plt.show()
    
plot_learning_curves(history,'accuracy',epochs,0,1)
plot_learning_curves(history,'loss',epochs,2,3)
test_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255) # 取出测试集数据
test_generator = test_datagen.flow_from_dataframe(
                                                    dataframe = test_df,
                                                   directory=r"./",
                                                    x_col= 'filepath',
                                                    y_col = 'class',
                                                    classes =class_names,
                                                    target_size = (height,width),
                                                   batch_size=batch_size,
                                                   seed=7,
                                                   shuffle=False,
                                                   class_mode='sparse',)
test_num =test_generator.samples
print(test_num)
# 进行预测
test_predict = model.predict_generator(test_generator,workers = 10,
                                      use_multiprocessing=True) # 使用10个进程而不是线程,用训练好的模型进行预测
print(test_predict.shape)
print(test_predict[0:5])
test_predict_classes_indices = np.argmax(test_predict,axis=1) # 获得三十万个整数
print(test_predict_classes_indices[0:5])
test_predict_class = [class_names[index] for index in test_predict_classes_indices] # 转换成类名
print(test_predict_class[0:5])
def generate_submissions(filename,predict_class):
    with open(filename,'w') as f:
        f.write('id,label\n')
        for i in range(len(predict_class)):
            f.write('%d,%s\n'%(i+1,predict_class[i])) # 格式化存入
output_file = './cifar10/submission.csv'
generate_submissions(output_file,test_predict_class)

 

Guess you like

Origin blog.csdn.net/weixin_40539952/article/details/107950377