import pandas as pd
import numpy as np
from pandas import DataFrame as df
import os
import math
import keras
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
from keras.utils import np_utils, Sequence
from sklearn.model_selection import StratifiedShuffleSplit
- header=None时,即指明原始文件数据没有列索引,这样read_csv为自动加上列索引,除非你给定列索引的名字。
- header=0,表示第一行为标题行
root=r'C:\Users\liulu\Desktop\huaweicloud\file\train_data\train_data'
file_folder=r'C:\Users\liulu\Desktop\huaweicloud\file\train_data\train_data'
img_path = [] # 每张图片的路径
labels = [] # 每张图片的标签
csv = pd.read_csv(r"C:\Users\liulu\Desktop\huaweicloud\file\train.csv",header=0)
for index in csv.index:
image = str(csv.loc[index].values[0])
label = int(csv.loc[index].values[1])
image_name = os.path.join(file_folder, image)
img_path.append(image_name)
labels.append(label)
print(len(img_path),len(labels))
现在需要将训练集与验证集都找到其图片的路径,然后与标签对应,可以封装成一个函数
def get_data(label_file): #从csv文件中读取图片路径和标签
img_paths=[]
labels=[]
csv=pd.read_csv(label_file,header=0) #===0000????
csv=csv.sample(frac=1.0) #csv是一个数据框,csv.loc[index]是一个series,.value得到array
for index in csv.index:
image=str(csv.loc[index].values[0])
label=int(csv.loc[index].values[1])
image_name=os.path.join(file_folder,image)
img_paths.append(image_name)
labels.append(label)
return img_paths,labels
train_img_paths,train_labels=get_data(train_label_file) #get训练csv得到图片路径列表,标签列表
val_img_paths, val_labels = get_data(val_label_file)