d a t a − e d a . i p y n b data-eda.ipynb data−e d a . i p y n b
from glob import glob
import pandas as pd
import numpy as np
import os
import cv2
from PIL import Image
from matplotlib import pyplot as plt
from tqdm import tqdm
# 训练集探索
TRAIN_DATASET_PATH ='/data/nextcloud/dbc2017/files/jupyter/train_data'
image_fns = glob(os.path.join(TRAIN_DATASET_PATH,'*','*.*'))
label_names =[s.split('/')[-2]for s in image_fns]
unique_labels =list(set(label_names))
# 类别数print(len(unique_labels))
# 图片总数print(len(image_fns))
# 每个类别的数量
dir_lst = os.listdir(TRAIN_DATASET_PATH)
number_lst =[]for i in dir_lst:
path = os.path.join(TRAIN_DATASET_PATH,i)if os.path.isdir(path):
num =len(glob(os.path.join(path,'*')))
number_lst.append(num)if num ==0:print(i)