data_eda.ipynb

d a t a − e d a . i p y n b data-eda.ipynb dataeda.ipynb

from glob import glob
import pandas as pd
import numpy as np
import os
import cv2
from PIL import Image
from matplotlib import pyplot as plt
from tqdm import tqdm
# 训练集探索
TRAIN_DATASET_PATH = '/data/nextcloud/dbc2017/files/jupyter/train_data'
image_fns = glob(os.path.join(TRAIN_DATASET_PATH, '*', '*.*'))
label_names = [s.split('/')[-2] for s in image_fns]
unique_labels = list(set(label_names))
# 类别数
print(len(unique_labels))

在这里插入图片描述

# 图片总数
print(len(image_fns))

在这里插入图片描述

# 每个类别的数量
dir_lst = os.listdir(TRAIN_DATASET_PATH)
number_lst = []
for i in dir_lst:
    path = os.path.join(TRAIN_DATASET_PATH,i)
    if os.path.isdir(path):
        num = len(glob(os.path.join(path,'*')))
        number_lst.append(num)
        if num == 0:
            print(i)

在这里插入图片描述

plt.hist(number_lst, bins=40, normed=0, facecolor="blue", edgecolor="black", alpha=0.7);

在这里插入图片描述

print(np.max(number_lst))
print(np.min(number_lst))

在这里插入图片描述

dir_lst = os.listdir(TRAIN_DATASET_PATH)
number_lst = []
size_lst = []
for i in tqdm(dir_lst):
    path = os.path.join(TRAIN_DATASET_PATH,i)
    if os.path.isdir(path):
        img_lst = glob(os.path.join(path,'*'))
        for j in img_lst:
            size_lst.append(Image.open(j).size)
temp = pd.value_counts(size_lst)
# 分辨率分布
temp[temp>200]

在这里插入图片描述

rd_index = np.random.randint(len(image_fns))
plt.imshow(plt.imread(image_fns[rd_index]))

在这里插入图片描述

# 测试集探索
TRAIN_DATASET_PATH = '/data/nextcloud/dbc2017/files/jupyter/test_data_A'
image_gal = glob(os.path.join(TRAIN_DATASET_PATH, 'gallery', '*.*'))
image_que = glob(os.path.join(TRAIN_DATASET_PATH, 'query', '*.*'))
print(len(image_gal))
print(len(image_que))

在这里插入图片描述

size_lst_gal = []
for i in tqdm(image_gal):
    size_lst_gal.append(Image.open(i).size)
size_lst_que = []
for i in tqdm(image_que):
    size_lst_que.append(Image.open(i).size)
temp_gal = pd.value_counts(size_lst_gal)
temp_que = pd.value_counts(size_lst_que)
temp_gal[temp_gal>200]

在这里插入图片描述

temp_que[temp_que>100]

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/qq_41375318/article/details/108549923
今日推荐