制作数据集图例

例如GTSRB数据集
在这里插入图片描述

要求 1. 按类别随机生成

import os
import matplotlib.pyplot as plt
from PIL import Image

# 数据集的根目录
root_dir = '/path/to/your/dataset'

# 获取所有类别的文件夹
class_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]

# 创建一个新的matplotlib图像
fig = plt.figure(figsize=(10, 10))

# 对于每个类别
for i, class_dir in enumerate(class_dirs):
    # 获取这个类别的所有图像文件
    image_files = [f for f in os.listdir(os.path.join(root_dir, class_dir)) if f.endswith('.jpg') or f.endswith('.png')]

    # 只选择第一个图像文件
    image_file = image_files[0]

    # 使用PIL库打开图像
    image = Image.open(os.path.join(root_dir, class_dir, image_file))

    # 在matplotlib图像上添加一个新的子图，7行7列（根据数据集类别自行定义行列数）
    ax = fig.add_subplot(7, 7, i + 1)

    # 显示图像
    ax.imshow(image)

    # 隐藏坐标轴
    ax.axis('off')

# 显示图像
plt.show()

要求 2. 按照数据集类别名称从小到大生成数据集图例

import os
import matplotlib.pyplot as plt
from PIL import Image

# 数据集的根目录
root_dir = '/path/to/your/dataset'

# 获取所有类别的文件夹
class_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]

# 按照类别名称的字母顺序从小到大排列
class_dirs.sort()

# 创建一个新的matplotlib图像
fig = plt.figure(figsize=(10, 10))

# 对于每个类别
for i, class_dir in enumerate(class_dirs):
    # 获取这个类别的所有图像文件
    image_files = [f for f in os.listdir(os.path.join(root_dir, class_dir)) if f.endswith('.jpg') or f.endswith('.png')]
    
    # 只选择第一个图像文件
    image_file = image_files[0]
    
    # 使用PIL库打开图像
    image = Image.open(os.path.join(root_dir, class_dir, image_file))
    
    # 在matplotlib图像上添加一个新的子图，7行7列
    ax = fig.add_subplot(7, 7, i+1)
    
    # 显示图像
    ax.imshow(image)
    
    # 隐藏坐标轴
    ax.axis('off')

# 显示图像
plt.show()

要求 3. 从数据集每个类别中挑出最清晰的一张绘制数据集图例，背景色为白色，不显示类别名但是按照类别名称从小到大，7行7列进行排列（根据数据集类别自行选择）

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image


def calculate_clearness(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Calculate the Laplacian of the image
    lap = cv2.Laplacian(gray, cv2.CV_64F)
    # Calculate the standard deviation of the Laplacian (this is a measure of sharpness)
    sharpness = np.std(lap)
    return sharpness
# 数据集的根目录
root_dir = '/path/to/your/dataset'

# 获取所有类别的文件夹
class_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]

# 按照类别名称的字母顺序从小到大排列
class_dirs.sort()

# 创建一个新的matplotlib图像
fig = plt.figure(figsize=(10, 10))

# 对于每个类别
for i, class_dir in enumerate(class_dirs):
    # 获取这个类别的所有图像文件
    image_files = [f for f in os.listdir(os.path.join(root_dir, class_dir)) if f.endswith('.jpg') or f.endswith('.png')]

    # 初始化最清晰的图像和其清晰度
    clearest_image = None
    max_clearness = 0

    # 遍历所有图像
    for image_file in image_files:
        # 使用PIL库打开图像
        image = Image.open(os.path.join(root_dir, class_dir, image_file))
        # 使用OpenCV库将PIL图像转换为OpenCV图像
        image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        # 计算图像的清晰度
        clearness = calculate_clearness(image_cv)
        # 如果这个图像比之前的图像更清晰，就更新最清晰的图像和其清晰度
        if clearness > max_clearness:
            clearest_image = image
            max_clearness = clearness

    # 在matplotlib图像上添加一个新的子图，7行7列
    ax = fig.add_subplot(7, 7, i + 1)

    # 显示图像
    ax.imshow(clearest_image)

    # 隐藏坐标轴
    ax.axis('off')

# 设置背景色为白色
fig.patch.set_facecolor('white')

# 显示图像
plt.show()

猜你喜欢