利用opencv-python库生成图像标签数据集

方法一:利用opencv-python库生成图像标签数据集

提示:此处独立使用图像库opencv。即仅用cv2读图和显示

安装OpenCV的时候,安装opencv_python:
pip install opencv-python
导入的时候:import cv2

服装关键点数据集下载:链接:https://pan.baidu.com/s/1A_UEaulqsz60OhC5BStA9g?pwd=hr47
提取码:hr47
数据集描述:pytorch生成图像标签数据集的三种方式–前言

关于opencv的常用方法

# Cv2常用的函数:
import cv2

cv2.imread(filepath,flags)  #读图
cv2.imshow(wname,img)  #显图
cv2.imwrite(file,img,num)   #存图
img.copy()  #图像复制

cv2.cvtColor()  #图像颜色空间转换

cv2.resize(image, image2,dsize)   #图像缩放:(输入原始图像,输出新图像,图像的大小)
cv2.flip(img,flipcode)  #图像翻转,flipcode控制翻转效果。
cv2.warpAffine(img, M, (400, 600))  #图像仿射变换 :平移;裁剪、剪切、旋转、仿射变换,M、M_crop、M_shear、M_rotate

cv2.putText(img,'text',(50,150)   #图像添加文字:(照片,添加的文字,左上角坐标,字体,字体大小,颜色,字体粗细)
cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)  #画出矩行:img原图、(x,y)是矩阵的左上点坐标、(x+w,y+h)是矩阵的右下点坐标、(0,255,0)是画线对应的rgb颜色、2是所画的线的宽度。
cv2.circle(img, (x,y), point_size, point_color, thickness)  # 画圆
cv2.boundingRect(img)  #返回图像的四值属性:img是一个二值图,即是它的参数; 返回四个值,分别是x,y,w,h; x,y是矩阵左上点的坐标,w,h是矩阵的宽和高。

生成 图像-关键点坐标标签 数据集

此例,服装类型和关键点图像-标签数据集,引入cv2的函数模块进行处理。
数据集生成器实现展示:(图像,坐标,类型)
在这里插入图片描述

代码:dataset_by_cv2.py

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2022/5/16 11:05
# @Author  : Hyan Tan 
# @File    : dataset_by_cv2.py
import os
import cv2
import numpy as np
import pandas as pd
import torch
from torch import nn
from torchvision import transforms
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from collections import OrderedDict

from abc import ABCMeta, abstractmethod
from transformation import RandomAdd, ImageResize, RandomCrop, RandomFlip, RandomRotate, Distort

train_transform = transforms.Compose([
    # RandomAdd(add_path="12"),
    ImageResize(size=256),
    # RandomCrop(in_size=256, out_size=224),  # 随机裁剪,此处不可用,因为标签为坐标群,会走样
    # RandomFlip(),  # 随机翻转,此处不可用。同理
    # RandomRotate(),  # 随机旋转,此处不可用。同理
    # Distort()  # 歪曲
])
val_transform = transforms.Compose([
    ImageResize(size=256),
    # RandomCrop(in_size=256, out_size=224),  # 测试可用,验证也不可用
])
test_transform = transforms.Compose([
    ImageResize(size=288),
    RandomCrop(in_size=288, out_size=256),  # 随机裁剪,测试可用,验证也不可用
    RandomFlip(),  # 随机翻转
    RandomRotate(),  # 随机旋转
    Distort()  # 歪曲
])

# clothes = {'trousers': 7, 'skirt': 4, 'outwear': 15, 'blouse': 13, 'dress': 15}


# ------数据集------
class KeyPointsDataSet(Dataset):
    """服装-类型-关键点群标记数据集"""
    def __init__(self, root_dir=r'E:/Datasets/Fashion/Fashion AI-keypoints_24/train/', image_set='train', transforms=None):
        """
        初始化数据集
        :param root_dir: 数据目录(.csv和images的根目录)
        :param image_set: train训练,val验证,test测试
        :param transforms(callable,optional):图像变换-可选
        标签数据文件格式为csv_file: 标签csv文件(内容:图像相对地址-category类型-标签coordination坐标)
        """
        super(KeyPointsDataSet, self).__init__()
        self._imgset = image_set
        self._image_paths = []
        self._labels = []
        self._cates = []  # 标签:服装类别
        self._csv_file = os.path.join(root_dir, image_set + '.csv')  # csv标签文件地址
        self._categories = ['blouse', 'outwear', 'dress', 'trousers', 'skirt', ]
        self._root_dir = root_dir
        self._transform = transforms

        self.__getFileList()  # 图地址列表和标签列表

    def __getFileList(self):
        file_info = pd.read_csv(self._csv_file)
        self._image_paths = file_info.iloc[:, 0]  # 图像地址在第一列
        self._cates = file_info.iloc[:, 1]  # 第二列,服装类型:blouse,trousers,skirt,dress,outwear
        if self._imgset == 'train':  # 只有训练和验证有标签,测试没有标签。
            landmarks = file_info.iloc[:, 2:26].values  # panda中DataFrame数据的读取
            for i in range(len(landmarks)):
                label = []
                for j in range(24):
                    plot = landmarks[i][j].split('_')
                    coor = []
                    for per in plot:
                        coor.append(int(per))
                    label.append(coor)
                self._labels.append(np.concatenate(label))
            self._labels = np.array(self._labels).reshape((-1, 24, 3))
        else:
            self._labels = np.ones((len(self._image_paths), 24, 3))*(-1)

    def __getitem__(self, idx):
        label = self._labels[idx]
        image = cv2.imread(os.path.join(self._root_dir, self._image_paths[idx]), cv2.IMREAD_COLOR)
        imgSize = image.shape  # cv2读取的是图像数组类型  BGR  H W C
        # print("old size:", imgSize)
        category = self._categories.index(self._cates[idx])  # 0,1,2,3,4

        if self._transform:
            image = self._transform(image)
            afterSize = image.shape
            # bi = np.array(afterSize[0:2]) / np.array(imgSize[0:2])
            # 坐标(x,y)代表的是w,h。图像是h,w,c的格式,所以此处反着来
            bi = np.array((afterSize[1], afterSize[0])) / np.array((imgSize[1], imgSize[0]))
            label[:, 0:2] = label[:, 0:2] * bi  # 图像伸缩变换,坐标同步伸缩

        image = image.astype(np.float32)
        # image = image.transpose((2, 0, 1))
        return image, label, category

    def __len__(self):
        return len(self._image_paths)


def showImageAndCoor(img, coords):
    point_size = 1
    point_color = (0, 0, 255)  # BGR
    thickness = 4  # 0 、4、8
    img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)  # numpy数组和cv2的mat的转换
    cv2.namedWindow("image")
    for coor in coords:
        if coor[2] == -1:  # 略去不存在的点
            pass
        else:
            cv2.circle(img, (int(coor[0]), int(coor[1])), point_size, point_color, thickness)  # 展示图像是正常的
    cv2.imwrite('1.png', img, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
    cv2.imshow("image", img)
    cv2.waitKey(1500)


if __name__ == "__main__":
    num_workers = 4
    data_root = r'E:/Datasets/Fashion/Fashion AI-keypoints_24/train/'  # 数据文件夹:包含train和test
    train_dataset = KeyPointsDataSet(data_root, 'train', train_transform)
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=False,
                              num_workers=num_workers, drop_last=True)
    for i_batch, data in enumerate(train_loader):
        img, label, category = data
        img, label, category = img.numpy(), label.numpy(), category.numpy()
        print(img.shape, label.shape, category)
        showImageAndCoor(img[0], label[0])
        # break

transformation.py

import cv2
import random
import os


class RandomAdd:
    def __init__(self, add_path=""):
        self.imgs = []
        for files in os.walk(add_path):
            for file_name in files[2]:
                self.imgs.append(cv2.imread(os.path.join(add_path, file_name)))

    def __call__(self, image):
        if random.random() < 0.2:
            idx = random.randrange(len(self.imgs))
            select_img = self.imgs[idx]
            h, w, _ = select_img.shape
            if h < image.shape[0] // 2 and w < image.shape[1] // 2:
                begin_x = random.randrange(image.shape[1] - w)
                begin_y = random.randrange(image.shape[0] - h)
                image[begin_y:begin_y + h, begin_x:begin_x + w, :] = select_img
        return image


class ImageResize:
    def __init__(self, size=256):
        self._size = size

    def __call__(self, image):
        image = cv2.resize(image, (self._size, self._size))
        return image


class RandomCrop:
    def __init__(self, in_size=256, out_size=224):
        self._in_size = in_size
        self._out_size = out_size

    def __call__(self, image):
        dis = self._in_size - self._out_size
        col_start = random.randint(0, dis)
        row_start = random.randint(0, dis)

        image = image[row_start:(row_start + self._out_size), col_start:(col_start + self._out_size)]
        return image


class RandomFlip:
    def __call__(self, image):
        if random.random() < 0.5:
            image = image[:, ::-1, :]
        return image


class RandomRotate:
    def rotateImg(self, image, angle):
        rows, cols, _ = image.shape
        M = cv2.getRotationMatrix2D(((cols - 1) / 2.0, (rows - 1) / 2.0), angle, 1)
        image = cv2.warpAffine(image, M, (cols, rows))
        return image

    def __call__(self, image):
        if random.randrange(2):
            if random.randrange(2):
                image = self.rotateImg(image, 5)
            else:
                image = self.rotateImg(image, -5)
        return image


class Distort:
    def _convert(self, image, alpha=1, beta=0):
        tmp = image.astype(float) * alpha + beta
        tmp[tmp < 0] = 0
        tmp[tmp > 255] = 255
        image[:] = tmp

    def __call__(self, image):
        if random.randrange(2):

            # brightness distortion
            if random.randrange(2):
                self._convert(image, beta=random.uniform(-32, 32))

            # contrast distortion
            if random.randrange(2):
                self._convert(image, alpha=random.uniform(0.5, 1.5))

            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            # saturation distortion
            if random.randrange(2):
                self._convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

            # hue distortion
            if random.randrange(2):
                tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
                tmp %= 180
                image[:, :, 0] = tmp

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

        else:

            # brightness distortion
            if random.randrange(2):
                self._convert(image, beta=random.uniform(-32, 32))

            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            # saturation distortion
            if random.randrange(2):
                self._convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

            # hue distortion
            if random.randrange(2):
                tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
                tmp %= 180
                image[:, :, 0] = tmp

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

            # contrast distortion
            if random.randrange(2):
                self._convert(image, alpha=random.uniform(0.5, 1.5))

        return image

注意事项

  1. cv2读图的数据结构为(h,w,c)=(高,宽,通道),坐标组是(宽x, 高y)。统一伸缩时注意对应。
  2. 本文输出数据集为了显示并没有对图像数组进行归一化或标准化操作,用的时候需要加上归一化。
  3. 显示工具为cv2自带工具。
  4. 接口模式设计参见blog:生成图像标签数据集的三种方式。

猜你喜欢

转载自blog.csdn.net/beauthy/article/details/124885056