YOLOv5:LoadImages类、letterbox函数

前言

  • 由于水平有限,难免出现错漏,敬请批评改正。
  • 更多精彩内容,可点击进入YOLO系列专栏或我的个人主页查看

前提条件

相关介绍

  • Python是一种跨平台的计算机程序设计语言。是一个高层次的结合了解释性、编译性、互动性和面向对象的脚本语言。最初被设计用于编写自动化脚本(shell),随着版本的不断更新和语言新功能的添加,越多被用于独立的、大型项目的开发。
  • Python OS模块是负责程序与操作系统的交互,提供了访问操作系统底层的接口和非常丰富的方法用来处理文件和目录。

letterbox()函数:自适应图片缩放

  • letterbox的主要思想是尽可能地利用网络感受野的信息特征。在YOLOv5中,最后一层的特征图中每个点,可以对应原图中32X32的区域信息,在保证图片变换比例一致的情况下,长宽均可以被32整除,那么就可以有效的利用感受野的信息。
  • 假设原图尺寸为(720, 640),目标缩放尺寸为(640, 640)。要想满足收缩的要求,应该选取收缩比例720 ÷ \div ÷ 640 = 0.88.则图片被缩放为(640,569),然后,要填充边界至可以被stride=32整除,则569填充至576,最终得到图片尺寸为(640, 576)。
import cv2
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    # 自适应调整图片大小和填充图像,同时满足stride-multiple约束
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old) 尺度比例 # 计算收缩比
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding # 计算填充信息
    ratio = r, r  # width, height ratios
    # 计算收缩后图片的长宽
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    # print("(w,h):",new_unpad) # (w,h)
    # 计算需要填充的像素,即计算那个需要收缩比大的那一边需要填充的像素
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    # print('dw, dh:',dw, dh)
    if auto:  # minimum rectangle # 最小矩形区域
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding # mod余数
        # print('dw, dh:',dw, dh)
    elif scaleFill:  # stretch # 直接缩放
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
        # print('dw, dh:',dw, dh)

    dw /= 2  # divide padding into 2 sides
    dh /= 2
    # print('dw, dh:',dw, dh)
    
    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    # 缩放图片并填充像素 # round(number)表示四舍五入到最接近的整数
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    # 填充边界
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)

if __name__ =="__main__":
    source = '1.jpg'
    im = cv2.imread(source)
    im = cv2.resize(im,(640,720))
    print('原图大小:',im.shape) # (720, 640, 3)
    im, ratio, (dw, dh) = letterbox(im)
    print('缩放后大小:',im.shape) # (640, 576, 3)
原图大小: (720, 640, 3)
缩放后大小: (640, 576, 3)

LoadImages类:读取测试图片数据集

  • LoadImages类实现涉及到两个魔法函数__iter__和__next__,这使其拥有迭代器的特性。
import os
import glob
import cv2
import numpy as np
from pathlib import Path
from utils.augmentations import letterbox

# Parameters
HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm'  # include image suffixes
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes

class LoadImages:
    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
    def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
        print("__init__")
        
        if isinstance(path, str) and Path(path).suffix == ".txt":  # *.txt file with img/vid/dir on each line
            path = Path(path).read_text().rsplit()
            
        files = []
        # 读取所有文件路径进入files列表
        for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
            p = str(Path(p).resolve())
            if '*' in p:
                files.extend(sorted(glob.glob(p, recursive=True)))  # glob
            elif os.path.isdir(p):
                files.extend(sorted(glob.glob(os.path.join(p, '*.*'))))  # dir
            elif os.path.isfile(p):
                files.append(p)  # files
            else:
                raise FileNotFoundError(f'{
      
      p} does not exist')

        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
        # print(images,videos,sep='\n')
        
        ni, nv = len(images), len(videos) # 图片数量,视频数量

        self.img_size = img_size # 图像大小
        self.stride = stride # 步长
        self.files = images + videos # 所以文件列表
        self.nf = ni + nv  # number of files 
        self.video_flag = [False] * ni + [True] * nv 
        self.mode = 'image'
        self.auto = auto # 默认auto = True
        self.transforms = transforms  # optional ,# 默认transforms = None
        self.vid_stride = vid_stride  # video frame-rate stride # 默认vid_stride=1
        if any(videos): # any() 函数用于判断给定的可迭代参数 iterable 是否全部为 False,则返回 False,如果有一个为 True,则返回 True。
            self._new_video(videos[0])  # new video
        else:
            self.cap = None
        assert self.nf > 0, f'No images or videos found in {
      
      p}. ' \
                            f'Supported formats are:\nimages: {
      
      IMG_FORMATS}\nvideos: {
      
      VID_FORMATS}'

    def __iter__(self):
        print("__iter__")
        self.count = 0
        return self

    def __next__(self):
        print("__next__")
        if self.count == self.nf:
            raise StopIteration
        path = self.files[self.count]

        if self.video_flag[self.count]:
            # Read video
            self.mode = 'video'
            for _ in range(self.vid_stride):
                self.cap.grab()
            ret_val, im0 = self.cap.retrieve()
            while not ret_val:
                self.count += 1
                self.cap.release()
                if self.count == self.nf:  # last video
                    raise StopIteration
                path = self.files[self.count]
                self._new_video(path)
                ret_val, im0 = self.cap.read()

            self.frame += 1
            # im0 = self._cv2_rotate(im0)  # for use if cv2 autorotation is False
            s = f'video {
      
      self.count + 1}/{
      
      self.nf} ({
      
      self.frame}/{
      
      self.frames}) {
      
      path}: '

        else:
            # Read image
            self.count += 1
            im0 = cv2.imread(path)  # BGR
            assert im0 is not None, f'Image Not Found {
      
      path}'
            s = f'image {
      
      self.count}/{
      
      self.nf} {
      
      path}: '

        if self.transforms:
            im = self.transforms(im0)  # transforms
        else:
            im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]  # padded resize
            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
            im = np.ascontiguousarray(im)  # contiguous

        return path, im, im0, self.cap, s
    
if __name__ =="__main__":
    source = '1.jpg'
    imgsz = 640
    stride = 32
    pt = True
    vid_stride = 1
    dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
    for path, im, im0s, vid_cap, s in dataset:
        print('img_path:',path)
        print('im0s.shape',im0s.shape)
        print('im.shape:',im.shape)
        print('vid_cap:',vid_cap)
        print('相关信息string:',s)        
__init__
__iter__
__next__
img_path: 1.jpg
im0s.shape (416, 416, 3)
im.shape: (3, 640, 640)
vid_cap: None
相关信息string: image 1/1 1.jpg: 
__next__

猜你喜欢

转载自blog.csdn.net/FriendshipTang/article/details/129252159