前言

由于水平有限，难免出现错漏，敬请批评改正。

更多精彩内容，可点击进入YOLO系列专栏或我的个人主页查看

前提条件

熟悉Python

letterbox()函数：自适应图片缩放

letterbox的主要思想是尽可能地利用网络感受野的信息特征。在YOLOv5中，最后一层的特征图中每个点，可以对应原图中32X32的区域信息，在保证图片变换比例一致的情况下，长宽均可以被32整除，那么就可以有效的利用感受野的信息。

假设原图尺寸为(720, 640)，目标缩放尺寸为(640, 640)。要想满足收缩的要求，应该选取收缩比例720 $\div$ 640 = 0.88.则图片被缩放为(640,569),然后，要填充边界至可以被stride=32整除，则569填充至576，最终得到图片尺寸为(640, 576)。

import cv2
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    # 自适应调整图片大小和填充图像，同时满足stride-multiple约束
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old) 尺度比例 # 计算收缩比
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding # 计算填充信息
    ratio = r, r  # width, height ratios
    # 计算收缩后图片的长宽
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    # print("(w,h)：",new_unpad) # (w,h)
    # 计算需要填充的像素，即计算那个需要收缩比大的那一边需要填充的像素
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    # print('dw, dh:',dw, dh)
    if auto:  # minimum rectangle # 最小矩形区域
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding # mod余数
        # print('dw, dh:',dw, dh)
    elif scaleFill:  # stretch # 直接缩放
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
        # print('dw, dh:',dw, dh)

    dw /= 2  # divide padding into 2 sides
    dh /= 2
    # print('dw, dh:',dw, dh)
    
    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    # 缩放图片并填充像素 # round(number)表示四舍五入到最接近的整数
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    # 填充边界
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)

if __name__ =="__main__":
    source = '1.jpg'
    im = cv2.imread(source)
    im = cv2.resize(im,(640,720))
    print('原图大小：',im.shape) # (720, 640, 3)
    im, ratio, (dw, dh) = letterbox(im)
    print('缩放后大小：',im.shape) # (640, 576, 3)

原图大小： (720, 640, 3)
缩放后大小： (640, 576, 3)

LoadImages类：读取测试图片数据集

LoadImages类实现涉及到两个魔法函数__iter__和__next__，这使其拥有迭代器的特性。

import os
import glob
import cv2
import numpy as np
from pathlib import Path
from utils.augmentations import letterbox

# Parameters
HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm'  # include image suffixes
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes

class LoadImages:
    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
    def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
        print("__init__")
        
        if isinstance(path, str) and Path(path).suffix == ".txt":  # *.txt file with img/vid/dir on each line
            path = Path(path).read_text().rsplit()
            
        files = []
        # 读取所有文件路径进入files列表
        for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
            p = str(Path(p).resolve())
            if '*' in p:
                files.extend(sorted(glob.glob(p, recursive=True)))  # glob
            elif os.path.isdir(p):
                files.extend(sorted(glob.glob(os.path.join(p, '*.*'))))  # dir
            elif os.path.isfile(p):
                files.append(p)  # files
            else:
                raise FileNotFoundError(f'{
      
      p} does not exist')

        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
        # print(images,videos,sep='\n')
        
        ni, nv = len(images), len(videos) # 图片数量，视频数量

        self.img_size = img_size # 图像大小
        self.stride = stride # 步长
        self.files = images + videos # 所以文件列表
        self.nf = ni + nv  # number of files 
        self.video_flag = [False] * ni + [True] * nv 
        self.mode = 'image'
        self.auto = auto # 默认auto = True
        self.transforms = transforms  # optional ，# 默认transforms = None
        self.vid_stride = vid_stride  # video frame-rate stride # 默认vid_stride=1
        if any(videos): # any() 函数用于判断给定的可迭代参数 iterable 是否全部为 False，则返回 False，如果有一个为 True，则返回 True。
            self._new_video(videos[0])  # new video
        else:
            self.cap = None
        assert self.nf > 0, f'No images or videos found in {
      
      p}. ' \
                            f'Supported formats are:\nimages: {
      
      IMG_FORMATS}\nvideos: {
      
      VID_FORMATS}'

    def __iter__(self):
        print("__iter__")
        self.count = 0
        return self

    def __next__(self):
        print("__next__")
        if self.count == self.nf:
            raise StopIteration
        path = self.files[self.count]

        if self.video_flag[self.count]:
            # Read video
            self.mode = 'video'
            for _ in range(self.vid_stride):
                self.cap.grab()
            ret_val, im0 = self.cap.retrieve()
            while not ret_val:
                self.count += 1
                self.cap.release()
                if self.count == self.nf:  # last video
                    raise StopIteration
                path = self.files[self.count]
                self._new_video(path)
                ret_val, im0 = self.cap.read()

            self.frame += 1
            # im0 = self._cv2_rotate(im0)  # for use if cv2 autorotation is False
            s = f'video {
      
      self.count + 1}/{
      
      self.nf} ({
      
      self.frame}/{
      
      self.frames}) {
      
      path}: '

        else:
            # Read image
            self.count += 1
            im0 = cv2.imread(path)  # BGR
            assert im0 is not None, f'Image Not Found {
      
      path}'
            s = f'image {
      
      self.count}/{
      
      self.nf} {
      
      path}: '

        if self.transforms:
            im = self.transforms(im0)  # transforms
        else:
            im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]  # padded resize
            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
            im = np.ascontiguousarray(im)  # contiguous

        return path, im, im0, self.cap, s
    
if __name__ =="__main__":
    source = '1.jpg'
    imgsz = 640
    stride = 32
    pt = True
    vid_stride = 1
    dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
    for path, im, im0s, vid_cap, s in dataset:
        print('img_path:',path)
        print('im0s.shape',im0s.shape)
        print('im.shape:',im.shape)
        print('vid_cap:',vid_cap)
        print('相关信息string:',s)

__init__
__iter__
__next__
img_path: 1.jpg
im0s.shape (416, 416, 3)
im.shape: (3, 640, 640)
vid_cap: None
相关信息string: image 1/1 1.jpg: 
__next__

更多精彩内容，可点击进入YOLO系列专栏或我的个人主页查看

YOLOv5：LoadImages类、letterbox函数

YOLOv5：LoadImages类、letterbox函数

前言

前提条件

相关介绍

letterbox()函数：自适应图片缩放

LoadImages类：读取测试图片数据集

猜你喜欢