siamese-fc pytorch代码解读

siamese-fc pytorch代码解读

demo_siamfc.py

1.os.path.join(video_dir, “img/*.jpg”)连接两个或更多的路径名组件

video_dir = '../Car1/'
b = os.path.join(video_dir, "img/*.jpg")
print(b)
打印结果为
../Car1/img/*.jpg
如果video_dir = '../Car1'
打印结果为../Car1\img/*.jpg,会自动补上下划线‘\’

2.glob.glob()

参数为字符串
test = glob.glob(os.path.join(video_dir, "img/*.jpg"))
print('test: ', test)
打印结果为
test:  ['../Car1/img\\0001.jpg', '../Car1/img\\0002.jpg', '../Car1/img\\0003.jpg',.....]
该文件夹下所有的jpg文件

3.os.path.basename(x).split(’.’)[0]

path = 'D:/honey/0001.jpg'
print('basepath: ', os.path.basename(path))
print('split: ', os.path.basename(path).split('.'))
打印结果为
basepath:  0001.jpg
split:  ['0001', 'jpg']

3.demo_siamfc.py

import glob
import os
import pandas as pd
import argparse
import numpy as np
import cv2
import time
import sys
sys.path.append(os.getcwd())

from fire import Fire
from tqdm import tqdm

from siamfc import SiamFCTracker

def main(video_dir, gpu_id,  model_path):
    #savepath = cv2.VideoWriter('test_track.avi', cv2.VideoWriter_fourcc('M', 'P', '4', '2'), 25, (320,240),True)
    #返回所有匹配的文件路径列表
    filenames = sorted(glob.glob(os.path.join(video_dir, "img/*.jpg")),
           key=lambda x: int(os.path.basename(x).split('.')[0]))#os.path.basename()返回最后的文件名
    #将图像转成彩色
    frames = [cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB) for filename in filenames]
    #读取每一帧真实目标坐标
    gt_bboxes = pd.read_csv(os.path.join(video_dir, "groundtruth_rect.txt"), sep='\t|,| ',
            header=None, names=['xmin', 'ymin', 'width', 'height'],
            engine='python')
    #print('gt_bboxes: ', gt_bboxes)
    title = video_dir.split('/')[-1]
    #print('title: ', title)
    # starting tracking
    #定义一个跟踪器,参数为模型和gpu_id
    tracker = SiamFCTracker(model_path, gpu_id)
    for idx, frame in enumerate(frames):
        if idx == 0:
            #取出第一行的坐标,第一帧的目标pos
            bbox = gt_bboxes.iloc[0].values
            #初始化
            tracker.init(frame, bbox)
            #向左上角挪一个位置?
            bbox = (bbox[0]-1, bbox[1]-1,
                    bbox[0]+bbox[2]-1, bbox[1]+bbox[3]-1)
        else:
            #更新
            bbox = tracker.update(frame)
        #画跟踪器的得出的坐标
        frame = cv2.rectangle(frame,
                              (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])),
                              (0, 255, 0),
                              2)
        # 画groundtruth_rect中的坐标
        gt_bbox = gt_bboxes.iloc[idx].values
        gt_bbox = (gt_bbox[0], gt_bbox[1],
                   gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3])
        frame = cv2.rectangle(frame,
                              (int(gt_bbox[0]-1), int(gt_bbox[1]-1)), # 0-index
                              (int(gt_bbox[2]-1), int(gt_bbox[3]-1)),
                              (255, 0, 0),
                              1)
        if len(frame.shape) == 3:
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        frame = cv2.putText(frame, str(idx), (5, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1)
        cv2.imshow(title, frame)
        #savepath.write(frame)
        cv2.waitKey(30)

if __name__ == "__main__":
    video_dir = '../Car1/'#视频路径
    gpu_id = 0#gpu id
    model_path = '../models/siamfc_pretrained.pth'#模型路径
    main(video_dir, gpu_id, model_path)

tracker.py

1.torch.nn.Module.eval()测试模式,不使用bn和dropout
2.torchvision.transforms.Compose([ToTensor()])将PILImage或者ndarrray转换为tensor,并且归一化至[0-1]
3.np.newaxis创建新轴

import numpy as np
x = np.array([1, 2, 3, 4])
print('x: ', x)
print('x.shape: ', x.shape)
x1 = x[np.newaxis, :]
print('x1: ', x1)
print('x.shape: ', x1.shape)
x2 = x[:, np.newaxis]
print('x2: ', x2)
print('x2.shape: ', x2.shape)
输出:
x:  [1 2 3 4]
x.shape:  (4,)
x1:  [[1 2 3 4]]
x.shape:  (1, 4)
x2:  [[1]
 [2]
 [3]
 [4]]
x2.shape:  (4, 1)

4.numyp.dot()矩阵乘法
5.tracker.py

import numpy as np
import cv2
import torch
import torch.nn.functional as F
import time
import warnings
import torchvision.transforms as transforms

from torch.autograd import Variable

from .alexnet import SiameseAlexNet
from .config import config
from .custom_transforms import ToTensor
from .utils import get_exemplar_image, get_pyramid_instance_image, get_instance_image

torch.set_num_threads(1) # otherwise pytorch will take all cpus

class SiamFCTracker:
    def __init__(self, model_path, gpu_id):
        self.gpu_id = gpu_id
        with torch.cuda.device(gpu_id):
            self.model = SiameseAlexNet(gpu_id, train=False)
            #torch.load()解序列化一个pickled对象并加载到内存
            #torch.nn.Module.load_state_dict()加载一个解序列化的state_dict对象
            self.model.load_state_dict(torch.load(model_path))
            self.model = self.model.cuda()
            #不启用 BatchNormalization 和 Dropout,测试模式,对应于nn.Module.train()
            self.model.eval()
        #将PILImage或者ndarrray转换为tensor,并且归一化至[0-1]
        self.transforms = transforms.Compose([
            ToTensor()
        ])
    #创建cos窗
    def _cosine_window(self, size):
        """
            get the cosine window
        """
        #np.newaxis维度扩展
        #尺寸为(size[0],size[1])
        cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :])
        cos_window = cos_window.astype(np.float32)
        cos_window /= np.sum(cos_window)
        return cos_window
    #初始化(RGB图像,第一帧目标位置)
    def init(self, frame, bbox):
        """ initialize siamfc tracker
        Args:
            frame: an RGB image
            bbox: one-based bounding box [x, y, width, height]
        """
        self.bbox = (bbox[0]-1, bbox[1]-1, bbox[0]-1+bbox[2], bbox[1]-1+bbox[3]) # zero based
        #目标中心坐标
        self.pos = np.array([bbox[0]-1+(bbox[2]-1)/2, bbox[1]-1+(bbox[3]-1)/2])  # center x, center y, zero based
        #目标尺寸
        self.target_sz = np.array([bbox[2], bbox[3]])                            # width, height
        #对应于matlab代码avgChans = gather([mean(mean(im(:,:,1))) mean(mean(im(:,:,2))) mean(mean(im(:,:,3)))]);
        #用均值填充
        self.img_mean = tuple(map(int, frame.mean(axis=(0, 1))))
        '''
        print('img_mean: ', self.img_mean)
        img_mean:  (140, 140, 140)
        '''
        #get_exemplar_image(frame, box, 127, 0.5, self.img_mean)
        #取出初始化中心的片段(模板)
        #scale_z是s_z缩放到127的比例,sz是目标扩充的正方形边长
        exemplar_img, scale_z, s_z = get_exemplar_image(frame, self.bbox,
                config.exemplar_size, config.context_amount, self.img_mean)

        # get exemplar feature
        #归一化这个片段
        exemplar_img = self.transforms(exemplar_img)[None,:,:,:]
        #将模板送入gpu
        with torch.cuda.device(self.gpu_id):
            exemplar_img_var = Variable(exemplar_img.cuda())
            #前向传播,计算模板图像的特征
            self.model((exemplar_img_var, None))
        #config.num_scale = 3
        self.penalty = np.ones((config.num_scale)) * config.scale_penalty#0.9745
        #self.penalty[1] = 1,self.penalty = (0.9745,1,0.9745)
        self.penalty[config.num_scale//2] = 1

        # create cosine window
        #config.response_up_stride = 16上采样的stride,response_sz = 17
        self.interp_response_sz = config.response_up_stride * config.response_sz#272
        #创建cos窗
        self.cosine_window = self._cosine_window((self.interp_response_sz, self.interp_response_sz))

        # create scalse
        #三尺度scale_step = 1.0375,  1/1.0375,1,1.0375
        self.scales = config.scale_step ** np.arange(np.ceil(config.num_scale/2)-config.num_scale,
                np.floor(config.num_scale/2)+1)

        # instance_size = 255, exemplar_size = 127
        #搜索范围缩放前的大小(255-127)*sz/127,按照模板缩放的比例确定的
        self.s_x = s_z + (config.instance_size-config.exemplar_size) / scale_z

        # arbitrary scale saturation
        #搜索范围
        self.min_s_x = 0.2 * self.s_x
        self.max_s_x = 5 * self.s_x
    #跟踪更新
    def update(self, frame):
        """track object based on the previous frame
        Args:
            frame: an RGB image

        Returns:
            bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
        """
        #三尺度搜索范围大小
        size_x_scales = self.s_x * self.scales
        #获取搜索范围三尺度片段
        pyramid = get_pyramid_instance_image(frame, self.pos, config.instance_size, size_x_scales, self.img_mean)
        #归一化三个片段,并将三个片段按照行拼接在一起
        instance_imgs = torch.cat([self.transforms(x)[None,:,:,:] for x in pyramid], dim=0)
        #print('instance_imgs: ', instance_imgs.size())
        with torch.cuda.device(self.gpu_id):
            #搜索图像送入gpu
            instance_imgs_var = Variable(instance_imgs.cuda())
            #前向传播
            response_maps = self.model((None, instance_imgs_var))
            response_maps = response_maps.data.cpu().numpy().squeeze()
            #上采样
            response_maps_up = [cv2.resize(x, (self.interp_response_sz, self.interp_response_sz), cv2.INTER_CUBIC)
             for x in response_maps]
        #计算每个尺度最大得分,最大值乘以惩罚因子
        max_score = np.array([x.max() for x in response_maps_up]) * self.penalty
        # penalty scale change
        scale_idx = max_score.argmax()#得分最大的索引值,是将数组平铺成一维下的索引
        response_map = response_maps_up[scale_idx]
        #响应图归一化
        response_map -= response_map.min()
        response_map /= response_map.sum()
        #config.window_influenc = 0.176
        response_map = (1 - config.window_influence) * response_map + \
                config.window_influence * self.cosine_window
        #找到最大的响应位置
        max_r, max_c = np.unravel_index(response_map.argmax(), response_map.shape)
        # displacement in interpolation response
        #在响应图上偏离中心的脱靶量
        disp_response_interp = np.array([max_c, max_r]) - (self.interp_response_sz-1) / 2.
        # displacement in input
        disp_response_input = disp_response_interp * config.total_stride / config.response_up_stride
        # displacement in frame
        #当前得分最大的尺度,disp_response_input(x,y)是在255x255图像上的位置,还原到金字塔原图上面x/255*s_x*scale
        scale = self.scales[scale_idx]
        disp_response_frame = disp_response_input * (self.s_x * scale) / config.instance_size
        # 绝对坐标
        self.pos += disp_response_frame
        # scale_lr = 0.59尺度学习率
        self.s_x *= ((1 - config.scale_lr) + config.scale_lr * scale)
        self.s_x = max(self.min_s_x, min(self.max_s_x, self.s_x))
        self.target_sz = ((1 - config.scale_lr) + config.scale_lr * scale) * self.target_sz
        bbox = (self.pos[0] - self.target_sz[0]/2 + 1, # xmin   convert to 1-based
                self.pos[1] - self.target_sz[1]/2 + 1, # ymin
                self.pos[0] + self.target_sz[0]/2 + 1, # xmax
                self.pos[1] + self.target_sz[1]/2 + 1) # ymax
        return bbox

utils.py

import numpy as np
import cv2

def get_center(x):
    return (x - 1.) / 2.

#(x1,y1,x2,y2)转换成(cx,cy,w,h)
def xyxy2cxcywh(bbox):
    return get_center(bbox[0]+bbox[2]), \
           get_center(bbox[1]+bbox[3]), \
           (bbox[2]-bbox[0]), \
           (bbox[3]-bbox[1])

def crop_and_pad(img, cx, cy, model_sz, original_sz, img_mean=None):
    #左上角和右下角的x,y
    xmin = cx - original_sz // 2
    xmax = cx + original_sz // 2
    ymin = cy - original_sz // 2
    ymax = cy + original_sz // 2
    im_h, im_w, _ = img.shape

    left = right = top = bottom = 0
    if xmin < 0:
        left = int(abs(xmin))
    if xmax > im_w:
        right = int(xmax - im_w)
    if ymin < 0:
        top = int(abs(ymin))
    if ymax > im_h:
        bottom = int(ymax - im_h)

    xmin = int(max(0, xmin))
    xmax = int(min(im_w, xmax))
    ymin = int(max(0, ymin))
    ymax = int(min(im_h, ymax))
    #取出目标片段
    im_patch = img[ymin:ymax, xmin:xmax]
    if left != 0 or right !=0 or top!=0 or bottom!=0:
        #前面没计算的话这里计算一次
        if img_mean is None:
            img_mean = tuple(map(int, img.mean(axis=(0, 1))))
        #填充im_patch大小为original_sz
        im_patch = cv2.copyMakeBorder(im_patch, top, bottom, left, right,
                cv2.BORDER_CONSTANT, value=img_mean)
    if model_sz != original_sz:
        im_patch = cv2.resize(im_patch, (model_sz, model_sz))#缩放成127x127
    return im_patch

#get_exemplar_image(frame, box, 127, 0.5, self.img_mean),取初始化模型的片段图像
def get_exemplar_image(img, bbox, size_z, context_amount, img_mean=None):
    #转换成中心坐标
    cx, cy, w, h = xyxy2cxcywh(bbox)
    #扩充的w,h
    wc_z = w + context_amount * (w+h)
    hc_z = h + context_amount * (w+h)
    #面积开方
    s_z = np.sqrt(wc_z * hc_z)
    #计算尺度 = 127 / sz,前面缩放的比例
    scale_z = size_z / s_z
    #以目标中心取出的片段
    exemplar_img = crop_and_pad(img, cx, cy, size_z, s_z, img_mean)
    return exemplar_img, scale_z, s_z
#
def get_instance_image(img, bbox, size_z, size_x, context_amount, img_mean=None):
    cx, cy, w, h = xyxy2cxcywh(bbox)
    wc_z = w + context_amount * (w+h)
    hc_z = h + context_amount * (w+h)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = size_z / s_z
    d_search = (size_x - size_z) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad
    scale_x = size_x / s_x
    instance_img = crop_and_pad(img, cx, cy, size_x, s_x, img_mean)
    return instance_img, scale_x, s_x

#获取搜索范围金字塔图像(图像,中心坐标,255,三尺度,图像均值),全部缩放至255x255
def get_pyramid_instance_image(img, center, size_x, size_x_scales, img_mean=None):
    if img_mean is None:
        img_mean = tuple(map(int, img.mean(axis=(0, 1))))
    pyramid = [crop_and_pad(img, center[0], center[1], size_x, size_x_scale, img_mean)
            for size_x_scale in size_x_scales]
    return pyramid

发布了35 篇原创文章 · 获赞 13 · 访问量 6312

猜你喜欢

转载自blog.csdn.net/qq_35306281/article/details/104165739