(含源码)利用Python实现KLT跟踪算法

利用Python实现KLT跟踪算法

NVIDIA 视觉编程接口 (VPI: Vision Programming Interface) 是 NVIDIA 的计算机视觉和图像处理软件库,使您能够实现在 NVIDIA Jetson 嵌入式设备和独立的GPU 上可用的不同硬件后端上加速的算法。

库中的一些算法包括过滤方法、透视扭曲、时间降噪、直方图均衡、立体视差和镜头失真校正。 VPI 提供易于使用的 Python 绑定以及 C++ API。

除了与 OpenCV 接口外,VPI 还能够与 PyTorch 和其他基于 Python 的库进行互操作。

下面的示例跟踪输入视频上的边界框,在每一帧上绘制它们并将结果保存在视频文件中。用户可以定义将用于处理的后端。

输出效果:
在这里插入图片描述

from __future__ import print_function
  
import sys
from argparse import ArgumentParser
import numpy as np
import cv2
import vpi
  
  
 # Convert a colored input frame to grayscale (if needed)
 # and then, if using PVA backend, convert it to 16-bit unsigned pixels;
 # The converted frame is copied before wrapping it as a VPI image so
 # later draws in the gray frame do not change the reference VPI image.
 def convertFrameImage(inputFrame, backend):
     if inputFrame.ndim == 3 and inputFrame.shape[2] == 3:
         grayFrame = cv2.cvtColor(inputFrame, cv2.COLOR_BGR2GRAY)
     else:
         grayFrame = inputFrame
     if backend == vpi.Backend.PVA:
         # PVA only supports 16-bit unsigned inputs,
         # where each element is in 0-255 range, so
         # no rescaling is needed.
         grayFrame = grayFrame.astype(np.uint16)
     grayImage = vpi.asimage(grayFrame.copy())
     return grayFrame, grayImage
  
  
 # Write the input gray frame to output video with
 # input bounding boxes and predictions
 def writeOutput(outVideo, cvGray, inBoxes, inPreds, colors, backend):
     try:
         if cvGray.dtype == np.uint16:
             cvGray = cvGray.astype(np.uint8)
         if cvGray.dtype != np.uint8:
             raise Exception('Input frame format must be grayscale, 8-bit unsigned')
         cvGrayBGR = cv2.cvtColor(cvGray, cv2.COLOR_GRAY2BGR)
  
         # Tracking the number of valid bounding boxes in the current frame
         numValidBoxes = 0
  
         # Draw the input bounding boxes considering the input predictions
         with inBoxes.rlock_cpu(), inPreds.rlock_cpu() as pred:
             # Array of bounding boxes (bbox) and predictions (pred)
             bbox = inBoxes.cpu().view(np.recarray)
  
             for i in range(inBoxes.size):
                 if bbox[i].tracking_status == vpi.KLTTrackStatus.LOST:
                     # If the tracking status of the current bounding box is lost, skip it
                     continue
  
                 # Gather information of the current (i) bounding box and prediction
                 # Prediction scaling width, height and x, y
                 predScaleWidth = pred[i][0, 0]
                 predScaleHeight = pred[i][1, 1]
                 predX = pred[i][0, 2]
                 predY = pred[i][1, 2]
  
                 # Bounding box scaling width, height and x, y and bbox width, height
                 bboxScaleWidth = bbox[i].bbox.xform.mat3[0, 0]
                 bboxScaleHeight = bbox[i].bbox.xform.mat3[1, 1]
                 bboxX = bbox[i].bbox.xform.mat3[0, 2]
                 bboxY = bbox[i].bbox.xform.mat3[1, 2]
                 bboxWidth = bbox[i].bbox.width
                 bboxHeight = bbox[i].bbox.height
  
                 # Compute corrected x, y and width, height (w, h) by proper adding
                 # bounding box and prediction x, y and by proper multiplying
                 # bounding box w, h with its own scaling and prediction scaling
                 x = bboxX + predX
                 y = bboxY + predY
                 w = bboxWidth * bboxScaleWidth * predScaleWidth
                 h = bboxHeight * bboxScaleHeight * predScaleHeight
  
                 # Start point and end point of the bounding box for OpenCV drawing
                 startPoint = tuple(np.array([x, y], dtype=int))
                 endPoint = tuple(np.array([x, y], dtype=int) + np.array([w, h], dtype=int))
  
                 # The color of the bounding box to be drawn
                 bboxColor = tuple([ int(c) for c in colors[0, i] ])
                 cv2.rectangle(cvGrayBGR, startPoint, endPoint, bboxColor, 2)
  
                 # Incrementing the number of valid bounding boxes in the current frame
                 numValidBoxes += 1
  
         print(' Valid: {:02d} boxes'.format(numValidBoxes))
  
         outVideo.write(cvGrayBGR)
     except Exception as e:
         print('Error while writing output video:\n', e, file=sys.stderr)
         exit(1)
  
  
 # ----------------------------
 # Parse command line arguments
  
 parser = ArgumentParser()
 parser.add_argument('backend', choices=['cpu','cuda','pva'],
                     help='Backend to be used for processing')
  
 parser.add_argument('input',
                     help='Input video')
  
 parser.add_argument('boxes',
                     help='Text file with bounding boxes description')
  
 args = parser.parse_args()
  
 if args.backend == 'cpu':
     backend = vpi.Backend.CPU
 elif args.backend == 'cuda':
     backend = vpi.Backend.CUDA
 else:
     assert args.backend == 'pva'
     backend = vpi.Backend.PVA
  
 # -----------------------------
 # Open input and output videos
  
 inVideo = cv2.VideoCapture(args.input)
  
 fourcc = cv2.VideoWriter_fourcc(*'MPEG')
 inSize = (int(inVideo.get(cv2.CAP_PROP_FRAME_WIDTH)), int(inVideo.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 fps = inVideo.get(cv2.CAP_PROP_FPS)
  
 outVideo = cv2.VideoWriter('klt_python'+str(sys.version_info[0])+'_'+args.backend+'.mp4',
                            fourcc, fps, inSize)
  
 if not outVideo.isOpened():
     print("Error creating output video", file=sys.stderr)
     exit(1)
  
 # -----------------------------
 # Reading input bounding boxes
  
 # All boxes is a dictionary of all bounding boxes to be tracked in the input video,
 # where each value is a list of new bounding boxes to track at the frame indicated by its key
 allBoxes = {
    
    }
 totalNumBoxes = 0
  
 # Array capacity 0 means no restricted maximum number of bounding boxes
 arrayCapacity = 0
  
 if backend == vpi.Backend.PVA:
     # PVA requires 128 array capacity or maximum number of bounding boxes
     arrayCapacity = 128
  
 with open(args.boxes) as f:
     # The input file (f) should have one bounding box per lines as:
     # "startFrame bboxX bboxY bboxWidth bboxHeight"; e.g.: "61 547 337 14 11"
     for line in f.readlines():
         line = line.replace('\n', '').replace('\r', '')
         startFrame, x, y, w, h = [ float(v) for v in line.split(' ') ]
         bb = (x, y, w, h)
         if startFrame not in allBoxes:
             allBoxes[startFrame] = [bb]
         else:
             allBoxes[startFrame].append(bb)
         totalNumBoxes += 1
         if totalNumBoxes == arrayCapacity:
             # Stop adding boxes if its total reached the array capacity
             break
  
 curFrame    = 0
 curNumBoxes = len(allBoxes[curFrame])
  
 # ------------------------------------------------------------------------------
 # Initialize VPI array with all input bounding boxes (same as C++ KLT sample)
  
 if arrayCapacity == 0:
     arrayCapacity = totalNumBoxes
  
 inBoxes = vpi.Array(arrayCapacity, vpi.Type.KLT_TRACKED_BOUNDING_BOX)
  
 inBoxes.size = totalNumBoxes
 with inBoxes.wlock_cpu():
     data = inBoxes.cpu().view(np.recarray)
  
     # Global index i of all bounding boxes data, starting at 0
     i = 0
  
     for f in sorted(allBoxes.keys()):
         for bb in allBoxes[f]:
             # Each bounding box bb is a tuple of (x, y, w, h)
             x, y, w, h = bb
  
             # The bounding box data is the identity for the scaling part,
             # meaning no scaling, and the offset part is its position x, y
             data[i].bbox.xform.mat3[0, 0] = 1
             data[i].bbox.xform.mat3[1, 1] = 1
             data[i].bbox.xform.mat3[2, 2] = 1
             data[i].bbox.xform.mat3[0, 2] = x
             data[i].bbox.xform.mat3[1, 2] = y
  
             # The bounding box data stores its width and height w, h
             data[i].bbox.width = w
             data[i].bbox.height = h
  
             # Initially all boxes have status tracked and update needed
             data[i].tracking_status = vpi.KLTTrackStatus.TRACKED
             data[i].template_status = vpi.KLTTemplateStatus.UPDATE_NEEDED
  
             # Incrementing the global index for the next bounding box
             i += 1
  
 #-------------------------------------------------------------------------------
 # Generate random colors for bounding boxes equal to the C++ KLT sample
  
 hues = np.zeros((totalNumBoxes,), dtype=np.uint8)
  
 if int(cv2.__version__.split('.')[0]) >= 3:
     cv2.setRNGSeed(1)
     hues = cv2.randu(hues, 0, 180)
 else:
     # Random differs in OpenCV-2.4
     rng = cv2.cv.RNG(1)
     hues = cv2.cv.fromarray(np.array([[ h for h in hues ]], dtype=np.uint8))
     cv2.cv.RandArr(rng, hues, cv2.cv.CV_RAND_UNI, 0, 180)
     hues = [ hues[0, i] for i in range(totalNumBoxes) ]
  
 colors = np.array([[ [int(h), 255, 255] for h in hues ]], dtype=np.uint8)
 colors = cv2.cvtColor(colors, cv2.COLOR_HSV2BGR)
  
 #-------------------------------------------------------------------------------
 # Initialize the KLT Feature Tracker algorithm
  
 # Load up first frame
 validFrame, cvFrame = inVideo.read()
 if not validFrame:
     print("Error reading first input frame", file=sys.stderr)
     exit(1)
  
 # Convert OpenCV frame to gray returning also the VPI image for given backend
 cvGray, imgTemplate = convertFrameImage(cvFrame, backend)
  
 # Create the KLT Feature Tracker object using the backend specified by the user
 klt = vpi.KLTFeatureTracker(imgTemplate, inBoxes, backend=backend)
  
 #-------------------------------------------------------------------------------
 # Main processing loop
  
 while validFrame:
     print('Frame: {:04d} ; Total: {:02d} boxes ;'.format(curFrame, curNumBoxes), end='')
  
     # Adjust input boxes and predictions to the current number of boxes
     inPreds = klt.in_predictions()
  
     inPreds.size = curNumBoxes
     inBoxes.size = curNumBoxes
  
     # Write current frame to the output video
     writeOutput(outVideo, cvGray, inBoxes, inPreds, colors, backend)
  
     # Read next input frame
     curFrame += 1
     validFrame, cvFrame = inVideo.read()
     if not validFrame:
         break
  
     cvGray, imgReference = convertFrameImage(cvFrame, backend)
  
     outBoxes = klt(imgReference)
  
     if curFrame in allBoxes:
         curNumBoxes += len(allBoxes[curFrame])
  
 outVideo.release()
     
 # vim: ts=8:sw=4:sts=4:et:ai

猜你喜欢

转载自blog.csdn.net/kunhe0512/article/details/125352095
今日推荐