Use ffmpeg to draw motion vector MV

This article will use ffmpeg to extract the motion vector MV of each frame of video, and use opencv to draw it.

Motion vector MV

Anyone who knows video coding is familiar with motion vector, which is a vector that marks the positional relationship between the current block and the reference block when performing inter-frame prediction. Inter-frame prediction includes unidirectional prediction (P frame) and bidirectional prediction (B frame). Unidirectional prediction requires only one MV, and bidirectional prediction requires two MVs.

The definition of MV in ffmpeg is as follows:

typedef struct AVMotionVector {
       /**
        * Where the current macroblock comes from; negative value when it comes
        * from the past, positive value when it comes from the future.
        * XXX: set exact relative ref frame reference instead of a +/- 1 "direction".
        */
       //表明参考块在前面帧（负）还是后面帧（正）
       int32_t source;
        /**
        * Width and height of the block.
         */
       //所属块的宽和高
       uint8_t w, h;
       /**
        * Absolute source position. Can be outside the frame area.
         */
       int16_t src_x, src_y;
       /**
         * Absolute destination position. Can be outside the frame area.
         */
        int16_t dst_x, dst_y;
        /**
         * Extra flag information.
         * Currently unused.
         */
        uint64_t flags;
        /**
         * Motion vector
         * src_x = dst_x + motion_x / motion_scale
         * src_y = dst_y + motion_y / motion_scale
         */
       int32_t motion_x, motion_y;
        uint16_t motion_scale;
    } AVMotionVector;

The sample code of ffmpeg provides an example program for mv extraction. After extracting mv, you can use opencv to draw it on the image.

extern "C"
{
#include <libavutil/motion_vector.h>
#include <libavformat/avformat.h>
}
#include <opencv.hpp>
using namespace cv;

static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *video_dec_ctx = NULL;
static AVStream *video_stream = NULL;
static const char *src_filename = NULL;

static int video_stream_idx = -1;
static AVFrame *frame = NULL;
static int video_frame_count = 0;

FILE *fout;
VideoWriter out;

static int decode_packet(const AVPacket *pkt)
{
    int ret = avcodec_send_packet(video_dec_ctx, pkt);
    if (ret < 0) {
        printf("Error while sending a packet to the decoder: %s\n");
        return ret;
    }

    while (ret >= 0)  {
        ret = avcodec_receive_frame(video_dec_ctx, frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            break;
        }
        else if (ret < 0) {
            printf("Error while receiving a frame from the decoder: %s\n");
            return ret;
        }

        if (ret >= 0) {
            int i;
            AVFrameSideData *sd;

            video_frame_count++;
            sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS);
            //获取每帧数据
            cv::Mat yuvImg;
            yuvImg.create(frame->height * 3 / 2, frame->width, CV_8UC1);
            memcpy(yuvImg.data, frame->data[0], frame->linesize[0] * frame->height*sizeof(uint8_t));
            memcpy(yuvImg.data + frame->linesize[0] * frame->height*sizeof(uint8_t), frame->data[1], frame->linesize[1] * frame->height/2*sizeof(uint8_t));
            memcpy(yuvImg.data + (frame->linesize[0] * frame->height + frame->linesize[1] * frame->height / 2)*sizeof(uint8_t), frame->data[2], frame->linesize[2] * frame->height / 2 * sizeof(uint8_t));
            cv::Mat rgbImg;
            cv::cvtColor(yuvImg, rgbImg, CV_YUV2BGR_I420);
            if (sd) {
                const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
                for (i = 0; i < sd->size / sizeof(*mvs); i++) {
                    const AVMotionVector *mv = &mvs[i];
                    //绘制mv
                    line(rgbImg, Point(mv->src_x, mv->src_y), Point(mv->dst_x, mv->dst_y), Scalar(0, 0, 255));
                }
            }
            //将带mv的帧写入文件
            out << rgbImg;
            av_frame_unref(frame);
        }
    }

    return 0;
}

static int open_codec_context(AVFormatContext *fmt_ctx, enum AVMediaType type)
{
    int ret;
    AVStream *st;
    AVCodecContext *dec_ctx = NULL;
    AVCodec *dec = NULL;
    AVDictionary *opts = NULL;

    ret = av_find_best_stream(fmt_ctx, type, -1, -1, &dec, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not find %s stream in input file '%s'\n",
            av_get_media_type_string(type), src_filename);
        return ret;
    }
    else {
        int stream_idx = ret;
        st = fmt_ctx->streams[stream_idx];

        dec_ctx = avcodec_alloc_context3(dec);
        if (!dec_ctx) {
            fprintf(stderr, "Failed to allocate codec\n");
            return AVERROR(EINVAL);
        }

        ret = avcodec_parameters_to_context(dec_ctx, st->codecpar);
        if (ret < 0) {
            fprintf(stderr, "Failed to copy codec parameters to codec context\n");
            return ret;
        }

        /* Init the video decoder */
        av_dict_set(&opts, "flags2", "+export_mvs", 0);
        if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
            fprintf(stderr, "Failed to open %s codec\n",
                av_get_media_type_string(type));
            return ret;
        }

        video_stream_idx = stream_idx;
        video_stream = fmt_ctx->streams[video_stream_idx];
        video_dec_ctx = dec_ctx;
    }

    return 0;
}

int main(int argc, char **argv)
{
    fout = fopen("out.yuv","wb");
    //out.open("out.avi", CV_FOURCC('X', 'V', 'I', 'D'),25, Size(640, 272));
    out.open("out.mp4", CV_FOURCC('D', 'I', 'V', 'X'), 25, Size(640, 272));
    int ret = 0;
    AVPacket pkt = { 0 };

    if (argc != 2) {
        fprintf(stderr, "Usage: %s <video>\n", argv[0]);
        exit(1);
    }
    src_filename = argv[1];

    if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) {
        fprintf(stderr, "Could not open source file %s\n", src_filename);
        exit(1);
    }

    if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
        fprintf(stderr, "Could not find stream information\n");
        exit(1);
    }

    open_codec_context(fmt_ctx, AVMEDIA_TYPE_VIDEO);

    av_dump_format(fmt_ctx, 0, src_filename, 0);

    if (!video_stream) {
        fprintf(stderr, "Could not find video stream in the input, aborting\n");
        ret = 1;
        goto end;
    }

    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate frame\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n");

    /* read frames from the file */
    while (av_read_frame(fmt_ctx, &pkt) >= 0) {
        if (pkt.stream_index == video_stream_idx)
            ret = decode_packet(&pkt);
        av_packet_unref(&pkt);
        if (ret < 0)
            break;
    }

    /* flush cached frames */
    decode_packet(NULL);

end:
    avcodec_free_context(&video_dec_ctx);
    avformat_close_input(&fmt_ctx);
    av_frame_free(&frame);
    fclose(fout);
    system("pause");
    return ret < 0;
}

problem

The mv extracted in ffmpeg has several problems:

The position of the macro block in the image is not given
For two-way prediction, there is no special mention of its two mv
Did not specify its specific reference image

If you are interested, please follow the WeChat public account Video Coding