[OpenCV+FFMepg] FFMepg を使用して cv::Mat をエンコードし、ビデオに書き込みます

OpenCV 自体には、ビデオの読み取りと書き込みのための多くの基本機能がすでに含まれていますが、より包括的なオーディオおよびビデオのエンコードおよびデコード機能が必要な場合、最も一般的なクロスプラットフォーム ソリューションは OpenCV+FFMpeg です。以下では、FFMpeg を使用して、 cv::Mat はエンコードされてファイルに書き込まれます。

まず、必要なヘッダー ファイルをいくつか導入する必要がありますが、C++ で導入する場合は、FFMpeg のヘッダーを extern "C" に追加する必要があることに注意してください。

#include <iostream>
#include <string>
#include <vector>

#include <opencv2/core/mat.hpp>
#include <opencv2/imgcodecs.hpp>


extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavutil/pixdesc.h>
#include <libavutil/opt.h>
}

次に、次のようにメソッドを作成します

int writeVideo(const std::string& video_path, std::vector<cv::Mat>& frames, int width, int height, int fps);

まず、エンコード形式を指定する必要がありますが、ここでは MP4 を例に挙げていますが、必要に応じて変更できます。

    const AVCodec* videoCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
    AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (!videoCodecContext) {
        std::cerr << "Error allocating video codec context" << std::endl;
        exit(EXIT_FAILURE);
    }

次に、エンコードされたビデオの Context を構築する必要があります。パラメータの詳細については、こちらを参照してください。

    videoCodecContext->bit_rate = 200000;
    videoCodecContext->width = width;
    videoCodecContext->height = height;
    //videoCodecContext->time_base = (AVRational){ 1, fps };  //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
    //videoCodecContext->framerate = (AVRational){ fps, 1 };
    videoCodecContext->time_base.num = 1;
    videoCodecContext->time_base.den = fps;
    videoCodecContext->framerate.num = fps;
    videoCodecContext->framerate.den = 1;
    videoCodecContext->gop_size = 12;
    videoCodecContext->max_b_frames = 0;
    videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
    if (formatContext->oformat->flags & AVFMT_GLOBALHEADER) {
        videoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

このようにしてエンコーダーのパラメーターを設定することができます 詳しい紹介はこちらをご覧ください

// 设置encoder参数
AVDictionary *param = 0;
av_dict_set(&param, "preset", "medium", 0);
av_dict_set(&param, "tune", "zerolatency", 0);

//开启Codec并写入设置
error = avcodec_open2(videoCodecContext, videoCodec,  &param);

次に、 cv::Mat へのループを開始できます。

for (const cv::Mat& frame : frames) {
        // convert the cv::Mat to an AVFrame
        AVFrame* avFrame = av_frame_alloc();
        avFrame->format = videoCodecContext->pix_fmt;
        avFrame->width = width;
        avFrame->height = height;
        error = av_frame_get_buffer(avFrame, 0);
        checkError(error, "Error allocating frame buffer");
        struct SwsContext* frameConverter = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, videoCodecContext->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
        uint8_t* srcData[AV_NUM_DATA_POINTERS] = { frame.data };
        int srcLinesize[AV_NUM_DATA_POINTERS] = { static_cast<int>(frame.step) };
        sws_scale(frameConverter, srcData, srcLinesize, 0, height, avFrame->data, avFrame->linesize);
        sws_freeContext(frameConverter);

...
}

 完全なコードを以下に添付します

#include <iostream>
#include <string>
#include <vector>

#include <opencv2/core/mat.hpp>
#include <opencv2/imgcodecs.hpp>


extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavutil/pixdesc.h>
#include <libavutil/opt.h>
}

// helper function to check for FFmpeg errors
inline void checkError(int error, const std::string& message) {
    if (error < 0) {
        //std::cerr << message << ": " << av_err2str(error) << std::endl;  //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
        std::cerr << message << ": " << std::to_string(error) << std::endl;
        exit(EXIT_FAILURE);
    }
}


int writeVideo(const std::string& video_path, std::vector<cv::Mat>& frames, int width, int height, int fps) {
    // initialize FFmpeg
    av_log_set_level(AV_LOG_ERROR);
    avformat_network_init();

    // create the output video context
    AVFormatContext* formatContext = nullptr;
    int error = avformat_alloc_output_context2(&formatContext, nullptr, nullptr, video_path.c_str());
    checkError(error, "Error creating output context");

    // create the video stream
    AVStream* videoStream = avformat_new_stream(formatContext, nullptr);
    if (!videoStream) {
        std::cerr << "Error creating video stream" << std::endl;
        exit(EXIT_FAILURE);
    }

    // create the video codec context
    const AVCodec* videoCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
    AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (!videoCodecContext) {
        std::cerr << "Error allocating video codec context" << std::endl;
        exit(EXIT_FAILURE);
    }
    videoCodecContext->bit_rate = 200000;
    videoCodecContext->width = width;
    videoCodecContext->height = height;
    //videoCodecContext->time_base = (AVRational){ 1, fps };  //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
    //videoCodecContext->framerate = (AVRational){ fps, 1 };
    videoCodecContext->time_base.num = 1;
    videoCodecContext->time_base.den = fps;
    videoCodecContext->framerate.num = fps;
    videoCodecContext->framerate.den = 1;
    videoCodecContext->gop_size = 12;
    videoCodecContext->max_b_frames = 0;
    videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
    if (formatContext->oformat->flags & AVFMT_GLOBALHEADER) {
        videoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

    // 设置encoder参数
    AVDictionary *param = 0;
    av_dict_set(&param, "preset", "medium", 0);
    av_dict_set(&param, "tune", "zerolatency", 0);

    error = avcodec_open2(videoCodecContext, videoCodec,  &param);
    checkError(error, "Error opening");
    error = avcodec_parameters_from_context(videoStream->codecpar, videoCodecContext);
    checkError(error, "Error setting video codec parameters");

    // open the output file
    error = avio_open(&formatContext->pb, video_path.c_str(), AVIO_FLAG_WRITE);
    checkError(error, "Error opening output file");

    // write the video file header
    error = avformat_write_header(formatContext, nullptr);
    checkError(error, "Error writing video file header");


    AVPacket* packet = av_packet_alloc();
    if (!packet) {
        std::cerr << "Error allocating packet" << std::endl;
        exit(EXIT_FAILURE);
    }
    for (const cv::Mat& frame : frames) {
        // convert the cv::Mat to an AVFrame
        AVFrame* avFrame = av_frame_alloc();
        avFrame->format = videoCodecContext->pix_fmt;
        avFrame->width = width;
        avFrame->height = height;
        error = av_frame_get_buffer(avFrame, 0);
        checkError(error, "Error allocating frame buffer");
        struct SwsContext* frameConverter = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, videoCodecContext->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
        uint8_t* srcData[AV_NUM_DATA_POINTERS] = { frame.data };
        int srcLinesize[AV_NUM_DATA_POINTERS] = { static_cast<int>(frame.step) };
        sws_scale(frameConverter, srcData, srcLinesize, 0, height, avFrame->data, avFrame->linesize);
        sws_freeContext(frameConverter);

        // encode the AVFrame
        // avFrame->pts = packet->pts;
        avFrame->pts=frame_count*(videoStream->time_base.den)/((videoStream->time_base.num)*fps);
        frame_count += 1;
        error = avcodec_send_frame(videoCodecContext, avFrame);
        checkError(error, "Error sending frame to video codec");
        while (error >= 0) {
            error = avcodec_receive_packet(videoCodecContext, packet);
            if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) {
                break;
            }
            checkError(error, "Error encoding video frame");

            // write the encoded packet to the output file
            packet->stream_index = videoStream->index;
            error = av_interleaved_write_frame(formatContext, packet);
            checkError(error, "Error writing video packet");
            av_packet_unref(packet);
        }
        av_frame_free(&avFrame);
    }


    // flush the rest of the packets
    
    int ret = 0;
    avcodec_send_frame(videoCodecContext, nullptr);
    do
    {
        av_packet_unref(packet);
        ret = avcodec_receive_packet(videoCodecContext, packet);
        if (!ret)
        {
            error = av_interleaved_write_frame(formatContext, packet);
            checkError(error, "Error writing video packet");
        }
    } while (!ret);

    av_write_trailer(formatContext);

    avformat_close_input(&formatContext);
    
   

    // clean up
    av_packet_free(&packet);
    avcodec_free_context(&videoCodecContext);
    avformat_free_context(formatContext);
    avformat_network_deinit();

    return EXIT_SUCCESS;
}

std::vector<cv::Mat> readVideo(const std::string video_path) {
    // initialize FFmpeg
    av_log_set_level(AV_LOG_ERROR);
    avformat_network_init();

    AVFormatContext* formatContext = nullptr;
    int error = avformat_open_input(&formatContext, video_path.c_str(), nullptr, nullptr);
    checkError(error, "Error opening input file");

    //Read packets of a media file to get stream information.
    
    error = avformat_find_stream_info(formatContext, nullptr);
    checkError(error, "Error avformat find stream info");
    


    // find the video stream
    AVStream* videoStream = nullptr;
    for (unsigned int i = 0; i < formatContext->nb_streams; i++) {
        if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && !videoStream) {
            videoStream = formatContext->streams[i];
        }
    }
    if (!videoStream) {
        std::cerr << "Error: input file does not contain a video stream" << std::endl;
        exit(EXIT_FAILURE);
    }

    // create the video codec context
    const AVCodec* videoCodec = avcodec_find_decoder(videoStream->codecpar->codec_id);
    AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (!videoCodecContext) {
        std::cerr << "Error allocating video codec context" << std::endl;
        exit(EXIT_FAILURE);
    }

    std::cout << "::informations::\n";
    std::cout << "  bit_rate:" << videoCodecContext->bit_rate << "\n";
    std::cout << "  width:" << videoCodecContext->width << "\n";
    std::cout << "  height:" << videoCodecContext->height << "\n";
    std::cout << "  gop_size:" << videoCodecContext->gop_size << "\n";
    std::cout << "  max_b_frames:" << videoCodecContext->max_b_frames << "\n";
    std::cout << "  pix_fmt:" << videoCodecContext->pix_fmt << "\n";

    error = avcodec_parameters_to_context(videoCodecContext, videoStream->codecpar);
    checkError(error, "Error setting video codec context parameters");
    error = avcodec_open2(videoCodecContext, videoCodec, nullptr);
    checkError(error, "Error opening video codec");

    // create the frame scaler
    int width = videoCodecContext->width;
    int height = videoCodecContext->height;
    struct SwsContext* frameScaler = sws_getContext(width, height, videoCodecContext->pix_fmt, width, height, AV_PIX_FMT_BGR24, SWS_BICUBIC, nullptr, nullptr, nullptr);

    // read the packets and decode the video frames
    std::vector<cv::Mat> videoFrames;
    AVPacket packet;
    while (av_read_frame(formatContext, &packet) == 0) {
        if (packet.stream_index == videoStream->index) {
            // decode the video frame
            AVFrame* frame = av_frame_alloc();
            int gotFrame = 0;
            error = avcodec_send_packet(videoCodecContext, &packet);
            checkError(error, "Error sending packet to video codec");
            error = avcodec_receive_frame(videoCodecContext, frame);

            //There is not enough data for decoding the frame, have to free and get more data
            
            if (error == AVERROR(EAGAIN))
            {
                av_frame_unref(frame);
                av_freep(frame);
                continue;
            }

            if (error == AVERROR_EOF)
            {
                std::cerr << "AVERROR_EOF" << std::endl;
                break;
            }
            

            checkError(error, "Error receiving frame from video codec");


            if (error == 0) {
                gotFrame = 1;
            }
            if (gotFrame) {
                // scale the frame to the desired format
                AVFrame* scaledFrame = av_frame_alloc();
                av_image_alloc(scaledFrame->data, scaledFrame->linesize, width, height, AV_PIX_FMT_BGR24, 32);
                sws_scale(frameScaler, frame->data, frame->linesize, 0, height, scaledFrame->data, scaledFrame->linesize);

                // copy the frame data to a cv::Mat object
                cv::Mat mat(height, width, CV_8UC3, scaledFrame->data[0], scaledFrame->linesize[0]);

                //Show mat image for testing
                
                //cv::imshow("mat", mat);
                //cv::waitKey(100);   //Wait 100msec (relativly long time - for testing).
                


                videoFrames.push_back(mat.clone());

                // clean up
                av_freep(&scaledFrame->data[0]);
                av_frame_free(&scaledFrame);
            }
            av_frame_free(&frame);
        }
        av_packet_unref(&packet);
    }


    // clean up
    sws_freeContext(frameScaler);
    avcodec_free_context(&videoCodecContext);
    avformat_close_input(&formatContext);
    return videoFrames;
}

int main() {
    auto videoFrames = readVideo("input.mp4");
    cv::imwrite("test.png", videoFrames[10]);
    writeVideo("outnow.mp4", videoFrames, videoFrames[0].cols, videoFrames[0].rows, 30);

おすすめ

転載: blog.csdn.net/weixin_44491772/article/details/130960685