ffmpeg.c（4.3.1）源码剖析

文章目录

前言
一、FFmpeg 源码结构图
二、ffmpeg.h 头文件详解
三、main 函数主要流程分析
四、ffmpeg_parse_options
五、transcode 函数
- 1、transcode_init 函数
- 2、transcode_step 函数

前言

本文对 ffmpeg.c 源码进行学习及剖析。

一、FFmpeg 源码结构图

请添加图片描述
链接：ffmpeg整体流程.jpg

下面对上述图片进行介绍：

函数背景色
- 函数在图中以方框的形式表现出来。不同的背景色标志了该函数不同的作用：
  - 粉红色背景函数：FFmpeg 的 API 函数。
  - 白色背景的函数：FFmpeg 的内部函数。
  - 黄色背景的函数：URLProtocol 结构体中的函数，包含了读写各种协议的功能。
  - 绿色背景的函数：AVOutputFormat 结构体中的函数，包含了读写各种封装格式的功能。
  - 蓝色背景的函数：AVCodec 结构体中的函数，包含了编解码的功能。
区域
- 整个关系图可以分为以下几个区域：
  - 左边区域——架构函数区域：这些函数并不针对某一特定的视频格式。
  - 右上方黄色区域——协议处理函数区域：不同的协议（RTP，RTMP，FILE）会调用不同的协议处理函数。
  - 右边中间绿色区域——封装格式处理函数区域：不同的封装格式（MKV，FLV，MPEG2TS，AVI）会调用不同的封装格式处理函数。
  - 右边下方蓝色区域——编解码函数区域：不同的编码标准（HEVC，H.264，MPEG2）会调用不同的编解码函数。
箭头线
- 为了把调用关系表示的更明显，图中的箭头线也使用了不同的颜色：
  - 红色的箭头线：标志了编码的流程。
  - 其他颜色的箭头线：标志了函数之间的调用关系。其中：
    - 调用 URLProtocol 结构体中的函数用黄色箭头线标识；
    - 调用 AVOutputFormat 结构体中的函数用绿色箭头线标识；
    - 调用 AVCodec 结构体中的函数用蓝色箭头线标识。
函数所在的文件
- 每个函数标识了它所在的文件路径。
  - 左边区域（架构函数）
  - 右上区域（URLProtocol 协议处理函数），URLProtocol 结构体包含如下协议处理函数指针：
    - url_open()：打开
    - url_read()：读取
    - url_write()：写入
    - url_seek()：调整进度
    - url_close()：关闭
      - 下面举个例子，说明不同的协议对应着上述接口有不同的实现函数：
        
        File 协议（即文件）对应的 URLProtocol 结构体 ff_file_protocol：
        
        url_open() -> file_open() -> open()
        
        url_read() -> file_read() -> read()
        
        url_write() -> file_write() -> write()
        
        url_seek() -> file_seek() -> lseek()
        
        url_close() -> file_close() -> close()
        
        RTMP 协议（libRTMP）对应的 URLProtocol 结构体 ff_librtmp_protocol：
        
        url_open() -> rtmp_open() -> RTMP_Init()，RTMP_SetupURL()，RTMP_Connect()，RTMP_ConnectStream()
        
        url_read() -> rtmp_read() -> RTMP_Read()
        
        url_write() -> rtmp_write() -> RTMP_Write()
        
        url_seek() -> rtmp_read_seek() -> RTMP_SendSeek()
        
        url_close() -> rtmp_close() -> RTMP_Close()
        
        UDP 协议对应的 URLProtocol 结构体 ff_udp_protocol：
        
        url_open() -> udp_open()
        
        url_read() -> udp_read()
        
        url_write() -> udp_write()
        
        url_seek() -> udp_close()
        
        url_close() -> udp_close()
  - 右中区域（AVOutputFormat 封装格式处理函数）
    - AVOutputFormat 包含如下封装格式处理函数指针：
      - write_header()：写文件头
      - write_packet()：写一帧数据
      - write_trailer()：写文件尾
        
        下面举个例子，说明不同的封装格式对应着上述接口有不同的实现函数：
        
        FLV 封装格式对应的 AVOutputFormat 结构体 ff_flv_muxer：
        
        write_header() -> flv_write_header()
        
        write_packet() – > flv_write_packet()
        
        write_trailer() -> flv_write_trailer()
        
        MKV 封装格式对应的 AVOutputFormat 结构体 ff_matroska_muxer：
        
        write_header() -> mkv_write_header()
        
        write_packet() – > mkv_write_flush_packet()
        
        write_trailer() -> mkv_write_trailer()
        
        MPEG2TS 封装格式对应的 AVOutputFormat 结构体 ff_mpegts_muxer：
        
        write_header() -> mpegts_write_header()
        
        write_packet() -> mpegts_write_packet()
        
        write_trailer() -> mpegts_write_end()
        
        AVI 封装格式对应的 AVOutputFormat 结构体 ff_avi_muxer：
        
        write_header() -> avi_write_header()
        
        write_packet() -> avi_write_packet()
        
        write_trailer() -> avi_write_trailer()
  - 右下区域（AVCodec 编解码函数）
    - AVCodec 包含如下编解码函数指针：
      - init()：初始化
      - encode2()：编码一帧数据
      - close()：关闭
        
        下面举个例子，说明不同的编解码器对应着上述接口有不同的实现函数：
        
        HEVC 编码器对应的 AVCodec 结构体 ff_libx265_encoder：
        
        init() -> libx265_encode_init() -> x265_param_alloc()，x265_param_default_preset()，
        x265_encoder_open()
        
        encode2() -> libx265_encode_frame() -> x265_encoder_encode()
        
        close() -> libx265_encode_close() -> x265_param_free()，x265_encoder_close()
        
        H.264 编码器对应的 AVCodec 结构体 ff_libx264_encoder：
        
        init() -> X264_init() -> x264_param_default()，x264_encoder_open()，x264_encoder_headers()
        
        encode2() -> X264_frame() -> x264_encoder_encode()
        
        close() -> X264_close() -> x264_encoder_close()
        
        VP8 编码器（libVPX）对应的 AVCodec 结构体 ff_libvpx_vp8_encoder：
        
        init() -> vpx_init() -> vpx_codec_enc_config_default()
        
        encode2() -> vp8_encode() -> vpx_codec_enc_init(), vpx_codec_encode()
        
        close() -> vp8_free() -> vpx_codec_destroy()

二、ffmpeg.h 头文件详解

ffmpeg.h 文件内容如下所示：

/*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef FFTOOLS_FFMPEG_H
#define FFTOOLS_FFMPEG_H

#include "config.h"

#include <stdint.h>
#include <stdio.h>
#include <signal.h>

#include "cmdutils.h"

#include "libavformat/avformat.h"
#include "libavformat/avio.h"

#include "libavcodec/avcodec.h"

#include "libavfilter/avfilter.h"

#include "libavutil/avutil.h"
#include "libavutil/dict.h"
#include "libavutil/eval.h"
#include "libavutil/fifo.h"
#include "libavutil/hwcontext.h"
#include "libavutil/pixfmt.h"
#include "libavutil/rational.h"
#include "libavutil/thread.h"
#include "libavutil/threadmessage.h"

#include "libswresample/swresample.h"

#define VSYNC_AUTO       -1
#define VSYNC_PASSTHROUGH 0
#define VSYNC_CFR         1
#define VSYNC_VFR         2
#define VSYNC_VSCFR       0xfe
#define VSYNC_DROP        0xff

#define MAX_STREAMS 1024    /* arbitrary sanity check value */

enum HWAccelID {
    
    
    HWACCEL_NONE = 0,
    HWACCEL_AUTO,
    HWACCEL_GENERIC,
    HWACCEL_VIDEOTOOLBOX,
    HWACCEL_QSV,
};

typedef struct HWAccel {
    
    
    const char *name;
    int (*init)(AVCodecContext *s);
    enum HWAccelID id;
    enum AVPixelFormat pix_fmt;
} HWAccel;

typedef struct HWDevice {
    
    
    const char *name;
    enum AVHWDeviceType type;
    AVBufferRef *device_ref;
} HWDevice;

/* select an input stream for an output stream */
typedef struct StreamMap {
    
    
    int disabled;           /* 1 is this mapping is disabled by a negative map */
    int file_index;
    int stream_index;
    int sync_file_index;
    int sync_stream_index;
    char *linklabel;       /* name of an output link, for mapping lavfi outputs */
} StreamMap;

typedef struct {
    
    
    int  file_idx,  stream_idx,  channel_idx; // input
    int ofile_idx, ostream_idx;               // output
} AudioChannelMap;

typedef struct OptionsContext {
    
    
    OptionGroup *g;

    /* input/output options */
    int64_t start_time;
    int64_t start_time_eof;
    int seek_timestamp;
    const char *format;

    SpecifierOpt *codec_names;
    int        nb_codec_names;
    SpecifierOpt *audio_channels;
    int        nb_audio_channels;
    SpecifierOpt *audio_sample_rate;
    int        nb_audio_sample_rate;
    SpecifierOpt *frame_rates;
    int        nb_frame_rates;
    SpecifierOpt *frame_sizes;
    int        nb_frame_sizes;
    SpecifierOpt *frame_pix_fmts;
    int        nb_frame_pix_fmts;

    /* input options */
    int64_t input_ts_offset;
    int loop;
    int rate_emu;
    int accurate_seek;
    int thread_queue_size;

    SpecifierOpt *ts_scale;
    int        nb_ts_scale;
    SpecifierOpt *dump_attachment;
    int        nb_dump_attachment;
    SpecifierOpt *hwaccels;
    int        nb_hwaccels;
    SpecifierOpt *hwaccel_devices;
    int        nb_hwaccel_devices;
    SpecifierOpt *hwaccel_output_formats;
    int        nb_hwaccel_output_formats;
    SpecifierOpt *autorotate;
    int        nb_autorotate;

    /* output options */
    StreamMap *stream_maps;
    int     nb_stream_maps;
    AudioChannelMap *audio_channel_maps; /* one info entry per -map_channel */
    int           nb_audio_channel_maps; /* number of (valid) -map_channel settings */
    int metadata_global_manual;
    int metadata_streams_manual;
    int metadata_chapters_manual;
    const char **attachments;
    int       nb_attachments;

    int chapters_input_file;

    int64_t recording_time;
    int64_t stop_time;
    uint64_t limit_filesize;
    float mux_preload;
    float mux_max_delay;
    int shortest;
    int bitexact;

    int video_disable;
    int audio_disable;
    int subtitle_disable;
    int data_disable;

    /* indexed by output file stream index */
    int   *streamid_map;
    int nb_streamid_map;

    SpecifierOpt *metadata;
    int        nb_metadata;
    SpecifierOpt *max_frames;
    int        nb_max_frames;
    SpecifierOpt *bitstream_filters;
    int        nb_bitstream_filters;
    SpecifierOpt *codec_tags;
    int        nb_codec_tags;
    SpecifierOpt *sample_fmts;
    int        nb_sample_fmts;
    SpecifierOpt *qscale;
    int        nb_qscale;
    SpecifierOpt *forced_key_frames;
    int        nb_forced_key_frames;
    SpecifierOpt *force_fps;
    int        nb_force_fps;
    SpecifierOpt *frame_aspect_ratios;
    int        nb_frame_aspect_ratios;
    SpecifierOpt *rc_overrides;
    int        nb_rc_overrides;
    SpecifierOpt *intra_matrices;
    int        nb_intra_matrices;
    SpecifierOpt *inter_matrices;
    int        nb_inter_matrices;
    SpecifierOpt *chroma_intra_matrices;
    int        nb_chroma_intra_matrices;
    SpecifierOpt *top_field_first;
    int        nb_top_field_first;
    SpecifierOpt *metadata_map;
    int        nb_metadata_map;
    SpecifierOpt *presets;
    int        nb_presets;
    SpecifierOpt *copy_initial_nonkeyframes;
    int        nb_copy_initial_nonkeyframes;
    SpecifierOpt *copy_prior_start;
    int        nb_copy_prior_start;
    SpecifierOpt *filters;
    int        nb_filters;
    SpecifierOpt *filter_scripts;
    int        nb_filter_scripts;
    SpecifierOpt *reinit_filters;
    int        nb_reinit_filters;
    SpecifierOpt *fix_sub_duration;
    int        nb_fix_sub_duration;
    SpecifierOpt *canvas_sizes;
    int        nb_canvas_sizes;
    SpecifierOpt *pass;
    int        nb_pass;
    SpecifierOpt *passlogfiles;
    int        nb_passlogfiles;
    SpecifierOpt *max_muxing_queue_size;
    int        nb_max_muxing_queue_size;
    SpecifierOpt *guess_layout_max;
    int        nb_guess_layout_max;
    SpecifierOpt *apad;
    int        nb_apad;
    SpecifierOpt *discard;
    int        nb_discard;
    SpecifierOpt *disposition;
    int        nb_disposition;
    SpecifierOpt *program;
    int        nb_program;
    SpecifierOpt *time_bases;
    int        nb_time_bases;
    SpecifierOpt *enc_time_bases;
    int        nb_enc_time_bases;
} OptionsContext;

typedef struct InputFilter {
    
    
    AVFilterContext    *filter;
    struct InputStream *ist;
    struct FilterGraph *graph;
    uint8_t            *name;
    enum AVMediaType    type;   // AVMEDIA_TYPE_SUBTITLE for sub2video

    AVFifoBuffer *frame_queue;

    // parameters configured for this input
    int format;

    int width, height;
    AVRational sample_aspect_ratio;

    int sample_rate;
    int channels;
    uint64_t channel_layout;

    AVBufferRef *hw_frames_ctx;

    int eof;
} InputFilter;

typedef struct OutputFilter {
    
    
    AVFilterContext     *filter;
    struct OutputStream *ost;
    struct FilterGraph  *graph;
    uint8_t             *name;

    /* temporary storage until stream maps are processed */
    AVFilterInOut       *out_tmp;
    enum AVMediaType     type;

    /* desired output stream properties */
    int width, height;
    AVRational frame_rate;
    int format;
    int sample_rate;
    uint64_t channel_layout;

    // those are only set if no format is specified and the encoder gives us multiple options
    int *formats;
    uint64_t *channel_layouts;
    int *sample_rates;
} OutputFilter;

typedef struct FilterGraph {
    
    
    int            index;
    const char    *graph_desc;

    AVFilterGraph *graph;
    int reconfiguration;

    InputFilter   **inputs;
    int          nb_inputs;
    OutputFilter **outputs;
    int         nb_outputs;
} FilterGraph;

typedef struct InputStream {
    
    
    int file_index;
    AVStream *st;
    int discard;             /* true if stream data should be discarded */
    int user_set_discard;
    int decoding_needed;     /* non zero if the packets must be decoded in 'raw_fifo', see DECODING_FOR_* */
#define DECODING_FOR_OST    1
#define DECODING_FOR_FILTER 2

    AVCodecContext *dec_ctx;
    AVCodec *dec;
    AVFrame *decoded_frame;
    AVFrame *filter_frame; /* a ref of decoded_frame, to be sent to filters */

    int64_t       start;     /* time when read started */
    /* predicted dts of the next packet read for this stream or (when there are
     * several frames in a packet) of the next frame in current packet (in AV_TIME_BASE units) */
    int64_t       next_dts;
    int64_t       dts;       ///< dts of the last packet read for this stream (in AV_TIME_BASE units)

    int64_t       next_pts;  ///< synthetic pts for the next decode frame (in AV_TIME_BASE units)
    int64_t       pts;       ///< current pts of the decoded frame  (in AV_TIME_BASE units)
    int           wrap_correction_done;

    int64_t filter_in_rescale_delta_last;

    int64_t min_pts; /* pts with the smallest value in a current stream */
    int64_t max_pts; /* pts with the higher value in a current stream */

    // when forcing constant input framerate through -r,
    // this contains the pts that will be given to the next decoded frame
    int64_t cfr_next_pts;

    int64_t nb_samples; /* number of samples in the last decoded audio frame before looping */

    double ts_scale;
    int saw_first_ts;
    AVDictionary *decoder_opts;
    AVRational framerate;               /* framerate forced with -r */
    int top_field_first;
    int guess_layout_max;

    int autorotate;

    int fix_sub_duration;
    struct {
    
     /* previous decoded subtitle and related variables */
        int got_output;
        int ret;
        AVSubtitle subtitle;
    } prev_sub;

    struct sub2video {
    
    
        int64_t last_pts;
        int64_t end_pts;
        AVFifoBuffer *sub_queue;    ///< queue of AVSubtitle* before filter init
        AVFrame *frame;
        int w, h;
        unsigned int initialize; ///< marks if sub2video_update should force an initialization
    } sub2video;

    int dr1;

    /* decoded data from this stream goes into all those filters
     * currently video and audio only */
    InputFilter **filters;
    int        nb_filters;

    int reinit_filters;

    /* hwaccel options */
    enum HWAccelID hwaccel_id;
    enum AVHWDeviceType hwaccel_device_type;
    char  *hwaccel_device;
    enum AVPixelFormat hwaccel_output_format;

    /* hwaccel context */
    void  *hwaccel_ctx;
    void (*hwaccel_uninit)(AVCodecContext *s);
    int  (*hwaccel_get_buffer)(AVCodecContext *s, AVFrame *frame, int flags);
    int  (*hwaccel_retrieve_data)(AVCodecContext *s, AVFrame *frame);
    enum AVPixelFormat hwaccel_pix_fmt;
    enum AVPixelFormat hwaccel_retrieved_pix_fmt;
    AVBufferRef *hw_frames_ctx;

    /* stats */
    // combined size of all the packets read
    uint64_t data_size;
    /* number of packets successfully read for this stream */
    uint64_t nb_packets;
    // number of frames/samples retrieved from the decoder
    uint64_t frames_decoded;
    uint64_t samples_decoded;

    int64_t *dts_buffer;
    int nb_dts_buffer;

    int got_output;
} InputStream;

typedef struct InputFile {
    
    
    AVFormatContext *ctx;
    int eof_reached;      /* true if eof reached */
    int eagain;           /* true if last read attempt returned EAGAIN */
    int ist_index;        /* index of first stream in input_streams */
    int loop;             /* set number of times input stream should be looped */
    int64_t duration;     /* actual duration of the longest stream in a file
                             at the moment when looping happens */
    AVRational time_base; /* time base of the duration */
    int64_t input_ts_offset;

    int64_t ts_offset;
    int64_t last_ts;
    int64_t start_time;   /* user-specified start time in AV_TIME_BASE or AV_NOPTS_VALUE */
    int seek_timestamp;
    int64_t recording_time;
    int nb_streams;       /* number of stream that ffmpeg is aware of; may be different
                             from ctx.nb_streams if new streams appear during av_read_frame() */
    int nb_streams_warn;  /* number of streams that the user was warned of */
    int rate_emu;
    int accurate_seek;

#if HAVE_THREADS
    AVThreadMessageQueue *in_thread_queue;
    pthread_t thread;           /* thread reading from this file */
    int non_blocking;           /* reading packets from the thread should not block */
    int joined;                 /* the thread has been joined */
    int thread_queue_size;      /* maximum number of queued packets */
#endif
} InputFile;

enum forced_keyframes_const {
    
    
    FKF_N,
    FKF_N_FORCED,
    FKF_PREV_FORCED_N,
    FKF_PREV_FORCED_T,
    FKF_T,
    FKF_NB
};

#define ABORT_ON_FLAG_EMPTY_OUTPUT        (1 <<  0)
#define ABORT_ON_FLAG_EMPTY_OUTPUT_STREAM (1 <<  1)

extern const char *const forced_keyframes_const_names[];

typedef enum {
    
    
    ENCODER_FINISHED = 1,
    MUXER_FINISHED = 2,
} OSTFinished ;

typedef struct OutputStream {
    
    
    int file_index;          /* file index */
    int index;               /* stream index in the output file */
    int source_index;        /* InputStream index */
    AVStream *st;            /* stream in the output file */
    int encoding_needed;     /* true if encoding needed for this stream */
    int frame_number;
    /* input pts and corresponding output pts
       for A/V sync */
    struct InputStream *sync_ist; /* input stream to sync against */
    int64_t sync_opts;       /* output frame counter, could be changed to some true timestamp */ // FIXME look at frame_number
    /* pts of the first frame encoded for this stream, used for limiting
     * recording time */
    int64_t first_pts;
    /* dts of the last packet sent to the muxer */
    int64_t last_mux_dts;
    // the timebase of the packets sent to the muxer
    AVRational mux_timebase;
    AVRational enc_timebase;

    AVBSFContext            *bsf_ctx;

    AVCodecContext *enc_ctx;
    AVCodecParameters *ref_par; /* associated input codec parameters with encoders options applied */
    AVCodec *enc;
    int64_t max_frames;
    AVFrame *filtered_frame;
    AVFrame *last_frame;
    int last_dropped;
    int last_nb0_frames[3];

    void  *hwaccel_ctx;

    /* video only */
    AVRational frame_rate;
    int is_cfr;
    int force_fps;
    int top_field_first;
    int rotate_overridden;
    double rotate_override_value;

    AVRational frame_aspect_ratio;

    /* forced key frames */
    int64_t forced_kf_ref_pts;
    int64_t *forced_kf_pts;
    int forced_kf_count;
    int forced_kf_index;
    char *forced_keyframes;
    AVExpr *forced_keyframes_pexpr;
    double forced_keyframes_expr_const_values[FKF_NB];

    /* audio only */
    int *audio_channels_map;             /* list of the channels id to pick from the source stream */
    int audio_channels_mapped;           /* number of channels in audio_channels_map */

    char *logfile_prefix;
    FILE *logfile;

    OutputFilter *filter;
    char *avfilter;
    char *filters;         ///< filtergraph associated to the -filter option
    char *filters_script;  ///< filtergraph script associated to the -filter_script option

    AVDictionary *encoder_opts;
    AVDictionary *sws_dict;
    AVDictionary *swr_opts;
    AVDictionary *resample_opts;
    char *apad;
    OSTFinished finished;        /* no more packets should be written for this stream */
    int unavailable;                     /* true if the steram is unavailable (possibly temporarily) */
    int stream_copy;

    // init_output_stream() has been called for this stream
    // The encoder and the bitstream filters have been initialized and the stream
    // parameters are set in the AVStream.
    int initialized;

    int inputs_done;

    const char *attachment_filename;
    int copy_initial_nonkeyframes;
    int copy_prior_start;
    char *disposition;

    int keep_pix_fmt;

    /* stats */
    // combined size of all the packets written
    uint64_t data_size;
    // number of packets send to the muxer
    uint64_t packets_written;
    // number of frames/samples sent to the encoder
    uint64_t frames_encoded;
    uint64_t samples_encoded;

    /* packet quality factor */
    int quality;

    int max_muxing_queue_size;

    /* the packets are buffered here until the muxer is ready to be initialized */
    AVFifoBuffer *muxing_queue;

    /* packet picture type */
    int pict_type;

    /* frame encode sum of squared error values */
    int64_t error[4];
} OutputStream;

typedef struct OutputFile {
    
    
    AVFormatContext *ctx;
    AVDictionary *opts;
    int ost_index;       /* index of the first stream in output_streams */
    int64_t recording_time;  ///< desired length of the resulting file in microseconds == AV_TIME_BASE units
    int64_t start_time;      ///< start time in microseconds == AV_TIME_BASE units
    uint64_t limit_filesize; /* filesize limit expressed in bytes */

    int shortest;

    int header_written;
} OutputFile;

extern InputStream **input_streams;
extern int        nb_input_streams;
extern InputFile   **input_files;
extern int        nb_input_files;

extern OutputStream **output_streams;
extern int         nb_output_streams;
extern OutputFile   **output_files;
extern int         nb_output_files;

extern FilterGraph **filtergraphs;
extern int        nb_filtergraphs;

extern char *vstats_filename;
extern char *sdp_filename;

extern float audio_drift_threshold;
extern float dts_delta_threshold;
extern float dts_error_threshold;

extern int audio_volume;
extern int audio_sync_method;
extern int video_sync_method;
extern float frame_drop_threshold;
extern int do_benchmark;
extern int do_benchmark_all;
extern int do_deinterlace;
extern int do_hex_dump;
extern int do_pkt_dump;
extern int copy_ts;
extern int start_at_zero;
extern int copy_tb;
extern int debug_ts;
extern int exit_on_error;
extern int abort_on_flags;
extern int print_stats;
extern int qp_hist;
extern int stdin_interaction;
extern int frame_bits_per_raw_sample;
extern AVIOContext *progress_avio;
extern float max_error_rate;
extern char *videotoolbox_pixfmt;

extern int filter_nbthreads;
extern int filter_complex_nbthreads;
extern int vstats_version;

extern const AVIOInterruptCB int_cb;

extern const OptionDef options[];
extern const HWAccel hwaccels[];
#if CONFIG_QSV
extern char *qsv_device;
#endif
extern HWDevice *filter_hw_device;


void term_init(void);
void term_exit(void);

void reset_options(OptionsContext *o, int is_input);
void show_usage(void);

void opt_output_file(void *optctx, const char *filename);

void remove_avoptions(AVDictionary **a, AVDictionary *b);
void assert_avoptions(AVDictionary *m);

int guess_input_channel_layout(InputStream *ist);

enum AVPixelFormat choose_pixel_fmt(AVStream *st, AVCodecContext *avctx, AVCodec *codec, enum AVPixelFormat target);
void choose_sample_fmt(AVStream *st, AVCodec *codec);

int configure_filtergraph(FilterGraph *fg);
int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFilterInOut *out);
void check_filter_outputs(void);
int ist_in_filtergraph(FilterGraph *fg, InputStream *ist);
int filtergraph_is_simple(FilterGraph *fg);
int init_simple_filtergraph(InputStream *ist, OutputStream *ost);
int init_complex_filtergraph(FilterGraph *fg);

void sub2video_update(InputStream *ist, int64_t heartbeat_pts, AVSubtitle *sub);

int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame);

int ffmpeg_parse_options(int argc, char **argv);

int videotoolbox_init(AVCodecContext *s);
int qsv_init(AVCodecContext *s);

HWDevice *hw_device_get_by_name(const char *name);
int hw_device_init_from_string(const char *arg, HWDevice **dev);
void hw_device_free_all(void);

int hw_device_setup_for_decode(InputStream *ist);
int hw_device_setup_for_encode(OutputStream *ost);
int hw_device_setup_for_filter(FilterGraph *fg);

int hwaccel_decode_init(AVCodecContext *avctx);

int main_ffmpeg431(int argc, char** argv);

#endif /* FFTOOLS_FFMPEG_H */

全局变量与结构体：

输入：
- InputStream **input_streams = NULL;
- int nb_input_streams = 0;
- InputFile **input_files = NULL;
- int nb_input_files = 0;
输出：
- OutputStream **output_streams = NULL;
- int nb_output_streams = 0;
- OutputFile **output_files = NULL;
- int nb_output_files = 0;

其中： input_streams 是输入流的数组，nb_input_streams 是输入流的个数。 input_files 是输入文件（也可能是设备）的数组，nb_input_files 是输入文件的个数。下面的输出相关的变量们就不用解释了。

可以看出，文件和流是分别保存的。于是，可以想象，结构 InputStream 中应有其所属的文件在 InputFile 中的序号（file_index）。输入流数组应是这样填充的：每当在输入文件中找到一个流时，就把它添加到 input_streams 中，所以一个输入文件对应的输入流在 input_streams 中是紧靠着的，于是 InputFile 结构中应有其第一个流在 input_streams 中的开始序号（ist_index）和被放在 input_streams 中的流的总个数（nb_streams）。

在输出流 output_streams 中，除了要保存其所在的输出文件在 output_files 中的序号（index），还应保存其对应的输入流在 input_streams 中的序号（ source_index），也应保存其在所属输出文件中的流序号（file_index）。而输出文件中呢，只需保存它的第一个流在 output_streams 中的序号（ost_index）。

流和文件都准备好了，下面就是转换，那么转换过程是怎样的呢？
答：首先打开输入文件们，然后根据输入流们准备并打开解码器们，然后跟据输出流们准备并打开编码器们，然后创建输出文件们，然后为所有输出文件们写好头部，然后就在循环中把输入流转换到输出流并写入输出文件中，转换完后跳出循环，然后写入文件尾，最后关闭所有的输出文件

三、main 函数主要流程分析

main 函数如下：

int main_ffmpeg431(int argc, char **argv)
{
    
    
    int i, ret;
    BenchmarkTimeStamps ti;

    init_dynload();

    register_exit(ffmpeg_cleanup);

    setvbuf(stderr,NULL,_IONBF,0); /* win32 runtime needs this */

    av_log_set_flags(AV_LOG_SKIP_REPEATED);
    parse_loglevel(argc, argv, options);

    if(argc>1 && !strcmp(argv[1], "-d")){
    
    
        run_as_daemon=1;
        av_log_set_callback(log_callback_null);
        argc--;
        argv++;
    }

#if CONFIG_AVDEVICE
    avdevice_register_all();
#endif
    avformat_network_init();

    //show_banner(argc, argv, options);

    /* parse options and open all input/output files */
    ret = ffmpeg_parse_options(argc, argv);
    if (ret < 0)
        exit_program(1);

    if (nb_output_files <= 0 && nb_input_files == 0) {
    
    
        show_usage();
        av_log(NULL, AV_LOG_WARNING, "Use -h to get full help or, even better, run 'man %s'\n", program_name);
        exit_program(1);
    }

    /* file converter / grab */
    if (nb_output_files <= 0) {
    
    
        av_log(NULL, AV_LOG_FATAL, "At least one output file must be specified\n");
        exit_program(1);
    }

    for (i = 0; i < nb_output_files; i++) {
    
    
        if (strcmp(output_files[i]->ctx->oformat->name, "rtp"))
            want_sdp = 0;
    }

    current_time = ti = get_benchmark_time_stamps();
    if (transcode() < 0)
        exit_program(1);
    if (do_benchmark) {
    
    
        int64_t utime, stime, rtime;
        current_time = get_benchmark_time_stamps();
        utime = current_time.user_usec - ti.user_usec;
        stime = current_time.sys_usec  - ti.sys_usec;
        rtime = current_time.real_usec - ti.real_usec;
        av_log(NULL, AV_LOG_INFO,
               "bench: utime=%0.3fs stime=%0.3fs rtime=%0.3fs\n",
               utime / 1000000.0, stime / 1000000.0, rtime / 1000000.0);
    }
    av_log(NULL, AV_LOG_DEBUG, "%"PRIu64" frames successfully decoded, %"PRIu64" decoding errors\n",
           decode_error_stat[0], decode_error_stat[1]);
    if ((decode_error_stat[0] + decode_error_stat[1]) * max_error_rate < decode_error_stat[1])
        exit_program(69);

    exit_program(received_nb_signals ? 255 : main_return_code);
    return main_return_code;
}

总结起来，分为以下几个步骤：

1、初始化工作
2、解析命令行参数
3、编码
4、收尾

四、ffmpeg_parse_options

下面是 ffmpeg_parse_options 的调用关系
在这里插入图片描述

1、命令行例子

ffmpeg -i abc.mp4 -i bbb.avi -vcodec libx264 -acodec aac -vf scale=640:480 -f flv -y abc.flv

命令行包括三个部分：输入参数，输出参数，和全局选项。
-i /home/ron/music/avm.mp4 是输入参数，a.mp4 是输出参数。输入/输出参数可以有专属的选项，这些选项应该紧挨着放在输入输出参数前面。如 -vf “split [main][tmp]…[main][flip]” 就是输出参数 a.mp4 的选项。
全局选项的位置不需要限定，因为选项是以选项名字查找的。
可以有多组输入参数和多组输出参数。

①、解析命令行 split_commandline()

split_commandline() 负责解析命令行。

/**
 * Split the commandline into an intermediate form convenient for further
 * processing.
 *
 * The commandline is assumed to be composed of options which either belong to a
 * group (those with OPT_SPEC, OPT_OFFSET or OPT_PERFILE) or are global
 * (everything else).
 *
 * A group (defined by an OptionGroupDef struct) is a sequence of options
 * terminated by either a group separator option (e.g. -i) or a parameter that
 * is not an option (doesn't start with -). A group without a separator option
 * must always be first in the supplied groups list.
 *
 * All options within the same group are stored in one OptionGroup struct in an
 * OptionGroupList, all groups with the same group definition are stored in one
 * OptionGroupList in OptionParseContext.groups. The order of group lists is the
 * same as the order of group definitions.
 */
int split_commandline(OptionParseContext *octx, int argc, char *argv[],
                      const OptionDef *options,
                      const OptionGroupDef *groups, int nb_groups);

解析的结果保存在 OptionParseContext 中。解析时需要参考 OptionDef 和 OptionGroupDef。OptonDef[] 是支持 ffmpeg 的选项列表，OptionGroupDef[] 是支持的组列表，包括输入类和输出类，前者以 -i 开头，加上设备名。后者只有文件名。

下面的类图显示了涉及的类：
在这里插入图片描述

OptionGroup 保存一个输入（或输出）和它的选项列表。Option 表示一个选项。
OptionParseContext 中包括多个 OptionGroup。全局选项保存在 global_opts 中。所有输入设备的选项保存在一个 OptionGroupList 实例中，所有输出设备的选项保存在另一个实例中。两者合起来组成数组 groups。

split_commandline() 在一个循环中解析命令行，主要涉及如下函数。

函数	功能
find_option()	查询支持的 option 列表，检查当前元素是否一个option
add_option()	将 option 加入一个临时组。（因为 option 先于 group 出现，还不知道应该加入到哪个组。）
match_group_separator()	查询支持的 group 列表，检查当前元素是否是一个 Group
finish_group()	设置临时组的参数，并用它填充 OptionParseContext.groups（现在知道应该加入哪个组了）

②、parse_optgroup()

parse_optgroup() 负责将 OptionGroup 转换成 OptionsContext。

/**
 * Parse an options group and write results into optctx.
 *
 * @param optctx an app-specific options context. NULL for global options group
 */
int parse_optgroup(void *optctx, OptionGroup *g);

OptionGroup 保存的选项值是字符串，而 OptionsContext 保存的值是由 OptionDef 定义的实际类型。 parse_optgroup() 的第一个参数 optctx 实际上是 OptonsContext。

下面的类图显示了涉及的类：
在这里插入图片描述

SpecifierOpt 保存实际类型的选项。OptionsContext 有若干个 SpecifierOpt 数组的成员。每个 specfier 数组保存一类选项。如 filters 保存 ”filter” 选项。但 filter 可以是 ”filter:v”，属于 video，也可以是“filter:a”，属于 audio。SpecifierOpt.specifier 成员就是用来标记这个选项应该属于谁的。对于”filter:v”，SpecifierOpt.specifier 就是”v”。
这里顺便提一下 AVDictionary。解析过程没有用到它。用户设置的选项可能不成功，而选项的最终值会保存在这里。用 av_dict_set()函数设置它。

parse_optgroup() 函数遍历 OptonGroup 中的 Option，调用 write_option() 将其写入 OptionsContext。

对于基本的选项，它的 OptionDef 中定义了它在 OptionsContext 的偏移，所以将字符串转化后，直接写入就好了。比如”filter:v”。
有的选项可能是其他选项的别名。这时它的 OptionDef 指定了一个回调函数。这个函数会重定向到所指向的选项上去。如”vf”就是”filter:v”的别名，它的 OptionDef 指定了回调函数 opt_video_filter()。这个函数会调用 parse_option() 和 find_option() 查找”filter:v”对应的 OptionDef，并再次调用 write_option()。
全局选项。它的 OptionDef 也定义了一个回调函数。这个函数直接设置全局变量。如 loglevel，它的 OptionDef 定义了 opt_loglevel()。这个函数调用 av_log_set_level() 设置日志输出等级。

③、MATCH_PER_XXX_OPT()

宏 MATCH_PER_TYPE_OPT() 和 MATCH_PER_STREAM_OPT() 用于从 OptionsContext 读值。

前者指定参数 mediatype，用它跟 OptionsContext.spcifier 比较，找出 option 并读出。
后者指定参数 AVStream，调用 check_stream_specifier()，用 AVStream 的属性与
OptionContext.specifier 匹配，找出 option 并读出。

#define MATCH_PER_STREAM_OPT(name, type, outvar, fmtctx, st)\
{
      
      \
    int i, ret, matches = 0;\
    SpecifierOpt *so = NULL;\
    for (i = 0; i < o->nb_ ## name; i++) {
      
      \
        char *spec = o->name[i].specifier;\
        if ((ret = check_stream_specifier(fmtctx, st, spec)) > 0) {
      
      \
            outvar = o->name[i].u.type;\
            so = &o->name[i];\
            matches++;\
        } else if (ret < 0)\
            exit_program(1);\
    }\
    if (matches > 1)\
       WARN_MULTIPLE_OPT_USAGE(name, type, so, st);\
}

#define MATCH_PER_TYPE_OPT(name, type, outvar, fmtctx, mediatype)\
{
      
      \
    int i;\
    for (i = 0; i < o->nb_ ## name; i++) {
      
      \
        char *spec = o->name[i].specifier;\
        if (!strcmp(spec, mediatype))\
            outvar = o->name[i].u.type;\
    }\
}

2、vf 选项解析

下图是 avfilter_graph_parse2() 的函数调用关系。
在这里插入图片描述

①、filters

如下是 filters 的一个例子。它来自 ffmpeg 的文档：https://ffmpeg.org//ffmpeg-filters.html#Filtergraph-description

ffmpeg -i INPUT -vf “split [main][tmp]; [tmp] crop=iw:ih/2:0:0, vflip [flip]; [main][flip] overlay=0:H/2” OUTPUT

对应 FilterGraph 的结构示意图如下。矩形框内是 vf 的内容对应的部分。其中 split 应该导出到 inputs 中，overlay 应该导出到 outputs 中。
在这里插入图片描述

②、vf 术语

描述 vf 的解析过程需要使用一些术语。其中一部分是关于 vf 语法的，另外一部分是关于生成的 FilterGraph 结构的。

上图标出了 vf 语法的术语。

过滤器。过滤器用红色标出，包括它的名字和参数。如”split”，只有名字。又如”overlay=0:H/2”，overlay 是名字， ”0:H/2”是参数。名字和参数用 = 连接。
位置点。有两类位置点，有名的和无名的。有名位置点用绿色标出，名字用 [] 包住，如 main, flip, tmp。无名位置点不必标出，如下图所示。
路径。路径是一条从位置点开始，中间过滤器和位置点交错，在位置点结束的处理流程。多条路径组成整个 filtergraph。中间的位置点都是无名的，开始和结束的位置点应该是有名的，除非这条路径在 filtergraph 的开始和结束位置。路径之间用;隔开。如 [tmp] crop=iw:ih/2:0:0, vflip [flip]。以 tmp 开始，中间包括 crop 和 vflip 和一个无名位置点，在 flip 结束。有名位置点是该路径与其他路径的连接点，所以需要有一个名字来标记，而无名位置点只存在该路径内部的两个过滤器之间，是隐含的，所以不需要名字。

下图是 FilterGraph 的结构图。
在这里插入图片描述

FilterGraph 是由一系列的过滤器，Pad 和 Pad Link 构成的。
过滤器来自 FilterGraph 语法中的过滤器，它有一组 In Pad 和一组 OutPad，Pad 与语法中的位置点对应。过滤器之间通过 Pad 联系，Pad Link 用来将一个 In Pad 连接到一个 OutPad。Pad Link 没有对应的语法元素。
Input/Output 用于解析过程，也用于保存整个解析的结果，以返回给调用者。open_inputs 标记当前还没有解析（与其他 OutPad 连接）的 InPad，open_outputs 标记当前还没有解析的 OutPad，curr_inputs 标记当前将要解析的 InPad。

③、avfilter_graph_parse2()

avfilter_graph_parse2() 负责解析 vf 选项内容。

/**
 * Add a graph described by a string to a graph.
 *
 * @param[in]  graph   the filter graph where to link the parsed graph context
 * @param[in]  filters string to be parsed
 * @param[out] inputs  a linked list of all free (unlinked) inputs of the
 *                     parsed graph will be returned here. It is to be freed
 *                     by the caller using avfilter_inout_free().
 * @param[out] outputs a linked list of all free (unlinked) outputs of the
 *                     parsed graph will be returned here. It is to be freed by the
 *                     caller using avfilter_inout_free().
 * @return zero on success, a negative AVERROR code on error
 *
 * @note This function returns the inputs and outputs that are left
 * unlinked after parsing the graph and the caller then deals with
 * them.
 * @note This function makes no reference whatsoever to already
 * existing parts of the graph and the inputs parameter will on return
 * contain inputs of the newly parsed part of the graph.  Analogously
 * the outputs parameter will contain outputs of the newly created
 * filters.
 */
int avfilter_graph_parse2(AVFilterGraph *graph, const char *filters,
                          AVFilterInOut **inputs,
                          AVFilterInOut **outputs);

输入参数 filters 是 vf 选项内容。输出参数 Inputs 是导出的输入接口，outputs 是 filters 导出的输出接口。

avfilter_graph_parse2() 主要调用四个函数进行解析。

函数	功能
parse_input()	选取若干 open_outputs，以更新 curr_inputs
parse_filter()	解析过滤器
link_filter_inouts()	将新的过滤器连入当前的 curr_inputs，并更新 curr_inputs
parse_output()	结束当前的 curr_inputs，加入 open_outputs。

④、FilterGraph 类

下面的类图显示了 FilterGraph 各元素对应的类。
在这里插入图片描述

AVFilterContext 表示过滤器。AVFilter 是它的属性类。
AVFilterPad 是 Pad 类。一个 AVFilterContext 实例包括 AVFilterPad 的一组 In Pad 实例和一组 Out Pad 实例。 AVFilterLink 是 Pad Link 类，它连接两个 AVFilterPad 实例。
AVFilterLink 有一个 FFFrameQueue，用于保存过滤的中间结果。这是一个 frame 的数据通道。
AVFilterContext 有一个空间，用于保存该特定类型 Filter 的私有信息，可以是 CropContext，SplitContext 或其他 filter 的一种。
AVFilterInOut 用于解析过程标记 open_iputs, open_ouputs 和 curr_inputs。它没有直接引用 AVFilterPad，而是引用 AVFilterContext，和用序号间接指向 AVFilterPad。

五、transcode 函数

transcode 用于实现媒体文件转码的函数之一。转码是指将一个媒体文件从一种编码格式转换为另一种编码格式的过程。这可以包括视频编解码器、音频编解码器、容器格式或其他媒体属性的更改。

其主要包括以下两个核心函数：

transcode_init()
- 初始化，打开所有输出流的编码器,打开所有输入流的解码器,写入所有输出文件的文件头。
transcode_step()
- 于实现 FFmpeg 转码过程中的一个步骤的函数

1、transcode_init 函数

初始化工作：

AVFormatContext *oc;//输出流的编解码器结构
OutputStream *ost;//输出流
InputStream *ist; //输入流
init_input_stream
init_output_stream

//transcode_init()函数是在转换前做准备工作的
static int transcode_init(void)
{
    
    
    int ret = 0, i, j, k;
    AVFormatContext *oc;//输出流的编解码器结构
    OutputStream *ost;	//输出流
    InputStream *ist;	//输入流
    char error[1024] = {
    
    0};
    
    for (i = 0; i < nb_filtergraphs; i++) {
    
    
        FilterGraph *fg = filtergraphs[i];
        for (j = 0; j < fg->nb_outputs; j++) {
    
    
            OutputFilter *ofilter = fg->outputs[j];
            if (!ofilter->ost || ofilter->ost->source_index >= 0)
                continue;
            if (fg->nb_inputs != 1)
                continue;
            for (k = nb_input_streams-1; k >= 0 ; k--)
                if (fg->inputs[0]->ist == input_streams[k])
                    break;
            ofilter->ost->source_index = k;
        }
    }

    /* init framerate emulation */
	//初始化帧率仿真（转换时是不按帧率来的,但如果要求帧率仿真,就可以做到）
    for (i = 0; i < nb_input_files; i++) {
    
    
        InputFile *ifile = input_files[i];
		//如果一个输入文件被要求帧率仿真(指的是即使是转换也像播放那样按照帧率来进行），则为这个文件中所有流记录下开始时间。
        if (ifile->rate_emu)
            for (j = 0; j < ifile->nb_streams; j++)
                input_streams[j + ifile->ist_index]->start = av_gettime_relative();
    }

    /* init input streams */
	//什么也没做,只是做了个判断而已。
    for (i = 0; i < nb_input_streams; i++)
        if ((ret = init_input_stream(i, error, sizeof(error))) < 0) {
    
    
            for (i = 0; i < nb_output_streams; i++) {
    
    
                ost = output_streams[i];
                avcodec_close(ost->enc_ctx);
            }
            goto dump_format;
        }

    /* open each encoder */
	//轮循所有输出流,打开每个输出流的编码器
    for (i = 0; i < nb_output_streams; i++) {
    
    
        // skip streams fed from filtergraphs until we have a frame for them
        if (output_streams[i]->filter)
            continue;

        ret = init_output_stream(output_streams[i], error, sizeof(error));
        if (ret < 0)
            goto dump_format;
    }

    /* discard unused programs */
    for (i = 0; i < nb_input_files; i++) {
    
    
        InputFile *ifile = input_files[i];
        for (j = 0; j < ifile->ctx->nb_programs; j++) {
    
    
            AVProgram *p = ifile->ctx->programs[j];
            int discard  = AVDISCARD_ALL;

            for (k = 0; k < p->nb_stream_indexes; k++)
                if (!input_streams[ifile->ist_index + p->stream_index[k]]->discard) {
    
    
                    discard = AVDISCARD_DEFAULT;
                    break;
                }
            p->discard = discard;
        }
    }

    /* write headers for files with no streams */
	//打开所有输出文件，写入媒体文件头
    for (i = 0; i < nb_output_files; i++) {
    
    
        oc = output_files[i]->ctx;
        if (oc->oformat->flags & AVFMT_NOSTREAMS && oc->nb_streams == 0) {
    
    
            ret = check_init_output_file(output_files[i], i);
            if (ret < 0)
                goto dump_format;
        }
    }

 dump_format:
    /* dump the stream mapping */
    av_log(NULL, AV_LOG_INFO, "Stream mapping:\n");
    for (i = 0; i < nb_input_streams; i++) {
    
    
        ist = input_streams[i];

        for (j = 0; j < ist->nb_filters; j++) {
    
    
            if (!filtergraph_is_simple(ist->filters[j]->graph)) {
    
    
                av_log(NULL, AV_LOG_INFO, "  Stream #%d:%d (%s) -> %s",
                       ist->file_index, ist->st->index, ist->dec ? ist->dec->name : "?",
                       ist->filters[j]->name);
                if (nb_filtergraphs > 1)
                    av_log(NULL, AV_LOG_INFO, " (graph %d)", ist->filters[j]->graph->index);
                av_log(NULL, AV_LOG_INFO, "\n");
            }
        }
    }

    for (i = 0; i < nb_output_streams; i++) {
    
    
        ost = output_streams[i];

        if (ost->attachment_filename) {
    
    
            /* an attached file */
            av_log(NULL, AV_LOG_INFO, "  File %s -> Stream #%d:%d\n",
                   ost->attachment_filename, ost->file_index, ost->index);
            continue;
        }

		// 复杂过滤器
        if (ost->filter && !filtergraph_is_simple(ost->filter->graph)) {
    
    
            /* output from a complex graph */
            av_log(NULL, AV_LOG_INFO, "  %s", ost->filter->name);
            if (nb_filtergraphs > 1)
                av_log(NULL, AV_LOG_INFO, " (graph %d)", ost->filter->graph->index);

            av_log(NULL, AV_LOG_INFO, " -> Stream #%d:%d (%s)\n", ost->file_index,
                   ost->index, ost->enc ? ost->enc->name : "?");
            continue;
        }

        av_log(NULL, AV_LOG_INFO, "  Stream #%d:%d -> #%d:%d",
               input_streams[ost->source_index]->file_index,
               input_streams[ost->source_index]->st->index,
               ost->file_index,
               ost->index);
        if (ost->sync_ist != input_streams[ost->source_index])
            av_log(NULL, AV_LOG_INFO, " [sync #%d:%d]",
                   ost->sync_ist->file_index,
                   ost->sync_ist->st->index);
		//如果只是复制一个流(不用解码后再编码)，则把输入流的编码参数直接赋值给输出流  
		//此时是不需要解码也不需要编码，所以不需打开解码器和编码器 
        if (ost->stream_copy)
            av_log(NULL, AV_LOG_INFO, " (copy)");
        else {
    
    
            const AVCodec *in_codec    = input_streams[ost->source_index]->dec;
            const AVCodec *out_codec   = ost->enc;
            const char *decoder_name   = "?";
            const char *in_codec_name  = "?";
            const char *encoder_name   = "?";
            const char *out_codec_name = "?";
            const AVCodecDescriptor *desc;

            if (in_codec) {
    
    
                decoder_name  = in_codec->name;
                desc = avcodec_descriptor_get(in_codec->id);
                if (desc)
                    in_codec_name = desc->name;
                if (!strcmp(decoder_name, in_codec_name))
                    decoder_name = "native";
            }

            if (out_codec) {
    
    
                encoder_name   = out_codec->name;
                desc = avcodec_descriptor_get(out_codec->id);
                if (desc)
                    out_codec_name = desc->name;
                if (!strcmp(encoder_name, out_codec_name))
                    encoder_name = "native";
            }

            av_log(NULL, AV_LOG_INFO, " (%s (%s) -> %s (%s))",
                   in_codec_name, decoder_name,
                   out_codec_name, encoder_name);
        }
        av_log(NULL, AV_LOG_INFO, "\n");
    }

    if (ret) {
    
    
        av_log(NULL, AV_LOG_ERROR, "%s\n", error);
        return ret;
    }

    atomic_store(&transcode_init_done, 1);//初始化完成

    return 0;
}

static int init_input_stream(int ist_index, char *error, int error_len)
{
    
    
    int ret;
    InputStream *ist = input_streams[ist_index];

    if (ist->decoding_needed) {
    
    
        AVCodec *codec = ist->dec;
        if (!codec) {
    
    
            snprintf(error, error_len, "Decoder (codec %s) not found for input stream #%d:%d",
                    avcodec_get_name(ist->dec_ctx->codec_id), ist->file_index, ist->st->index);
            return AVERROR(EINVAL);
        }

        ist->dec_ctx->opaque                = ist;
        ist->dec_ctx->get_format            = get_format;
        ist->dec_ctx->get_buffer2           = get_buffer;
        ist->dec_ctx->thread_safe_callbacks = 1;

        av_opt_set_int(ist->dec_ctx, "refcounted_frames", 1, 0);
        if (ist->dec_ctx->codec_id == AV_CODEC_ID_DVB_SUBTITLE &&
           (ist->decoding_needed & DECODING_FOR_OST)) {
    
    
            av_dict_set(&ist->decoder_opts, "compute_edt", "1", AV_DICT_DONT_OVERWRITE);
            if (ist->decoding_needed & DECODING_FOR_FILTER)
                av_log(NULL, AV_LOG_WARNING, "Warning using DVB subtitles for filtering and output at the same time is not fully supported, also see -compute_edt [0|1]\n");
        }

        av_dict_set(&ist->decoder_opts, "sub_text_format", "ass", AV_DICT_DONT_OVERWRITE);

        /* Useful for subtitles retiming by lavf (FIXME), skipping samples in
         * audio, and video decoders such as cuvid or mediacodec */
        ist->dec_ctx->pkt_timebase = ist->st->time_base;

        if (!av_dict_get(ist->decoder_opts, "threads", NULL, 0))
            av_dict_set(&ist->decoder_opts, "threads", "auto", 0);
        /* Attached pics are sparse, therefore we would not want to delay their decoding till EOF. */
        if (ist->st->disposition & AV_DISPOSITION_ATTACHED_PIC)
            av_dict_set(&ist->decoder_opts, "threads", "1", 0);

        ret = hw_device_setup_for_decode(ist);
        if (ret < 0) {
    
    
            snprintf(error, error_len, "Device setup failed for "
                     "decoder on input stream #%d:%d : %s",
                     ist->file_index, ist->st->index, av_err2str(ret));
            return ret;
        }

		//打开解码器
        if ((ret = avcodec_open2(ist->dec_ctx, codec, &ist->decoder_opts)) < 0) {
    
    
            if (ret == AVERROR_EXPERIMENTAL)
                abort_codec_experimental(codec, 0);

            snprintf(error, error_len,
                     "Error while opening decoder for input stream "
                     "#%d:%d : %s",
                     ist->file_index, ist->st->index, av_err2str(ret));
            return ret;
        }
        assert_avoptions(ist->decoder_opts);
    }

    ist->next_pts = AV_NOPTS_VALUE;
    ist->next_dts = AV_NOPTS_VALUE;

    return 0;
}

2、transcode_step 函数

/**
 * Run a single step of transcoding.
 *
 * @return  0 for success, <0 for error
 */
 /*
 解码流程是：
 process_input() -> output_packet() -> decode_audio()/decode_video()/transcode_subtitles()
 而decode_audio() 是调用 avcodec_decode_audio4() 来完成工作的。
 decode_video() 则是通过调用 avcodec_decode_video2() 来完成的。

 编码流程是：
 reap_filters() -> do_video_out()或 do_audio_out() 
 -> avcodec_encode_video2() 或 avcodec_encode_audio2()。
 */
static int transcode_step(void)
{
    
    
    OutputStream *ost;
    InputStream  *ist = NULL;
    int ret;

	//选择一个有效的输出流进行处理
    ost = choose_output();
    if (!ost) {
    
    
        if (got_eagain()) {
    
    
            reset_eagain();
            av_usleep(10000);
            return 0;
        }
        av_log(NULL, AV_LOG_VERBOSE, "No more inputs to read from, finishing.\n");
        return AVERROR_EOF;
    }

	//选择一个输入流
    if (ost->filter && !ost->filter->graph->graph) {
    
    
        if (ifilter_has_all_input_formats(ost->filter->graph)) {
    
    
            ret = configure_filtergraph(ost->filter->graph);
            if (ret < 0) {
    
    
                av_log(NULL, AV_LOG_ERROR, "Error reinitializing filters!\n");
                return ret;
            }
        }
    }

    if (ost->filter && ost->filter->graph->graph) {
    
    
        if (!ost->initialized) {
    
    
            char error[1024] = {
    
    0};
            ret = init_output_stream(ost, error, sizeof(error));
            if (ret < 0) {
    
    
                av_log(NULL, AV_LOG_ERROR, "Error initializing output stream %d:%d -- %s\n",
                       ost->file_index, ost->index, error);
                exit_program(1);
            }
        }
        if ((ret = transcode_from_filter(ost->filter->graph, &ist)) < 0)
            return ret;
        if (!ist)
            return 0;
    } else if (ost->filter) {
    
    
        int i;
        for (i = 0; i < ost->filter->graph->nb_inputs; i++) {
    
    
            InputFilter *ifilter = ost->filter->graph->inputs[i];
            if (!ifilter->ist->got_output && !input_files[ifilter->ist->file_index]->eof_reached) {
    
    
                ist = ifilter->ist;
                break;
            }
        }
        if (!ist) {
    
    
            ost->inputs_done = 1;
            return 0;
        }
    } else {
    
    
        av_assert0(ost->source_index >= 0);
        ist = input_streams[ost->source_index];
    }

	//读取并处理每一个包
    ret = process_input(ist->file_index);
    if (ret == AVERROR(EAGAIN)) {
    
    
        if (input_files[ist->file_index]->eagain)
            ost->unavailable = 1;
        return 0;
    }

    if (ret < 0)
        return ret == AVERROR_EOF ? 0 : ret;

	//根据滤波器做滤波处理，并把处理完的音视频输出到输出文件中
    return reap_filters(0);

}

我的qq：2442391036，欢迎交流！