FFmpeg source code analysis: av_parser_parse2() parses data packets

The libavcodec module in FFmpeg provides functions for parsing packets and encoding and decoding. Among them, the av_parser_parse2() function is used to parse the data packet. When using av_read_frame() to read the audio and video frame, this function will be called to parse the data packet. For source code analysis of reading audio and video frames, please see: av_read_frame() article .

content

One, av_parser_parse2 source code analysis

1. av_parser_parse2 call example

2, av_parser_parse2 function source code

3, AVCodecParserContext parser context

4. AVCodecParser parser structure

5. Parser list

Second, h264 parser analysis

1. h264 parser structure

2, h264_parse function source code

3. ff_h264_decode_extradata function

4. h264_find_frame_end function

5. ff_combine_frame function

6、parse_nal_units

The complete parsing process of the av_parser_parse2 function is shown in the following figure:

One, av_parser_parse2 source code analysis

1. av_parser_parse2 call example

The declaration of the av_parser_parse2() function is located in libavcodec/avcodec.h, and the function API call example is as follows:

while(in_len) {
   len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
						  in_data, in_len, pts, dts, pos);
   in_data += len;
   in_len  -= len;

   if(size)
	  decode_frame(data, size);
}

2, av_parser_parse2 function source code

The function implementation is located in libavcodec/parser.c, and the specific code is as follows:

int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
                     uint8_t **poutbuf, int *poutbuf_size,
                     const uint8_t *buf, int buf_size,
                     int64_t pts, int64_t dts, int64_t pos)
{
    int index, i;
    uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];

    if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
        s->next_frame_offset =
        s->cur_offset        = pos;
        s->flags            |= PARSER_FLAG_FETCHED_OFFSET;
    }
    if (buf_size == 0) {
        /* padding is always necessary even if EOF, so we add it here */
        memset(dummy_buf, 0, sizeof(dummy_buf));
        buf = dummy_buf;
    } else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) {
        /* add a new packet descriptor */
        i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
        s->cur_frame_start_index = i;
        s->cur_frame_offset[i]   = s->cur_offset;
        s->cur_frame_end[i]      = s->cur_offset + buf_size;
        s->cur_frame_pts[i]      = pts;
        s->cur_frame_dts[i]      = dts;
        s->cur_frame_pos[i]      = pos;
    }
    if (s->fetch_timestamp) {
        s->fetch_timestamp = 0;
        s->last_pts        = s->pts;
        s->last_dts        = s->dts;
        s->last_pos        = s->pos;
        ff_fetch_timestamp(s, 0, 0, 0);
    }
    // 解析数据包，返回值可能为负数
    index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
                                    poutbuf_size, buf, buf_size);
    // 更新文件指针
    if (*poutbuf_size) {
        s->frame_offset = s->next_frame_offset;
        // 偏移到下一帧的位置
        s->next_frame_offset = s->cur_offset + index;
        s->fetch_timestamp   = 1;
    } else {
        *poutbuf = NULL;
    }
    if (index < 0)
        index = 0;
    s->cur_offset += index;
    return index;
}

3, AVCodecParserContext parser context

Here we mainly call s->parser->parse() for parsing, s is the AVCodecParserContext structure, located in libavcodec/avcodec.h, as follows:

typedef struct AVCodecParserContext {
    void *priv_data;
    struct AVCodecParser *parser;
    int64_t frame_offset;
    int64_t cur_offset;
    int64_t next_frame_offset;
    int pict_type;
    int repeat_pict;
    int64_t pts;      // 当前帧pts
    int64_t dts;      // 当前帧dts
    int64_t last_pts; // 上一帧pts
    int64_t last_dts; // 上一帧dts
    int fetch_timestamp;

    int cur_frame_start_index;
    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
    int64_t cur_frame_dts[AV_PARSER_PTS_NB];
    int flags;
    int64_t offset;
    int64_t cur_frame_end[AV_PARSER_PTS_NB];

    // 关键帧标志，1代表关键帧，0代表非关键帧
    int key_frame;
    int dts_sync_point;
    int dts_ref_dts_delta;
    int pts_dts_delta;
    int64_t cur_frame_pos[AV_PARSER_PTS_NB];

    // 当前帧position位置
    int64_t pos;
    // 上一帧position位置
    int64_t last_pos;
    // 帧时长
    int duration;

    enum AVFieldOrder field_order;
    enum AVPictureStructure picture_structure;
    int output_picture_number;
    int width;
    int height;
    int coded_width;
    int coded_height;

    // 视频为AVPixelFormat枚举类型，音频为AVSampleFormat枚举类型
    int format;
} AVCodecParserContext;

4. AVCodecParser parser structure

AVCodecParserContext has a parameter that is parser, which is the AVCodecParser structure, as follows:

typedef struct AVCodecParser {
    int codec_ids[5];
    int priv_data_size;
    int (*parser_init)(AVCodecParserContext *s);
    int (*parser_parse)(AVCodecParserContext *s,
                        AVCodecContext *avctx,
                        const uint8_t **poutbuf, int *poutbuf_size,
                        const uint8_t *buf, int buf_size);
    void (*parser_close)(AVCodecParserContext *s);
    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
#if FF_API_NEXT
    struct AVCodecParser *next;
#endif
} AVCodecParser;

5. Parser list

All parsers are declared in libavcodec/parsers.c. Parser_list.c is automatically generated when compiling. The parsers are stored in a linked list structure. The list of parsers is as follows:

extern AVCodecParser ff_aac_parser;
extern AVCodecParser ff_aac_latm_parser;
extern AVCodecParser ff_ac3_parser;
extern AVCodecParser ff_adx_parser;
extern AVCodecParser ff_av1_parser;
extern AVCodecParser ff_avs2_parser;
extern AVCodecParser ff_avs3_parser;
extern AVCodecParser ff_bmp_parser;
extern AVCodecParser ff_cavsvideo_parser;
extern AVCodecParser ff_cook_parser;
extern AVCodecParser ff_cri_parser;
extern AVCodecParser ff_dca_parser;
extern AVCodecParser ff_dirac_parser;
extern AVCodecParser ff_dnxhd_parser;
extern AVCodecParser ff_dolby_e_parser;
extern AVCodecParser ff_dpx_parser;
extern AVCodecParser ff_dvaudio_parser;
extern AVCodecParser ff_dvbsub_parser;
extern AVCodecParser ff_dvdsub_parser;
extern AVCodecParser ff_dvd_nav_parser;
extern AVCodecParser ff_flac_parser;
extern AVCodecParser ff_g723_1_parser;
extern AVCodecParser ff_g729_parser;
extern AVCodecParser ff_gif_parser;
extern AVCodecParser ff_gsm_parser;
extern AVCodecParser ff_h261_parser;
extern AVCodecParser ff_h263_parser;
extern AVCodecParser ff_h264_parser;
extern AVCodecParser ff_hevc_parser;
extern AVCodecParser ff_ipu_parser;
extern AVCodecParser ff_jpeg2000_parser;
extern AVCodecParser ff_mjpeg_parser;
extern AVCodecParser ff_mlp_parser;
extern AVCodecParser ff_mpeg4video_parser;
extern AVCodecParser ff_mpegaudio_parser;
extern AVCodecParser ff_mpegvideo_parser;
extern AVCodecParser ff_opus_parser;
extern AVCodecParser ff_png_parser;
extern AVCodecParser ff_pnm_parser;
extern AVCodecParser ff_rv30_parser;
extern AVCodecParser ff_rv40_parser;
extern AVCodecParser ff_sbc_parser;
extern AVCodecParser ff_sipr_parser;
extern AVCodecParser ff_tak_parser;
extern AVCodecParser ff_vc1_parser;
extern AVCodecParser ff_vorbis_parser;
extern AVCodecParser ff_vp3_parser;
extern AVCodecParser ff_vp8_parser;
extern AVCodecParser ff_vp9_parser;
extern AVCodecParser ff_webp_parser;
extern AVCodecParser ff_xbm_parser;
extern AVCodecParser ff_xma_parser;

Second, h264 parser analysis

1. h264 parser structure

Taking the h264 parser as an example, the corresponding parser structure is ff_h264_parser, located in libavcodec/h264_parser.c, and the structure is as follows:

AVCodecParser ff_h264_parser = {
    .codec_ids      = { AV_CODEC_ID_H264 },
    .priv_data_size = sizeof(H264ParseContext),
    .parser_init    = init,
    .parser_parse   = h264_parse,
    .parser_close   = h264_close,
    .split          = h264_split,
};

2, h264_parse function source code

The parser_parse function pointer here points to h264_parse, let's take a look at the source code of h264_parse() (located in libavcodec/h264_parser.c):

static int h264_parse(AVCodecParserContext *s,
                      AVCodecContext *avctx,
                      const uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size)
{
    H264ParseContext *p = s->priv_data;
    ParseContext *pc = &p->pc;
    int next;

    if (!p->got_first) {
        p->got_first = 1;
        if (avctx->extradata_size) {
			// 解析额外数据：avcC，包含sps和pps等信息
            ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
                                     &p->ps, &p->is_avc, &p->nal_length_size,
                                     avctx->err_recognition, avctx);
        }
    }
    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
        next = buf_size;
    } else {
		// 找到帧结束符：FFmpeg使用下一帧的start_code作为当前帧结束符
        next = h264_find_frame_end(p, buf, buf_size, avctx);
        // 开始组帧
        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
            *poutbuf      = NULL;
            *poutbuf_size = 0;
            return buf_size;
        }
        if (next < 0 && next != END_NOT_FOUND) {
            av_assert1(pc->last_index + next >= 0);
            h264_find_frame_end(p, &pc->buffer[pc->last_index + next], -next, avctx);
        }
    }
    // 解析NAL单元
    parse_nal_units(s, avctx, buf, buf_size);

    if (avctx->framerate.num)
        avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1}));
    if (p->sei.picture_timing.cpb_removal_delay >= 0) {
        s->dts_sync_point    = p->sei.buffering_period.present;
        s->dts_ref_dts_delta = p->sei.picture_timing.cpb_removal_delay;
        s->pts_dts_delta     = p->sei.picture_timing.dpb_output_delay;
    } else {
        s->dts_sync_point    = INT_MIN;
        s->dts_ref_dts_delta = INT_MIN;
        s->pts_dts_delta     = INT_MIN;
    }
    if (s->flags & PARSER_FLAG_ONCE) {
        s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
    }
    if (s->dts_sync_point >= 0) {
        int64_t den = avctx->time_base.den * (int64_t)avctx->pkt_timebase.num;
        if (den > 0) {
            int64_t num = avctx->time_base.num * (int64_t)avctx->pkt_timebase.den;
            if (s->dts != AV_NOPTS_VALUE) {
                // 从stream中获取到dts，更新引用时间戳
                p->reference_dts = s->dts - av_rescale(s->dts_ref_dts_delta, num, den);
            } else if (p->reference_dts != AV_NOPTS_VALUE) {
                // 基于引用时间戳计算dts
                s->dts = p->reference_dts + av_rescale(s->dts_ref_dts_delta, num, den);
            }
            if (p->reference_dts != AV_NOPTS_VALUE && s->pts == AV_NOPTS_VALUE)
                s->pts = s->dts + av_rescale(s->pts_dts_delta, num, den);
            if (s->dts_sync_point > 0)
                p->reference_dts = s->dts;
        }
    }
    *poutbuf      = buf;
    *poutbuf_size = buf_size;
    return next;
}

It can be seen that the h264 parsing process is divided into 4 steps:

Call ff_h264_decode_extradata() to parse extra data;
Call h264_find_frame_end() to find the end of the frame;
Call ff_combine_frame() to perform framing;
Call parse_nal_units() to parse NAL units;

3. ff_h264_decode_extradata function

If it is the first frame, parse extradata first. For h264 encoding, parse avcC, including sps and pps parameters. Then find the end of the frame, the default is the start code start_code of the next frame as the end of the current frame. Next, the NAL unit is parsed and the dts timestamp is updated. Let's take a look at the code for parsing extradata (located in libavcodec/h264_parse.c):

int ff_h264_decode_extradata(const uint8_t *data, int size, H264ParamSets *ps,
                             int *is_avc, int *nal_length_size,
                             int err_recognition, void *logctx)
{
    int ret;
    if (!data || size <= 0)
        return -1;
    if (data[0] == 1) {
        int i, cnt, nalsize;
        const uint8_t *p = data;
        *is_avc = 1;
        if (size < 7) {
            return AVERROR_INVALIDDATA;
        }
        // 从avcC解析sps
        cnt = *(p + 5) & 0x1f; // sps数量
        p  += 6;
        for (i = 0; i < cnt; i++) {
            nalsize = AV_RB16(p) + 2;
            if (nalsize > size - (p - data))
                return AVERROR_INVALIDDATA;
            ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
            if (ret < 0) {
                return ret;
            }
            p += nalsize;
        }
        // 从avcC解析pps
        cnt = *(p++); // pps数量
        for (i = 0; i < cnt; i++) {
            nalsize = AV_RB16(p) + 2;
            if (nalsize > size - (p - data))
                return AVERROR_INVALIDDATA;
            ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
            if (ret < 0) {
                return ret;
            }
            p += nalsize;
        }
        // 存储正确的NAL长度
        *nal_length_size = (data[4] & 0x03) + 1;
    } else {
        *is_avc = 0;
        ret = decode_extradata_ps(data, size, ps, 0, logctx);
        if (ret < 0)
            return ret;
    }
    return size;
}

4. h264_find_frame_end function

Then call h264_find_frame_end() to find the frame end character, the code is located in libavcodec/h264_parser.c, as follows:

static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf,
                               int buf_size, void *logctx)
{
    int i, j;
    uint32_t state;
    ParseContext *pc = &p->pc;
    int next_avc = p->is_avc ? 0 : buf_size;
    state = pc->state;
    if (state > 13)
        state = 7;

    for (i = 0; i < buf_size; i++) {
        if (i >= next_avc) {
            int nalsize = 0;
            i = next_avc;
            for (j = 0; j < p->nal_length_size; j++)
                nalsize = (nalsize << 8) | buf[i++];
            if (nalsize <= 0 || nalsize > buf_size - i) {
                return buf_size;
            }
            next_avc = i + nalsize;
            state    = 5;
        }

        if (state == 7) {
            i += p->h264dsp.startcode_find_candidate(buf + i, next_avc - i);
            if (i < next_avc)
                state = 2;
        } else if (state <= 2) {
            if (buf[i] == 1)
                state ^= 5;
            else if (buf[i])
                state = 7;
            else
                state >>= 1;
        } else if (state <= 5) {
            int nalu_type = buf[i] & 0x1F;
            if (nalu_type == H264_NAL_SEI || nalu_type == H264_NAL_SPS ||
                nalu_type == H264_NAL_PPS || nalu_type == H264_NAL_AUD) {
                if (pc->frame_start_found) {
                    i++;
                    goto found;
                }
            } else if (nalu_type == H264_NAL_SLICE || nalu_type == H264_NAL_DPA ||
                       nalu_type == H264_NAL_IDR_SLICE) {
                state += 8;
                continue;
            }
            state = 7;
        } else {
            unsigned int mb, last_mb = p->parse_last_mb;
            GetBitContext gb;
            p->parse_history[p->parse_history_count++] = buf[i];

            init_get_bits(&gb, p->parse_history, 8*p->parse_history_count);
            mb= get_ue_golomb_long(&gb);
            if (get_bits_left(&gb) > 0 || p->parse_history_count > 5) {
                p->parse_last_mb = mb;
                if (pc->frame_start_found) {
                    if (mb <= last_mb) {
                        i -= p->parse_history_count - 1;
                        p->parse_history_count = 0;
                        goto found;
                    }
                } else
                    pc->frame_start_found = 1;
                p->parse_history_count = 0;
                state = 7;
            }
        }
    }
    pc->state = state;
    if (p->is_avc)
        return next_avc;
    return END_NOT_FOUND;

found:
    pc->state             = 7;
    pc->frame_start_found = 0;
    if (p->is_avc)
        return next_avc;
    return i - (state & 5);
}

5. ff_combine_frame function

After finding the frame terminator, call ff_combine_frame() to frame and splicing into a complete audio and video frame. The code is located in libavcodec/parser.c as follows:

int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_size)
{
    /* Copy overread bytes from last frame into buffer. */
    for (; pc->overread > 0; pc->overread--)
        pc->buffer[pc->index++] = pc->buffer[pc->overread_index++];

    if (next > *buf_size)
        return AVERROR(EINVAL);

    /* flush remaining if EOF */
    if (!*buf_size && next == END_NOT_FOUND)
        next = 0;

    pc->last_index = pc->index;
    /* copy into buffer end return */
    if (next == END_NOT_FOUND) {
        void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
                                           *buf_size + pc->index +
                                           AV_INPUT_BUFFER_PADDING_SIZE);
        if (!new_buffer) {
            pc->index = 0;
            return AVERROR(ENOMEM);
        }
        pc->buffer = new_buffer;
        memcpy(&pc->buffer[pc->index], *buf, *buf_size);
        pc->index += *buf_size;
        return -1;
    }

    *buf_size          =
    pc->overread_index = pc->index + next;
    if (pc->index) {
        void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
                                           next + pc->index +
                                           AV_INPUT_BUFFER_PADDING_SIZE);
        if (!new_buffer) {
            pc->overread_index =
            pc->index = 0;
            return AVERROR(ENOMEM);
        }
        pc->buffer = new_buffer;
        if (next > -AV_INPUT_BUFFER_PADDING_SIZE)
            memcpy(&pc->buffer[pc->index], *buf,
                   next + AV_INPUT_BUFFER_PADDING_SIZE);
        pc->index = 0;
        *buf      = pc->buffer;
    }

    if (next < -8) {
        pc->overread += -8 - next;
        next = -8;
    }
    /* store overread bytes */
    for (; next < 0; next++) {
        pc->state   = pc->state   << 8 | pc->buffer[pc->last_index + next];
        pc->state64 = pc->state64 << 8 | pc->buffer[pc->last_index + next];
        pc->overread++;
    }

    return 0;
}

6、parse_nal_units

Finally, call parse_nal_units() to parse NAL units, located in libavcodec/h264_parser.c, the specific code is as follows:

static inline int parse_nal_units(AVCodecParserContext *s,
                                  AVCodecContext *avctx,
                                  const uint8_t * const buf, int buf_size)
{
    H264ParseContext *p = s->priv_data;
    H2645RBSP rbsp = { NULL };
    H2645NAL nal = { NULL };
    int buf_index, next_avc;
    unsigned int pps_id;
    unsigned int slice_type;
    int state = -1, got_reset = 0;
    int q264 = buf_size >=4 && !memcmp("Q264", buf, 4);
    int field_poc[2];
    int ret;
    s->pict_type         = AV_PICTURE_TYPE_I;
    s->key_frame         = 0;
    s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;

    ff_h264_sei_uninit(&p->sei);
    p->sei.frame_packing.arrangement_cancel_flag = -1;
    if (!buf_size)
        return 0;
    av_fast_padded_malloc(&rbsp.rbsp_buffer, &rbsp.rbsp_buffer_alloc_size, buf_size);
    if (!rbsp.rbsp_buffer)
        return AVERROR(ENOMEM);
    buf_index     = 0;
    next_avc      = p->is_avc ? 0 : buf_size;
    for (;;) {
        const SPS *sps;
        int src_length, consumed, nalsize = 0;

        if (buf_index >= next_avc) {
            // 获取NAL长度
            nalsize = get_nalsize(p->nal_length_size, buf, buf_size, &buf_index, avctx);
            if (nalsize < 0)
                break;
            next_avc = buf_index + nalsize;
        } else {
			// 找到起始码
            buf_index = find_start_code(buf, buf_size, buf_index, next_avc);
            if (buf_index >= buf_size)
                break;
            if (buf_index >= next_avc)
                continue;
        }
        src_length = next_avc - buf_index;
        state = buf[buf_index];
        switch (state & 0x1f) {
        case H264_NAL_SLICE:
        case H264_NAL_IDR_SLICE:
            // 只解析slice条带的头部
            if ((state & 0x1f) == H264_NAL_IDR_SLICE || ((state >> 5) & 0x3) == 0) {
                if (src_length > 60)
                    src_length = 60;
            } else {
                if (src_length > 1000)
                    src_length = 1000;
            }
            break;
        }
		// 抽取rbsp序列
        consumed = ff_h2645_extract_rbsp(buf + buf_index, src_length, &rbsp, &nal, 1);
        if (consumed < 0)
            break;
        buf_index += consumed;
        ret = init_get_bits8(&nal.gb, nal.data, nal.size);
        if (ret < 0)
            goto fail;
        get_bits1(&nal.gb);
        nal.ref_idc = get_bits(&nal.gb, 2);
        nal.type    = get_bits(&nal.gb, 5);

        switch (nal.type) {
        case H264_NAL_SPS: // SPS序列参数集
            ff_h264_decode_seq_parameter_set(&nal.gb, avctx, &p->ps, 0);
            break;
        case H264_NAL_PPS: // PPS图像参数集
            ff_h264_decode_picture_parameter_set(&nal.gb, avctx, &p->ps,
                                                 nal.size_bits);
            break;
        case H264_NAL_SEI: // 补充增强信息
            ff_h264_sei_decode(&p->sei, &nal.gb, &p->ps, avctx);
            break;
        case H264_NAL_IDR_SLICE: //IDR立即刷新
            s->key_frame = 1;
            p->poc.prev_frame_num        = 0;
            p->poc.prev_frame_num_offset = 0;
            p->poc.prev_poc_msb          =
            p->poc.prev_poc_lsb          = 0;
        case H264_NAL_SLICE: // slice条带
            get_ue_golomb_long(&nal.gb);
            slice_type   = get_ue_golomb_31(&nal.gb);
            s->pict_type = ff_h264_golomb_to_pict_type[slice_type % 5];
            if (p->sei.recovery_point.recovery_frame_cnt >= 0) {
                s->key_frame = 1;
            }
            pps_id = get_ue_golomb(&nal.gb);
            if (pps_id >= MAX_PPS_COUNT) {
                goto fail;
            }
            if (!p->ps.pps_list[pps_id]) {
                goto fail;
            }
            av_buffer_unref(&p->ps.pps_ref);
            p->ps.pps = NULL;
            p->ps.sps = NULL;
            p->ps.pps_ref = av_buffer_ref(p->ps.pps_list[pps_id]);
            if (!p->ps.pps_ref)
                goto fail;
            p->ps.pps = (const PPS*)p->ps.pps_ref->data;
            p->ps.sps = p->ps.pps->sps;
            sps       = p->ps.sps;
            // 检测未标记的关键帧
            if (p->ps.sps->ref_frame_count <= 1 && p->ps.pps->ref_count[0] <= 1 && s->pict_type == AV_PICTURE_TYPE_I)
                s->key_frame = 1;

            p->poc.frame_num = get_bits(&nal.gb, sps->log2_max_frame_num);
            s->coded_width  = 16 * sps->mb_width;
            s->coded_height = 16 * sps->mb_height;
            s->width        = s->coded_width  - (sps->crop_right + sps->crop_left);
            s->height       = s->coded_height - (sps->crop_top   + sps->crop_bottom);
            if (s->width <= 0 || s->height <= 0) {
                s->width  = s->coded_width;
                s->height = s->coded_height;
            }
            // 根据色度的idc来推测像素格式
            switch (sps->bit_depth_luma) {
            case 9:
                if (sps->chroma_format_idc == 3)      s->format = AV_PIX_FMT_YUV444P9;
                else if (sps->chroma_format_idc == 2) s->format = AV_PIX_FMT_YUV422P9;
                else                                  s->format = AV_PIX_FMT_YUV420P9;
                break;
            case 10:
                if (sps->chroma_format_idc == 3)      s->format = AV_PIX_FMT_YUV444P10;
                else if (sps->chroma_format_idc == 2) s->format = AV_PIX_FMT_YUV422P10;
                else                                  s->format = AV_PIX_FMT_YUV420P10;
                break;
            case 8:
                if (sps->chroma_format_idc == 3)      s->format = AV_PIX_FMT_YUV444P;
                else if (sps->chroma_format_idc == 2) s->format = AV_PIX_FMT_YUV422P;
                else                                  s->format = AV_PIX_FMT_YUV420P;
                break;
            default:
                s->format = AV_PIX_FMT_NONE;
            }
            avctx->profile = ff_h264_get_profile(sps);
            avctx->level   = sps->level_idc;
            if (sps->frame_mbs_only_flag) {
                p->picture_structure = PICT_FRAME;
            } else {
                if (get_bits1(&nal.gb)) {
                    p->picture_structure = PICT_TOP_FIELD + get_bits1(&nal.gb);
                } else {
                    p->picture_structure = PICT_FRAME;
                }
            }
            if (nal.type == H264_NAL_IDR_SLICE)
                get_ue_golomb_long(&nal.gb); /* idr_pic_id */
            if (sps->poc_type == 0) {
                p->poc.poc_lsb = get_bits(&nal.gb, sps->log2_max_poc_lsb);

                if (p->ps.pps->pic_order_present == 1 &&
                    p->picture_structure == PICT_FRAME)
                    p->poc.delta_poc_bottom = get_se_golomb(&nal.gb);
            }
            if (sps->poc_type == 1 &&
                !sps->delta_pic_order_always_zero_flag) {
                p->poc.delta_poc[0] = get_se_golomb(&nal.gb);

                if (p->ps.pps->pic_order_present == 1 &&
                    p->picture_structure == PICT_FRAME)
                    p->poc.delta_poc[1] = get_se_golomb(&nal.gb);
            }
            field_poc[0] = field_poc[1] = INT_MAX;
            ret = ff_h264_init_poc(field_poc, &s->output_picture_number, sps,
                             &p->poc, p->picture_structure, nal.ref_idc);
            if (ret < 0)
                goto fail;
            if (nal.ref_idc && nal.type != H264_NAL_IDR_SLICE) {
                got_reset = scan_mmco_reset(s, &nal.gb, avctx);
                if (got_reset < 0)
                    goto fail;
            }
            p->poc.prev_frame_num        = got_reset ? 0 : p->poc.frame_num;
            p->poc.prev_frame_num_offset = got_reset ? 0 : p->poc.frame_num_offset;
            if (nal.ref_idc != 0) {
                if (!got_reset) {
                    p->poc.prev_poc_msb = p->poc.poc_msb;
                    p->poc.prev_poc_lsb = p->poc.poc_lsb;
                } else {
                    p->poc.prev_poc_msb = 0;
                    p->poc.prev_poc_lsb =
                        p->picture_structure == PICT_BOTTOM_FIELD ? 0 : field_poc[0];
                }
            }
            if (p->sei.picture_timing.present) {
				// 处理补充增强信息
                ret = ff_h264_sei_process_picture_timing(&p->sei.picture_timing,
                                                         sps, avctx);
                if (ret < 0) {
                    p->sei.picture_timing.present = 0;
                }
            }
            if (sps->pic_struct_present_flag && p->sei.picture_timing.present) {
                switch (p->sei.picture_timing.pic_struct) {
                case H264_SEI_PIC_STRUCT_TOP_FIELD:
                case H264_SEI_PIC_STRUCT_BOTTOM_FIELD:
                    s->repeat_pict = 0;
                    break;
                case H264_SEI_PIC_STRUCT_FRAME:
                case H264_SEI_PIC_STRUCT_TOP_BOTTOM:
                case H264_SEI_PIC_STRUCT_BOTTOM_TOP:
                    s->repeat_pict = 1;
                    break;
                case H264_SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
                case H264_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
                    s->repeat_pict = 2;
                    break;
                case H264_SEI_PIC_STRUCT_FRAME_DOUBLING:
                    s->repeat_pict = 3;
                    break;
                case H264_SEI_PIC_STRUCT_FRAME_TRIPLING:
                    s->repeat_pict = 5;
                    break;
                default:
                    s->repeat_pict = p->picture_structure == PICT_FRAME ? 1 : 0;
                    break;
                }
            } else {
                s->repeat_pict = p->picture_structure == PICT_FRAME ? 1 : 0;
            }

            if (p->picture_structure == PICT_FRAME) {
                s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
                if (sps->pic_struct_present_flag && p->sei.picture_timing.present) {
                    switch (p->sei.picture_timing.pic_struct) {
                    case H264_SEI_PIC_STRUCT_TOP_BOTTOM:
                    case H264_SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
                        s->field_order = AV_FIELD_TT;
                        break;
                    case H264_SEI_PIC_STRUCT_BOTTOM_TOP:
                    case H264_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
                        s->field_order = AV_FIELD_BB;
                        break;
                    default:
                        s->field_order = AV_FIELD_PROGRESSIVE;
                        break;
                    }
                } else {
                    if (field_poc[0] < field_poc[1])
                        s->field_order = AV_FIELD_TT;
                    else if (field_poc[0] > field_poc[1])
                        s->field_order = AV_FIELD_BB;
                    else
                        s->field_order = AV_FIELD_PROGRESSIVE;
                }
            } else {
                if (p->picture_structure == PICT_TOP_FIELD)
                    s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
                else
                    s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
                if (p->poc.frame_num == p->last_frame_num &&
                    p->last_picture_structure != AV_PICTURE_STRUCTURE_UNKNOWN &&
                    p->last_picture_structure != AV_PICTURE_STRUCTURE_FRAME &&
                    p->last_picture_structure != s->picture_structure) {
                    if (p->last_picture_structure == AV_PICTURE_STRUCTURE_TOP_FIELD)
                        s->field_order = AV_FIELD_TT;
                    else
                        s->field_order = AV_FIELD_BB;
                } else {
                    s->field_order = AV_FIELD_UNKNOWN;
                }
                p->last_picture_structure = s->picture_structure;
                p->last_frame_num = p->poc.frame_num;
            }
            av_freep(&rbsp.rbsp_buffer);
            return 0;
        }
    }
    if (q264) {
        av_freep(&rbsp.rbsp_buffer);
        return 0;
    }
    av_log(avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
fail:
    av_freep(&rbsp.rbsp_buffer);
    return -1;
}

The process of parsing NAL units is mainly based on NAL types: SPS, PPS, IDR, SLICE, and SEI. If it is a slice type, it will detect unlabeled key frames, infer pixel format based on chroma idc, and process supplementary enhancement information.

At this point, the analysis of FFmpeg's analysis packet process is completed.