ffplay.c source code analysis【3】

  In the previous chapter, ffplay.c source code analysis [1] , ffplay.c source code analysis [2] : ffpaly decodes the frame and stores it in the corresponding queue. This article mainly talks about audio, video output, and audio and video synchronization.

Audio output

  The audio output of ffplay is mainly realized through SDL. SDL is a set of open source cross-platform multimedia development library. In ffplay, after SDL audio is enabled, when SDL needs data output, it tells the user to pass the data through the callback function. How much data is input, then the problem is coming. After ffmpeg decodes the audio of AVPacket to get AVFrame, at this time, the size of the decoded AVFrame is not necessarily equal to the size of the incoming data. In addition, if you want to realize the variable speed playback function, each frame The data size after the AVFrame speed change is different from the data size required by the SDL callback, so it is necessary to add a buffer to solve it, that is, after reading the Frame data from the FrameQueue queue, cache it in the buffer first, and then read it from the buffer Data to SDL callback function.

The benefits of this article, free C++ audio and video learning materials package, technical video/code, including (audio and video development, interview questions, FFmpeg, webRTC, rtmp, hls, rtsp, ffplay, codec, push-pull stream, srs)↓↓↓ ↓↓↓See below↓↓Click at the bottom of the article to get it for free↓↓

 

  SDL asks ffplay for audio data through the sdl_audio_callback function. ffplay takes out the data in sampq through the audio_decode_frame function, puts it into is->audio_buf, and then sends it to sdl. Call audio_decode_frame to supplement audio_buf.

static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
{
    SDL_AudioSpec wanted_spec, spec;
    const char *env;
    static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
    static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
    int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;
 
    env = SDL_getenv("SDL_AUDIO_CHANNELS");
    if (env) {// 若环境变量有设置,优先从环境变量取得声道数和声道布局
        wanted_nb_channels = atoi(env);
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
    }
    if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
        wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
    }
    //根据channel_layout获取nb_channels,当传⼊参数wanted_nb_channels不匹配时,此处会作修正
    wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
    wanted_spec.channels = wanted_nb_channels;
    wanted_spec.freq = wanted_sample_rate;
    if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
        av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
        return -1;
    }
    while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
        next_sample_rate_idx--;
    wanted_spec.format = AUDIO_S16SYS;
    wanted_spec.silence = 0;<br>    //SDL_AUDIO_MAX_CALLBACKS_PER_SEC一秒最多回调次数,避免频繁的回调
    wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC));
    wanted_spec.callback = sdl_audio_callback;//设置回调函数
    wanted_spec.userdata = opaque;
    while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
        av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
               wanted_spec.channels, wanted_spec.freq, SDL_GetError());
        wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
        if (!wanted_spec.channels) {
            wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
            wanted_spec.channels = wanted_nb_channels;
            if (!wanted_spec.freq) {
                av_log(NULL, AV_LOG_ERROR,
                       "No more combinations to try, audio open failed\n");
                return -1;
            }
        }
        wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
    }
    //检查打开音频设备的实际参数:采样格式
    if (spec.format != AUDIO_S16SYS) {
        av_log(NULL, AV_LOG_ERROR,
               "SDL advised audio format %d is not supported!\n", spec.format);
        return -1;
    }
    //声道数,spec.channels实际参数,wanted_spec.channels期望参数
    if (spec.channels != wanted_spec.channels) {
        wanted_channel_layout = av_get_default_channel_layout(spec.channels);
        if (!wanted_channel_layout) {
            av_log(NULL, AV_LOG_ERROR,
                   "SDL advised channel count %d is not supported!\n", spec.channels);
            return -1;
        }
    }
 
    audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
    audio_hw_params->freq = spec.freq;
    audio_hw_params->channel_layout = wanted_channel_layout;
    audio_hw_params->channels =  spec.channels;
  //audio_hw_params->frame_size计算一个采样点占用的字节数
    audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
    audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
    if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
        av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
        return -1;
    }
    return spec.size;//硬件內部缓存的数据字节,samples * channels *byte_per_sample
}

  Look at the implementation of sdl_audio_callback

static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
    VideoState *is = opaque;
    int audio_size, len1;
 
    audio_callback_time = av_gettime_relative();
    //循环读取,直到读取足够的数据
    while (len > 0) {
        if (is->audio_buf_index >= is->audio_buf_size) {//数据不足,audio_decode_frame读取数据
           audio_size = audio_decode_frame(is);//读取数据,重采样处理
           if (audio_size < 0) {
                /* if error, just output silence */
               is->audio_buf = NULL;
               is->audio_buf_size = SDL_AUDIO_MIN_BUFFER_SIZE / is->audio_tgt.frame_size * is->audio_tgt.frame_size;
           } else {
               if (is->show_mode != SHOW_MODE_VIDEO)
                   update_sample_display(is, (int16_t *)is->audio_buf, audio_size);
               is->audio_buf_size = audio_size;
           }
           is->audio_buf_index = 0;
        }
    //根据缓冲区剩余大小量力而行
        len1 = is->audio_buf_size - is->audio_buf_index;
        if (len1 > len)
            len1 = len;
    //根据audio_volume决定如何输出audio_buf
    //判断是否为静音,以及当前音量的大小,如果音量为最大则直接拷贝数据
        if (!is->muted && is->audio_buf && is->audio_volume == SDL_MIX_MAXVOLUME)
            memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
        else {
            memset(stream, 0, len1);//如果处于mute状态,则直接给stream填0
             
            if (!is->muted && is->audio_buf)//如果不处于mute状态,即控制音量在某一个值(0,SDL_MIX_MAXVOLUME),使用SDL_MixAudioFormat进行音量调整和混音
                SDL_MixAudioFormat(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, AUDIO_S16SYS, len1, is->audio_volume);
        }
        len -= len1;
        stream += len1;
    //更新is->audio_buf_index,指向audio_buf中未被拷贝到stream的数据的起始位置
        is->audio_buf_index += len1;
    }
    is->audio_write_buf_size = is->audio_buf_size - is->audio_buf_index;
    /* Let's assume the audio driver that is used by SDL has two periods. */
    if (!isnan(is->audio_clock)) {
        set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
    sync_clock_to_slave(&is->extclk, &is->audclk);
    }
}

  Output audio_buf to stream. If audio_volume is the maximum volume, you only need to memcpy to copy it to stream. Otherwise, call SDL_MixAudioFormat to adjust the volume (for example, when the volume is changed). If audio_buf is exhausted, audio_decode_frame will be called to refill audio_buf , Next analyze the audio_decode_frame function.

static int audio_decode_frame(VideoState *is)
{
    int data_size, resampled_data_size;
    int64_t dec_channel_layout;
    av_unused double audio_clock0;
    int wanted_nb_samples;
    Frame *af;
 
    if (is->paused)//暂停状态,退出,不从队列取帧,不会播放音频
        return -1;
 
    do {//1.从sampq取一帧,如果发生了seek,此时serial会不连续,就需要丢帧处理
#if defined(_WIN32)
        while (frame_queue_nb_remaining(&is->sampq) == 0) {
            if ((av_gettime_relative() - audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
                return -1;
            av_usleep (1000);
        }
#endif
        if (!(af = frame_queue_peek_readable(&is->sampq)))//判断是否可读,从队列头部读取一帧
            return -1;
        frame_queue_next(&is->sampq);//更新读索引,此时Frame才真正出队列,丢帧
    } while (af->serial != is->audioq.serial);//判断是否连续
    //计算这一帧,占用的字节数
    data_size = av_samples_get_buffer_size(NULL, af->frame->channels,
                                           af->frame->nb_samples,
                                           af->frame->format, 1);
 
    dec_channel_layout =
        (af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
        af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
    //判断是否需要重新初始化重采样
    if (af->frame->format        != is->audio_src.fmt            ||
        dec_channel_layout       != is->audio_src.channel_layout ||
        af->frame->sample_rate   != is->audio_src.freq           ||
        (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
        swr_free(&is->swr_ctx);
    //设置输出、输入目标参数
        is->swr_ctx = swr_alloc_set_opts(NULL,
                                         is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                                         dec_channel_layout,           af->frame->format, af->frame->sample_rate,
                                         0, NULL);
        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                    af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
                    is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
            swr_free(&is->swr_ctx);
            return -1;
        }
        is->audio_src.channel_layout = dec_channel_layout;
        is->audio_src.channels       = af->frame->channels;
        is->audio_src.freq = af->frame->sample_rate;
        is->audio_src.fmt = af->frame->format;
    }
    //如果初始化了重采样
    if (is->swr_ctx) {
        const uint8_t **in = (const uint8_t **)af->frame->extended_data;
        uint8_t **out = &is->audio_buf1;
        int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
        int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
        int len2;
        if (out_size < 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
            return -1;
        }
        if (wanted_nb_samples != af->frame->nb_samples) {
            if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
                                        wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                return -1;
            }
        }
        av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
        if (!is->audio_buf1)
            return AVERROR(ENOMEM);
    //音频重采样:返回值是重采样后得到的音频数据中单个声道的样本数
        len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
        if (len2 < 0) {
            av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
            return -1;
        }
        if (len2 == out_count) {
            av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
            if (swr_init(is->swr_ctx) < 0)
                swr_free(&is->swr_ctx);
        }
        is->audio_buf = is->audio_buf1;
        resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
    } else {
    //不用重采样,则将指针指向frame中音频数据
        is->audio_buf = af->frame->data[0];
        resampled_data_size = data_size;
    }
 
    audio_clock0 = is->audio_clock;
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
#ifdef DEBUG
    {
        static double last_clock;
        printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
               is->audio_clock - last_clock,
               is->audio_clock, audio_clock0);
        last_clock = is->audio_clock;
    }
#endif
    return resampled_data_size;//返回audio_buf的数据大小
}

  First read a frame from sampq to determine whether the serial is continuous, and if it is not continuous, perform frame loss processing. For example, if a seek operation occurs, the serial will be discontinuous, and frame loss processing is required; then calculate the number of bytes in this frame, Calculated by av_samples_get_buffer_size; then resampling processing, when the format of the decoded audio frame may not be supported by SDL, in this case, audio resampling is required, that is, the audio frame format is converted to one supported by SDL audio format.

video output

  The ffplay video output uses SDL to display, and the video display is processed in the main function.

int main(int argc, char **argv)
{
     ..............................
 
    flags = SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER;
    if (audio_disable)
        flags &= ~SDL_INIT_AUDIO;
    else {
        /* Try to work around an occasional ALSA buffer underflow issue when the
         * period size is NPOT due to ALSA resampling by forcing the buffer size. */
        if (!SDL_getenv("SDL_AUDIO_ALSA_SET_BUFFER_SIZE"))
            SDL_setenv("SDL_AUDIO_ALSA_SET_BUFFER_SIZE","1", 1);
    }
    if (display_disable)
        flags &= ~SDL_INIT_VIDEO;<br>  //1.SDL的初始化
    if (SDL_Init (flags)) {
        av_log(NULL, AV_LOG_FATAL, "Could not initialize SDL - %s\n", SDL_GetError());
        av_log(NULL, AV_LOG_FATAL, "(Did you set the DISPLAY variable?)\n");
        exit(1);
    }
 
    SDL_EventState(SDL_SYSWMEVENT, SDL_IGNORE);
    SDL_EventState(SDL_USEREVENT, SDL_IGNORE);
 
    av_init_packet(&flush_pkt);
    flush_pkt.data = (uint8_t *)&flush_pkt;
 
    if (!display_disable) {
        int flags = SDL_WINDOW_HIDDEN;
        if (alwaysontop)
#if SDL_VERSION_ATLEAST(2,0,5)
            flags |= SDL_WINDOW_ALWAYS_ON_TOP;
#else
            av_log(NULL, AV_LOG_WARNING, "Your SDL version doesn't support SDL_WINDOW_ALWAYS_ON_TOP. Feature will be inactive.\n");
#endif
        if (borderless)
            flags |= SDL_WINDOW_BORDERLESS;
        else
            flags |= SDL_WINDOW_RESIZABLE;<br>    //2.创建窗口
        window = SDL_CreateWindow(program_name, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, default_width, default_height, flags);
        SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "linear");
        if (window) {
            renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
            if (!renderer) {
                av_log(NULL, AV_LOG_WARNING, "Failed to initialize a hardware accelerated renderer: %s\n", SDL_GetError());
                renderer = SDL_CreateRenderer(window, -1, 0);
            }
            if (renderer) {
                if (!SDL_GetRendererInfo(renderer, &renderer_info))
                    av_log(NULL, AV_LOG_VERBOSE, "Initialized %s renderer.\n", renderer_info.name);
            }
        }
        if (!window || !renderer || !renderer_info.num_texture_formats) {
            av_log(NULL, AV_LOG_FATAL, "Failed to create window or renderer: %s", SDL_GetError());
            do_exit(NULL);
        }
    }
  //3.初始化队列,创建数据读取线程 read_thread
    is = stream_open(input_filename, file_iformat);
    if (!is) {
        av_log(NULL, AV_LOG_FATAL, "Failed to initialize VideoState!\n");
        do_exit(NULL);
    }
  //4.事件响应
    event_loop(is);
 
    /* never returns */
 
    return 0;
}

  Basically, window operations will be processed in the main function main. After SDL creates the main window, it creates render for rendering output, event_loop playback controls event polling, and also includes video display output.

  At the code breakpoint, the call stack of the video output is as follows:

  event_loop starts processing SDL events:

static void event_loop(VideoState *cur_stream)
{
    SDL_Event event;
    double incr, pos, frac;
 
    for (;;) {
        double x;
        refresh_loop_wait_event(cur_stream, &event);//video显示
        switch (event.type) {
            按键事件
            ............................
        }
    }
}

  The display of the video is mainly in refresh_loop_wait_event:

static void refresh_loop_wait_event(VideoState *is, SDL_Event *event) {
    double remaining_time = 0.0;
    SDL_PumpEvents();
    while (!SDL_PeepEvents(event, 1, SDL_GETEVENT, SDL_FIRSTEVENT, SDL_LASTEVENT)) {
        if (!cursor_hidden && av_gettime_relative() - cursor_last_shown > CURSOR_HIDE_DELAY) {
            SDL_ShowCursor(0);
            cursor_hidden = 1;
        }
        if (remaining_time > 0.0)
            av_usleep((int64_t)(remaining_time * 1000000.0));
        remaining_time = REFRESH_RATE;
        if (is->show_mode != SHOW_MODE_NONE && (!is->paused || is->force_refresh))
            video_refresh(is, &remaining_time);//显示画面,remaining_time:下⼀轮应当sleep的时间,以保持稳定的画⾯输出
        SDL_PumpEvents();
    }
}

The specific implementation of video_refresh is as follows

static void video_refresh(void *opaque, double *remaining_time)
{
    VideoState *is = opaque;
    double time;
 
    Frame *sp, *sp2;
 
    if (!is->paused && get_master_sync_type(is) == AV_SYNC_EXTERNAL_CLOCK && is->realtime)
        check_external_clock_speed(is);
 
    if (!display_disable && is->show_mode != SHOW_MODE_VIDEO && is->audio_st) {
        time = av_gettime_relative() / 1000000.0;
        if (is->force_refresh || is->last_vis_time + rdftspeed < time) {
            video_display(is);
            is->last_vis_time = time;
        }
        *remaining_time = FFMIN(*remaining_time, is->last_vis_time + rdftspeed - time);
    }
 
    if (is->video_st) {
retry:
        if (frame_queue_nb_remaining(&is->pictq) == 0) {//判断队列是否为空
            // nothing to do, no picture to display in the queue
        } else {
            double last_duration, duration, delay;
            Frame *vp, *lastvp;
 
            /* dequeue the picture */
            lastvp = frame_queue_peek_last(&is->pictq);//获取上一帧,已在显示
            vp = frame_queue_peek(&is->pictq);//获取待显示帧
 
            if (vp->serial != is->videoq.serial) {//如果序列不连续(比如seek),则将其出队列,以尽快读取最新的序列
                frame_queue_next(&is->pictq);//丢帧
                goto retry;
            }
 
            if (lastvp->serial != vp->serial)//新的播放序列重置当前时间
                is->frame_timer = av_gettime_relative() / 1000000.0;//frame_timer自己独立的时间轴
 
            if (is->paused)//暂停状态,一直显示上一帧
                goto display;
 
            /* compute nominal last_duration */
            last_duration = vp_duration(is, lastvp, vp);//计算相邻两帧间隔时长
            delay = compute_target_delay(last_duration, is);//计算上一帧lastvp还需要播放时间
 
            time= av_gettime_relative()/1000000.0;//当前时间
            if (time < is->frame_timer + delay) {//如果还没有到播放时间,说明当前显示帧还需要继续显示
                *remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);//延迟的时间,继续显示时间长
                goto display;
            }
            //该到vp播放了
            is->frame_timer += delay;
            if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
                is->frame_timer = time;
 
            SDL_LockMutex(is->pictq.mutex);
            if (!isnan(vp->pts))
                update_video_pts(is, vp->pts, vp->pos, vp->serial);
            SDL_UnlockMutex(is->pictq.mutex);
       //判断丢帧处理
            if (frame_queue_nb_remaining(&is->pictq) > 1) {//队列至少有2帧
                Frame *nextvp = frame_queue_peek_next(&is->pictq);//取下一帧
                duration = vp_duration(is, vp, nextvp);
                if(!is->step && //非逐帧播放才检测是否需要丢帧
            (framedrop>0 //framedrop = 0,不开启丢帧
                || (framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) && //如果同步时钟以视频为基准不需要丢帧,非视频时钟为基准,才需要丢帧
            time > is->frame_timer + duration){//落后,也需要丢帧
                    is->frame_drops_late++;//累计丢帧
                    frame_queue_next(&is->pictq);
                    goto retry;
                }
            }
       //字幕显示
            if (is->subtitle_st) {      
                ........
            }
 
            frame_queue_next(&is->pictq);
            is->force_refresh = 1;
 
            if (is->step && !is->paused)
                stream_toggle_pause(is);
        }
display:
        /* display picture */
        if (!display_disable && is->force_refresh && is->show_mode == SHOW_MODE_VIDEO && is->pictq.rindex_shown)
            video_display(is);//显示图像
    }
    is->force_refresh = 0;
    if (show_status) {
        AVBPrint buf;
        static int64_t last_time;
        int64_t cur_time;
        int aqsize, vqsize, sqsize;
        double av_diff;
 
        cur_time = av_gettime_relative();
        if (!last_time || (cur_time - last_time) >= 30000) {
            aqsize = 0;
            vqsize = 0;
            sqsize = 0;
            if (is->audio_st)
                aqsize = is->audioq.size;
            if (is->video_st)
                vqsize = is->videoq.size;
            if (is->subtitle_st)
                sqsize = is->subtitleq.size;
            av_diff = 0;
            if (is->audio_st && is->video_st)
                av_diff = get_clock(&is->audclk) - get_clock(&is->vidclk);
            else if (is->video_st)
                av_diff = get_master_clock(is) - get_clock(&is->vidclk);
            else if (is->audio_st)
                av_diff = get_master_clock(is) - get_clock(&is->audclk);
 
            av_bprint_init(&buf, 0, AV_BPRINT_SIZE_AUTOMATIC);
            av_bprintf(&buf,
                      "%7.2f %s:%7.3f fd=%4d aq=%5dKB vq=%5dKB sq=%5dB f=%"PRId64"/%"PRId64"   \r",
                      get_master_clock(is),
                      (is->audio_st && is->video_st) ? "A-V" : (is->video_st ? "M-V" : (is->audio_st ? "M-A" : "   ")),
                      av_diff,
                      is->frame_drops_early + is->frame_drops_late,
                      aqsize / 1024,
                      vqsize / 1024,
                      sqsize,
                      is->video_st ? is->viddec.avctx->pts_correction_num_faulty_dts : 0,
                      is->video_st ? is->viddec.avctx->pts_correction_num_faulty_pts : 0);
 
            if (show_status == 1 && AV_LOG_INFO > av_log_get_level())
                fprintf(stderr, "%s", buf.str);
            else
                av_log(NULL, AV_LOG_INFO, "%s", buf.str);
 
            fflush(stderr);
            av_bprint_finalize(&buf, NULL);
 
            last_time = cur_time;
        }
    }
}    

  Its processing flow:

The benefits of this article, free C++ audio and video learning materials package, technical video/code, including (audio and video development, interview questions, FFmpeg, webRTC, rtmp, hls, rtsp, ffplay, codec, push-pull stream, srs)↓↓↓ ↓↓↓See below↓↓Click at the bottom of the article to get it for free↓↓

static void video_display(VideoState *is)
{
    if (!is->width)
        video_open(is);
 
    SDL_SetRenderDrawColor(renderer, 0, 0, 0, 255);
    SDL_RenderClear(renderer);//清理render
    if (is->audio_st && is->show_mode != SHOW_MODE_VIDEO)
        video_audio_display(is);//音频显示(显示的方式根据SHOW_MODE_VIDEO的值)
    else if (is->video_st)
        video_image_display(is);//图像显示
    SDL_RenderPresent(renderer);
}

  The logic of video_image_display is to first fetch the frame to be displayed by frame_queue_peek_last, then update upload_texture to SDL_Texture, and finally copy the texture to render for display through SDL_RenaerCopyEx.

static void video_image_display(VideoState *is)
{
    Frame *vp;
    Frame *sp = NULL;
    SDL_Rect rect;
 
    vp = frame_queue_peek_last(&is->pictq);//取要显示的视频帧
    if (is->subtitle_st) {//字幕显示
        if (frame_queue_nb_remaining(&is->subpq) > 0) {
            sp = frame_queue_peek(&is->subpq);
 
            if (vp->pts >= sp->pts + ((float) sp->sub.start_display_time / 1000)) {
                if (!sp->uploaded) {
                    uint8_t* pixels[4];
                    int pitch[4];
                    int i;
                    if (!sp->width || !sp->height) {
                        sp->width = vp->width;
                        sp->height = vp->height;
                    }
                    if (realloc_texture(&is->sub_texture, SDL_PIXELFORMAT_ARGB8888, sp->width, sp->height, SDL_BLENDMODE_BLEND, 1) < 0)
                        return;
 
                    for (i = 0; i < sp->sub.num_rects; i++) {
                        AVSubtitleRect *sub_rect = sp->sub.rects[i];
 
                        sub_rect->x = av_clip(sub_rect->x, 0, sp->width );
                        sub_rect->y = av_clip(sub_rect->y, 0, sp->height);
                        sub_rect->w = av_clip(sub_rect->w, 0, sp->width  - sub_rect->x);
                        sub_rect->h = av_clip(sub_rect->h, 0, sp->height - sub_rect->y);
 
                        is->sub_convert_ctx = sws_getCachedContext(is->sub_convert_ctx,
                            sub_rect->w, sub_rect->h, AV_PIX_FMT_PAL8,
                            sub_rect->w, sub_rect->h, AV_PIX_FMT_BGRA,
                            0, NULL, NULL, NULL);
                        if (!is->sub_convert_ctx) {
                            av_log(NULL, AV_LOG_FATAL, "Cannot initialize the conversion context\n");
                            return;
                        }
                        if (!SDL_LockTexture(is->sub_texture, (SDL_Rect *)sub_rect, (void **)pixels, pitch)) {
                            sws_scale(is->sub_convert_ctx, (const uint8_t * const *)sub_rect->data, sub_rect->linesize,
                                      0, sub_rect->h, pixels, pitch);
                            SDL_UnlockTexture(is->sub_texture);
                        }
                    }
                    sp->uploaded = 1;
                }
            } else
                sp = NULL;
        }
    }
    //每次显示,都会计算窗口显示的大小
    calculate_display_rect(&rect, is->xleft, is->ytop, is->width, is->height, vp->width, vp->height, vp->sar);
 
    if (!vp->uploaded) {//判断帧vp是否加载显示过
        if (upload_texture(&is->vid_texture, vp->frame, &is->img_convert_ctx) < 0)
            return;
        vp->uploaded = 1;
        vp->flip_v = vp->frame->linesize[0] < 0;
    }
 
    set_sdl_yuv_conversion_mode(vp->frame);
    SDL_RenderCopyEx(renderer, is->vid_texture, NULL, &rect, 0, NULL, vp->flip_v ? SDL_FLIP_VERTICAL : 0);//拷贝纹理到render
    set_sdl_yuv_conversion_mode(NULL);
    if (sp) {<br>  //字幕显示逻辑
#if USE_ONEPASS_SUBTITLE_RENDER
        SDL_RenderCopy(renderer, is->sub_texture, NULL, &rect);
#else
        int i;
        double xratio = (double)rect.w / (double)sp->width;
        double yratio = (double)rect.h / (double)sp->height;
        for (i = 0; i < sp->sub.num_rects; i++) {
            SDL_Rect *sub_rect = (SDL_Rect*)sp->sub.rects[i];
            SDL_Rect target = {.x = rect.x + sub_rect->x * xratio,
                               .y = rect.y + sub_rect->y * yratio,
                               .w = sub_rect->w * xratio,
                               .h = sub_rect->h * yratio};
            SDL_RenderCopy(renderer, is->sub_texture, sub_rect, &target);
        }
#endif
    }
}

  upload_texure is the process of passing the image data of AVFormat to the texture of SDL.

static int upload_texture(SDL_Texture **tex, AVFrame *frame, struct SwsContext **img_convert_ctx) {
    int ret = 0;
    Uint32 sdl_pix_fmt;
    SDL_BlendMode sdl_blendmode;<br>  //根据frame的图像格式,获取对应的SDL像素格式
    get_sdl_pix_fmt_and_blendmode(frame->format, &sdl_pix_fmt, &sdl_blendmode);<br>  //realloc_texture 根据新得到的SDL像素格式,为&is->vid_texture重新分配空间
    if (realloc_texture(tex, sdl_pix_fmt == SDL_PIXELFORMAT_UNKNOWN ? SDL_PIXELFORMAT_ARGB8888 : sdl_pix_fmt, frame->width, frame->height, sdl_blendmode, 0) < 0)
        return -1;
    switch (sdl_pix_fmt) {
        case SDL_PIXELFORMAT_UNKNOWN://frame的格式是sdl不支持的格式,则需要进行图像格式转换,转换为目标格式AV_PIX_FMT_BGRA,对应SDL_PIXELFORMAT_BGRA32
            /* This should only happen if we are not using avfilter... */
            *img_convert_ctx = sws_getCachedContext(*img_convert_ctx,
                frame->width, frame->height, frame->format, frame->width, frame->height,
                AV_PIX_FMT_BGRA, sws_flags, NULL, NULL, NULL);
            if (*img_convert_ctx != NULL) {
                uint8_t *pixels[4];
                int pitch[4];
                if (!SDL_LockTexture(*tex, NULL, (void **)pixels, pitch)) {<br>            //图像格式转换,做视频像素格式和分辨率的转换,比如图像缩放,其实使用libyuv或shader效率更高
                    sws_scale(*img_convert_ctx, (const uint8_t * const *)frame->data, frame->linesize,
                              0, frame->height, pixels, pitch);
                    SDL_UnlockTexture(*tex);
                }
            } else {
                av_log(NULL, AV_LOG_FATAL, "Cannot initialize the conversion context\n");
                ret = -1;
            }
            break;
        case SDL_PIXELFORMAT_IYUV://framr格式对应SDL_PIXELFORMAT_IYUV,不用进行图像格式转换,调用SDL_UpdateYUVTexture更新texture
            if (frame->linesize[0] > 0 && frame->linesize[1] > 0 && frame->linesize[2] > 0) {
                ret = SDL_UpdateYUVTexture(*tex, NULL, frame->data[0], frame->linesize[0],
                                                       frame->data[1], frame->linesize[1],
                                                       frame->data[2], frame->linesize[2]);
            } else if (frame->linesize[0] < 0 && frame->linesize[1] < 0 && frame->linesize[2] < 0) {
                ret = SDL_UpdateYUVTexture(*tex, NULL, frame->data[0] + frame->linesize[0] * (frame->height                    - 1), -frame->linesize[0],
                                                       frame->data[1] + frame->linesize[1] * (AV_CEIL_RSHIFT(frame->height, 1) - 1), -frame->linesize[1],
                                                       frame->data[2] + frame->linesize[2] * (AV_CEIL_RSHIFT(frame->height, 1) - 1), -frame->linesize[2]);
            } else {
                av_log(NULL, AV_LOG_ERROR, "Mixed negative and positive linesizes are not supported.\n");
                return -1;
            }
            break;<br><br>//framr格式对应其他的SDL格式,不用进行图像格式转换,调用SDL_UpdateYUVTexture更新texturedefault:<br>        
            if (frame->linesize[0] < 0) {
                ret = SDL_UpdateTexture(*tex, NULL, frame->data[0] + frame->linesize[0] * (frame->height - 1), -frame->linesize[0]);
            } else {
                ret = SDL_UpdateTexture(*tex, NULL, frame->data[0], frame->linesize[0]);
            }
            break;
    }
    return ret;
}

Audio and video synchronization

  First of all, we must understand why audio and video synchronization is required? From the above audio and video output, we can know that their output is not in the same thread, so the audio frame and video frame of the same pts may not be solved at the same time. Sometimes the pts may not be continuous when encoding or encapsulating. If not Play directly after processing, and the sound and picture will be out of sync. Therefore, when playing audio and video, it is necessary to control the playback speed and playback time of the audio and video to keep the audio and video in sync.

  There are three synchronization strategies in ffplay: audio, video, and external clock. Since the human ear is more sensitive to sound changes than vision, the general synchronization strategy is based on audio, that is, the video is synchronized to Audio, properly drop or repeat the display of the picture to catch up or wait for the audio. In ffplay, the default is to synchronize with the audio clock.

  When doing synchronization, a "clock" concept is needed. Audio, video, and external clocks all have their own independent clocks, and each set their own clock, based on who is the reference (master), and others can only get the clock for synchronization. The structure of the clock is defined as follows:

typedef struct Clock {
    double pts;//时钟基础, 当前帧(待播放)显示时间戳,播放后,当前帧变成上一帧
    double pts_drift;//当前pts与当前系统时钟的差值, audio、video对于该值是独立的
    double last_updated;//最后一次更新的系统时钟
    double speed;//时钟速度控制,用于控制播放速度
    int serial;//播放序列,所谓播放序列就是一段连续的播放动作,一个seek操作会启动一段新的播放序列          
    int paused;//= 1 说明是暂停状态
    int *queue_serial; //指向packet_serial
} Clock;

  The working principle of the clock needs to be "time-aligned" constantly, through set_clock_at(Clock *c, double pts, int serial, double time), use pts, serial, time (system time) to perform time synchronization. The obtained time is an estimated value, which is estimated by pts_drifft recorded during "time alignment".

subject to audio

  By default, ffplay uses the audio clock for synchronization. At this time, the audio clock is set in sdl_audio_callback:

static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{<br>
    audio_callback_time = av_gettime_relative();
 
    ..........................................
 
    /* Let's assume the audio driver that is used by SDL has two periods. */
    if (!isnan(is->audio_clock)) {
        set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
    sync_clock_to_slave(&is->extclk, &is->audclk);
    }
}

  is->audio_clock is assigned in audio_decode_frame:

static int audio_decode_frame(VideoState *is)
{
    ........................................................
 
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
 
    ................................................
    
}

  It can be seen from here that the timestamp here is the timestamp of the end position of audio_buf, not the timestamp of the start position of audio_buf, so when audio_buf is left (the remaining length is recorded in audio_write_buf_size), the actual data pts becomes is->audio_clock - (double)(is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec.

   In fact, the audio_hw_buf_size*2 data are not actually played out, so there are:

is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec

set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);

  Next, the video must be synchronized based on the audio clock. If the video is played too fast, the previous frame will be played repeatedly to wait for the audio; if the video is played too slowly, frames will be dropped to catch up with the audio. The logic of this part is in vido_refresh:

static void video_refresh(void *opaque, double *remaining_time)
{
        ...............................................................
 
            /* compute nominal last_duration */
            last_duration = vp_duration(is, lastvp, vp);//计算相邻两帧间隔时长
            delay = compute_target_delay(last_duration, is);//计算上一帧lastvp还需要播放时间
 
            time= av_gettime_relative()/1000000.0;//当前时间
            if (time < is->frame_timer + delay) {//如果还没有到播放时间,说明当前显示帧还需要继续显示
                *remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);//延迟的时间,继续显示时间长
                goto display;
            }
      //该到vp播放了
            is->frame_timer += delay;
            if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
                is->frame_timer = time;
 
            SDL_LockMutex(is->pictq.mutex);
            if (!isnan(vp->pts))
                update_video_pts(is, vp->pts, vp->pos, vp->serial);
            SDL_UnlockMutex(is->pictq.mutex);
 
            if (frame_queue_nb_remaining(&is->pictq) > 1) {//队列至少有2帧
                Frame *nextvp = frame_queue_peek_next(&is->pictq);//取下一帧
                duration = vp_duration(is, vp, nextvp);
                if(!is->step && //非逐帧播放才检测是否需要丢帧
        (framedrop>0 //framedrop = 0,不开启丢帧
        || (framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) && //如果同步时钟以视频为基准不需要丢帧,非视频时钟为基准,才需要丢帧
        time > is->frame_timer + duration){//落后,也需要丢帧
                    is->frame_drops_late++;//累计丢帧
                    frame_queue_next(&is->pictq);
                    goto retry;
                }
            }
    .............................................................  
}

  Another concept is introduced here: frame_timer, which can be understood as the display time of the frame. For example, before the update, it is the display time of the previous frame lastvp; after the update ( is->frame_timer += delay ), it is the display time of the current frame vp. The display time of the previous frame plus the delay (how long it should be displayed (including the duration of the frame itself)) is the time when the display of the previous frame should end.

  time1: The system time is less than the time when lastvp ends (frame_timer+dealy), that is, the position of the dotted circle, and lastvp should continue to be displayed at this time;

  time2: The system time is greater than the end display time of lastvp, but less than the end display time of vp (the display time of vp starts at the dotted circle and ends at the black circle). At this time, neither lastvp is repeatedly displayed nor vp is discarded, and it should be displayed vp;

  time3: The system time is greater than the end display time of vp (the position of the black circle is also the expected start display time of nextvp), and the vp should be discarded at this time.

  How to calculate the display duration delay of lastvp is implemented in the function compute_target_delay. The larger the return value of compute_target_delay, the slower the screen, that is, the longer the display time of the previous frame.

static double compute_target_delay(double delay, VideoState *is)
{
    double sync_threshold, diff = 0;
 
    /* update delay to follow master synchronisation source */
    if (get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER) {
        /* if video is slave, we try to correct big delays by
           duplicating or deleting a frame */
        diff = get_clock(&is->vidclk) - get_master_clock(is);
 
        /* skip or repeat frame. We take into account the
           delay to compute the threshold. I still don't know
           if it is the best guess */
        sync_threshold = FFMAX(AV_SYNC_THRESHOLD_MIN, FFMIN(AV_SYNC_THRESHOLD_MAX, delay));
        if (!isnan(diff) && fabs(diff) < is->max_frame_duration) {
            if (diff <= -sync_threshold)
                delay = FFMAX(0, delay + diff);
            else if (diff >= sync_threshold && delay > AV_SYNC_FRAMEDUP_THRESHOLD)
                delay = delay + diff;
            else if (diff >= sync_threshold)
                delay = 2 * delay;
        }
    }
 
    av_log(NULL, AV_LOG_TRACE, "video: delay=%0.3f A-V=%f\n",delay, -diff);
 
    return delay;
}

  (1) diff<= -sync_threshold: The video playback is slow, and frames need to be dropped appropriately;

  (2) diff>= sync_threshold && delay > AV_SYNC_FRAMEDUP_THRESHOLD: return delay + diff, how long to display depends on the specific dif;

  (3) diff>= sync_threshold: The video plays faster, and lastvp needs to be displayed repeatedly, and 2*delay is returned, which is 2 times the display duration of lastvp, that is, the duration for lastvp to display another frame;

  (4) -sync_threshold< diff < sync_threshold: Within the allowable error, display the video according to the frame duration and return to delay.

  It can be seen from the above that it is not synchronized all the time, but there is a "quasi-synchronous" difference area.

Subject to video

  When synchronizing audio with video based on video, it is unreliable to use the same frame dropping and repeated display scheme for audio, because the continuity of audio is much higher than that of video, by repeating hundreds of samples or discarding a few Hundreds of samples to achieve synchronization, there will be obvious incoherence in the hearing. The principle adopted by ffplay: through resampling compensation, the audio becomes slower, and the resampled samples are less than normal, so as to quickly play the next frame; the audio is faster, and the resampled samples are increased than normal, so that Play slower.

  For the video process, video_refresh()->update_video_pts() plays according to the video frame interval, and re-corrects the video clock in real time, mainly for audio playback, which mainly depends on audio_decode_frame:

static int audio_decode_frame(VideoState *is)
{
    .......................................
    //1.根据与video clock的差值,计算应该输出的样本数
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
   //2.判断是否需要重新初始化重采样
    if (af->frame->format        != is->audio_src.fmt            ||
        dec_channel_layout       != is->audio_src.channel_layout ||
        af->frame->sample_rate   != is->audio_src.freq           ||
        (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
        swr_free(&is->swr_ctx);
    //设置输出、输入目标参数
        is->swr_ctx = swr_alloc_set_opts(NULL,
                                         is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                                         dec_channel_layout,           af->frame->format, af->frame->sample_rate,
                                         0, NULL);
        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                    af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
                    is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
            swr_free(&is->swr_ctx);
            return -1;
        }
    //保存最新的参数
        is->audio_src.channel_layout = dec_channel_layout;
        is->audio_src.channels       = af->frame->channels;
        is->audio_src.freq = af->frame->sample_rate;
        is->audio_src.fmt = af->frame->format;
    }
    //3.重采样,利用重采样进行样本的添加或剔除
    if (is->swr_ctx) {
        const uint8_t **in = (const uint8_t **)af->frame->extended_data;
        uint8_t **out = &is->audio_buf1;
        int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
        int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
        int len2;
        if (out_size < 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
            return -1;
        }
        if (wanted_nb_samples != af->frame->nb_samples) {
            if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
                                        wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                return -1;
            }
        }
        av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
        if (!is->audio_buf1)
            return AVERROR(ENOMEM);
    //音频重采样:返回值是重采样后得到的音频数据中单个声道的样本数
        len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
        if (len2 < 0) {
            av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
            return -1;
        }
        if (len2 == out_count) {
            av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
            if (swr_init(is->swr_ctx) < 0)
                swr_free(&is->swr_ctx);
        }
        is->audio_buf = is->audio_buf1;
        resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
    } else {
    //不用重采样,则将指针指向frame中音频数据
        is->audio_buf = af->frame->data[0];
        resampled_data_size = data_size;
    }
    ...............................................
     
    return resampled_data_size;//返回audio_buf的数据大小
}

  (1) Calculate the number of samples that should be output according to the difference with the video clock. Completed by the function synchronize_audio, the number of samples decreases when the audio becomes slower, and the number of samples increases when the audio becomes faster;

  (2) Determine whether resampling is required: If the number of samples to be output is not equal to the number of samples in the frame, it is necessary to appropriately reduce or increase samples;

  (3) Use the resampling library to add or remove samples smoothly. After knowing the target number of samples to be adjusted, wanted_nb_samples, complete the resampling through the function combination of swr_set_compensation and swr_convert.

The benefits of this article, free C++ audio and video learning materials package, technical video/code, including (audio and video development, interview questions, FFmpeg, webRTC, rtmp, hls, rtsp, ffplay, codec, push-pull stream, srs)↓↓↓ ↓↓↓See below↓↓Click at the bottom of the article to get it for free↓↓

Guess you like

Origin blog.csdn.net/m0_60259116/article/details/131940437