背景

本文主要从URLProtocol的视角探究ffplay在demux数据之前是如何拉取数据的。首先先看一下函数调用的基本流程，如下图：

stream_open : 在ffplay::main函数中,负责根据filename决定拉取数据的协议，以及生成URLProtocol等数据。

avformat_open_input这个函数的作用是打开文件的链接，如果是网络连接，还会发起网络请求，并一直等待网络数据的返回，然后读取视频流的数据。

ffurl_open_whitelist函数的功能主要是打开文件链接，并填充一个URLContext *h结构体。该结构体的声明是在url.h文件里面，源码里面有

typedef struct URLContext {
    const AVClass *av_class;    /**< information for av_log(). Set by url_open(). */
    const struct URLProtocol *prot;
    void *priv_data;
    char *filename;             /**< specified URL */
    int flags;
    int max_packet_size;        /**< if non zero, the stream is packetized with this max packet size */
    int is_streamed;            /**< true if streamed (no seek possible), default = false */
    int is_connected;
    AVIOInterruptCB interrupt_callback;
    int64_t rw_timeout;         /**< maximum time to wait for (network) read/write operation completion, in mcs */
    const char *protocol_whitelist;
    const char *protocol_blacklist;
} URLContext;

这个结构体很重要，里面prot指向了具体URLProtocol结构体，该结构体里面包含有打开该协议的url的回调函数，如http，tcp，都有对应的open函数来处理。

ffurl_alloc

在上述的流程调用中，从ffurl_alloc函数开始进行URLProtocol和URLContext的初始化。如下所示:

int ffurl_alloc(URLContext **puc, const char *filename, int flags,
                const AVIOInterruptCB *int_cb)
{
    const URLProtocol *p = NULL;
    // p -> ff_http_protocol
    p = url_find_protocol(filename);
    //初始化URLContext,并将protocol赋值进去
    if (p)
       return url_alloc_for_protocol(puc, p, filename, flags, int_cb);
	//代码执行到此，代表URLContext初始化失败
    *puc = NULL;
    if (av_strstart(filename, "https:", NULL))
        av_log(NULL, AV_LOG_WARNING, "https protocol not found, recompile FFmpeg with "
                                     "openssl, gnutls "
                                     "or securetransport enabled.\n");
    return AVERROR_PROTOCOL_NOT_FOUND;
}

url_find_protocol: 根据输入的文件名或者url进行protocol探查，如filename以http开头的则探查返回的结果为ff_http_protocol

url_alloc_for_protocol: 初始化URLContext,并将上一个步骤返回的URLProtocol赋值其中

const URLProtocol ff_http_protocol = {
  .name = “http”,
  .url_open2 = http_open,
  .url_read = http_read,
  .url_write = http_write,
  .priv_data_size = sizeof(HTTPContext),
};

url_find_protocol

url_find_protocol函数主要通过filename进行探查.如果匹配，则返回对应的URLProtocol，ffmpeg将所有支持的protocol以URLProtocol常量保存在protocol_list.c中。

static const struct URLProtocol *url_find_protocol(const char *filename)
{
    const URLProtocol **protocols;
    char proto_str[128], proto_nested[128], *ptr;
    size_t proto_len = strspn(filename, URL_SCHEME_CHARS);
    int i;
    //filename == http://xxxx, proto_len = 4
    if (filename[proto_len] != ':' &&
        (strncmp(filename, "subfile,", 8) || !strchr(filename + proto_len + 1, ':')) ||
        is_dos_path(filename))
        strcpy(proto_str, "file");
    else
        av_strlcpy(proto_str, filename,
                   FFMIN(proto_len + 1, sizeof(proto_str)));      //sizeof(proto_str) == 128, so proto_str == 'http'
                   

    if ((ptr = strchr(proto_str, ',')))
        *ptr = '\0';
    //proto_nested -> 'http'
    av_strlcpy(proto_nested, proto_str, sizeof(proto_nested));
    
    if ((ptr = strchr(proto_nested, '+')))
        *ptr = '\0';
    
    //将常量url_protocols装配到数组中,url_protocals -> [ff_http_protocol,....]
    protocols = ffurl_get_protocols(NULL, NULL);
    //分配失败,直接返回
    if (!protocols)
        return NULL;
    
    for (i = 0; protocols[i]; i++) {
            const URLProtocol *up = protocols[i];
        //按照name属性匹配URLProtocal
        if (!strcmp(proto_str, up->name)) {
            av_freep(&protocols);
            return up;
        }
        if (up->flags & URL_PROTOCOL_FLAG_NESTED_SCHEME &&
            !strcmp(proto_nested, up->name)) {
            av_freep(&protocols);
            return up;
        }
    }
    av_freep(&protocols);

    return NULL;
}

在protocol_list.c中罗列了所有ffmpeg支持的协议，例如http、rtmp、udp、file等。这些协议常量以URLProtocol结构的形式保存在url_protocols数组中。而url_find_protocol函数就是要通过filename匹配到特定的URLProtocol实例。
首先，url_find_protocol通过正则表达式判断出filename的协议长度,主要通过判断filename首个非字母非数字的下标来获得。例如http://www.baidu.com/xxx.mp4,则首个非字母非数字的字符为[:],其下标为4。所以proto_len = 4
随后，根据proto_len截取出filename中的协议字符串,例如http.再逐个跟ff_http_protocol等URLProtocol的name成员进行比较，如果相等，则代表匹配到URLProtocol。

ffurl_connect函数

该函数中唯一一个比较重要的函数就是

err = uc->prot->url_open2 ? uc->prot->url_open2(uc,
                                                  uc->filename,
                                                  uc->flags,
                                                  options) :
        uc->prot->url_open(uc, uc->filename, uc->flags);

首先判断是否prot->url_open2函数指针是否有赋值。我们从http.c中ff_http_protocol结构体的定义中可以发现

.url_open2           = http_open,

所以在ffurl_connect函数中这里实际调用的是http_open函数。那么接下来就进入到我们的关键函数了。

http_open

http协议的基本实现都是在http.c文件中实现的。确定URLProtocol的实例是http之后，调用的就是http_open

static int http_open(URLContext *h, const char *uri, int flags,
                     AVDictionary **options)
{
    av_log(NULL, AV_LOG_WARNING, "This is %s!\n",__FUNCTION__);
    HTTPContext *s = h->priv_data;
    int ret;

    s->app_ctx = (AVApplicationContext *)(intptr_t)s->app_ctx_intptr;

    if( s->seekable == 1 )
        h->is_streamed = 0;
    else
        h->is_streamed = 1;

    s->filesize = UINT64_MAX;
    s->location = av_strdup(uri);
    if (!s->location)
        return AVERROR(ENOMEM);
    if (options)
        av_dict_copy(&s->chained_options, *options, 0);

    if (s->headers) {
        int len = strlen(s->headers);
        if (len < 2 || strcmp("\r\n", s->headers + len - 2)) {
            av_log(h, AV_LOG_WARNING,
                   "No trailing CRLF found in HTTP header.\n");
            ret = av_reallocp(&s->headers, len + 3);
            if (ret < 0)
                return ret;
            s->headers[len]     = '\r';
            s->headers[len + 1] = '\n';
            s->headers[len + 2] = '\0';
        }
    }

    if (s->listen) {
        return http_listen(h, uri, flags, options);
    }
    av_log(NULL, AV_LOG_WARNING,"mdx http_listen %s!\n",__FUNCTION__);
    av_application_will_http_open(s->app_ctx, (void*)h, uri);
    ret = http_open_cnx(h, options);
    av_application_did_http_open(s->app_ctx, (void*)h, uri, ret, s->http_code, s->filesize);
    if (ret < 0)
        av_dict_free(&s->chained_options);
    return ret;
}

在这个函数中，首先看一下av_application_will_http_open，这个函数主要是调用了av_application_on_http_event发送了AVAPP_EVENT_WILL_HTTP_OPEN的event，代表开始做http的open操作

#define AVAPP_EVENT_WILL_HTTP_OPEN  1 //AVAppHttpEvent

真正的open操作是在后续的http_open_cnx。

另外，在http_open_cnx之后，会去调用 av_application_did_http_open发送了AVAPP_EVENT_DID_HTTP_OPEN的event，表明完成了connect，可以传输数据

#define AVAPP_EVENT_DID_HTTP_OPEN   2 //AVAppHttpEvent

http_open_cnx

static int http_open_cnx(URLContext *h, AVDictionary **options)
{
    HTTPAuthType cur_auth_type, cur_proxy_auth_type;
    HTTPContext *s = h->priv_data;
    int location_changed, attempts = 0, redirects = 0;
redo:
    av_dict_copy(options, s->chained_options, 0);

    cur_auth_type       = s->auth_state.auth_type;
    cur_proxy_auth_type = s->auth_state.auth_type;

    location_changed = http_open_cnx_internal(h, options);
    if (location_changed < 0)
        goto fail;

    attempts++;
    if (s->http_code == 401) {
        if ((cur_auth_type == HTTP_AUTH_NONE || s->auth_state.stale) &&
            s->auth_state.auth_type != HTTP_AUTH_NONE && attempts < 4) {
            ffurl_closep(&s->hd);
            goto redo;
        } else
            goto fail;
    }
    if (s->http_code == 407) {
        if ((cur_proxy_auth_type == HTTP_AUTH_NONE || s->proxy_auth_state.stale) &&
            s->proxy_auth_state.auth_type != HTTP_AUTH_NONE && attempts < 4) {
            ffurl_closep(&s->hd);
            goto redo;
        } else
            goto fail;
    }
    if ((s->http_code == 301 || s->http_code == 302 ||
         s->http_code == 303 || s->http_code == 307) &&
        location_changed == 1) {
        /* url moved, get next */
        ffurl_closep(&s->hd);
        if (redirects++ >= MAX_REDIRECTS)
            return AVERROR(EIO);
        /* Restart the authentication process with the new target, which
         * might use a different auth mechanism. */
        memset(&s->auth_state, 0, sizeof(s->auth_state));
        attempts         = 0;
        location_changed = 0;
        goto redo;
    }
    return 0;

fail:
    if (s->hd)
        ffurl_closep(&s->hd);
    if (location_changed < 0)
        return location_changed;
    return ff_http_averror(s->http_code, AVERROR(EIO));
}

http_open_cnx_internal

由http_connect函数进入，http_open_cnx_internal函数主要完成底层protocol的生成，以及与服务器进行握手(通过底层protocol发送报文)

static int http_open_cnx_internal(URLContext *h, AVDictionary **options)
{
	//默认底层协议为tcp
    const char *path, *proxy_path, *lower_proto = "tcp", *local_path;
    
    char hostname[1024], hoststr[1024], proto[10];
    char auth[1024], proxyauth[1024] = "";
    char path1[MAX_URL_SIZE];
    char buf[1024], urlbuf[MAX_URL_SIZE];
    int port, use_proxy, err, location_changed = 0;
    
    HTTPContext *s = h->priv_data;
    //av_url_split -> 
    //根据传入的s->location其实就是视频的url，从url里面提取出hostname，port，以及path。
    av_url_split(proto, sizeof(proto), auth, sizeof(auth),
                 hostname, sizeof(hostname), &port,
                 path1, sizeof(path1), s->location);
    ff_url_join(hoststr, sizeof(hoststr), NULL, NULL, hostname, port, NULL);
    if (!strcmp(proto, "https")) {
        av_dict_set_int(options, "fastopen", 0, 0);
        lower_proto = "tls";
        use_proxy   = 0;
        if (port < 0)
            port = 443;
    }
    if (port < 0)
        port = 80;

    if (path1[0] == '\0')
        path = "/";
    else
        path = path1;
    local_path = path;
    ....
    
    //拼凑lower protocol字符串(buf) -> tcp://{hostname}:{port}
    ff_url_join(buf, sizeof(buf), lower_proto, NULL, hostname, port, NULL);
    //s: HTTPContext, s->hd: URLContext
    if (!s->hd) {
        //匹配tcp protocal
        err = ffurl_open_whitelist(&s->hd, buf, AVIO_FLAG_READ_WRITE,
                                   &h->interrupt_callback, options,
                                   h->protocol_whitelist, h->protocol_blacklist, h);
            return err;
    }
    //进行http连接
    err = http_connect(h, path, local_path, hoststr,
                       auth, proxyauth, &location_changed);
    if (err < 0)
        return err;

    return location_changed;
}

1、底层protocol默认采用的是tcp,如果是https协议的话会更改为tls.

2、调用ffurl_open_whitelist生成对应tcp的URLProtocol(ff_tcp_protocol)

3、调用底层protocol进行发送报文

关于建立TCP链接，这里多分析一下：

ff_url_join前面用到的时候是解析出hostname，但在这里，由于传入了lower_proto（它表示http 协议的下一层协议，一般都是tcp，所以该值初始化的时候就是tcp），所以buf的值是有lower_proto拼凑起来的tcp链接tcp://flv-meipai.8686c.com:80，就是tcp+域名。

接下来就是判断 s->hd 是否存在。默认情况下，是为NULL值，所以调用ffurl_open_whitelist开始打开tcp://flv-meipai.8686c.com:80，现在回到ffurl_open_whitelist函数了。这时通过url_find_protocol找到的就是tcp类型的URLProtocol，那么在ffurl_connect调用时调用的就是tcp.c里面的tcp_open函数，那么tcp的握手连接就在这个函数里面解析了。首先在tcp_open函数368行

av_url_split(proto, sizeof(proto), NULL, 0, hostname, sizeof(hostname),
        &port, path, sizeof(path), uri);

先根据uri解析出协议名以及hostname，然后调用以下的

ret = ijk_tcp_getaddrinfo_nonblock(hostname, portstr, &hints, &ai, s->addrinfo_timeout, &h->interrupt_callback, s->addrinfo_one_by_one);

做DNS解析。这个函数是ijkplayer作者加上去的，标准的ffmpeg 里面并没有。它的功能是利用多线程来解析DNS。但实际上从代码上并没有看到有什么优势，其实还是阻塞等结果解析出来了才返回的，这个地方不是很懂为什么要这么改。

接下来就是创建socket了

fd = ff_socket(cur_ai->ai_family, cur_ai->ai_socktype, cur_ai->ai_protocol);

调用ff_listen_connect函数进行tcp握手。

之后668行会有一个调用av_application_on_tcp_will_open(s->app_ctx)，主要是发送了AVAPP_CTRL_WILL_TCP_OPEN（#define AVAPP_CTRL_WILL_TCP_OPEN 0x20001 //AVAppTcpIOControl）消息，表示要开始tcp连接；在此之后会调用av_application_on_tcp_did_open，发送一个AVAPP_CTRL_DID_TCP_OPEN（#define AVAPP_CTRL_DID_TCP_OPEN 0x20002 //AVAppTcpIOControl）消息，表示tcp连接成功。

至此，调用tcp协议的ffurl_open_whitelist函数就调用完成了，tcp握手连接也建立成功。再回到http_open_cnx_internal函数。继续调用

err = http_connect(h, path, local_path, hoststr, auth, proxyauth, &location_changed);

http_connect

该函数的主要作用是调用底层protocol发送报文，写入请求header并读取服务器返回的结果存入HTTPContext。例如，当filename为http://xxxx:port时，URLProtocol对应ff_http_protocol,同时private_data中嵌套了URLContext，该成员中还存放着对应ff_tcp_protocol的URLProtocol，这也是tcp在ffmpeg中视作lower protocol的来由。

static int http_connect(URLContext *h, const char *path, const char *local_path,
                        const char *hoststr, const char *auth,
                        const char *proxyauth, int *new_location)
{
    HTTPContext *s = h->priv_data;
    int post, err;
    char headers[HTTP_HEADERS_SIZE] = "";
    char *authstr = NULL, *proxyauthstr = NULL;
    uint64_t off = s->off;
    int len = 0;
    const char *method;
    int send_expect_100 = 0;
    int ret;

    /* send http header */
    //第一次初始化AVFormatContext时,为FLAG_READ
    post = h->flags & AVIO_FLAG_WRITE;

    if (s->post_data) {
        /* force POST method and disable chunked encoding when
         * custom HTTP post data is set */
        post            = 1;
        s->chunked_post = 0;
    }

    //method -> "GET"
    if (s->method)
        method = s->method;
    else
        method = post ? "POST" : "GET";
	....
    
    /* set default headers if needed */
    if (!has_header(s->headers, "\r\nUser-Agent: "))
        len += av_strlcatf(headers + len, sizeof(headers) - len,
                           "User-Agent: %s\r\n", s->user_agent);
    if (!has_header(s->headers, "\r\nAccept: "))
        len += av_strlcpy(headers + len, "Accept: */*\r\n",
                          sizeof(headers) - len);
    // Note: we send this on purpose even when s->off is 0 when we're probing,
    // since it allows us to detect more reliably if a (non-conforming)
    // server supports seeking by analysing the reply headers.
    if (!has_header(s->headers, "\r\nRange: ") && !post && (s->off > 0 || s->end_off || s->seekable == -1)) {
        len += av_strlcatf(headers + len, sizeof(headers) - len,
                           "Range: bytes=%"PRIu64"-", s->off);
        if (s->end_off)
            len += av_strlcatf(headers + len, sizeof(headers) - len,
                               "%"PRId64, s->end_off - 1);
        len += av_strlcpy(headers + len, "\r\n",
                          sizeof(headers) - len);
    }
    if (send_expect_100 && !has_header(s->headers, "\r\nExpect: "))
        len += av_strlcatf(headers + len, sizeof(headers) - len,
                           "Expect: 100-continue\r\n");

    if (!has_header(s->headers, "\r\nConnection: ")) {
        if (s->multiple_requests)
            len += av_strlcpy(headers + len, "Connection: keep-alive\r\n",
                              sizeof(headers) - len);
        else
            len += av_strlcpy(headers + len, "Connection: close\r\n",
                              sizeof(headers) - len);
    }

    if (!has_header(s->headers, "\r\nHost: "))
        len += av_strlcatf(headers + len, sizeof(headers) - len,
                           "Host: %s\r\n", hoststr);
    if (!has_header(s->headers, "\r\nContent-Length: ") && s->post_data)
        len += av_strlcatf(headers + len, sizeof(headers) - len,
                           "Content-Length: %d\r\n", s->post_datalen);

    if (!has_header(s->headers, "\r\nContent-Type: ") && s->content_type)
        len += av_strlcatf(headers + len, sizeof(headers) - len,
                           "Content-Type: %s\r\n", s->content_type);
    if (!has_header(s->headers, "\r\nCookie: ") && s->cookies) {
        char *cookies = NULL;
        if (!get_cookies(s, &cookies, path, hoststr) && cookies) {
            len += av_strlcatf(headers + len, sizeof(headers) - len,
                               "Cookie: %s\r\n", cookies);
            av_free(cookies);
        }
    }
    if (!has_header(s->headers, "\r\nIcy-MetaData: ") && s->icy)
        len += av_strlcatf(headers + len, sizeof(headers) - len,
                           "Icy-MetaData: %d\r\n", 1);

    /* now add in custom headers */
    if (s->headers)
        av_strlcpy(headers + len, s->headers, sizeof(headers) - len);

    ret = snprintf(s->buffer, sizeof(s->buffer),
             "%s %s HTTP/1.1\r\n"
             "%s"
             "%s"
             "%s"
             "%s%s"
             "\r\n",
             method,
             path,
             post && s->chunked_post ? "Transfer-Encoding: chunked\r\n" : "",
             headers,
             authstr ? authstr : "",
             proxyauthstr ? "Proxy-" : "", proxyauthstr ? proxyauthstr : "");

    av_log(h, AV_LOG_DEBUG, "request: %s\n", s->buffer);

    if (strlen(headers) + 1 == sizeof(headers) ||
        ret >= sizeof(s->buffer)) {
        av_log(h, AV_LOG_ERROR, "overlong headers\n");
        err = AVERROR(EINVAL);
        goto done;
    }

    //写入请求的header
    if ((err = ffurl_write(s->hd, s->buffer, strlen(s->buffer))) < 0)
        goto done;

    if (s->post_data)
        if ((err = ffurl_write(s->hd, s->post_data, s->post_datalen)) < 0)
            goto done;

    /* init input buffer */
    s->buf_ptr          = s->buffer;
    s->buf_end          = s->buffer;
    s->line_count       = 0;
    s->off              = 0;
    s->icy_data_read    = 0;
    s->filesize         = UINT64_MAX;
    s->willclose        = 0;
    s->end_chunked_post = 0;
    s->end_header       = 0;
    if (post && !s->post_data && !send_expect_100) {
        /* Pretend that it did work. We didn't read any header yet, since
         * we've still to send the POST data, but the code calling this
         * function will check http_code after we return. */
        s->http_code = 200;
        err = 0;
        goto done;
    }

    /* wait for header */
    //读取header
    err = http_read_header(h, new_location);
    if (err < 0)
        goto done;

    if (*new_location)
        s->off = off;

    err = (off == s->off) ? 0 : -1;
done:
    av_freep(&authstr);
    av_freep(&proxyauthstr);
    return err;
}

1、ffurl_write: 通过lower protocol(例如tcp)写入协议头

从ffurl_write代码中可以看出，它实际调用的是url_write方法，而该prot的write方法，是http_write，它又是调用的ffurl_write(s->hd, buf, size);就是指http协议下一层的协议tcp的tcp_write方法。tcp_write方法最终调用就是ret = send(s->fd, buf, size, MSG_NOSIGNAL);系统的send方法。所以最终都是调用系统实现的Socket接口。至此，http_connect方法的发送request的请求就完毕了。剩下就是等待响应了。

2、http_read_header: 读取服务器返回的header,并保存在HTTPContext

http_read_header就是不断的读取网络返回的数据，并解析出来。

至此http_open_cnx_internal函数也调用完了。回到http_open_cnx函数。这时如果能正常获取数据，那么s->http_code的值应该是200，至此，http_open_cnx，http_open函数也返回了，流程可以直接返回到ffio_open_whitelist函数中，ffio_fdopen函数只是对AVIOContext结构体根据http request获取的数据进行一些赋值。那就可以直接返回到init_input函数了。接下来是

if (s->iformat)
    return 0;
return av_probe_input_buffer2(s->pb, &s->iformat, filename,

判断如果s->iformat没有值，就根据filename解析出s->iformat。这也是在前面开头提到的，如果没有加av_find_input_format("flv")这个代码，那就要重新根据filename来解析数据了，这个函数比较耗时，需要读取到一定数据后才能解析出来。

probe(探查)

在根据url生成URLProtocol以及根据读取服务器返回的header初始化HTTPContext后，需要进一步的判断媒体资源属于哪种格式，适用于哪种解码器进行demux.

/* Open input file and probe the format if necessary. */
static int init_input(AVFormatContext *s, const char *filename,
                      AVDictionary **options)
{
    int ret;
    AVProbeData pd = { filename, NULL, 0 };
    int score = AVPROBE_SCORE_RETRY;
    
    //usually no 
    if (s->pb) {
        s->flags |= AVFMT_FLAG_CUSTOM_IO;
        if (!s->iformat)
            return av_probe_input_buffer2(s->pb, &s->iformat, filename,
                                         s, 0, s->format_probesize);
        else if (s->iformat->flags & AVFMT_NOFILE)
            av_log(s, AV_LOG_WARNING, "Custom AVIOContext makes no sense and "
                                      "will be ignored with AVFMT_NOFILE format.\n");
        return 0;
    }
    
    //对应iformat已经初始化的情况
    if ((s->iformat && s->iformat->flags & AVFMT_NOFILE) ||
        (!s->iformat && (s->iformat = av_probe_input_format2(&pd, 0, &score))))
        return score;

    //call avio_open2, s->pb : AVIOContext
    if ((ret = s->io_open(s, &s->pb, filename, AVIO_FLAG_READ | s->avio_flags, options)) < 0)
        return ret;

    return av_probe_input_buffer2(s->pb, &s->iformat, filename,
                                 s, 0, s->format_probesize);
}

初始化时，AVInputFormat是无效的。因此会调用av_probe_input_buffer2进行媒体资源的格式探查

av_probe_input_buffer2

该函数的主要行为是通过读取数据匹配到合适的解码器,比如针对aac音频，那匹配的就是ff_aac_demuxer解码器。

int av_probe_input_buffer2(AVIOContext *pb, AVInputFormat **fmt,
                          const char *filename, void *logctx,
                          unsigned int offset, unsigned int max_probe_size)
{
    AVProbeData pd = { filename ? filename : "" };
    uint8_t *buf = NULL;
    int ret = 0, probe_size, buf_offset = 0;
    int score = 0;
    int ret2;
    
    //s->format_probesize = 0, PROBE_BUF_MAX == 1<<20
    if (!max_probe_size)
      max_probe_size = PROBE_BUF_MAX;


    //URLContext
    if (pb->av_class) {
        uint8_t *mime_type_opt = NULL;
        char *semi;
        //在读取http协议头时获得,获取
        av_opt_get(pb, "mime_type", AV_OPT_SEARCH_CHILDREN, &mime_type_opt);
        pd.mime_type = (const char *)mime_type_opt;
        semi = pd.mime_type ? strchr(pd.mime_type, ';') : NULL;
        if (semi) {
            *semi = '\0';
        }
    }
    //PROBE_BUF_MIN -> 2048
    //probe的大小从2048开始,随后以2倍大小增加
    for (probe_size = PROBE_BUF_MIN; probe_size <= max_probe_size && !*fmt;
        //每次循环都会增加1倍的probe_size
        probe_size = FFMIN(probe_size << 1,
                            FFMAX(max_probe_size, probe_size + 1))) {
                              
        //AVPROBE_SCORE_RETRY = 25
        score = probe_size < max_probe_size ? AVPROBE_SCORE_RETRY : 0;

        /* Read probe data. */
        //分配buf内存空间
        if ((ret = av_reallocp(&buf, probe_size + AVPROBE_PADDING_SIZE)) < 0)
            goto fail;
            
        //读取多媒体数据
        //1.读到数据.ret > 0
        //2.没读到数据走if分支
        if ((ret = avio_read(pb, buf + buf_offset,
                             probe_size - buf_offset)) < 0) {
            /* Fail if error was not end of file, otherwise, lower score. */
            if (ret != AVERROR_EOF)
                goto fail;

            score = 0;
            ret   = 0;          /* error was end of file, nothing read */
        }
        
        //buf_offset初始化时为0
        buf_offset += ret;
        if (buf_offset < offset)
            continue;
        pd.buf_size = buf_offset - offset;
        pd.buf = &buf[offset];
        
        //设置probe data末尾的extra allocated bytes为0
        memset(pd.buf + pd.buf_size, 0, AVPROBE_PADDING_SIZE);
      
        /* Guess file format. */
        *fmt = av_probe_input_format2(&pd, 1, &score);
        if (*fmt) {
            /* This can only be true in the last iteration. */
            if (score <= AVPROBE_SCORE_RETRY) {
                av_log(logctx, AV_LOG_WARNING,
                       "Format %s detected only with low score of %d, "
                       "misdetection possible!\n", (*fmt)->name, score);
            } else
                av_log(logctx, AV_LOG_DEBUG,
                       "Format %s probed with size=%d and score=%d\n",
                       (*fmt)->name, probe_size, score);
#if 0
            FILE *f = fopen("probestat.tmp", "ab");
            fprintf(f, "probe_size:%d format:%s score:%d filename:%s\n", probe_size, (*fmt)->name, score, filename);
            fclose(f);
#endif
        }
    }

    if (!*fmt)
        ret = AVERROR_INVALIDDATA;

fail:
    /* Rewind. Reuse probe buffer to avoid seeking. */
    ret2 = ffio_rewind_with_probe_data(pb, &buf, buf_offset);
    if (ret >= 0)
        ret = ret2;

    av_freep(&pd.mime_type);
    return ret < 0 ? ret : score;
}

初始化AVFormatContext时并没有确定probe data的大小,所以这里会统一设置为1<<20
创建AVProbeData结构，并把之前URLProtocol读取到的mime_type赋值到其中
循环多次进行probe行为，每次循环探查的数据大小逐渐增加，当匹配到特定解码器后跳出循环。
probe行为：a).通过tcp读取数据，读取数据的上限为probe_max(1<<20)。b).读取数据的过程中，如果遇到end of file之外的错误则探查行为失败，进入失败流程。c).读取数据之后，调用av_probe_input_format2(实质上调用的是av_probe_input_format3)进行format guess,并打分。

avio_read

ffmpeg自带的数据读取函数,读取数据之后会存放在AVIOContext->buffer之中

/**
 * size -> probe_size,第一次probe_size为2048
 **/
int avio_read(AVIOContext *s, unsigned char *buf, int size)
{
    int len, size1;
    size1 = size;
    while (size > 0) {
        len = FFMIN(s->buf_end - s->buf_ptr, size);
        //s-> write_flag = 0
        if (len == 0 || s->write_flag) {
            //s->update_checksum = null
            //s->buffer_size = IO_BUFFER_SIZE     //32768
            if((s->direct || size > s->buffer_size) && !s->update_checksum) {
                // bypass the buffer and read data directly into buf
                if(s->read_packet)
                    len = s->read_packet(s->opaque, buf, size);

                if (len <= 0) {
                    /* do not modify buffer if EOF reached so that a seek back can
                    be done without rereading data */
                    s->eof_reached = 1;
                    if(len<0)
                        s->error= len;
                    break;
                } else {
                    s->pos += len;
                    s->bytes_read += len;
                    size -= len;
                    buf += len;
                    // reset the buffer
                    s->buf_ptr = s->buffer;
                    s->buf_end = s->buffer/* + len*/;
                }
            } else {
                //goto this
                //通过tcp读取一次数据,len!=0
                fill_buffer(s);
                len = s->buf_end - s->buf_ptr;
                //如果数据读取完毕，终止循环
                if (len == 0)
                    break;
            }
        } else {
            //下一次循环,goto this
            //将s->buf_ptr的内容复制到buf
            memcpy(buf, s->buf_ptr, len);
            buf += len;
            s->buf_ptr += len;
            //size减去已读数据的长度
            size -= len;
        }
    }
    ....
    return size1 - size;
}

1、首次读取通过调用fill_buffer进行,如果是http协议的媒体资源,会通过底层的tcp去读取数据,并存放在AVIOContext->buffer中

2、进入第二次循环之后，会将AVIOContext->buffer拷贝到目标buf中

av_probe_input_format3

av_probe_input_format3函数的主要行为是format guess.

AVInputFormat *av_probe_input_format3(AVProbeData *pd, int is_opened,
                                      int *score_ret)
{
    AVProbeData lpd = *pd;
    AVInputFormat *fmt1 = NULL, *fmt;
    int score, score_max = 0;
    const static uint8_t zerobuffer[AVPROBE_PADDING_SIZE];
    enum nodat {
        NO_ID3,
        ID3_ALMOST_GREATER_PROBE,
        ID3_GREATER_PROBE,
        ID3_GREATER_MAX_PROBE,
    } nodat = NO_ID3;

    if (!lpd.buf)
        lpd.buf = (unsigned char *) zerobuffer;
    //如果含id3信息(通常是mp3),则将buf移动到id3信息之后数据部分之前
    if (lpd.buf_size > 10 && ff_id3v2_match(lpd.buf, ID3v2_DEFAULT_MAGIC)) {
        int id3len = ff_id3v2_tag_len(lpd.buf);
        if (lpd.buf_size > id3len + 16) {
            if (lpd.buf_size < 2LL*id3len + 16)
                nodat = ID3_ALMOST_GREATER_PROBE;
            lpd.buf      += id3len;
            lpd.buf_size -= id3len;
        } else if (id3len >= PROBE_BUF_MAX) {
            nodat = ID3_GREATER_MAX_PROBE;
        } else
            nodat = ID3_GREATER_PROBE;
    }

    fmt = NULL;
    
    //0.初始化时fmt1为null
    //1.ffplay::main -> register_all 
    //2.REGISTER_DEMUXER (AAC,aac); ,注册解码器,加入新的AVInputFormat(ff_aac_demuxer: AVInputFormat)
    
    //遍历AVInputFormat链表,如ff_mp3_demuxer等
    while ((fmt1 = av_iformat_next(fmt1))) {
        if (!is_opened == !(fmt1->flags & AVFMT_NOFILE) && strcmp(fmt1->name, "image2"))
            continue;
        score = 0;
        
        if (fmt1->read_probe) {
            //mp3 -> mp3_read_probe
            score = fmt1->read_probe(&lpd);
            if (score)
                av_log(NULL, AV_LOG_TRACE, "Probing %s score:%d size:%d\n", fmt1->name, score, lpd.buf_size);
            if (fmt1->extensions && av_match_ext(lpd.filename, fmt1->extensions)) {
                switch (nodat) {
                case NO_ID3:
                    score = FFMAX(score, 1);
                    break;
                case ID3_GREATER_PROBE:
                case ID3_ALMOST_GREATER_PROBE:
                    score = FFMAX(score, AVPROBE_SCORE_EXTENSION / 2 - 1);
                    break;
                case ID3_GREATER_MAX_PROBE:
                    score = FFMAX(score, AVPROBE_SCORE_EXTENSION);
                    break;
                }
            }
        } else if (fmt1->extensions) {  //如果demuxer有登记自身的后缀,如ff_mp3_demuxer->extensions == 'mp2,mp3,m2a,mpa'
            if (av_match_ext(lpd.filename, fmt1->extensions))
                score = AVPROBE_SCORE_EXTENSION;
        }
        if (av_match_name(lpd.mime_type, fmt1->mime_type)) {
            if (AVPROBE_SCORE_MIME > score) {
                av_log(NULL, AV_LOG_DEBUG, "Probing %s score:%d increased to %d due to MIME type\n", fmt1->name, score, AVPROBE_SCORE_MIME);
                score = AVPROBE_SCORE_MIME;
            }
        }
        if (score > score_max) {
            score_max = score;
            fmt       = fmt1;
        } else if (score == score_max)
            fmt = NULL;
    }
    if (nodat == ID3_GREATER_PROBE)
        score_max = FFMIN(AVPROBE_SCORE_EXTENSION / 2 - 1, score_max);
    *score_ret = score_max;

    return fmt;
}

1、首先在AVProbeData数据中，查看是否有id3信息。id3一般在mp3音频中，该信息用于存放比如专辑名称等一些数据。

2、调用av_iformat_next遍历解码器。在初始化AVFormatContext之后,会调用av_register_all注册解码器和编码器。在单次循环中，调用对应解码器的read_probe进行打分，比如mp3音频对应的就是mp3_read_probe函数，随后根据最高分选出适合的解码器。

参考链接：ffplay如何通过URLProtocol拉取http数据【源码篇】_哲学家♂的博客-CSDN博客

参考链接：Avformat_open_input函数的分析之--HTTP篇_baohonglai的博客-CSDN博客_avformat_open_input参数

ijkplayer通过URLProtocol拉取http数据源码分析丰富篇

背景