Nginx upstream (一) 整体流程分析

Nginx访问上游服务器的流程大致分以下几个阶段：启动upstream、连接上游服务器、向上游发送请求、接收上游响应（包头/包体）、结束请求。本篇主要从代码流程的角度，梳理一下upstream的整个的数据的处理流程。下面先看一下upstream相关的两个重要数据结构ngx_http_upstream_t和ngx_http_upstream_conf_t：
相关数据结构


typedef struct ngx_http_upstream_s    ngx_http_upstream_t;

struct ngx_http_upstream_s {
    ngx_http_upstream_handler_pt read_event_handler;    // 处理读事件的回调方法
    ngx_http_upstream_handler_pt write_event_handler;   // 处理写事件的回调方法

    ngx_peer_connection_t peer;                         // 主动向上游发起的连接，稍后会详细分析
    ngx_event_pipe_t *pipe;                             // 当开启缓存配置，会用pipe来转发响应，需要http模块在使用upstream机制前构造pipe结构体
    ngx_chain_t *request_bufs;                          // 用链表将ngx_buf_t缓冲区链接起来，表示所有需要发送到上游的请求内容，
                                                        // create_request回调在于构造request_buf链表

    ngx_output_chain_ctx_t output;                      // 向下游发送响应的方式，稍后会详细分析
    ngx_chain_writer_ctx_t writer;                      // 向下游发送响应的方式，稍后会详细分析

    ngx_http_upstream_conf_t *conf;                     // upstream相关的配置信息
#if (NGX_HTTP_CACHE)
    ngx_array_t *caches;                                // 缓存数组，稍后会单独介绍缓存相关内容
#endif

    ngx_http_upstream_headers_in_t headers_in;          // 当直接转发时，process_header将解析的头部适配为http头部，同时将包头信息放在headers_in中
    ngx_http_upstream_resolved_t *resolved;             // 用于解析主机域名，后面会详细介绍

    ngx_buf_t from_client;                              // ToDo....
    ngx_buf_t buffer;                                   // 接收上游服务器响应包头的缓存区，当不需要直接响应或buffering为0时，也作为转发包体缓冲区  
    off_t length;                                       // 来自上游服务器的响应包体的长度

    ngx_chain_t *out_bufs;                              // 使用时再具体介绍，不同场景下有不同意义
    ngx_chain_t *busy_bufs;                             // 当buffering为0时，表示上一次向下游转发响应时没有发送完成的内容
    ngx_chain_t *free_bufs;                             // 当buffering为0时，用于回收out_bufs中已经发送给下游的ngx_buf_t结构体

    ngx_int_t (*input_filter_init)(void *data);         // 处理包体前的初始化方法，其中data用于传递用户数据结构，即下方的input_filter_ctx
    ngx_int_t (*input_filter)(void *data, ssize_t bytes)// 处理包体的方法，bytes表示本次接收到的包体长度，data同上
    void *input_filter_ctx;                             // 传递http模块的自定义的数据结构

#if (NGX_HTTP_CACHE)
    ngx_int_t (*create_key)(ngx_http_request_t *r);     // cache部分，后面再分析
#endif
    ngx_int_t (*create_request)(ngx_http_request_t *r); // 用于构造发往上游服务器的请求
    ngx_int_t (*reinit_request)(ngx_http_request_t *r); // 与上游通讯失败，需要重新发起连接时，用该方法重新初始化请求信息
    ngx_int_t (*process_header)(ngx_http_request_t *r); // 解析上游服务器返回响应的包头，NGX_AGAIN接收不完整，NGX_OK解析到完整包头
    void (*abort_request)(ngx_http_request_t *r);       // 暂时没有用到
    void (*finalize_request)(ngx_http_request_t *r,     // 请求结束时会调用，目前没有实际作用
                                         ngx_int_t rc);
    ngx_int_t (*rewrite_redirect)(ngx_http_request_t *r,// 上游返回响应中含Location或Refresh时，process_header会调用http模块实现的该方法
                     ngx_table_elt_t *h, size_t prefix);
    ngx_int_t (*rewrite_cookie)(ngx_http_request_t *r,  // 同上，当响应中含Set-Cookie时，会调用http模块实现的该方法
                               ngx_table_elt_t *h);
    ngx_msec_t timeout;                                 // 暂时没有用到
    ngx_http_upstream_state_t *state;                   // 用于表示上游响应的错误码、包体长度等信息

    ngx_str_t method;                                   // 用于文件缓存，稍后再进行分析
    ngx_str_t schema;                                   // 记录日志时使用
    ngx_str_t uri;                                      // 记录日志时使用

    ngx_http_cleanup_pt *cleanup;                       // 用于标识是否需要清理资源，相当于一个标志位，实际不会调用该方法

    unsigned store:1;                                   // 是否指定文件缓存路径的标志位
    unsigned cacheable:1;                               // 是否启用文件缓存
    unsigned accel:1;                                   // 目前没有用到
    unsigned ssl:1;                                     // 是否基于SSL协议访问上游服务器

    unsigned buffering:1;                               // 向下游转发响应包体时，是否开启更大内存及临时磁盘文件用于缓存来不及发送到下游的响应包体
    unsigned keepalive:1;                               // 标识与后端是否开启keepalive ?
    unsigned upgrade:1;                                 // 是否存在upgrade header

    unsigned request_sent:1;                            // 是否向上游服务器发送了请求
    unsigned header_sent:1;                             // 为1时，表示包头已经转发给客户端了
}

ngx_http_upstream_conf_t：指定了upstream的运行方式，必须在启动upstream之前设置

点击(此处)折叠或打开


typedef struct {
    ngx_http_upstream_srv_conf_t *upstream; // 当上面没有实现resolved成员时，用该结构体定义上游服务器的配置

    ngx_msec_t connect_timeout;       // 建立tcp连接的超时时间，即写事件添加到定时器中设置的超时时间
    ngx_msec_t send_timeout;          // 发送请求的超时时间，即写事件添加到定时器中设置的超时时间
    ngx_msec_t read_timeout;          // 接收响应的超时时间，即读事件添加到定时器中设置的超时时间
    ngx_msec_t timeout;               // 暂时没有使用
    ngx_msec_t next_upstream_timeout; // 

    size_t send_lowat;                // 发送缓存区的下限，即TCP的SO_SNOLOWAT选项
    size_t buffer_size;               // 指定接收头部缓冲区分配的内存大小，当buffering为0时，由于上述buffer同时用于接收包体，也表示接收包体缓冲区大小
    size_t limit_rate;                // 

    size_t busy_buffers_size;         // 当buffering为1，且向下游转发响应时生效，会设置到ngx_event_pipe_t结构体的busy_size中
    size_t max_temp_file_size;        // 指定临时文件的大小，限制ngx_event_pipe_t中的temp_file
    size_t temp_file_write_size;      // 将缓冲区的响应写入临时文件时，一次写入字符流的最大长度
    ......
    ngx_bufs_t bufs;                  // 以缓存响应的方式转发上游服务器的包体时所使用的内存大小
    ngx_uint_t ignore_headers;        // 以位图的形式标识在转发时需要忽略的headers
    ngx_uint_t next_upstream;         // 以位图的方式表示一些错误码，当处理上游响应时发现该错误码，选择下一个上游服务器重发请求
    ngx_uint_t store_access;          // 表示创建的临时目录和文件的权限
    ngx_uint_t next_upstream_tries;   // 
    ngx_flag_t buffering;             // 为1时表示打开缓存，尽量在内存和磁盘中缓存来自上游的响应，为0时则开辟固定大小内存块作为缓存来转发响应
    ......
    ngx_flag_t ignore_client_abort;   // 为1时，表示与上游服务器交互时不检查nginx与下游服务器是否断开，即使下游主动关闭连接，也不会中断与上游交互
    ngx_flag_t intercept_errors;      // 详见ngx_http_upstream_intercept_errors
    ngx_flag_t cyclic_temp_file;      // 为1时，会尝试复用临时文件中已经使用过的空间
    ......
    ngx_path_t *temp_path;            // buffering为1的情况下转发响应时，存放临时文件的路径
    ngx_hash_t hide_headers_hash;     // 不转发的头部，根据hide_headers和pass_headers动态数组构造出的需要隐藏的http头部散列表
    ngx_array_t *hide_headers;        // 当转发上游头部给下游时，如果不希望将某些头部转发给下游，则设置到该数组中
    ngx_array_t *pass_headers;        // 转发头部时upstream机制默认不会转发某些头部，当确定需要转发时，需要设置到该数组中

    ngx_http_upstream_local_t *local; // 连接上游服务器时，需要使用的本机地址

    ngx_array_t *store_lengths;       // 当需要将上游响应缓存到文件中时，表示存放路径的长度
    ngx_array_t *store_values;        // 当需要将上游响应缓存到文件中时，表示存放路径
    ......
    signed store:2;                   // 同ngx_http_upstream_t中的store
    unsigned intercept_404:1;         // 如果该值设为1，当上游返回404时直接转发该错误码给下游，而不会去与error_page进行比较
    unsigned change_buffering:1;      // 当为1时，根据上游服务器返回的响应头部，动态决定是以上游网速优先，还是下游网速优先
    ......
    ngx_str_t module;                 // 使用upstream的模块名称，仅用于记录日志
} ngx_http_upstream_conf_t

启动upstream
当收到请求后，http的代理模块是ngx_http_proxy_module，其NGX_HTTP_CONTENT_PHASE阶段的处理函数为ngx_http_proxy_handler

点击(此处)折叠或打开


static ngx_int_t
ngx_http_proxy_handler(ngx_http_request_t *r)
{
    // 创建ngx_http_upstream_t结构，并赋值给r->upstream
    if (ngx_http_upstream_create(r) != NGX_OK) {
        return NGX_HTTP_INTERNAL_SERVER_ERROR;
    }
    .....
    plcf = ngx_http_get_module_loc_conf(r, ngx_http_proxy_module);
    .....
    u = r->upstream;
    .....
    // 给upstream的conf成员赋值，记录相关的配置信息
    u->conf = &plcf->upstream;
    // 设置相关的回调信息
    u->create_request = ngx_http_proxy_create_request;
    u->reinit_request = ngx_http_proxy_reinit_request;
    u->process_header = ngx_http_proxy_process_status_line;
    u->abort_request = ngx_http_proxy_abort_request;
    u->finalize_request = ngx_http_proxy_finalize_request;
    ......
    u->buffering = plcf->upstream.buffering;
    .....
    // 调用ngx_http_upstream_init函数
    rc = ngx_http_read_client_request_body(r, ngx_http_upstream_init);
    .....
    return NGX_DONE;
}

首先创建upstream的结构并进行设置，然后设置ngx_http_upstream_conf_t配置结构体给upstream->conf。ngx_http_upstream_init函数会根据
ngx_http_upstream_conf_t配置的信息初始化upstream，同时开始连接上游服务器，由此展开整个upstream的处理流程。
点击(此处)折叠或打开


void ngx_http_upstream_init(ngx_http_request_t *r)
{
    ngx_connection_t *c;
    // 客户端的连接
    c = r->connection;
    ......
    // 当启用upstream时，需要将客户端对应的读事件从定时器中删除，此时主要关注上游的连接相关的事件
    if (c->read->timer_set) {
        ngx_del_timer(c->read);
    }
    ......
    ngx_http_upstream_init_request(r);
}

继续看ngx_http_upstream_init_request函数
点击(此处)折叠或打开


static void ngx_http_upstream_init_request(ngx_http_request_t *r)
{

    u = r->upstream;
    u->store = u->conf->store;
    ......
    // 设置Nginx与下游客户端之间TCP连接的检查方法，注意几个条件，ignore来自之前配置属性，是否忽略客户端的连接状态
    if (!u->store && !r->post_action && !u->conf->ignore_client_abort) {
        r->read_event_handler = ngx_http_upstream_rd_check_broken_connection;
        r->write_event_handler = ngx_http_upstream_wr_check_broken_connection;
    }
    ......
    // 调用http模块实现的create_request方法，即前面注册的ngx_http_proxy_create_request函数，用于构造发到上游服务器的请求
    if (u->create_request(r) != NGX_OK) {
        ngx_http_finalize_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
        return;
    }
    ......
    // 向当前请求的main成员指向的原始请求中的cleanup链表末尾添加一个新成员
    cln = ngx_http_cleanup_add(r, 0);
    // 将handler的回调方法设置为ngx_http_upstream_cleanup
    cln->handler = ngx_http_upstream_cleanup;
    cln->data = r;
    u->cleanup = &cln->handler;
    ......
    // 调用ngx_http_upstream_connect向上游服务器发起连接
    ngx_http_upstream_connect(r, u);
}

与上游服务器建立连接
upstream机制与上游服务器之间通过tcp建立连接，为了保证三次握手的过程中不阻塞进程，Nginx采用了无阻塞的套接字来连接上游服务器。
ngx_http_upstream_connect负责发起建连动作，如果没有立即返回成功，需要在epoll中监控该套接字，当出现可写事件时，则说明连接已经建立成功。

点击(此处)折叠或打开


static void ngx_http_upstream_connect(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    // 建连的动作主要由下面函数进行.....
    rc = ngx_event_connect_peer(&u->peer);
    ....

点击(此处)折叠或打开


ngx_int_t ngx_event_connect_peer(ngx_peer_connection_t *pc)
{
    // 创建tcp socket套接字
    s = ngx_socket(pc->sockaddr->sa_family, SOCK_STREAM, 0);
    ......
    // 获取空闲的ngx_connection_t结构来承载连接，从ngx_cycle_t的free_connections指向的空闲连接池中获取
    c = ngx_get_connection(s, pc->log);
    ......
    // 设置连接为非阻塞的模式
    if (ngx_nonblocking(s) == -1) {
        ......
    // 绑定地址和端口
    if (pc->local) {
        if (bind(s, pc->local->sockaddr, pc->local->socklen) == -1) {
        ......
    // 设置连接收发相关的回调函数
    c->recv = ngx_recv;
    c->send = ngx_send;
    c->recv_chain = ngx_recv_chain;
    c->send_chain = ngx_send_chain;
    // 启用sendfile的支持
    c->sendfile = 1;
    ......
    rev = c->read;
    wev = c->write;
    ......
    pc->connection = c;
    // 调用ngx_event_actions.add_conn将tcp套接字以期待可读、可写的方式添加到事件搜集器中，这里是把套接字加到epoll中
    if (ngx_add_conn) {
        if (ngx_add_conn(c) == NGX_ERROR) {
            goto failed;
        }
    }
    // 向上游服务器发起连接，由于非阻塞，调用会立即返回
    rc = connect(s, pc->sockaddr, pc->socklen);
    ......

回到ngx_http_upstream_connect继续分析
点击(此处)折叠或打开


static void ngx_http_upstream_connect(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    ......
    // 上面已经分析了，该函数主要进行上游服务器的连接
    rc = ngx_event_connect_peer(&u->peer);
    ......
    c = u->peer.connection;
    c->data = r;
    // 将上游connection上读写事件的回调，都设置为ngx_http_upstream_handler
    c->write->handler = ngx_http_upstream_handler;
    c->read->handler = ngx_http_upstream_handler;

    // 设置upstream机制的write_event_handler和read_event_handler，具体使用见后续的ngx_upstream_handler函数
    // ngx_http_upstream_send_request_handler用于向上游发送请求
    u->write_event_handler = ngx_http_upstream_send_request_handler;

    // ngx_http_upstream_process_header接收和解析上游服务器的响应
    u->read_event_handler = ngx_http_upstream_process_header;
    ......
    if (rc == NGX_AGAIN) {
        // 当连接没有建立成功时，套接字已经在epoll中了，将写事件添加到定时器中，超时时间是ngx_http_upstream_conf_t中的connect_timeout成员
        ngx_add_timer(c->write, u->conf->connect_timeout);
        return;
    }
    ......
    // 当成功建立连接时，向上游服务器发送请求，注意：此处的函数与上面设置的定时器回调的函数有所不同，下文会进行说明
    ngx_http_upstream_send_request(r, u);
}

下面先简单看一下connection的读写回调函数——ngx_http_upstream_handler
点击(此处)折叠或打开


static void
ngx_http_upstream_handler(ngx_event_t *ev)
{
    ......
    // 由事件的data成员取得ngx_connection_t连接，该连接是nginx与上游服务器之间的连接
    c = ev->data;
    // 由连接的data取得ngx_http_request_t结构体
    r = c->data;
    // 由请求的upstream成员取的表示upstream机制的ngx_http_upstream_t结构体
    u = r->upstream;
    // 此处ngx_http_request_t结构中的connection成员代表的是客户端与nginx之间连接
    c = r->connection;
    ......
    if (ev->write) {
        // nginx与上游服务器间的tcp连接的可写事件被触发时，该方法被调用
        u->write_event_handler(r, u);
    } else {
        // nginx与上游服务器间的tcp连接的可读事件被触发时，该方法被调用
        u->read_event_handler(r, u);
    }
    // 与nginx_http_request_handler相同，最后一步执行post请求
    ngx_http_run_posted_requests(c);
}

发送请求到上游服务器
前面在介绍ngx_http_upstream_connect函数时，我们看到将ngx_http_upstream_t中的write_event_handler设置为了ngx_http_upstream_send_request_handler，而ngx_http_upstream_connect的最后直接调用了ngx_http_upstream_send_request发送请求。
下面先来看一下两者的区别
点击(此处)折叠或打开


static void ngx_http_upstream_send_request_handler(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    ngx_connection_t *c;

    // 获取与上游服务器间表示连接的ngx_connection_t结构体
    c = u->peer.connection;

    // 当写事件的timeout被设置为1时，则代表向上游发送http请求已经超时
    if (c->write->timedout) {
        // 将超时错误传给next方法，next方法根据允许的重传策略决定：重新发起连接执行upstream请求，还是结束upstream请求
        ngx_http_upstream_next(r, u, NGX_HTTP_UPSTREAM_FT_TIMEOUT);
        return;
    }
    ......
    // header_sent为1时，表示上游服务器的响应需要直接转发给客户端，而且此时响应包头已经转给客户端了
    if (u->header_sent) {
        // 由于此时已经收到了上游服务器的完整包头，此时不需要再向上游发送请求，因此将write回调设置为空函数（只记录日志）
        u->write_event_handler = ngx_http_upstream_dummy_handler;
        // 将写事件添加到epoll中
        (void) ngx_handle_write_event(c->write, 0);

        return;
    }
    // 调用下面函数向上游发送http请求
    ngx_http_upstream_send_request(r, u);
}

通过上面的分析，现在很容易看出两者的区别，ngx_http_upstream_send_request_handler更多的是在检测请求的状态，而实际的发送函数是
ngx_http_upstream_send_request，下面继续看一下该函数。
点击(此处)折叠或打开


static void ngx_http_upstream_send_request(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    ......
    // 发送u->request_bufs链表上的请求内容，该函数会把未一次发送完的链表缓冲区保存下来，再次调用时不需要request_bufs参数
    rc = ngx_output_chain(&u->output, u->request_sent ? NULL : u->request_bufs);
    
    // 标识已经向上游发送了请求，实际上是为了标识是否调用过ngx_output_chain，除了第一次，其他时候不需要再传送request_bufs，直接设置为NULL
    u->request_sent = 1;
    ......
    // 当写事件仍在定时器中时，先将写事件从定时器中移出，由ngx_output_chain的返回值决定是否需要向定时器中增加写事件
    if (c->write->timer_set) {
        ngx_del_timer(c->write);
    }
    // 当ngx_output_chain返回NGX_AGAIN时，说明请求还没有发完，此时需要设置写事件定时器
    if (rc == NGX_AGAIN) {
        ngx_add_timer(c->write, u->conf->send_timeout);
        // 将写事件添加到epoll中
        if (ngx_handle_write_event(c->write, u->conf->send_lowat) != NGX_OK) {
            ngx_http_upstream_finalize_request(r, u,
                                               NGX_HTTP_INTERNAL_SERVER_ERROR);
            return;
        }
        // 结束ngx_http_upstream_send_request的执行，等待epoll事件触发
        return;
    }

    /* rc == NGX_OK */
    // 当ngx_output_chain返回NGX_OK时，表示向上游服务器发送完了所有的请求，将写事件的回调设置为空函数
    ......
    u->write_event_handler = ngx_http_upstream_dummy_handler;
    // 重新添加到epoll中
    if (ngx_handle_write_event(c->write, 0) != NGX_OK) {
        ngx_http_upstream_finalize_request(r, u,
                                           NGX_HTTP_INTERNAL_SERVER_ERROR);
        return;
    }
    // 发送完请求后，需要开始读上游返回的响应，设置读事件的超时时间
    ngx_add_timer(c->read, u->conf->read_timeout);
    // 当ready已经设置时，说明应答已经到位，调用process_header开始处理来自上游的响应
    if (c->read->ready) {
        ngx_http_upstream_process_header(r, u);
        return;
    }
}

接收上游服务器的响应
Nginx的upstream机制支持三种响应包体的处理方式：不转发响应、转发响应时以下游网速优先、转发响应时以上游网速优先。当ngx_http_request_t结构体的
subrequest_in_memory标志位为1时，即不转发响应；当subrequest_in_memory为0时，则转发响应；而ngx_http_upstream_conf_t配置结构中的buffering
为0时，则以下游网速优先，即使用固定大小的内存作为缓存；当buffering为1时，则以上游网速优先，即采用更多的内存、硬盘文件作为缓存。

下面看一下用于接收、解析响应头部的ngx_http_upstream_process_header方法
点击(此处)折叠或打开


static void
ngx_http_upstream_process_header(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    ......
    // 获取到上游服务器的连接信息
    c = u->peer.connection;
    ......
    // 检查是否发生了读事件超时，如果发生了超时，则调用ngx_http_upstream_next函数决定下一步动作
    if (c->read->timedout) {
        ngx_http_upstream_next(r, u, NGX_HTTP_UPSTREAM_FT_TIMEOUT);
        return;
    }
    // request_sent为1则代表已经向上游发过请求；为0则代表还没有发送请求，没有发送请求却收到上游的响应时，则不符合逻辑，进行下一步动作
    // ngx_http_upstream_next会根据配置信息决定是否直接结束请求，还是寻找下一个上游服务器
    if (!u->request_sent && ngx_http_upstream_test_connect(c) != NGX_OK) {
        ngx_http_upstream_next(r, u, NGX_HTTP_UPSTREAM_FT_ERROR);
        return;
    }
    // 检查用于接收上游响应的buffer，当start为NULL时，代表该缓冲区尚未进行分配，此时会按照配置指定的buffer_size进行缓冲区的分配
    if (u->buffer.start == NULL) {
        u->buffer.start = ngx_palloc(r->pool, u->conf->buffer_size);
        if (u->buffer.start == NULL) {
            ngx_http_upstream_finalize_request(r, u, NGX_HTTP_INTERNAL_SERVER_ERROR);
            return;
        }
        // 针对新申请的缓冲区进行初始化，省略
        .......
    }

    for ( ;; ) {
        // 读取响应的内容存储在buffer中，每次读取的最大不超过buffer_size，即当前缓冲区的剩余空间大小
        n = c->recv(c, u->buffer.last, u->buffer.end - u->buffer.last);

        // NGX_AGAIN代表响应还没有读完，设置读事件到epoll中，等待下一次读取
        if (n == NGX_AGAIN) {
            if (ngx_handle_read_event(c->read, 0) != NGX_OK) {
                ngx_http_upstream_finalize_request(r, u, NGX_HTTP_INTERNAL_SERVER_ERROR);
                return;
            }

            return;
        }
        // 读取出错或者连接已关闭，则调用next函数决定是终止连接，还是重新选择上游服务器
        if (n == NGX_ERROR || n == 0) {
            ngx_http_upstream_next(r, u, NGX_HTTP_UPSTREAM_FT_ERROR);
            return;
        }
        // n 大于0时，代表读取到的数据，此时last游标需要往后移动n个字节，last初始化时与start相同，指向buffer起始地址
        u->buffer.last += n;
        // 开始处理读取到的响应头部信息
        rc = u->process_header(r);
        // 检查process_header的返回值，当返回NGX_AGAIN时，需要判断一下是否当前的缓冲区已经被用尽，如果被用尽说明一个buffer_size无法容纳整个响应头部
        if (rc == NGX_AGAIN) {
            // 当buffer无法容纳整个响应头部时，调用next决定是终止连接还是选择下一个上游服务器
            if (u->buffer.last == u->buffer.end) {
                ngx_http_upstream_next(r, u, NGX_HTTP_UPSTREAM_FT_INVALID_HEADER);
            ......
    }
    // 当process_header处理的是完整的响应头部时，会进一步判断其返回值，检测到无效的响应头部时，进行next的进一步决策处理
    if (rc == NGX_HTTP_UPSTREAM_INVALID_HEADER) {
        ngx_http_upstream_next(r, u, NGX_HTTP_UPSTREAM_FT_INVALID_HEADER);
        return;
    }
    // 当process_header返回ERROR时，直接终止当前的请求
    if (rc == NGX_ERROR) {
        ngx_http_upstream_finalize_request(r, u, NGX_HTTP_INTERNAL_SERVER_ERROR);
        return;
    }
    // 走到目前位置，当前的process_header至少是执行成功了，完整的解析了响应的头部信息
    /* rc == NGX_OK */
    .......
    // 处理已经解析出的头部，该函数会把已经解析出的头部，设置到ngx_http_request_t结构体的headers_out成员中
    // 当调用ngx_http_send_header时，可以将设置到headers_out中的响应头部发送给客户端
    if (ngx_http_upstream_process_headers(r, u) != NGX_OK) {
        return;
    }
    // subrequest_in_memory字段为0时，表示需要转发响应到客户端；为1时，表示不需要转发响应到客户端
    if (!r->subrequest_in_memory) {
        // 发送响应给客户端
        ngx_http_upstream_send_response(r, u);
        return;
    }
    // 以下的逻辑是不需要转发响应给客户端，即subrequest_in_memory为1的情况
    /* subrequest content in memory */
    // 检查一下input_filter是否为NULL，input_filter用于处理响应的包体，当没有定义自己的实现方法时，使用默认的处理方法
    if (u->input_filter == NULL) {
        u->input_filter_init = ngx_http_upstream_non_buffered_filter_init; 
        u->input_filter = ngx_http_upstream_non_buffered_filter;  
        u->input_filter_ctx = r; 
    }
    // 调用init方法为即将进行的包体处理做一些初始化的工作，默认的init函数是空的，什么也没做    
    if (u->input_filter_init(u->input_filter_ctx) == NGX_ERROR) {
        ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
        return;
    }
    // pos与last之间的内容是已经读取但尚未处理的数据
    n = u->buffer.last - u->buffer.pos;
    // 当process_header处理完后，如果还有尚未处理的数据，那说明除了读到了包头之外，还读到部分包体信息
    if (n) {
        u->buffer.last = u->buffer.pos;
        u->state->response_length += n;
        // 调用input_filter处理已经读到的包体信息
        if (u->input_filter(u->input_filter_ctx, n) == NGX_ERROR) {
            ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
            return;
        }
    }
    ......
    // 设置处理上游响应包体的回调函数
    u->read_event_handler = ngx_http_upstream_process_body_in_memory;
    // 开始处理包体的信息
    ngx_http_upstream_process_body_in_memory(r, u);
}

下面继续分析一下，不用upstream直接转发响应时的具体处理流程，主要是上面subrequest_memory为1的场景，此时该请求属于一个子请求。
我们看一下上面分析时提到的默认的input_filter的处理方法，在上面的分析中，如果读取包头时同时读到了包体信息，会调用input_filter方法处理：

点击(此处)折叠或打开


static ngx_int_t ngx_http_upstream_non_buffered_filter(void *data, ssize_t bytes)
{
    // data指向了请求的ngx_http_request_t结构，前面函数中当没有定义input_filter时，对input_filter_ctx进行了重新初始化，指向了ngx_http_request_t
    ngx_http_request_t *r = data;
    ......
    u = r->upstream;
    
    // 遍历out_bufs使ll指向最后一个缓冲区->next的地址
    for (cl = u->out_bufs, ll = &u->out_bufs; cl; cl = cl->next) {
        ll = &cl->next;
    }
    // 申请新的缓冲区
    cl = ngx_chain_get_free_buf(r->pool, &u->free_bufs);
    if (cl == NULL) {
        return NGX_ERROR;
    }
    // 将新申请的缓冲区挂在out_bufs的链表末尾
    *ll = cl;
    ......
    // buffer为接收上游响应包体的缓冲区
    b = &u->buffer;
    // b->last在调用该函数时，已经指向了接收到的包体的首地址，cl->buf->pos指向首地址后，将b->last和cl->buf->last设置为保存包体尾部
    cl->buf->pos = b->last;
    b->last += bytes;
    cl->buf->last = b->last;
    cl->buf->tag = u->output.tag;
    // 如果没有设置包体长度，则到此可以结束了
    if (u->length == -1) {
        return NGX_OK;
    }
    // 计算还需要接收的包体的长度
    u->length -= bytes;

    return NGX_OK;
}

继续向下分析，process_header调用input_filter处理完包体后，最后调用的函数时ngx_http_upstream_process_body_in_memory，
该函数实际上会接收上游服务器的包体内容，下面看一下具体实现。
点击(此处)折叠或打开


static void ngx_http_upstream_process_body_in_memory(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    ......
    // 获取到上游的连接信息
    c = u->peer.connection;
    // 获取该连接的读事件，判断是否发生了读事件的超时，如果超时，则直接结束连接
    rev = c->read;
    if (rev->timedout) {
        ngx_connection_error(c, NGX_ETIMEDOUT, "upstream timed out");
        ngx_http_upstream_finalize_request(r, u, NGX_HTTP_GATEWAY_TIME_OUT);
        return;
    }
    // buffer为存储上游响应包体的缓冲区
    b = &u->buffer;

    for ( ;; ) {
        // 计算剩余空闲缓冲区的大小
        size = b->end - b->last;
        ......
        // 如果还有空闲的空间，调用recv方法继续读取响应
        n = c->recv(c, b->last, size);
        // 此处NGX_AGAIN代表需要等待下一次的读事件
        if (n == NGX_AGAIN) {
            break;
        }
        // 如果上游主动关闭连接，或者读取出现错误，则直接关闭连接    
        if (n == 0 || n == NGX_ERROR) {
            ngx_http_upstream_finalize_request(r, u, n);
            return;
        }
        // 更新读到的响应包体的长度
        u->state->response_length += n;
        // 处理读到的包体内容
        if (u->input_filter(u->input_filter_ctx, n) == NGX_ERROR) {
            ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
            return;
        }
            
        if (!rev->ready) {
            break;
        }
    }
    // 如果包体长度没有设置，则可以直接结束请求了
    if (u->length == 0) {
        ngx_http_upstream_finalize_request(r, u, 0);
        return;
    }
    // 将读事件增加到Epoll中
    if (ngx_handle_read_event(rev, 0) != NGX_OK) {
        ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
        return;
    }
    // 将读事件同时添加到定时器中，超时时间为配置的read_timeout，避免长时间等待
    if (rev->active) {
        ngx_add_timer(rev, u->conf->read_timeout);
    
    } else if (rev->timer_set) {
        ngx_del_timer(rev);
    }
}

上面流程很容易看出一个问题，那就是读取响应头的Buffer的空间可能不足，导致处理出现问题。使用时关键还在于Input_filter方法中对buffer的管理。
分析完不转发响应的过程后，继续看一下转发响应的两种实现方式，下游网速优先和上游网速优先的实现。由于上游网速优先的方式，实现较为复杂，下面先看一下下游网速优先的方式，即采用固定的内存大小，作为响应的缓冲区。代码上也删减不必要的逻辑。

下游网速优先
点击(此处)折叠或打开


static void
ngx_http_upstream_send_response(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    ......
    // 向下游的客户端发送响应头部，前面process_header处理时先将响应头部设置到了headers_in中，然后upstream_process_headers将headers_in中的
    // 头部设置到headers_out中，ngx_http_send_header就是将headers_out中的http包头发送给客户端
    rc = ngx_http_send_header(r);
    ......
    // 设置头部已发送的标志
    u->header_sent = 1;
    ......
    // 如果早期的请求携带了包体信息，且用到了临时文件，则先清理临时文件，因为已经收到响应了，请求的临时文件肯定用不到了
    if (r->request_body && r->request_body->temp_file) {
        ngx_pool_run_cleanup_file(r->pool, r->request_body->temp_file->file.fd);
        r->request_body->temp_file->file.fd = NGX_INVALID_FILE;
    }
    ......
    // buffering为1代表上游网速优先，为0代表下游网速优先
    if (!u->buffering) {
        // 看一下用户有没有设置input_filter，没有的话使用默认的input_filter函数
        if (u->input_filter == NULL) {
            u->input_filter_init = ngx_http_upstream_non_buffered_filter_init;
            u->input_filter = ngx_http_upstream_non_buffered_filter;
            u->input_filter_ctx = r;
        }
        // 设置接收上游响应的回调函数
        u->read_event_handler = ngx_http_upstream_process_non_buffered_upstream;
        // 设置向下游客户端发送报文的回调函数
        r->write_event_handler = ngx_http_upstream_process_non_buffered_downstream;
    
        r->limit_rate = 0;
        // 为input_filter处理包体做初始化的准备函数，默认实现是空的
        if (u->input_filter_init(u->input_filter_ctx) == NGX_ERROR) {
            ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
            return;
        }
        ......
        // 看一下解析完包头后，是否还有未解析的包体信息，如果存在包体，则先处理一次包体，和前面分析不转发响应的逻辑是一样的
        n = u->buffer.last - u->buffer.pos;
        if (n) {
            // last指向代处理的包体的起始地址，更新response_length，调用input_filter处理当前的包体
            u->buffer.last = u->buffer.pos;
            u->state->response_length += n;
            if (u->input_filter(u->input_filter_ctx, n) == NGX_ERROR) {
                ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
                return;
            }
            // 调用downstream将本次的包体转发给客户端
            ngx_http_upstream_process_non_buffered_downstream(r);
        } else {
            // 清空buff，实际上是将pos和last指针复位
            u->buffer.pos = u->buffer.start;
            u->buffer.last = u->buffer.start;
            // Todo....
            if (ngx_http_send_special(r, NGX_HTTP_FLUSH) == NGX_ERROR) {
                ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
                return;
            }
            // 如果连接上的读事件已经准备好或者响应头部没有指定包体长度时，直接调用downstream方法处理响应
            if (u->peer.connection->read->ready || u->length == 0) {
                ngx_http_upstream_process_non_buffered_upstream(r, u);
            }
        }
        // 将控制权转交给Nginx框架
        return;
    }
    ......
}

ngx_http_upstream_process_non_buffered_downstream函数，用于处理上游服务器响应的读事件
点击(此处)折叠或打开


static void ngx_http_upstream_process_non_buffered_downstream(ngx_http_request_t *r)
{
    // 获取与上游服务器的连接信息，和写事件信息
    c = r->connection;
    u = r->upstream;
    wev = c->write;
    ......
    // 如果出现写事件超时，则设置超时标签，同时终止连接
    if (wev->timedout) {
        c->timedout = 1;
        ngx_connection_error(c, NGX_ETIMEDOUT, "client timed out");
        ngx_http_upstream_finalize_request(r, u, NGX_HTTP_REQUEST_TIME_OUT);
        return;
    }
    // non_buffered即固定内存，用固定内存处理转发响应，其中第二个参数是个标签，为1时代表向下游发送响应，为0时代表读取上游的响应
    ngx_http_upstream_process_non_buffered_request(r, 1);
}

下面继续分析一下ngx_http_upstream_process_non_buffered_request
点击(此处)折叠或打开


static void
ngx_http_upstream_process_non_buffered_request(ngx_http_request_t *r, ngx_uint_t do_write)
{
    // 获取上游和现有的连接信息，记录为downstream和upstream
    u = r->upstream;
    downstream = r->connection;
    upstream = u->peer.connection;

    b = &u->buffer;
    // 判断是否向下游写，do_write是调用方设置的，而u->length表示还需要接收的上游响应的长度，为0则代表不需要继续接收
    do_write = do_write || u->length == 0;
    for ( ;; ) {
        // 判断是否需要向客户端写数据
        if (do_write) {
            // out_bufs中记录的是需要向下游写的数据，而busy_bufs用于记录当out_bufs无法一次发完时指向out_bufs，从而将out_bufs置空
            if (u->out_bufs || u->busy_bufs) {
                // 向下游发送out_bufs指向的内容，busy_bufs中记录的是上一次的out_bufs的内容，现在已经合并当前的out_bufs中了
                rc = ngx_http_output_filter(r, u->out_bufs);
                // 回收out_bufs上已经发送的buf，将未发送完的buf设置到busy_buf上，清空out_bufs
                ngx_chain_update_chains(r->pool, &u->free_bufs, &u->busy_bufs, &u->out_bufs, u->output.tag);
            }
            // 当busy_bufs为空时，说明当前没有需要发送到客户端的内容了
            if (u->busy_bufs == NULL) {
                ......
                // 将pos和last重新置位
                b->pos = b->start;
                b->last = b->start;
            }
        }
        // 计算一下buffer的可用空间
        size = b->end - b->last;
        // 继续读取响应
        if (size && upstream->read->ready) {
            n = upstream->recv(upstream, b->last, size);
            // NGX_AGAIN代表需要等待下一次读事件
            if (n == NGX_AGAIN) {
                break;
            }
            // n > 0表示读到的n字节的正常响应包体
            if (n > 0) {
                // 更新response_length
                u->state->response_length += n;
                // 调用input_filter处理包体
                if (u->input_filter(u->input_filter_ctx, n) == NGX_ERROR) {
                    ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
                    return;
                }
            }
            // 设置do_write标签，标识已经读到新的响应包体，需要向客户端转发数据
            do_write = 1;
            continue;
        }
        break;
    }
    ......
    if (downstream->data == r) {
        // 将下游的写事件添加到epoll中
        if (ngx_handle_write_event(downstream->write, clcf->send_lowat) != NGX_OK)
        ......
    }
    // 同时设置超时定时器，控制等待时间，超时时间为配置的send_timeout
    if (downstream->write->active && !downstream->write->ready) {
        ngx_add_timer(downstream->write, clcf->send_timeout);
    } 
    ......
    // 将连接上游的读事件也添加到epoll中去
    if (ngx_handle_read_event(upstream->read, 0) != NGX_OK) {
        ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
        return;
    }
    // 设置读定时器，控制等待时间，超时时间为配置的read_timeout
    if (upstream->read->active && !upstream->read->ready) {
        ngx_add_timer(upstream->read, u->conf->read_timeout);
    }
    ......
}

上游网速优先
上游网速优先的实现比较复杂，目前官方携带的ngx_http_proxy_module即采用了上游网速优先的实现方式。当ngx_http_upstream_conf_t中的Buffering设置
为1时，则说明需要使用上游网速优先的方式。此时需要用ngx_event_pipe_t结构，这个结构维护着上下游间转发的响应包体，用于解决内存复制的问题。
点击(此处)折叠或打开


typedef struct ngx_event_pipe_s ngx_event_pipe_t;

struct ngx_event_pipe_s {
    ngx_connection_t *upstream;                        // 与上游服务器间的连接
    ngx_connection_t *downstream;                      // 与下游客户端间的连接                        

    ngx_chain_t *free_raw_bufs;                        // 用于接收上游服务器响应的缓冲区链表，新收到的响应向链表头部插入
    ngx_chain_t *in;                                   // 接收到上游响应的缓冲区，ngx_event_pipe_copy_input_filter将buffer中的数据设置到in中
    ngx_chain_t **last_in;                             // 指向刚刚接收到的缓冲区

    ngx_chain_t *out;                                  // 将要发给客户端的缓冲区链表，
    ngx_chain_t *free;                                 // 等待释放的缓冲区
    ngx_chain_t *busy;                                 // 表示上次发送响应时未发完的缓冲区链表，下一次发送时会合并到out链表中

    /*
     * the input filter i.e. that moves HTTP/1.1 chunks
     * from the raw bufs to an incoming chain
     */

    ngx_event_pipe_input_filter_pt input_filter;       // 处理接收到的来自上游服务器的缓冲区，接收响应的处理方法
    void *input_ctx;                                   // input_filter函数的参数，通常设置为ngx_http_request_t

    ngx_event_pipe_output_filter_pt output_filter;     // 向下游发送响应的方法，默认为ngx_http_output_filter
    void *output_ctx;                                  // output_filter函数的参数，通常设置为ngx_http_request_t

    unsigned read:1;                                   // 为1表示当前已经读到来自上游的响应
    unsigned cacheable:1;                              // 为1时表示启用文件缓存
    unsigned single_buf:1;                             // 为1时表示接收上游的响应时一次只能接收一个ngx_buf_t缓冲区
    unsigned free_bufs:1;                              // 为1时表示当不再接收上游的响应包体时，尽可能快的释放缓冲区
    unsigned upstream_done:1;                          // input_filter中用到的标识位，表示Nginx与上游间的交互已经结束
    unsigned upstream_error:1;                         // 与上游连接出现错误时，将该标识为置为1，比如超时，解析错误等
    unsigned upstream_eof:1;                           // 与上游的连接已经关闭时，该标志位置为1
    unsigned upstream_blocked:1;                       // 表示暂时阻塞读取上游响应的流程，先发送响应，再用释放的缓冲区接收响应
    unsigned downstream_done:1;                        // 为1时表示与下游的交互已经结束
    unsigned downstream_error:1;                       // 与下游连接出现错误时，设置为1
    unsigned cyclic_temp_file:1;                       // 为1时会试图复用临时文件中曾用过的空间

    ngx_int_t allocated;                               // 表示已经分配的缓冲区的数目，其受bufs.num成员的限制
    ngx_bufs_t bufs;                                   // 记录了接收上游响应的内存缓冲区的大小，bufs.size记录每个缓冲区大小，bufs.num记录缓冲区个数
    ngx_buf_tag_t tag;                                 // 用于设置、比较缓冲区链表中ngx_buf_t结构体的tag标志位

    ssize_t busy_size;

    off_t read_length;                                 // 已经接收到上游响应包体长度
    off_t length;                                      // 表示临时文件的最大长度

    off_t max_temp_file_size;                          // 表示临时文件的最大长度
    ssize_t temp_file_write_size;                      // 表示一次写入文件时的最大长度

    ngx_msec_t read_timeout;                           // 读取上游响应的超时时间
    ngx_msec_t send_timeout;                           // 向下游发送响应的超时时间
    ssize_t send_lowat;                                // 向下游发送响应时，TCP连接中设置的参数

    ngx_pool_t *pool;                                  // 用于分配内存缓冲区的连接池对象
    ngx_log_t *log;                                    // 用于记录日志的ngx_log_t对象

    ngx_chain_t *preread_bufs;                         // 表示接收上游服务器响应头部的阶段，已经读到的响应包体
    size_t preread_size;                               // 表示接收上游服务器响应头部的阶段，已经读到的响应包体长度
    ngx_buf_t *buf_to_file;                            // 

    size_t limit_rate;                                 // 发送速率的限制
    time_t start_sec;                                  // 连接的启动时间

    ngx_temp_file_t *temp_file;                        // 存放上游响应的临时文件

    /* STUB */ int num;                                // 已经使用的ngx_buf_t的数目
}

不管上游网速优先还是下游网速优先，响应的转发都是通过ngx_http_upstream_send_response函数进行的。前面分析过下游网速优先的部分流程，
下面再继续分析一下剩下的部分

点击(此处)折叠或打开


static void ngx_http_upstream_send_response(ngx_http_request_t *r, ngx_http_upstream_t *u)
{
    // 发送设置到r->headers_out中的响应头部
    rc = ngx_http_send_header(r);
    ......
    // 如果客户端的请求携带了包体，且包体已经保存到了临时文件中，则清理临时文件，前面分析过了
    if (r->request_body && r->request_body->temp_file) {
        ngx_pool_run_cleanup_file(r->pool, r->request_body->temp_file->file.fd);
        r->request_body->temp_file->file.fd = NGX_INVALID_FILE;
    }
    ......
    // buffering为1时走的是上游网速优先的流程，为0时走的是下游网速优先的流程
    if (!u->buffering) {
        ......
        return ;
    }
    
    /* TODO: preallocate event_pipe bufs, look "Content-Length" */

    // pipe的内存在upstream启动时已经分配了，这里直接使用，对pipe进行初始化
    p = u->pipe;
    // 设置向下游发送响应的方法
    p->output_filter = (ngx_event_pipe_output_filter_pt) ngx_http_output_filter;
    // 将pipe的output_ctx指向ngx_http_request_t结构，后续传入的参数都是pipe，通过pipe->output_ctx找到ngx_http_request_t
    p->output_ctx = r;
    // 设置转发响应时启用的每个缓冲区的tag标志位
    p->tag = u->output.tag;
    // bufs指定了内存缓冲区的限制
    p->bufs = u->conf->bufs;
    // 设置busy缓冲区中待发送的响应长度触发值
    p->busy_size = u->conf->busy_buffers_size;
    // upstream指向nginx与上游服务器的连接
    p->upstream = u->peer.connection;
    // downstream指向nginx与客户端之间的连接
    p->downstream = c;
    // 初始化用于分配内存缓冲区的内存池
    p->pool = r->pool;
    // 初始化用于记录日志的log成员
    p->log = c->log;
    // 初始化速率阀值
    p->limit_rate = u->conf->limit_rate;
    // 记录当前的时间
    p->start_sec = ngx_time();
    // 记录是否进行文件缓存
    p->cacheable = u->cacheable || u->store;
    // 申请临时文件结构
    p->temp_file = ngx_pcalloc(r->pool, sizeof(ngx_temp_file_t));
    if (p->temp_file == NULL) {
        ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
        return;
    }
    // 初始化临时文件的结构信息
    p->temp_file->file.fd = NGX_INVALID_FILE;
    p->temp_file->file.log = c->log;
    p->temp_file->path = u->conf->temp_path;
    p->temp_file->pool = r->pool;
    ......
    // 设置临时存放上游响应的单个缓存文件的最大长度
    p->max_temp_file_size = u->conf->max_temp_file_size;
    // 设置一次写入临时文件时写入的最大长度
    p->temp_file_write_size = u->conf->temp_file_write_size;
    // 申请预读缓冲区链表，该链表的缓冲区不会分配内存来存放上游的响应内容，而用ngx_buf_t指向实际存放包体的内容
    p->preread_bufs = ngx_alloc_chain_link(r->pool);
    if (p->preread_bufs == NULL) {
        ngx_http_upstream_finalize_request(r, u, NGX_ERROR);
        return;
    }
    // 初始化预读缓冲区的链表，（预读是在读取包头时，同时读到了包体的情况）
    p->preread_bufs->buf = &u->buffer;
    p->preread_bufs->next = NULL;
    u->buffer.recycled = 1;
    p->preread_size = u->buffer.last - u->buffer.pos;
    .......
    // 设置读取上游服务器响应的超时时间
    p->read_timeout = u->conf->read_timeout;
    // 设置发送到下游客户端的超时时间
    p->send_timeout = clcf->send_timeout;
    // 设置向客户端发送响应时TCP的send_lowat选项
    p->send_lowat = clcf->send_lowat;
    .......
    // 设置处理上游读事件的回调
    u->read_event_handler = ngx_http_upstream_process_upstream;
    // 设置处理下游写事件的回调
    r->write_event_handler = ngx_http_upstream_process_downstream;
    // 处理上游发来的响应包体
    ngx_http_upstream_process_upstream(r, u);
}

不管是读取上游的响应事件process_upstream，还是向客户端写数据的process_downstream，最终都是通过ngx_event_pipe实现缓存转发响应的。
下面来看一下ngx_event_pipe的具体实现：
点击(此处)折叠或打开


ngx_int_t ngx_event_pipe(ngx_event_pipe_t *p, ngx_int_t do_write)
{
    // do_write为1表示需要向下游客户端发送响应，为0表示需要从上游客户端接收响应
    for ( ;; ) {
        if (do_write) {
            // do_write为1，向下游发送响应包体，并检查其返回值
            rc = ngx_event_pipe_write_to_downstream(p);
            ......
            // 返回NGX_OK时继续读取上游的响应事件，返回其他值需要终止ngx_event_pipe函数
        }
        ......
        // 从上游读取响应数据
        if (ngx_event_pipe_read_upstream(p) == NGX_ABORT) {
            return NGX_ABORT;
        }
        // 当没有读取到响应数据，并且也不需要暂停读取响应的读取时，跳出当前循环，即不对do_write进行设置
        if (!p->read && !p->upstream_blocked) {
            break;
        }
        // 当读到的响应数据，或者需要暂停读取数据，先给客户端发送响应以释放缓冲区时，设置do_write进行响应的发送
        do_write = 1;
    }

    if (p->upstream->fd != (ngx_socket_t) -1) {
        // 将上游读事件添加到epoll中
        if (ngx_handle_read_event(rev, flags) != NGX_OK) {
            return NGX_ABORT;
        }
        // 同时设置读事件的超时定时器
        if (!rev->delayed) {
            if (rev->active && !rev->ready) {
                ngx_add_timer(rev, p->read_timeout);
            ......
    }
    // 将下游的写事件添加到epoll中，并且设置写事件的定时器
    if (p->downstream->fd != (ngx_socket_t) -1 && p->downstream->data == p->output_ctx)
    {
        wev = p->downstream->write;
        if (ngx_handle_write_event(wev, p->send_lowat) != NGX_OK) {
            return NGX_ABORT;
        }
        if (!wev->delayed) {
            if (wev->active && !wev->ready) {
                ngx_add_timer(wev, p->send_timeout);
            .......
    }

    return NGX_OK;
}

上面函数中提到的ngx_event_pipe_read_upstream用于接收上游的响应，下面来具体看一下：

点击(此处)折叠或打开


static ngx_int_t ngx_event_pipe_read_upstream(ngx_event_pipe_t *p)
{
    ......
    for ( ;; ) {
        // 检查上游连接是否结束，如果已经结束，不再接收新的响应，跳出循环
        if (p->upstream_eof || p->upstream_error || p->upstream_done) {
            break;
        }
        // 如果preread_bufs为NULL代表读包头时没有读到包体信息或者已经处理完成，ready为0表示没有上游响应可以接收，跳出循环
        if (p->preread_bufs == NULL && !p->upstream->read->ready) {
            break;
        }
        // preread_bufs存放着接收包头时可能读取到的包体信息，如果不为空，则先要优先处理这部分包体信息
        if (p->preread_bufs) {
            chain = p->preread_bufs;
            // 用chain保存待处理的缓冲区，重置preread_bufs，下次循环则不会再走到该逻辑
            p->preread_bufs = NULL;
            n = p->preread_size;
            // 有待处理的包体信息，将read设置为1，表示接收到的包体待处理
            if (n) {
                p->read = 1;
            }
        } else {
            .......
            } else {
                limit = 0;
            }
            // free_raw_bufs用于表示一次ngx_event_pipe_read_upstream方法调用过程中接收到的上游响应
            if (p->free_raw_bufs) {
                chain = p->free_raw_bufs;
                if (p->single_buf) {
                    p->free_raw_bufs = p->free_raw_bufs->next;
                    chain->next = NULL;
                } else {
                    p->free_raw_bufs = NULL;
                }
            // 判断当前已分配的缓冲区的数量是否超过了bufs.num，没有超过时可以继续分配
            } else if (p->allocated < p->bufs.num) {
                b = ngx_create_temp_buf(p->pool, p->bufs.size);
                if (b == NULL) {
                    return NGX_ABORT;
                }
                p->allocated++;
                chain = ngx_alloc_chain_link(p->pool);
                if (chain == NULL) {
                    return NGX_ABORT;
                }
                chain->buf = b;
                chain->next = NULL;
            // 缓冲区已经达到上限，如果写事件的ready为1时表示可以向下游发送响应，而delay为0代表并不是由于限速的原因导致写事件就绪
            // 当ready为1，且delay为0时，可以向下游发送响应来释放缓冲区了
            } else if (!p->cacheable
                       && p->downstream->data == p->output_ctx
                       && p->downstream->write->ready
                       && !p->downstream->write->delayed)
            {
                p->upstream_blocked = 1;
                break;
            // offset表示临时文件中已经写入的响应内容的长度，检查是否达到了配置的上限，当达到上限时，暂时不再接收上游响应
            // 没有达到上限时，调用下面write方法将响应写入临时文件中
            } else if (p->cacheable
                       || p->temp_file->offset < p->max_temp_file_size)
            {

                /*
                 * if it is allowed, then save some bufs from p->in
                 * to a temporary file, and add them to a p->out chain
                 */
                // 该函数将in缓冲区链表中的内容写入temp_file临时文件中，再将写入临时文件的ngx_buf_t缓冲区由in缓冲区链表中移出，添加到out缓冲区链表中
                rc = ngx_event_pipe_write_chain_to_temp_file(p);
                ......
            // 调用recv_chain接收上游的响应
            n = p->upstream->recv_chain(p->upstream, chain, limit);
            // 将新接收到的缓冲区放置到free_raw_bufs链表的最后
            if (p->free_raw_bufs) {
                chain->next = p->free_raw_bufs;
            }
            ......

        while (cl && n > 0) {
            // 从接收到的缓冲区链表中取出一块缓冲区，将其shadow域释放掉
            ngx_event_pipe_remove_shadow_links(cl->buf);
            // 检查当前收到的包体长度是否小于缓冲区的大小，小于时当前缓冲区可以继续接收响应包体，否则缓冲区已满，需要调用input_filter函数处理
            size = cl->buf->end - cl->buf->last;
            if (n >= size) {
                // 当前缓冲区已满，需要处理，下面的input_filter方法是ngx_event_pipe_copy_input_filter函数，其主要在in链表中增加这个缓冲区
                cl->buf->last = cl->buf->end;
                if (p->input_filter(p, cl->buf) == NGX_ERROR) {
                    return NGX_ABORT;
                }
                // 更新待处理的包体的长度，释放已经处理的缓冲区
                n -= size;
                ln = cl;
                cl = cl->next;
                ngx_free_chain(p->pool, ln);
            } else {
                // 缓冲区没有满，更新last位置，n可以设置为0了，因为last的位置已经包含当前读到的包体信息
                cl->buf->last += n;
                n = 0;
            }
        }
        // 走到这里时cl的链表中一定有缓冲区没有用满（最后一个？），此时cl不为NULL；或者cl的所有缓冲区都已经被处理回收了，此时cl为NULL
        if (cl) {
            for (ln = cl; ln->next; ln = ln->next) { /* void */ }
            // 此时的p->free_raw_bufs已经为NULL了，将p->free_raw_bufs指向当前待处理的缓冲区链表
            ln->next = p->free_raw_bufs;
            p->free_raw_bufs = cl;
        }
        ......
    }
    ......
    // upstream_eof为1时表示上游服务器关闭了连接，upstream_error表示处理过程中出现了错误，而free_raw_bufs不为空代表还有需要处理的包体信息
    if ((p->upstream_eof || p->upstream_error) && p->free_raw_bufs) {
        // 调用input_filter处理剩余的包体信息
        if (p->input_filter(p, p->free_raw_bufs->buf) == NGX_ERROR) {
            return NGX_ABORT;
        }
        p->free_raw_bufs = p->free_raw_bufs->next;
        // free_bufs为1时代表需要尽快释放缓冲区中用到内存，此时应该调用ngx_pfree尽快释放shadow域为空的缓冲区
        if (p->free_bufs && p->buf_to_file == NULL) {
            for (cl = p->free_raw_bufs; cl; cl = cl->next) {
                if (cl->buf->shadow == NULL) {
                    ngx_pfree(p->pool, cl->buf->start);
                    ......
    }
    .......
}

看完接收响应的处理过程，再来看一下发送响应的处理流程，对应的函数是ngx_event_pipe_write_to_downstream

点击(此处)折叠或打开


static ngx_int_t ngx_event_pipe_write_to_downstream(ngx_event_pipe_t *p)
{
    ......
    for ( ;; ) {
        if (p->downstream_error) {
            return ngx_event_pipe_drain_chains(p);
        }
        // 检查与上游的连接是否结束
        if (p->upstream_eof || p->upstream_error || p->upstream_done) {
            // 发送out链表中的缓冲区给客户端
            if (p->out) {
                for (cl = p->out; cl; cl = cl->next) {
                    cl->buf->recycled = 0;
                }
                rc = p->output_filter(p->output_ctx, p->out);
                ......
            }
            // 发送in链表中的缓冲区给客户端
            if (p->in) {
                for (cl = p->in; cl; cl = cl->next) {
                    cl->buf->recycled = 0;
                }
                rc = p->output_filter(p->output_ctx, p->in);
                ......
            }
            // 标识需要向下游发送的响应已经完成
            p->downstream_done = 1;
            break;
        }
        ......
        // 计算busy缓冲区中待发送的响应长度
        for (cl = p->busy; cl; cl = cl->next) {
            if (cl->buf->recycled) {
                ......
                bsize += cl->buf->end - cl->buf->start;
                prev = cl->buf->start;
            }
        }
        ......
        // 检查是否超过了busy_size的配置，当超过配置值时跳转至flush处检查和发送out缓冲区
        if (bsize >= (size_t) p->busy_size) {
            flush = 1;
            goto flush;
        }
        ......
        for ( ;; ) {
            // 先检查out链表是否为NULL，不为空则先发送out链表的缓冲区
            if (p->out) {
                cl = p->out;
                p->out = p->out->next;
            // 当out链表中的数据被处理完成后，开始处理in链表中的数据
            } else if (!p->cacheable && p->in) {
                cl = p->in;
                .....
            } else {
                break;
            }
            cl->next = NULL;
            if (out) {
                *ll = cl;
            } else {
                out = cl;
            }
            ll = &cl->next;
        }

    flush:
        ......
        // 发送响应给客户端
        rc = p->output_filter(p->output_ctx, out);
        // 更新free、busy和out缓冲区
        ngx_chain_update_chains(p->pool, &p->free, &p->busy, &out, p->tag);
        ......
        // 遍历free链表中的缓冲区，释放缓冲区中shadow域
        for (cl = p->free; cl; cl = cl->next) {
            ......
            if (cl->buf->last_shadow) {
                if (ngx_event_pipe_add_free_buf(p, cl->buf->shadow) != NGX_OK) {
                    return NGX_ABORT;
                }
                cl->buf->last_shadow = 0;
            }
            cl->buf->shadow = NULL;
        }
    }
    return NGX_OK;
}

终于快要结束了，upstream的流程还是比较复杂的，最后看一下结束upstream的请求
结束upstream的请求
upstream请求的结束的流程，有三个函数可以进来，ngx_http_upstream_finalize_request、ngx_http_upstream_cleanup、ngx_http_upstream_next。
其中cleanup和next真正终止upstream时还是会调用到finalize_request函数。ngx_http_upstream_cleanup函数在启动upstream时，会挂在到请求的cleanup
链表中，当HTTP框架结束http请求时一定会调用到upstream_cleanup函数。
点击(此处)折叠或打开


static void ngx_http_upstream_cleanup(void *data)
{
    ngx_http_request_t *r = data;
    ngx_http_upstream_finalize_request(r, r->upstream, NGX_DONE);
}

可以看到upstream_cleanup的实现，其实是直接调用了ngx_http_upstream_finalize_request，这个流程是我们期待的关闭方式。

而ngx_http_upstream_next函数，是在处理请求的的流程中出现错误才会主动调用到，该函数通过重连服务器、选取新的服务器等策略来提高服务的可用性。目前
nginx的负载均衡的功能就是通过next函数来实现的，我们后面会进行详细分析，这里只简单说明一下。
点击(此处)折叠或打开


static void ngx_http_upstream_next(ngx_http_request_t *r, ngx_http_upstream_t *u, ngx_uint_t ft_type)
{
    ......
    if (status) {
        u->state->status = status;
        timeout = u->conf->next_upstream_timeout;
        // 当tries为0时，才最终结束upstream的请求
        if (u->peer.tries == 0
            || !(u->conf->next_upstream & ft_type)
            || (timeout && ngx_current_msec - u->peer.start_time >= timeout))
        {
            ngx_http_upstream_finalize_request(r, u, status);
            return;
        }
    }
    // 由于要发起新的连接，所以需要先关闭和上游服务器的已有连接
    if (u->peer.connection) {     
        if (u->peer.connection->pool) {
            ngx_destroy_pool(u->peer.connection->pool);
        }
        ngx_close_connection(u->peer.connection);
        u->peer.connection = NULL;
    }
    // 重新发起连接
    ngx_http_upstream_connect(r, u);
}

最后看一下ngx_http_upstream_finalize_request的具体实现
点击(此处)折叠或打开


static void ngx_http_upstream_finalize_request(ngx_http_request_t *r, ngx_http_upstream_t *u, ngx_int_t rc)
{
    // 将cleanup指向的清理资源回调方法设置为NULL
    if (u->cleanup) {
        *u->cleanup = NULL;
        u->cleanup = NULL;
    }
    // 释放解析主机域名时分配的资源
    if (u->resolved && u->resolved->ctx) {
        ngx_resolve_name_done(u->resolved->ctx);
        u->resolved->ctx = NULL;
    }
    ......
    // 调用http模块实现的finalize_request方法
    u->finalize_request(r, rc);
    // 释放与上游的连接
    if (u->peer.connection) {
        if (u->peer.connection->pool) {
            ngx_destroy_pool(u->peer.connection->pool);
        }
        ngx_close_connection(u->peer.connection);
    }

    u->peer.connection = NULL;
    // 删除用于缓存响应的临时文件
    if (u->store && u->pipe && u->pipe->temp_file
        && u->pipe->temp_file->file.fd != NGX_INVALID_FILE)
    {
        if (ngx_delete_file(u->pipe->temp_file->file.name.data)
            == NGX_FILE_ERROR)
        ......
    }
    ......
    // 最后还是调用HTTP框架提供的方法结束请求
    ngx_http_finalize_request(r, rc);
}

至此，大概的梳理了一下upstream的处理流程，后面会针对目前已经实现的负载均衡各类算法，以及Nginx cache功能进行分析。。

Nginx upstream (一) 整体流程分析

猜你喜欢