El servidor TCP de la pila de protocolos del kernel de Linux recibe el paquete ACK

Tabla de contenido

1 Descripción general del proceso de recepción de mensajes ACK

2 Entrada de paquete de datos tcp_v4_do_rcv

3 Busque req_sock y cree un nuevo calcetín (tcp_v4_hnd_req () core)

3.1 Buscar req_scok (inet_csk_search_req) de la cola de solicitudes SYN

3.2 Cree un nuevo calcetín, migre req_sock a la cola completamente conectada (tcp_check_req () core) 

4 Cree un nuevo proceso de calcetín tcp_v4_syn_recv_sock

4.1 Crear un nuevo calcetín tcp (tcp_create_openreq_child)

4.2 Nuevo puerto de socket puerto de escucha integrado __inet_inherit_port

5 Migre el estado de sock, active el sistema de aceptación para llamar a tcp_child_process ()

5.1 Procesando el mensaje ACK recibido bajo SYN_RECV tcp_rcv_state_process ()


1 Descripción general del proceso de recepción de mensajes ACK

El servidor recibe el mensaje de confirmación del cliente como último paso del protocolo de enlace de tres vías. La información de la pila es la siguiente:

tcp_v4_do_rcv
	--tcp_v4_hnd_req
		--inet_csk_search_req
		--tcp_check_req
			--tcp_v4_syn_recv_sock//child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,req, NULL);
			--tcp_create_openreq_child
			--inet_csk_reqsk_queue_unlink(sk, req, prev);
			--inet_csk_reqsk_queue_removed(sk, req);
			--inet_csk_reqsk_queue_add(sk, req, child);
	--tcp_child_process
		--tcp_rcv_state_process
			--tcp_set_state(sk, TCP_ESTABLISHED);
		--sock_def_readable //parent->sk_data_ready(parent, 0);

La lógica de procesamiento principal es la siguiente:

  1. Busque el socket de solicitud req_sock de la cola de semiconexión
  2. Cree un nuevo calcetín de enchufe (secundario) para la conexión e inicialícelo en SYN_RECV
  3. Elimina el socket de solicitud req_sock de la cola de semiconexión
  4. Agregue el socket de solicitud req_sock a la cola de conexión completa e injerte el nuevo calcetín de socket (niño) en el socket de solicitud req-> sk = child.
  5. Llame a tcp_rcv_state_process para establecer el nuevo estado del socket en ESTABLISHED
  6. Despierta el hilo de espera en el calcetín del zócalo de escucha, aquí es principalmente para despertar la llamada de aceptación

2 Entrada de paquete de datos tcp_v4_do_rcv

int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
	struct sock *rsk;
        ....
	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
		sock_rps_save_rxhash(sk, skb->rxhash);
		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
			rsk = sk;
			goto reset;
		}
		return 0;
	}

	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
		goto csum_err;

	if (sk->sk_state == TCP_LISTEN) {
		//返回NULL:出错,丢弃数据包
		//nsk == sk:收到的是第一次握手的SYN
		//NSK != SK: 收到的是第三次握手的ACK
		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
		if (!nsk)
			goto discard;

		if (nsk != sk) {
                        //收到ACK报文会调用该函数
			if (tcp_child_process(sk, nsk, skb)) {
				rsk = nsk;
				goto reset;
			}
			return 0;
		}
	} else
		sock_rps_save_rxhash(sk, skb->rxhash);

        //收到的是第一次握手的SYN
	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
		rsk = sk;
		goto reset;
	}
	return 0;

reset:
	tcp_v4_send_reset(rsk, skb);
...
}

3 Busque req_sock y cree un nuevo calcetín (tcp_v4_hnd_req () core)

Esta función se utiliza para buscar en la cola de solicitudes de conexión del conector de escucha para determinar si es un paquete SYN o un paquete ACK.

static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
	struct tcphdr *th = tcp_hdr(skb);
	const struct iphdr *iph = ip_hdr(skb);
	struct sock *nsk;
	struct request_sock **prev;

	//首先搜索监听套接字的SYN请求队列,如果找到,则说明前两次握手成功,此时很可能收到的是ACK报文·
	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
						       iph->saddr, iph->daddr);
	//虽然在SYN请求队列中找到了对应的连接请求块,但是还需要对输入报文进行检查,确保其是期望的ACK报文
	if (req)
		return tcp_check_req(sk, skb, req, prev);

	//为什么要搜索ehash表,这里没看明白...
	nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr,
			th->source, iph->daddr, th->dest, inet_iif(skb));
	if (nsk) {
		//处于TIME_WAIT状态的套接字需要做特殊处理,原因暂时未知
		if (nsk->sk_state != TCP_TIME_WAIT) {
			bh_lock_sock(nsk);
			return nsk;
		}
		inet_twsk_put(inet_twsk(nsk));
		return NULL;
	}

#ifdef CONFIG_SYN_COOKIES
	if (!th->rst && !th->syn && th->ack)
		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
#endif
	//其余情况返回监听套接字
	return sk;
}

3.1 Buscar req_scok (inet_csk_search_req) de la cola de solicitudes SYN

//@sk: 监听套接字的TCB
//@prevp: 保存半连接队列中目标request_sock的后继结点的地址
//@rport: 输入数据包中的源端口
//@raddr: 输入数据包中的源IP地址
//@laddr:输入数据包中目的IP地址
struct request_sock *inet_csk_search_req(const struct sock *sk,
					 struct request_sock ***prevp,
					 const __be16 rport, const __be32 raddr,
					 const __be32 laddr)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);
	//SYN请求队列
	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
	struct request_sock *req, **prev;

	//哈希函数的参数包含了输入数据包中的源IP地址和源端口号
	for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
						    lopt->nr_table_entries)];
	     (req = *prev) != NULL;
	     prev = &req->dl_next) {
		const struct inet_request_sock *ireq = inet_rsk(req);

		//比较的是客户端IP、端口以及本地IP和地址族
		if (ireq->rmt_port == rport &&
		    ireq->rmt_addr == raddr &&
		    ireq->loc_addr == laddr &&
		    AF_INET_FAMILY(req->rsk_ops->family)) {
			BUG_TRAP(!req->sk);
			*prevp = prev;
			break;
		}
	}
	//如果找到,req就是连接请求控制块,否则是NULL
	return req;
}

3.2 Cree un nuevo calcetín, migre req_sock a la cola completamente conectada (tcp_check_req () core) 

  1. Cree un nuevo calcetín (niño) y establezca el estado del calcetín en   SYN_RECV
  2. Eliminar la migración de req_sock correspondiente de la cola semiconectada
  3. Agregue su req_sock a la cola completamente conectada y cuelgue el calcetín recién creado debajo de req_sock

Como dice el comentario, esta función procesa los paquetes de datos pertenecientes al socket en el estado SYN_RECV, en este estado lo más esperado es recibir el mensaje ACK del cliente, de modo que se pueda completar el handshake de tres vías. Aquí hay una cosa más sobre el estado SYN_RECV . Cuando el socket de escucha recibe un paquete SYN, no migra su estado de TCP_LISTEN a TCP_SYN_RECV. Es decir, cuando está implementado, el socket representado por el bloque de solicitud de conexión está en TCP_SYN_RECV.

/*
 *	Process an incoming packet for SYN_RECV sockets represented
 *	as a request_sock.
 */
//@sk: 监听套接字的TCB
//@skb: 输入数据包
//@req: 在SYN请求队列中找到的连接请求块
//@prev:req在SYN请求队列中的后继结点的指针的地址
struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
			   struct request_sock *req,
			   struct request_sock **prev)
{
	const struct tcphdr *th = tcp_hdr(skb);
	//将输入数据包中的TCP标志位提取出来
	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
	int paws_reject = 0;
	struct tcp_options_received tmp_opt;
	struct sock *child;

	//数据包中TCP选项相关处理,忽略。在实际使用过程中,ACK报文中很少带有选项字段
	tmp_opt.saw_tstamp = 0;
	if (th->doff > (sizeof(struct tcphdr)>>2)) {
		tcp_parse_options(skb, &tmp_opt, 0);

		if (tmp_opt.saw_tstamp) {
			tmp_opt.ts_recent = req->ts_recent;
			/* We do not store true stamp, but it is not required,
			 * it can be estimated (approximately)
			 * from another data.
			 */
			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
		}
	}

	//收到的数据包是SYN数据包,说明这是一个SYN请求的重传包。这种情况会向客户端重传SYN+ACK包,
	//这是通过调用连接请求块中的回调函数rtx_syn_ack()实现的,实际上就是tcp_v4_send_synack()。
	//最后返回NULL,表示对此数据包的处理到此为止
	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
	    	flg == TCP_FLAG_SYN && !paws_reject) {
		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
		return NULL;
	}

	/* Further reproduces section "SEGMENT ARRIVES"
	   for state SYN-RECEIVED of RFC793.
	   It is broken, however, it does not work only
	   when SYNs are crossed.

	   You would think that SYN crossing is impossible here, since
	   we should have a SYN_SENT socket (from connect()) on our end,
	   but this is not true if the crossed SYNs were sent to both
	   ends by a malicious third party.  We must defend against this,
	   and to do that we first verify the ACK (as per RFC793, page
	   36) and reset if it is invalid.  Is this a true full defense?
	   To convince ourselves, let us consider a way in which the ACK
	   test can still pass in this 'malicious crossed SYNs' case.
	   Malicious sender sends identical SYNs (and thus identical sequence
	   numbers) to both A and B:

		A: gets SYN, seq=7
		B: gets SYN, seq=7

	   By our good fortune, both A and B select the same initial
	   send sequence number of seven :-)

		A: sends SYN|ACK, seq=7, ack_seq=8
		B: sends SYN|ACK, seq=7, ack_seq=8

	   So we are now A eating this SYN|ACK, ACK test passes.  So
	   does sequence test, SYN is truncated, and thus we consider
	   it a bare ACK.

	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
	   bare ACK.  Otherwise, we create an established connection.  Both
	   ends (listening sockets) accept the new incoming connection and try
	   to talk to each other. 8-)

	   Note: This case is both harmless, and rare.  Possibility is about the
	   same as us discovering intelligent life on another plant tomorrow.

	   But generally, we should (RFC lies!) to accept ACK
	   from SYNACK both here and in tcp_rcv_state_process().
	   tcp_rcv_state_process() does not, hence, we do not too.

	   Note that the case is absolutely generic:
	   we cannot optimize anything here without
	   violating protocol. All the checks must be made
	   before attempt to create socket.
	 */

	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
	 *                  and the incoming segment acknowledges something not yet
	 *                  sent (the segment carries an unacceptable ACK) ...
	 *                  a reset is sent."
	 *
	 * Invalid ACK: reset will be sent by listening socket
	 */
	//收到了ACK报文,但是其ACK序号和发送SYN+ACK报文的序号不一致。这种情况不作处理,
	//直接返回监听套接字,该报文会后面会在tcp_rcv_state_process()中发送RST
	if ((flg & TCP_FLAG_ACK) &&
	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
		return sk;

	/* Also, it would be not so bad idea to check rcv_tsecr, which
	 * is essentially ACK extension and too early or too late values
	 * should cause reset in unsynchronized states.
	 */
	/* RFC793: "first check sequence number". */
	//处理接收报文不再接收窗口范围内的情况。tcp_in_window(a,b,c,d)用来判断[a,b]是否在[c,d]范围内
	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
					  tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
		/* Out of window: send ACK and drop. */
		//向发送回复ACK,这可以尽快的让发送端意识到错误,从而发送正确序号的数据包
		//该函数的实现是tcp_v4_reqsk_send_ack()
		if (!(flg & TCP_FLAG_RST))
			req->rsk_ops->send_ack(skb, req);
		if (paws_reject)
			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
		//返回NULL,结束对该输入数据包的处理过程
		return NULL;
	}

	/* In sequence, PAWS is OK. */
	//时间戳选项,忽略
	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
			req->ts_recent = tmp_opt.rcv_tsval;

	//ACK数据包的序号和SYN包的序号相同,这是错误的,因为SYN会消耗一个序号,
	//所以ACK报文的起始序号应该是SYN报文的序号+1。但是这里清除SYN标记的操作
	//很奇怪,不理解
	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
		/* Truncate SYN, it is out of window starting
		at tcp_rsk(req)->rcv_isn + 1. */
		flg &= ~TCP_FLAG_SYN;
	}
    /* RFC793: "second check the RST bit" and
     *	   "fourth, check the SYN bit"
     */
    if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
        TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
        goto embryonic_reset;
    }

    /* ACK sequence verified above, just make sure ACK is
     * set.  If ACK not set, just silently drop the packet.
     */
	//收到的报文没有设置ACK标记位,什么都不做,返回NULL,结束对该数据包的后续处理
    if (!(flg & TCP_FLAG_ACK))
        return NULL;

    /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
	//如果设置了TCP_DEFER_ACCEPT选项,则不接收纯粹的ACK包,而是等待有负荷的
	//数据包到达后,再完成三次握手过程,这里返回NULL,直接丢弃纯ACK。
	
	//个人理解该选项会对高并发服务器不利,很有可能会导致SYN请求队列accept连接队列满
	//而无法及时接收新的连接请求
    if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
        TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
        inet_rsk(req)->acked = 1;
        return NULL;
    }

    /* OK, ACK is valid, create big socket and
     * feed this segment to it. It will repeat all
     * the tests. THIS SEGMENT MUST MOVE SOCKET TO
     * ESTABLISHED STATE. If it will be dropped after
     * socket is created, wait for troubles.
     */
    //所有事情都OK,调用监听套接字的syn_recv_sock()回调函数创建新的TCB
    child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
                             req, NULL);
	//创建失败,说明当前监听套接字很繁忙
    if (child == NULL)
        goto listen_overflow;

    //将连接请求块从SYN请求队列中移除
    inet_csk_reqsk_queue_unlink(sk, req, prev);
    inet_csk_reqsk_queue_removed(sk, req);

    //将连接请求块加入到accept连接队列中等待应用程序accept()
    inet_csk_reqsk_queue_add(sk, req, child);
	//返回新的TCB
    return child;

listen_overflow:
	//系统参数tcp_abort_on_overflow(/proc/sys/net/ipv4/tcp_abort_on_ownerflow)表示
	//当服务器端繁忙而无法接受新的连接时,是否向对端发送RST报文,默认为0,即不发送RST,因为服
	//务器端可能很快就可以降低负载,从而可以继续提供服务。acked的使用见
	//《TCP之服务器端发送SYN+ACK包》中的超时重传介绍
    if (!sysctl_tcp_abort_on_overflow) {
        inet_rsk(req)->acked = 1;
        return NULL;
    }

embryonic_reset:
    NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
	//如果对端没有发送RST,向对端回复RST,这种判断可以说明
	//服务器端不会对客户端的RST再回复RST(见tcp_v4_send_reset())
    if (!(flg & TCP_FLAG_RST))
        req->rsk_ops->send_reset(sk, skb);
	//遇到错误,会将该请求套接字从半连接队列中清除
    inet_csk_reqsk_queue_drop(sk, req, prev);
    return NULL;
}

4 Cree un nuevo proceso de calcetín tcp_v4_syn_recv_sock

Como puede ver arriba, después de recibir el ACK, se llamará inet_csk (sk) -> icsk_af_ops-> syn_recv_sock () del socket de escucha. Para TCP, en realidad es tcp_v4_syn_recv_sock (). Consulte la llamada a la función init () en el proceso de creación del socket. Confirmar esto.

/*
 * The three way handshake has completed - we got a valid synack -
 * now create the new socket.
 */
struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
				  struct request_sock *req,
				  struct dst_entry *dst)
{
	struct inet_request_sock *ireq;
	struct inet_sock *newinet;
	struct tcp_sock *newtp;
	struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
	struct tcp_md5sig_key *key;
#endif

	//如果accept接收队列已满,则返回创建失败
	if (sk_acceptq_is_full(sk))
		goto exit_overflow;

	//路由相关查询操作
	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
		goto exit;

	//分配一个新的TCB
	newsk = tcp_create_openreq_child(sk, req, skb);
	if (!newsk)
		goto exit;

	newsk->sk_gso_type = SKB_GSO_TCPV4;
	sk_setup_caps(newsk, dst);

	//根据连接请求块中的内容初始化新的TCB的各个字段
	newtp		      = tcp_sk(newsk);
	newinet		      = inet_sk(newsk);
	ireq		      = inet_rsk(req);
	newinet->daddr	      = ireq->rmt_addr;
	newinet->rcv_saddr    = ireq->loc_addr;
	newinet->saddr	      = ireq->loc_addr;
	newinet->opt	      = ireq->opt;
	ireq->opt	      = NULL;
	newinet->mc_index     = inet_iif(skb);
	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
	inet_csk(newsk)->icsk_ext_hdr_len = 0;
	if (newinet->opt)
		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
	newinet->id = newtp->write_seq ^ jiffies;

	//路径MTU相关初始化
	tcp_mtup_init(newsk);
	tcp_sync_mss(newsk, dst_mtu(dst));
	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
	tcp_initialize_rcv_mss(newsk);

#ifdef CONFIG_TCP_MD5SIG
	/* Copy over the MD5 key from the original socket */
	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
		/*
		 * We're using one, so create a matching key
		 * on the newsk structure. If we fail to get
		 * memory, then we end up not copying the key
		 * across. Shucks.
		 */
		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
		if (newkey != NULL)
			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
					  newkey, key->keylen);
	}
#endif

	//将新的TCB加入到TCP的ehash散列表中
	__inet_hash_nolisten(newsk);
	//保存新的TCB的端口信息
	__inet_inherit_port(sk, newsk);

	return newsk;

exit_overflow:
	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
exit:
	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
	dst_release(dst);
	return NULL;
}

4.1 Crear un nuevo calcetín tcp (tcp_create_openreq_child)

Cree un nuevo calcetín (tcp sock) e inicialice, establezca el estado en TCP_SYN_RECV

struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
{
	struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);

	if (newsk != NULL) {
        ...
	}
	return newsk;
}

struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
			    const gfp_t priority)
{
	struct sock *newsk = sk_clone(sk, priority);

	if (newsk != NULL) {
		struct inet_connection_sock *newicsk = inet_csk(newsk);

		newsk->sk_state = TCP_SYN_RECV;
		newicsk->icsk_bind_hash = NULL;

		inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
		newsk->sk_write_space = sk_stream_write_space;
        ...
	}
	return newsk;
}

4.2 Nuevo puerto de socket puerto de escucha integrado __inet_inherit_port

int __inet_inherit_port(struct sock *sk, struct sock *child)
{
	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
	unsigned short port = inet_sk(child)->inet_num;
	const int bhash = inet_bhashfn(sock_net(sk), port,
			table->bhash_size);
	struct inet_bind_hashbucket *head = &table->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
	tb = inet_csk(sk)->icsk_bind_hash;
	if (tb->port != port) {
		/* NOTE: using tproxy and redirecting skbs to a proxy
		 * on a different listener port breaks the assumption
		 * that the listener socket's icsk_bind_hash is the same
		 * as that of the child socket. We have to look up or
		 * create a new bind bucket for the child here. */
		struct hlist_node *node;
		inet_bind_bucket_for_each(tb, node, &head->chain) {
			if (net_eq(ib_net(tb), sock_net(sk)) &&
			    tb->port == port)
				break;
		}
		if (!node) {
			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
						     sock_net(sk), head, port);
			if (!tb) {
				spin_unlock(&head->lock);
				return -ENOMEM;
			}
		}
	}
	inet_bind_hash(child, tb, port);
	spin_unlock(&head->lock);

	return 0;
}

void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
		    const unsigned short snum)
{
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;

	atomic_inc(&hashinfo->bsockets);

	inet_sk(sk)->inet_num = snum;
	//将新生成的TCB加入到监听套接字对应的端口的owner链表中,
	//从这里可以看出,新生成的TCB和监听套接字是共享同一个端口的
	sk_add_bind_node(sk, &tb->owners);
	tb->num_owners++;
	inet_csk(sk)->icsk_bind_hash = tb;
}

5 Migre el estado de sock, active el sistema de aceptación para llamar a tcp_child_process ()

int tcp_child_process(struct sock *parent, struct sock *child,
		      struct sk_buff *skb)
{
	int ret = 0;
	int state = child->sk_state;

	//如果用户进程没有锁住child,则让child重新处理该ACK报文,这可以让child
	//套接字由TCP_SYN_RECV迁移到TCP_ESTABLISH状态
	if (!sock_owned_by_user(child)) {
		//见下文
		ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
					    skb->len);
		/* Wakeup parent, send SIGIO */
		//child套接字状态发生了迁移,唤醒监听套接字上的进程,可能由于调用accept()而block
		if (state == TCP_SYN_RECV && child->sk_state != state)
			parent->sk_data_ready(parent, 0);
	} else {
		/* Alas, it is possible again, because we do lookup
		 * in main socket hash table and lock on listening
		 * socket does not protect us more.
		 */
		 //缓存该skb后续处理
		sk_add_backlog(child, skb);
	}

	bh_unlock_sock(child);
	sock_put(child);
	return ret;
}

Hablemos de operaciones de cola y operaciones de activación en la parte de recepción de datos, echemos un vistazo al procesamiento de mensajes ACK por tcp_rcv_state_process ().

5.1 Procesando el mensaje ACK recibido bajo SYN_RECV tcp_rcv_state_process ()

Aquí solo prestamos atención al procesamiento de mensajes ACK en el estado TCP_SYN_RECV.

int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
			  struct tcphdr *th, unsigned len)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);
	int queued = 0;

...
	/* step 5: check the ACK field */
	if (th->ack) {
		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);

		switch (sk->sk_state) {
		case TCP_SYN_RECV:
			//下面进行的还是一些字段的初始化,需要时再细查
			if (acceptable) {
				tp->copied_seq = tp->rcv_nxt;
				smp_mb();
				//从TCP_SYN_RECV变为TCP_ESTABLISHED
				tcp_set_state(sk, TCP_ESTABLISHED);
				sk->sk_state_change(sk);

				/* Note, that this wakeup is only for marginal
				 * crossed SYN case. Passively open sockets
				 * are not waked up, because sk->sk_sleep ==
				 * NULL and sk->sk_socket == NULL.
				 */
				if (sk->sk_socket)
					sk_wake_async(sk,
						      SOCK_WAKE_IO, POLL_OUT);

				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
				tp->snd_wnd = ntohs(th->window) <<
					      tp->rx_opt.snd_wscale;
				tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
					    TCP_SKB_CB(skb)->seq);

				/* tcp_ack considers this ACK as duplicate
				 * and does not calculate rtt.
				 * Fix it at least with timestamps.
				 */
				if (tp->rx_opt.saw_tstamp &&
				    tp->rx_opt.rcv_tsecr && !tp->srtt)
					tcp_ack_saw_tstamp(sk, 0);

				if (tp->rx_opt.tstamp_ok)
					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;

				/* Make sure socket is routed, for
				 * correct metrics.
				 */
				icsk->icsk_af_ops->rebuild_header(sk);

				tcp_init_metrics(sk);

				tcp_init_congestion_control(sk);

				/* Prevent spurious tcp_cwnd_restart() on
				 * first data packet.
				 */
				tp->lsndtime = tcp_time_stamp;

				tcp_mtup_init(sk);
				tcp_initialize_rcv_mss(sk);
				tcp_init_buffer_space(sk);
				tcp_fast_path_on(tp);
			} else {
				return 1;
			}
			break;


	} else
		goto discard;

...
	return 0;
}

Supongo que te gusta

Origin blog.csdn.net/wangquan1992/article/details/108914464
Recomendado
Clasificación