Linux kernel protocol stack TCP server receives ACK packet

Table of Contents

1 Overview of the process of receiving ACK messages

2 Data packet entry tcp_v4_do_rcv

3 Search for req_sock and create a new sock (tcp_v4_hnd_req() core)

3.1 Search req_scok(inet_csk_search_req) of SYN request queue

3.2 Create a new sock, migrate req_sock to the fully connected queue (tcp_check_req() core) 

4 Create a new sock process tcp_v4_syn_recv_sock

4.1 Create a new tcp sock (tcp_create_openreq_child)

4.2 New socket port number integrated listening socket port __inet_inherit_port

5 Migrate the state of sock, wake up the accept system to call tcp_child_process()

5.1 Processing the ACK message received under SYN_RECV tcp_rcv_state_process()


1 Overview of the process of receiving ACK messages

The server receives the ack message from the client as the last step of the three-way handshake. The stack information is as follows:

tcp_v4_do_rcv
	--tcp_v4_hnd_req
		--inet_csk_search_req
		--tcp_check_req
			--tcp_v4_syn_recv_sock//child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,req, NULL);
			--tcp_create_openreq_child
			--inet_csk_reqsk_queue_unlink(sk, req, prev);
			--inet_csk_reqsk_queue_removed(sk, req);
			--inet_csk_reqsk_queue_add(sk, req, child);
	--tcp_child_process
		--tcp_rcv_state_process
			--tcp_set_state(sk, TCP_ESTABLISHED);
		--sock_def_readable //parent->sk_data_ready(parent, 0);

The main processing logic is as follows:

  1. Search for the request socket req_sock from the semi-connection queue
  2. Create a new socket sock(child) for connection and initialize it to SYN_RECV
  3. Remove the request socket req_sock from the semi-connection queue
  4. Add the request socket req_sock to the full connection queue, and graft the new socket sock(child) to the request socket req->sk = child.
  5. Call tcp_rcv_state_process to set the new socket state to ESTABLISHED
  6. Wake up the waiting thread on the listening socket sock, here is mainly to wake up the accept call

2 Data packet entry tcp_v4_do_rcv

int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
	struct sock *rsk;
        ....
	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
		sock_rps_save_rxhash(sk, skb->rxhash);
		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
			rsk = sk;
			goto reset;
		}
		return 0;
	}

	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
		goto csum_err;

	if (sk->sk_state == TCP_LISTEN) {
		//返回NULL:出错,丢弃数据包
		//nsk == sk:收到的是第一次握手的SYN
		//NSK != SK: 收到的是第三次握手的ACK
		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
		if (!nsk)
			goto discard;

		if (nsk != sk) {
                        //收到ACK报文会调用该函数
			if (tcp_child_process(sk, nsk, skb)) {
				rsk = nsk;
				goto reset;
			}
			return 0;
		}
	} else
		sock_rps_save_rxhash(sk, skb->rxhash);

        //收到的是第一次握手的SYN
	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
		rsk = sk;
		goto reset;
	}
	return 0;

reset:
	tcp_v4_send_reset(rsk, skb);
...
}

3 Search for req_sock and create a new sock (tcp_v4_hnd_req() core)

This function is used to search the connection request queue of the listening socket and determine whether it is a SYN packet or an ACK packet.

static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
	struct tcphdr *th = tcp_hdr(skb);
	const struct iphdr *iph = ip_hdr(skb);
	struct sock *nsk;
	struct request_sock **prev;

	//首先搜索监听套接字的SYN请求队列,如果找到,则说明前两次握手成功,此时很可能收到的是ACK报文·
	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
						       iph->saddr, iph->daddr);
	//虽然在SYN请求队列中找到了对应的连接请求块,但是还需要对输入报文进行检查,确保其是期望的ACK报文
	if (req)
		return tcp_check_req(sk, skb, req, prev);

	//为什么要搜索ehash表,这里没看明白...
	nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr,
			th->source, iph->daddr, th->dest, inet_iif(skb));
	if (nsk) {
		//处于TIME_WAIT状态的套接字需要做特殊处理,原因暂时未知
		if (nsk->sk_state != TCP_TIME_WAIT) {
			bh_lock_sock(nsk);
			return nsk;
		}
		inet_twsk_put(inet_twsk(nsk));
		return NULL;
	}

#ifdef CONFIG_SYN_COOKIES
	if (!th->rst && !th->syn && th->ack)
		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
#endif
	//其余情况返回监听套接字
	return sk;
}

3.1 Search req_scok(inet_csk_search_req) of SYN request queue

//@sk: 监听套接字的TCB
//@prevp: 保存半连接队列中目标request_sock的后继结点的地址
//@rport: 输入数据包中的源端口
//@raddr: 输入数据包中的源IP地址
//@laddr:输入数据包中目的IP地址
struct request_sock *inet_csk_search_req(const struct sock *sk,
					 struct request_sock ***prevp,
					 const __be16 rport, const __be32 raddr,
					 const __be32 laddr)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);
	//SYN请求队列
	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
	struct request_sock *req, **prev;

	//哈希函数的参数包含了输入数据包中的源IP地址和源端口号
	for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
						    lopt->nr_table_entries)];
	     (req = *prev) != NULL;
	     prev = &req->dl_next) {
		const struct inet_request_sock *ireq = inet_rsk(req);

		//比较的是客户端IP、端口以及本地IP和地址族
		if (ireq->rmt_port == rport &&
		    ireq->rmt_addr == raddr &&
		    ireq->loc_addr == laddr &&
		    AF_INET_FAMILY(req->rsk_ops->family)) {
			BUG_TRAP(!req->sk);
			*prevp = prev;
			break;
		}
	}
	//如果找到,req就是连接请求控制块,否则是NULL
	return req;
}

3.2 Create a new sock, migrate req_sock to the fully connected queue (tcp_check_req() core) 

  1. Create a new sock(child) and set the status of the sock to   SYN_RECV
  2. Delete the corresponding req_sock migration from the semi-connected queue
  3. Add its req_sock to the fully connected queue, and hang the newly created sock under req_sock

As the comment says, this function processes the data packets belonging to the socket in the SYN_RECV state. In this state, the most expected to receive is the ACK message from the client, so that the three-way handshake can be completed. Here is one more thing about the SYN_RECV state. When the listening socket receives a SYN packet, it does not migrate its state from TCP_LISTEN to TCP_SYN_RECV. That is to say, when implemented, the socket represented by the connection request block is in TCP_SYN_RECV.

/*
 *	Process an incoming packet for SYN_RECV sockets represented
 *	as a request_sock.
 */
//@sk: 监听套接字的TCB
//@skb: 输入数据包
//@req: 在SYN请求队列中找到的连接请求块
//@prev:req在SYN请求队列中的后继结点的指针的地址
struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
			   struct request_sock *req,
			   struct request_sock **prev)
{
	const struct tcphdr *th = tcp_hdr(skb);
	//将输入数据包中的TCP标志位提取出来
	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
	int paws_reject = 0;
	struct tcp_options_received tmp_opt;
	struct sock *child;

	//数据包中TCP选项相关处理,忽略。在实际使用过程中,ACK报文中很少带有选项字段
	tmp_opt.saw_tstamp = 0;
	if (th->doff > (sizeof(struct tcphdr)>>2)) {
		tcp_parse_options(skb, &tmp_opt, 0);

		if (tmp_opt.saw_tstamp) {
			tmp_opt.ts_recent = req->ts_recent;
			/* We do not store true stamp, but it is not required,
			 * it can be estimated (approximately)
			 * from another data.
			 */
			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
		}
	}

	//收到的数据包是SYN数据包,说明这是一个SYN请求的重传包。这种情况会向客户端重传SYN+ACK包,
	//这是通过调用连接请求块中的回调函数rtx_syn_ack()实现的,实际上就是tcp_v4_send_synack()。
	//最后返回NULL,表示对此数据包的处理到此为止
	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
	    	flg == TCP_FLAG_SYN && !paws_reject) {
		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
		return NULL;
	}

	/* Further reproduces section "SEGMENT ARRIVES"
	   for state SYN-RECEIVED of RFC793.
	   It is broken, however, it does not work only
	   when SYNs are crossed.

	   You would think that SYN crossing is impossible here, since
	   we should have a SYN_SENT socket (from connect()) on our end,
	   but this is not true if the crossed SYNs were sent to both
	   ends by a malicious third party.  We must defend against this,
	   and to do that we first verify the ACK (as per RFC793, page
	   36) and reset if it is invalid.  Is this a true full defense?
	   To convince ourselves, let us consider a way in which the ACK
	   test can still pass in this 'malicious crossed SYNs' case.
	   Malicious sender sends identical SYNs (and thus identical sequence
	   numbers) to both A and B:

		A: gets SYN, seq=7
		B: gets SYN, seq=7

	   By our good fortune, both A and B select the same initial
	   send sequence number of seven :-)

		A: sends SYN|ACK, seq=7, ack_seq=8
		B: sends SYN|ACK, seq=7, ack_seq=8

	   So we are now A eating this SYN|ACK, ACK test passes.  So
	   does sequence test, SYN is truncated, and thus we consider
	   it a bare ACK.

	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
	   bare ACK.  Otherwise, we create an established connection.  Both
	   ends (listening sockets) accept the new incoming connection and try
	   to talk to each other. 8-)

	   Note: This case is both harmless, and rare.  Possibility is about the
	   same as us discovering intelligent life on another plant tomorrow.

	   But generally, we should (RFC lies!) to accept ACK
	   from SYNACK both here and in tcp_rcv_state_process().
	   tcp_rcv_state_process() does not, hence, we do not too.

	   Note that the case is absolutely generic:
	   we cannot optimize anything here without
	   violating protocol. All the checks must be made
	   before attempt to create socket.
	 */

	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
	 *                  and the incoming segment acknowledges something not yet
	 *                  sent (the segment carries an unacceptable ACK) ...
	 *                  a reset is sent."
	 *
	 * Invalid ACK: reset will be sent by listening socket
	 */
	//收到了ACK报文,但是其ACK序号和发送SYN+ACK报文的序号不一致。这种情况不作处理,
	//直接返回监听套接字,该报文会后面会在tcp_rcv_state_process()中发送RST
	if ((flg & TCP_FLAG_ACK) &&
	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
		return sk;

	/* Also, it would be not so bad idea to check rcv_tsecr, which
	 * is essentially ACK extension and too early or too late values
	 * should cause reset in unsynchronized states.
	 */
	/* RFC793: "first check sequence number". */
	//处理接收报文不再接收窗口范围内的情况。tcp_in_window(a,b,c,d)用来判断[a,b]是否在[c,d]范围内
	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
					  tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
		/* Out of window: send ACK and drop. */
		//向发送回复ACK,这可以尽快的让发送端意识到错误,从而发送正确序号的数据包
		//该函数的实现是tcp_v4_reqsk_send_ack()
		if (!(flg & TCP_FLAG_RST))
			req->rsk_ops->send_ack(skb, req);
		if (paws_reject)
			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
		//返回NULL,结束对该输入数据包的处理过程
		return NULL;
	}

	/* In sequence, PAWS is OK. */
	//时间戳选项,忽略
	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
			req->ts_recent = tmp_opt.rcv_tsval;

	//ACK数据包的序号和SYN包的序号相同,这是错误的,因为SYN会消耗一个序号,
	//所以ACK报文的起始序号应该是SYN报文的序号+1。但是这里清除SYN标记的操作
	//很奇怪,不理解
	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
		/* Truncate SYN, it is out of window starting
		at tcp_rsk(req)->rcv_isn + 1. */
		flg &= ~TCP_FLAG_SYN;
	}
    /* RFC793: "second check the RST bit" and
     *	   "fourth, check the SYN bit"
     */
    if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
        TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
        goto embryonic_reset;
    }

    /* ACK sequence verified above, just make sure ACK is
     * set.  If ACK not set, just silently drop the packet.
     */
	//收到的报文没有设置ACK标记位,什么都不做,返回NULL,结束对该数据包的后续处理
    if (!(flg & TCP_FLAG_ACK))
        return NULL;

    /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
	//如果设置了TCP_DEFER_ACCEPT选项,则不接收纯粹的ACK包,而是等待有负荷的
	//数据包到达后,再完成三次握手过程,这里返回NULL,直接丢弃纯ACK。
	
	//个人理解该选项会对高并发服务器不利,很有可能会导致SYN请求队列accept连接队列满
	//而无法及时接收新的连接请求
    if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
        TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
        inet_rsk(req)->acked = 1;
        return NULL;
    }

    /* OK, ACK is valid, create big socket and
     * feed this segment to it. It will repeat all
     * the tests. THIS SEGMENT MUST MOVE SOCKET TO
     * ESTABLISHED STATE. If it will be dropped after
     * socket is created, wait for troubles.
     */
    //所有事情都OK,调用监听套接字的syn_recv_sock()回调函数创建新的TCB
    child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
                             req, NULL);
	//创建失败,说明当前监听套接字很繁忙
    if (child == NULL)
        goto listen_overflow;

    //将连接请求块从SYN请求队列中移除
    inet_csk_reqsk_queue_unlink(sk, req, prev);
    inet_csk_reqsk_queue_removed(sk, req);

    //将连接请求块加入到accept连接队列中等待应用程序accept()
    inet_csk_reqsk_queue_add(sk, req, child);
	//返回新的TCB
    return child;

listen_overflow:
	//系统参数tcp_abort_on_overflow(/proc/sys/net/ipv4/tcp_abort_on_ownerflow)表示
	//当服务器端繁忙而无法接受新的连接时,是否向对端发送RST报文,默认为0,即不发送RST,因为服
	//务器端可能很快就可以降低负载,从而可以继续提供服务。acked的使用见
	//《TCP之服务器端发送SYN+ACK包》中的超时重传介绍
    if (!sysctl_tcp_abort_on_overflow) {
        inet_rsk(req)->acked = 1;
        return NULL;
    }

embryonic_reset:
    NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
	//如果对端没有发送RST,向对端回复RST,这种判断可以说明
	//服务器端不会对客户端的RST再回复RST(见tcp_v4_send_reset())
    if (!(flg & TCP_FLAG_RST))
        req->rsk_ops->send_reset(sk, skb);
	//遇到错误,会将该请求套接字从半连接队列中清除
    inet_csk_reqsk_queue_drop(sk, req, prev);
    return NULL;
}

4 Create a new sock process tcp_v4_syn_recv_sock

As you can see above, after receiving the ACK, inet_csk(sk)->icsk_af_ops->syn_recv_sock() of the listening socket will be called. For TCP, it is actually tcp_v4_syn_recv_sock(). Refer to the init() function call in the socket creation process. Confirm this.

/*
 * The three way handshake has completed - we got a valid synack -
 * now create the new socket.
 */
struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
				  struct request_sock *req,
				  struct dst_entry *dst)
{
	struct inet_request_sock *ireq;
	struct inet_sock *newinet;
	struct tcp_sock *newtp;
	struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
	struct tcp_md5sig_key *key;
#endif

	//如果accept接收队列已满,则返回创建失败
	if (sk_acceptq_is_full(sk))
		goto exit_overflow;

	//路由相关查询操作
	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
		goto exit;

	//分配一个新的TCB
	newsk = tcp_create_openreq_child(sk, req, skb);
	if (!newsk)
		goto exit;

	newsk->sk_gso_type = SKB_GSO_TCPV4;
	sk_setup_caps(newsk, dst);

	//根据连接请求块中的内容初始化新的TCB的各个字段
	newtp		      = tcp_sk(newsk);
	newinet		      = inet_sk(newsk);
	ireq		      = inet_rsk(req);
	newinet->daddr	      = ireq->rmt_addr;
	newinet->rcv_saddr    = ireq->loc_addr;
	newinet->saddr	      = ireq->loc_addr;
	newinet->opt	      = ireq->opt;
	ireq->opt	      = NULL;
	newinet->mc_index     = inet_iif(skb);
	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
	inet_csk(newsk)->icsk_ext_hdr_len = 0;
	if (newinet->opt)
		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
	newinet->id = newtp->write_seq ^ jiffies;

	//路径MTU相关初始化
	tcp_mtup_init(newsk);
	tcp_sync_mss(newsk, dst_mtu(dst));
	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
	tcp_initialize_rcv_mss(newsk);

#ifdef CONFIG_TCP_MD5SIG
	/* Copy over the MD5 key from the original socket */
	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
		/*
		 * We're using one, so create a matching key
		 * on the newsk structure. If we fail to get
		 * memory, then we end up not copying the key
		 * across. Shucks.
		 */
		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
		if (newkey != NULL)
			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
					  newkey, key->keylen);
	}
#endif

	//将新的TCB加入到TCP的ehash散列表中
	__inet_hash_nolisten(newsk);
	//保存新的TCB的端口信息
	__inet_inherit_port(sk, newsk);

	return newsk;

exit_overflow:
	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
exit:
	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
	dst_release(dst);
	return NULL;
}

4.1 Create a new tcp sock (tcp_create_openreq_child)

Create a new sock (tcp sock), and initialize, set the status to TCP_SYN_RECV

struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
{
	struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);

	if (newsk != NULL) {
        ...
	}
	return newsk;
}

struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
			    const gfp_t priority)
{
	struct sock *newsk = sk_clone(sk, priority);

	if (newsk != NULL) {
		struct inet_connection_sock *newicsk = inet_csk(newsk);

		newsk->sk_state = TCP_SYN_RECV;
		newicsk->icsk_bind_hash = NULL;

		inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
		newsk->sk_write_space = sk_stream_write_space;
        ...
	}
	return newsk;
}

4.2 New socket port number integrated listening socket port __inet_inherit_port

int __inet_inherit_port(struct sock *sk, struct sock *child)
{
	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
	unsigned short port = inet_sk(child)->inet_num;
	const int bhash = inet_bhashfn(sock_net(sk), port,
			table->bhash_size);
	struct inet_bind_hashbucket *head = &table->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
	tb = inet_csk(sk)->icsk_bind_hash;
	if (tb->port != port) {
		/* NOTE: using tproxy and redirecting skbs to a proxy
		 * on a different listener port breaks the assumption
		 * that the listener socket's icsk_bind_hash is the same
		 * as that of the child socket. We have to look up or
		 * create a new bind bucket for the child here. */
		struct hlist_node *node;
		inet_bind_bucket_for_each(tb, node, &head->chain) {
			if (net_eq(ib_net(tb), sock_net(sk)) &&
			    tb->port == port)
				break;
		}
		if (!node) {
			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
						     sock_net(sk), head, port);
			if (!tb) {
				spin_unlock(&head->lock);
				return -ENOMEM;
			}
		}
	}
	inet_bind_hash(child, tb, port);
	spin_unlock(&head->lock);

	return 0;
}

void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
		    const unsigned short snum)
{
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;

	atomic_inc(&hashinfo->bsockets);

	inet_sk(sk)->inet_num = snum;
	//将新生成的TCB加入到监听套接字对应的端口的owner链表中,
	//从这里可以看出,新生成的TCB和监听套接字是共享同一个端口的
	sk_add_bind_node(sk, &tb->owners);
	tb->num_owners++;
	inet_csk(sk)->icsk_bind_hash = tb;
}

5 Migrate the state of sock, wake up the accept system to call tcp_child_process()

int tcp_child_process(struct sock *parent, struct sock *child,
		      struct sk_buff *skb)
{
	int ret = 0;
	int state = child->sk_state;

	//如果用户进程没有锁住child,则让child重新处理该ACK报文,这可以让child
	//套接字由TCP_SYN_RECV迁移到TCP_ESTABLISH状态
	if (!sock_owned_by_user(child)) {
		//见下文
		ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
					    skb->len);
		/* Wakeup parent, send SIGIO */
		//child套接字状态发生了迁移,唤醒监听套接字上的进程,可能由于调用accept()而block
		if (state == TCP_SYN_RECV && child->sk_state != state)
			parent->sk_data_ready(parent, 0);
	} else {
		/* Alas, it is possible again, because we do lookup
		 * in main socket hash table and lock on listening
		 * socket does not protect us more.
		 */
		 //缓存该skb后续处理
		sk_add_backlog(child, skb);
	}

	bh_unlock_sock(child);
	sock_put(child);
	return ret;
}

Let's talk about queue operations and wake-up operations in the data receiving part. Let's take a look at the processing of ACK messages by tcp_rcv_state_process().

5.1 Processing the ACK message received under SYN_RECV tcp_rcv_state_process()

Here we only pay attention to the processing of ACK messages in the TCP_SYN_RECV state.

int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
			  struct tcphdr *th, unsigned len)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);
	int queued = 0;

...
	/* step 5: check the ACK field */
	if (th->ack) {
		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);

		switch (sk->sk_state) {
		case TCP_SYN_RECV:
			//下面进行的还是一些字段的初始化,需要时再细查
			if (acceptable) {
				tp->copied_seq = tp->rcv_nxt;
				smp_mb();
				//从TCP_SYN_RECV变为TCP_ESTABLISHED
				tcp_set_state(sk, TCP_ESTABLISHED);
				sk->sk_state_change(sk);

				/* Note, that this wakeup is only for marginal
				 * crossed SYN case. Passively open sockets
				 * are not waked up, because sk->sk_sleep ==
				 * NULL and sk->sk_socket == NULL.
				 */
				if (sk->sk_socket)
					sk_wake_async(sk,
						      SOCK_WAKE_IO, POLL_OUT);

				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
				tp->snd_wnd = ntohs(th->window) <<
					      tp->rx_opt.snd_wscale;
				tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
					    TCP_SKB_CB(skb)->seq);

				/* tcp_ack considers this ACK as duplicate
				 * and does not calculate rtt.
				 * Fix it at least with timestamps.
				 */
				if (tp->rx_opt.saw_tstamp &&
				    tp->rx_opt.rcv_tsecr && !tp->srtt)
					tcp_ack_saw_tstamp(sk, 0);

				if (tp->rx_opt.tstamp_ok)
					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;

				/* Make sure socket is routed, for
				 * correct metrics.
				 */
				icsk->icsk_af_ops->rebuild_header(sk);

				tcp_init_metrics(sk);

				tcp_init_congestion_control(sk);

				/* Prevent spurious tcp_cwnd_restart() on
				 * first data packet.
				 */
				tp->lsndtime = tcp_time_stamp;

				tcp_mtup_init(sk);
				tcp_initialize_rcv_mss(sk);
				tcp_init_buffer_space(sk);
				tcp_fast_path_on(tp);
			} else {
				return 1;
			}
			break;


	} else
		goto discard;

...
	return 0;
}

Guess you like

Origin blog.csdn.net/wangquan1992/article/details/108914464