linux kernel protocol stack TCP data receiving clear send queue + RTT sampling

table of Contents

1 Clear the sending queue tcp_clean_rtx_queue()

1.1 Processing TSO segment confirmation tcp_tso_acked


1 Clear the sending queue tcp_clean_rtx_queue()

In the ACK confirmation process, one very important thing that needs to be done is to delete the confirmed data from the sending queue (and the retransmission queue), which is done through tcp_clean_rtx_queue(). In addition, since the sending queue and the retransmission queue are both sk_write_queue in the implementation of TCP, these two queues are processed together.

/* Remove acknowledged frames from the retransmission queue. If our packet
 * is before the ack sequence we can discard it as it's confirmed to have
 * arrived at the other end.
 */
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
{
	struct tcp_sock *tp = tcp_sk(sk);
	const struct inet_connection_sock *icsk = inet_csk(sk);
	struct sk_buff *skb;
	u32 now = tcp_time_stamp;
	//如果为1,表示skb中所有序号都被确认了,否则只是部分确认
	int fully_acked = 1;
	int flag = 0;
	//累计本次清除操作清除的TSO段数
	u32 pkts_acked = 0;
	u32 reord = tp->packets_out;
	s32 seq_rtt = -1;
	s32 ca_seq_rtt = -1;
	ktime_t last_ackt = net_invalid_timestamp();
 
	//遍历发送队列中所有已发送的数据段
	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
		//记录当前skb能够被确认的最大序号(的下一个序号)
		u32 end_seq;
		//记录当前skb中能够被ACK的TSO段数
		u32 acked_pcount;
		//sacked是当前skb的记分牌,它记录了skb的重传、SACK、丢失信息
		u8 sacked = scb->sacked;
 
		//如果当前skb的右边界大于等于snd_una(该变量已经更新),
		//说明当前skb最多只能被确认一部分
		if (after(scb->end_seq, tp->snd_una)) {
			//如果当前skb只有一个TSO段(如果是大包那么tcp_gso_segs肯定不为1),或者这一个段的左边界也大于等于snd_una,
			//说明当前skb的所有数据都不能被确认,进而队列中后续skb也不用判断了,结束遍历
			if (tcp_skb_pcount(skb) == 1 || !after(tp->snd_una, scb->seq))
				break;
			//调用tcp_tso_acked()将当前skb中能够确认的数据删除,返回能被确认的TSO段的个数
			acked_pcount = tcp_tso_acked(sk, skb);
			//如果一个TSO段都没有被确认,没有必要继续下去了,结束遍历过程
			if (!acked_pcount)
				break;
			//只能确认当前skb前面的一部分数据,所以fully_akced设置为0
			fully_acked = 0;
			end_seq = tp->snd_una;
		} else {
			//当前skb的所有数据都能被确认,acked_pcount为skb的TSO段数
			acked_pcount = tcp_skb_pcount(skb);
			end_seq = scb->end_seq;
		}
 
		/* MTU probing checks */
		//PMTU相关
		if (fully_acked && icsk->icsk_mtup.probe_size &&
		    !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) {
			tcp_mtup_probe_success(sk, skb);
		}
 
		//下面这段是用于RTT采样的,不在这里详述
		if (sacked & TCPCB_RETRANS) {
			//如果skb被重传过,递减重传计数器
			if (sacked & TCPCB_SACKED_RETRANS)
				tp->retrans_out -= acked_pcount;
			flag |= FLAG_RETRANS_DATA_ACKED;
			ca_seq_rtt = -1;
			seq_rtt = -1;
			if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
				flag |= FLAG_NONHEAD_RETRANS_ACKED;
		} else {
			ca_seq_rtt = now - scb->when;
			last_ackt = skb->tstamp;
			if (seq_rtt < 0) {
				seq_rtt = ca_seq_rtt;
			}
			if (!(sacked & TCPCB_SACKED_ACKED))
				reord = min(pkts_acked, reord);
		}
 
		//如果当前skb被SACK确认过,现在又被累计ACK确认了,它的被确认部分已不
		//在[snd_una, snd_nxt)之间,所以可以递减sacked_out。下面同理递减
		//lost_out、packets_out.
		if (sacked & TCPCB_SACKED_ACKED)
			tp->sacked_out -= acked_pcount;
		if (sacked & TCPCB_LOST)
			tp->lost_out -= acked_pcount;
 
		if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
			tp->urg_mode = 0;
		tp->packets_out -= acked_pcount;
		//累加已确认TSO段数
		pkts_acked += acked_pcount;
 
		/* Initial outgoing SYN's get put onto the write_queue
		 * just like anything else we transmit.  It is not
		 * true data, and if we misinform our callers that
		 * this ACK acks real data, we will erroneously exit
		 * connection startup slow start one packet too
		 * quickly.  This is severely frowned upon behavior.
		 */
		//如注释所述,对数据和SYN段的确认要区分,否则会影响慢启动时的初始窗口大小
		if (!(scb->flags & TCPCB_FLAG_SYN)) {
			flag |= FLAG_DATA_ACKED;
		} else {
			flag |= FLAG_SYN_ACKED;
			tp->retrans_stamp = 0;
		}
		//如果当前skb只确认了部分TSO段,结束遍历
		if (!fully_acked)
			break;
		//当前skb的所有数据已经被确认,将其从发送队列移除(这并不意味着skb已经被释放,
		tcp_unlink_write_queue(skb, sk);

		//释放skb内存
		sk_wmem_free_skb(sk, skb);
		tcp_clear_all_retrans_hints(tp);
	}//end of while
 
	//下面的逻辑已经跳出了循环
 
	//如果之前收到过最后一个skb的SACK确认信息,但是此时累加确认号却没有涵盖该skb,那么
	//说明对端在发送SACK之后又将其删除了,这种属于假的SACK,设置FLAG_SACK_RENEGING
	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
		flag |= FLAG_SACK_RENEGING;
 
	//下面是RTT采样和拥塞控制相关内容,不在这里详述
	if (flag & FLAG_ACKED) {
		const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
 
		tcp_ack_update_rtt(sk, flag, seq_rtt);
		tcp_rearm_rto(sk);
 
		if (tcp_is_reno(tp)) {
			tcp_remove_reno_sacks(sk, pkts_acked);
		} else {
			/* Non-retransmitted hole got filled? That's reordering */
			if (reord < prior_fackets)
				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
		}
 
		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
		if (ca_ops->pkts_acked) {
			s32 rtt_us = -1;
 
			/* Is the ACK triggering packet unambiguous? */
			if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
				/* High resolution needed and available? */
				if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
				    !ktime_equal(last_ackt, net_invalid_timestamp()))
					rtt_us = ktime_us_delta(ktime_get_real(),
								last_ackt);
				else if (ca_seq_rtt > 0)
					rtt_us = jiffies_to_usecs(ca_seq_rtt);
			}
 
			ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
		}
	}
	return flag;
}

In summary, we can see that tcp_clean_rtx_queue() actually does two things:

  1. Clear the sending queue and retransmission queue;
  2. RTT sampling.

1.1 Processing TSO segment confirmation tcp_tso_acked

When it is found that a TSO data segment has not been fully confirmed, it is necessary to call tcp_tso_acked() to delete the confirmed data, and finally return the number of segments of the processed data.

/* If we get here, the whole TSO packet has not been acked. */
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
{
	struct tcp_sock *tp = tcp_sk(sk);
	u32 packets_acked;

	//必须是部分确认
	BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));

	//初始化为数据段总数
	packets_acked = tcp_skb_pcount(skb);
	//tcp-trim_head()的核心工作是将skb的首部删除指定长度的数据。同时还要更新skb中
	//的TSO信息,以及TCB的内存记账信息,这里不再展开。该函数有可能会因为内存操作失败,
	//这种情况则无法将该skb的部分数据删除,只能等到该skb的所有数据都确认后统一删除
	if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
		return 0;
	//减去新的skb中遗留的数据段数,就是本次已经确认的段数
	packets_acked -= tcp_skb_pcount(skb);

	if (packets_acked) {
		BUG_ON(tcp_skb_pcount(skb) == 0);
		BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
	}
	//返回已确认的段数
	return packets_acked;
}

Guess you like

Origin blog.csdn.net/wangquan1992/article/details/109072268