TCP输出 之 tcp_transmit_skb

概述

tcp_transmit_skb的作用是复制或者拷贝skb,构造skb中的tcp首部,并将调用网络层的发送函数发送skb;在发送前,首先需要克隆或者复制skb,因为在成功发送到网络设备之后,skb会释放,而tcp层不能真正的释放,是需要等到对该数据段的ack才可以释放;然后构造tcp首部和选项;最后调用网络层提供的发送回调函数发送skb,ip层的回调函数为ip_queue_xmit;

源码分析
  1 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
  2                 gfp_t gfp_mask)
  3 {
  4     const struct inet_connection_sock *icsk = inet_csk(sk);
  5     struct inet_sock *inet;
  6     struct tcp_sock *tp;
  7     struct tcp_skb_cb *tcb;
  8     struct tcp_out_options opts;
  9     unsigned int tcp_options_size, tcp_header_size;
 10     struct tcp_md5sig_key *md5;
 11     struct tcphdr *th;
 12     int err;
 13 
 14     BUG_ON(!skb || !tcp_skb_pcount(skb));
 15     tp = tcp_sk(sk);
 16 
 17     /* 需要克隆 */
 18     if (clone_it) {
 19         skb_mstamp_get(&skb->skb_mstamp);
 20         TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
 21             - tp->snd_una;
 22         tcp_rate_skb_sent(sk, skb);
 23 
 24         /* 如果skb已经是被克隆过的,那么只能复制 */
 25         if (unlikely(skb_cloned(skb)))
 26             skb = pskb_copy(skb, gfp_mask);
 27         /* 未被克隆过,则克隆之 */
 28         else
 29             skb = skb_clone(skb, gfp_mask);
 30 
 31         /* 复制或者克隆失败 */
 32         if (unlikely(!skb))
 33             return -ENOBUFS;
 34     }
 35 
 36     inet = inet_sk(sk);
 37     tcb = TCP_SKB_CB(skb);
 38     memset(&opts, 0, sizeof(opts));
 39 
 40     /* 计算syn包tcp选项长度 */
 41     if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
 42         tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
 43     /* 计算已连接状态tcp选项长度 */
 44     else
 45         tcp_options_size = tcp_established_options(sk, skb, &opts,
 46                                &md5);
 47     /* 计算tcp头部长度 */
 48     tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
 49 
 50     /* if no packet is in qdisc/device queue, then allow XPS to select
 51      * another queue. We can be called from tcp_tsq_handler()
 52      * which holds one reference to sk_wmem_alloc.
 53      *
 54      * TODO: Ideally, in-flight pure ACK packets should not matter here.
 55      * One way to get this would be to set skb->truesize = 2 on them.
 56      */
 57     skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
 58 
 59     /* If we had to use memory reserve to allocate this skb,
 60      * this might cause drops if packet is looped back :
 61      * Other socket might not have SOCK_MEMALLOC.
 62      * Packets not looped back do not care about pfmemalloc.
 63      */
 64     skb->pfmemalloc = 0;
 65 
 66     /* 加入tcp头 */
 67     skb_push(skb, tcp_header_size);
 68     skb_reset_transport_header(skb);
 69 
 70     /* 与控制块解除关联 */
 71     skb_orphan(skb);
 72 
 73     /* 与控制块建立关联 */
 74     skb->sk = sk;
 75     skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
 76     skb_set_hash_from_sk(skb, sk);
 77 
 78     /* 增加分配的内存 */
 79     atomic_add(skb->truesize, &sk->sk_wmem_alloc);
 80 
 81     skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
 82 
 83     /* Build TCP header and checksum it. */
 84     /* 构造tcp头 */
 85     th = (struct tcphdr *)skb->data;
 86     th->source        = inet->inet_sport;
 87     th->dest        = inet->inet_dport;
 88     th->seq            = htonl(tcb->seq);
 89     th->ack_seq        = htonl(tp->rcv_nxt);
 90     *(((__be16 *)th) + 6)    = htons(((tcp_header_size >> 2) << 12) |
 91                     tcb->tcp_flags);
 92 
 93     th->check        = 0;
 94     th->urg_ptr        = 0;
 95 
 96     /* The urg_mode check is necessary during a below snd_una win probe */
 97     /* 紧急模式 */
 98     if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
 99         if (before(tp->snd_up, tcb->seq + 0x10000)) {
100             th->urg_ptr = htons(tp->snd_up - tcb->seq);
101             th->urg = 1;
102         } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
103             th->urg_ptr = htons(0xFFFF);
104             th->urg = 1;
105         }
106     }
107 
108     /* 写入tcp选项 */
109     tcp_options_write((__be32 *)(th + 1), tp, &opts);
110     skb_shinfo(skb)->gso_type = sk->sk_gso_type;
111 
112     /* syn需要选择通告窗口 */
113     if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
114         th->window      = htons(tcp_select_window(sk));
115         tcp_ecn_send(sk, skb, th, tcp_header_size);
116     } 
117     /* 其他需要设置接收窗口 */
118     else {
119         /* RFC1323: The window in SYN & SYN/ACK segments
120          * is never scaled.
121          */
122         th->window    = htons(min(tp->rcv_wnd, 65535U));
123     }
124 #ifdef CONFIG_TCP_MD5SIG
125     /* Calculate the MD5 hash, as we have all we need now */
126     if (md5) {
127         sk_nocaps_add(sk, NETIF_F_GSO_MASK);
128         tp->af_specific->calc_md5_hash(opts.hash_location,
129                            md5, sk, skb);
130     }
131 #endif
132     /* 计算校验和 */
133     icsk->icsk_af_ops->send_check(sk, skb);
134 
135     /* ack处理,快速模式数量-以及定时器清除 */
136     if (likely(tcb->tcp_flags & TCPHDR_ACK))
137         tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
138 
139     /* 有数据要发送 */
140     if (skb->len != tcp_header_size) {
141         tcp_event_data_sent(tp, sk);
142         tp->data_segs_out += tcp_skb_pcount(skb);
143     }
144 
145     /* 统计分段数 */
146     if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
147         TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
148                   tcp_skb_pcount(skb));
149 
150     /* 发送的总分段数统计 */
151     tp->segs_out += tcp_skb_pcount(skb);
152     
153     /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
154     /* skb中分段数统计 */
155     skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
156     skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
157 
158     /* Our usage of tstamp should remain private */
159     skb->tstamp = 0;
160 
161     /* Cleanup our debris for IP stacks */
162     /* 清空tcb,ip层要使用 */
163     memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
164                    sizeof(struct inet6_skb_parm)));
165 
166     /* 发送skb */
167     err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
168 
169     /* 发送成功或失败 */
170     if (likely(err <= 0))
171         return err;
172 
173     /* 拥塞控制 */
174 
175     /* 进入cwr */
176     tcp_enter_cwr(sk);
177 
178     /* 根据err返回成功与否 */
179     return net_xmit_eval(err);
180 }

猜你喜欢

转载自www.cnblogs.com/wanpengcoder/p/11755347.html