深入理解TCP协议及其源代码——TCP三次握客户端tcp层SYN包的发送

1、TCP基本概念

传输控制协议TCP是一种面向连接的、可靠的、基于字节流的运输层通信协议。TCP层是位于IP层之上,应用层之下的传输层。

2、TCP连接时三次握手示意

3. TCP协议栈从上到下提供的接口

创建socket

创建TCP socket调用接口

 在创建socket套接字描述符, sys_socket内核函数会根据指定的协议(例如socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP))挂载对应的协议处理函数

250 static int inet_create(struct net *net, struct socket *sock, int protocol,int kern)
251{
...
262     /* Look for the requested type/protocol pair. */
263     lookup_protocol:
264     err = -ESOCKTNOSUPPORT;
265     rcu_read_lock();

           // TCP套接字、UDP套接字、原始套接字的inet_protosw实 例都在inetsw_array数组中定义,
           //这些实例会调inet_register_protosw()注册到inetsw中
          //根据protocol查找要创建的套接字对应的四层传输协议。
266     list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
268           ...
283     }
284
           //如果没有找到,则调用request_module()来尝试加载协议所属的模块,正常情况下不会发生。
285     if (unlikely(err)) {
286             if (try_loading_module < 2) {
287                     rcu_read_unlock();
...
}

  三次握手

 结构体变量struct proto tcp_prot指定了TCP协议栈的访问接口函数

 首先客户端发送SYN报文

调用tcp_v4_connect函数建立与服务器联系并发送SYN段:

tcp_v4_connect函数

140/* This will initiate an outgoing connection. */
141int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142{
...
171    rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
172                          RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173                          IPPROTO_TCP,
174                          orig_sport, orig_dport, sk);
...
214215    /* Socket identity is still unknown (sport may be zero).
216     * However we set state to SYN-SENT and not releasing socket
217     * lock select source port, enter ourselves into the hash tables and
218     * complete initialization after this.
219     */
220   tcp_set_state(sk, TCP_SYN_SENT);
...
227    rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228                           inet->inet_sport, inet->inet_dport, sk);
...
246    err = tcp_connect(sk);
...
}
265EXPORT_SYMBOL(tcp_v4_connect);

此函数前面部分是确定socket的源端口,目的ip及端口。目的IP和目的端口是由connect系统调用的入参指定。tcp_connect函数用于构建并发送一个SYN请求。

tcp_connect函数

  • 构造一个携带SYN标志位的TCP头,tcp_init_nondata_skb函数实现
  • 发送带有SYN的TCP报文,tcp_transmit_skb函数实现
  • 设置计时器超时重发,net_csk_reset_xmit_timer函数实现

3090/* Build a SYN and send it off. */
3091int tcp_connect(struct sock *sk)
3092{
...
3108       /* Reserve space for headers. */
3109       skb_reserve(buff, MAX_TCP_HEADER);
3110
3111       tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3112       tp->retrans_stamp = tcp_time_stamp;
3113       tcp_connect_queue_skb(sk, buff);
3114       tcp_ecn_send_syn(sk, buff);
3115
3116       /* Send off SYN; include data in Fast Open. */
3117       err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3118             tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
...
3129       /* Timer for repeating the SYN until an answer. */
3130      inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3131                                 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
...
}
3134EXPORT_SYMBOL(tcp_connect);

tcp_transmit_sbk函数

__tcp_transmit_skb函数的主要任务是向ip层发送数据包,其中包括

初始化TCP协议头等数据结构

查看clone_it是否要克隆Socket Buffer,应用Socket Buffer可能正被其他进程使用,就要克隆一个份

构建TCP协议选项

阻塞控制,确定网络上有多少数据包最好

构建TCP协议头主要的数据域:源端口、目的端口、数据段初始序列号,计算窗口大小,如果是SYN请求包就不需要计算窗口大小

发送数据包到ip层,发送过程状态机切换,发送SYN包之后切换为SYN_SENT

// net/ipv4/tcp_output.c
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
                gfp_t gfp_mask)
{
    return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
                  tcp_sk(sk)->rcv_nxt);
}

tcp_transmit_skb是对__tcp_transmit_skb的封装,继续调用,进入__tcp_transmit_skb发送SYN报文

__tcp_transmit_skb函数

// net/ipv4/tcp_output.c
static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
                  int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{
    const struct inet_connection_sock *icsk = inet_csk(sk);
    struct inet_sock *inet;
    struct tcp_sock *tp;
    struct tcp_skb_cb *tcb;
    struct tcp_out_options opts;
    unsigned int tcp_options_size, tcp_header_size;
    struct sk_buff *oskb = NULL;
    struct tcp_md5sig_key *md5;
    struct tcphdr *th;
    u64 prior_wstamp;
    int err;

    BUG_ON(!skb || !tcp_skb_pcount(skb));
    tp = tcp_sk(sk);

    if (clone_it) { 
        Socket Buffer
        TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
            - tp->snd_una;
        oskb = skb;

        tcp_skb_tsorted_save(oskb) {
            if (unlikely(skb_cloned(oskb)))
                skb = pskb_copy(oskb, gfp_mask);
            else
                skb = skb_clone(oskb, gfp_mask);
        } tcp_skb_tsorted_restore(oskb);

        if (unlikely(!skb))
            return -ENOBUFS;
    }

    prior_wstamp = tp->tcp_wstamp_ns;
    tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);

    skb->skb_mstamp_ns = tp->tcp_wstamp_ns;

    inet = inet_sk(sk); 
    tcb = TCP_SKB_CB(skb); 
    memset(&opts, 0, sizeof(opts));

    if (unlikely(tcb->tcp_flags & TCPHDR_SYN))  
        tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
    else
        tcp_options_size = tcp_established_options(sk, skb, &opts,
                               &md5);
    tcp_header_size = tcp_options_size + sizeof(struct tcphdr);

    /* if no packet is in qdisc/device queue, then allow XPS to select
     * another queue. We can be called from tcp_tsq_handler()
     * which holds one reference to sk.
     *
     * TODO: Ideally, in-flight pure ACK packets should not matter here.
     * One way to get this would be to set skb->truesize = 2 on them.
     */
    skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);

    /* If we had to use memory reserve to allocate this skb,
     * this might cause drops if packet is looped back :
     * Other socket might not have SOCK_MEMALLOC.
     * Packets not looped back do not care about pfmemalloc.
     */
    skb->pfmemalloc = 0;

    skb_push(skb, tcp_header_size);
    skb_reset_transport_header(skb);

    skb_orphan(skb);
    skb->sk = sk;
    skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
    skb_set_hash_from_sk(skb, sk);
    refcount_add(skb->truesize, &sk->sk_wmem_alloc);

    skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);

    /* Build TCP header and checksum it. */
    th = (struct tcphdr *)skb->data;
    th->source      = inet->inet_sport;
    th->dest        = inet->inet_dport;
    th->seq         = htonl(tcb->seq);
    th->ack_seq     = htonl(rcv_nxt);
    *(((__be16 *)th) + 6)   = htons(((tcp_header_size >> 2) << 12) |
                    tcb->tcp_flags);

    th->check       = 0;
    th->urg_ptr     = 0;

    /* The urg_mode check is necessary during a below snd_una win probe */
    if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
        if (before(tp->snd_up, tcb->seq + 0x10000)) {
            th->urg_ptr = htons(tp->snd_up - tcb->seq);
            th->urg = 1;
        } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
            th->urg_ptr = htons(0xFFFF);
            th->urg = 1;
        }
    }

    tcp_options_write((__be32 *)(th + 1), tp, &opts);
    skb_shinfo(skb)->gso_type = sk->sk_gso_type;
    if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
        th->window      = htons(tcp_select_window(sk));
        tcp_ecn_send(sk, skb, th, tcp_header_size);
    } else {
        /* RFC1323: The window in SYN & SYN/ACK segments
         * is never scaled.
         */
        th->window  = htons(min(tp->rcv_wnd, 65535U));
    }
#ifdef CONFIG_TCP_MD5SIG
    /* Calculate the MD5 hash, as we have all we need now */
    if (md5) {
        sk_nocaps_add(sk, NETIF_F_GSO_MASK);
        tp->af_specific->calc_md5_hash(opts.hash_location,
                           md5, sk, skb);
    }
#endif

    icsk->icsk_af_ops->send_check(sk, skb);

    if (likely(tcb->tcp_flags & TCPHDR_ACK))
        tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);

    if (skb->len != tcp_header_size) {
        tcp_event_data_sent(tp, sk);
        tp->data_segs_out += tcp_skb_pcount(skb);
        tp->bytes_sent += skb->len - tcp_header_size;
    }

    if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
        TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
                  tcp_skb_pcount(skb));

    tp->segs_out += tcp_skb_pcount(skb);
    /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
    skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
    skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);

    /* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */

    /* Cleanup our debris for IP stacks */
    memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
                   sizeof(struct inet6_skb_parm)));

    err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
    if (unlikely(err > 0)) { 
        tcp_enter_cwr(sk);
        err = net_xmit_eval(err);
    }
    if (!err && oskb) {
        tcp_update_skb_after_send(sk, oskb, prior_wstamp);
        tcp_rate_skb_sent(sk, oskb);
    }
    return err;
}    

客户端tcp层是完成SYN包的发送了,经过下层传输到网卡。之后服务端接收客户端发来的tcp报文,并发送回SYN+ACK。

 

猜你喜欢

转载自www.cnblogs.com/zzydexiaowu/p/12103552.html
今日推荐