Netfilter 之 连接跟踪钩子函数分析

ipv4_conntrack_defrag

ipv4_conntrack_defrag对输入包进行检查,如果是分片包,则调用nf_ct_ipv4_gather_frags函数进行重组;

 1 static unsigned int ipv4_conntrack_defrag(void *priv,
 2                       struct sk_buff *skb,
 3                       const struct nf_hook_state *state)
 4 {
 5     struct sock *sk = skb->sk;
 6 
 7     if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
 8         inet_sk(sk)->nodefrag)
 9         return NF_ACCEPT;
10 
11 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
12 #if !IS_ENABLED(CONFIG_NF_NAT)
13     /* Previously seen (loopback)?  Ignore.  Do this before
14        fragment check. */
15     if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
16         return NF_ACCEPT;
17 #endif
18 #endif
19     /* Gather fragments. */
20     /* 如果是分片的话进行分片重组 */
21     if (ip_is_fragment(ip_hdr(skb))) {
22         enum ip_defrag_users user =
23             nf_ct_defrag_user(state->hook, skb);
24 
25         if (nf_ct_ipv4_gather_frags(state->net, skb, user))
26             return NF_STOLEN;
27     }
28     return NF_ACCEPT;
29 }

nf_ct_ipv4_gather_frags内部调用了ip_defrag进行重组,ip_defrag相关分析,请移步IP分片重组;

 1 static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
 2                    u_int32_t user)
 3 {
 4     int err;
 5 
 6     local_bh_disable();
 7     /* 分片重组 */
 8     err = ip_defrag(net, skb, user);
 9     local_bh_enable();
10 
11     if (!err)
12         skb->ignore_df = 1;
13 
14     return err;
15 }
ipv4_conntrack_in

ipv4_conntrack_in是对nf_conntrack_in的封装,是连接跟踪的输入本机或者由本机转发的入口函数,该函数获取l3proto ,l4proto,调用resolve_normal_ct检查是否有tuple节点,没有则创建,并且与skb关联,并调用l4proto->packet函数对连接状态进行处理;

1 static unsigned int ipv4_conntrack_in(void *priv,
2                       struct sk_buff *skb,
3                       const struct nf_hook_state *state)
4 {
5     return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
6 }
  1 unsigned int
  2 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
  3         struct sk_buff *skb)
  4 {
  5     struct nf_conn *ct, *tmpl;
  6     enum ip_conntrack_info ctinfo;
  7     struct nf_conntrack_l3proto *l3proto;
  8     struct nf_conntrack_l4proto *l4proto;
  9     unsigned int *timeouts;
 10     unsigned int dataoff;
 11     u_int8_t protonum;
 12     int ret;
 13 
 14     /* 获取skb关联的nf_conn */
 15     tmpl = nf_ct_get(skb, &ctinfo);
 16 
 17     /* 已经关联了nf_conn或者设置了不跟踪标记 */
 18     if (tmpl || ctinfo == IP_CT_UNTRACKED) {
 19         /* Previously seen (loopback or untracked)?  Ignore. */
 20         /* 环回 || 不跟踪,返回accept */
 21         if ((tmpl && !nf_ct_is_template(tmpl)) ||
 22              ctinfo == IP_CT_UNTRACKED) {
 23             NF_CT_STAT_INC_ATOMIC(net, ignore);
 24             return NF_ACCEPT;
 25         }
 26 
 27         /* 清空关联的nf_conn */
 28         skb->_nfct = 0;
 29     }
 30 
 31     /* rcu_read_lock()ed by nf_hook_thresh */
 32     /* 根据协议类型找到对应协议的l3proto */
 33     l3proto = __nf_ct_l3proto_find(pf);
 34 
 35     /* 获取数据偏移和4层协议 */
 36     ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
 37                    &dataoff, &protonum);
 38     if (ret <= 0) {
 39         pr_debug("not prepared to track yet or error occurred\n");
 40         NF_CT_STAT_INC_ATOMIC(net, error);
 41         NF_CT_STAT_INC_ATOMIC(net, invalid);
 42         ret = -ret;
 43         goto out;
 44     }
 45 
 46     /* 根据协议和4层协议号获取l4proto */
 47     l4proto = __nf_ct_l4proto_find(pf, protonum);
 48 
 49     /* It may be an special packet, error, unclean...
 50      * inverse of the return code tells to the netfilter
 51      * core what to do with the packet. */
 52     /* 如果l4设置了错误检查函数,则进行检查 */
 53     if (l4proto->error != NULL) {
 54         ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
 55         if (ret <= 0) {
 56             NF_CT_STAT_INC_ATOMIC(net, error);
 57             NF_CT_STAT_INC_ATOMIC(net, invalid);
 58             ret = -ret;
 59             goto out;
 60         }
 61         /* ICMP[v6] protocol trackers may assign one conntrack. */
 62         if (skb->_nfct)
 63             goto out;
 64     }
 65 repeat:
 66     /* 查看hash中是否有对应tuple节点,没有则新建;更新nf_conn_info状态,并且与skb进行关联 */
 67     ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
 68                 l3proto, l4proto);
 69     if (ret < 0) {
 70         /* Too stressed to deal. */
 71         NF_CT_STAT_INC_ATOMIC(net, drop);
 72         ret = NF_DROP;
 73         goto out;
 74     }
 75 
 76     /* 获取skb关联的nf_conn */
 77     ct = nf_ct_get(skb, &ctinfo);
 78     /* 没有关联的nf_conn,不是连接合法的一部分 */
 79     if (!ct) {
 80         /* Not valid part of a connection */
 81         NF_CT_STAT_INC_ATOMIC(net, invalid);
 82         ret = NF_ACCEPT;
 83         goto out;
 84     }
 85 
 86     /* Decide what timeout policy we want to apply to this flow. */
 87     /* 获取超时策略,扩展中的策略,或者默认l4proto中的策略 */
 88     timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
 89 
 90     /* 处理4层协议的状态,tcp为tcp_packet */
 91     ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
 92     if (ret <= 0) {
 93         /* Invalid: inverse of the return code tells
 94          * the netfilter core what to do */
 95         pr_debug("nf_conntrack_in: Can't track with proto module\n");
 96         nf_conntrack_put(&ct->ct_general);
 97         skb->_nfct = 0;
 98         NF_CT_STAT_INC_ATOMIC(net, invalid);
 99         if (ret == -NF_DROP)
100             NF_CT_STAT_INC_ATOMIC(net, drop);
101         /* Special case: TCP tracker reports an attempt to reopen a
102          * closed/aborted connection. We have to go back and create a
103          * fresh conntrack.
104          */
105         if (ret == -NF_REPEAT)
106             goto repeat;
107         ret = -ret;
108         goto out;
109     }
110 
111     /* 第一次收到应答,则设置IPS_SEEN_REPLY_BIT标记,原值为0,则需要记录应答事件 */
112     if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
113         !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
114         nf_conntrack_event_cache(IPCT_REPLY, ct);
115 out:
116     if (tmpl)
117         nf_ct_put(tmpl);
118 
119     return ret;
120 }

resolve_normal_ct函数将数据包中的相关字段设置到tuple中,并且检查hash中是否有该tuple,如果没有则新建tuple,而后设置连接状态,并且与skb进行关联;

 1 static int
 2 resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 3           struct sk_buff *skb,
 4           unsigned int dataoff,
 5           u_int16_t l3num,
 6           u_int8_t protonum,
 7           struct nf_conntrack_l3proto *l3proto,
 8           struct nf_conntrack_l4proto *l4proto)
 9 {
10     const struct nf_conntrack_zone *zone;
11     struct nf_conntrack_tuple tuple;
12     struct nf_conntrack_tuple_hash *h;
13     enum ip_conntrack_info ctinfo;
14     struct nf_conntrack_zone tmp;
15     struct nf_conn *ct;
16     u32 hash;
17 
18     /* 将源目的地址端口协议方向等字段设置到tuple */
19     if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
20                  dataoff, l3num, protonum, net, &tuple, l3proto,
21                  l4proto)) {
22         pr_debug("Can't get tuple\n");
23         return 0;
24     }
25 
26     /* look for tuple match */
27     /* 从hash中查找tuple */
28     zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
29     hash = hash_conntrack_raw(&tuple, net);
30     h = __nf_conntrack_find_get(net, zone, &tuple, hash);
31 
32     /* 未找到该tuple */
33     if (!h) {
34         /* 创建一个节点 */
35         h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
36                    skb, dataoff, hash);
37         if (!h)
38             return 0;
39         if (IS_ERR(h))
40             return PTR_ERR(h);
41     }
42 
43     /* 获取到nf_conn */
44     ct = nf_ct_tuplehash_to_ctrack(h);
45 
46     /* It exists; we have (non-exclusive) reference. */
47     /* 应答方向,已建立连接应答 */
48     if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
49         ctinfo = IP_CT_ESTABLISHED_REPLY;
50     } 
51     /* 原始方向 */
52     else {
53         /* Once we've had two way comms, always ESTABLISHED. */
54         /* 已经见过应答了,那么是已连接状态 */
55         if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
56             pr_debug("normal packet for %p\n", ct);
57             ctinfo = IP_CT_ESTABLISHED;
58         } 
59         /* 有期望连接标记,则设置关联字段 */
60         else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
61             pr_debug("related packet for %p\n", ct);
62             ctinfo = IP_CT_RELATED;
63         } 
64         /* 其他情况,新连接 */
65         else {
66             pr_debug("new packet for %p\n", ct);
67             ctinfo = IP_CT_NEW;
68         }
69     }
70 
71     /* skb关联nf_conn */
72     nf_ct_set(skb, ct, ctinfo);
73     return 0;
74 }
ipv4_conntrack_local

ipv4_conntrack_local是由本机发出的数据包连接跟踪的入口,是对nf_conntrack_in函数的封装;

 1 static unsigned int ipv4_conntrack_local(void *priv,
 2                      struct sk_buff *skb,
 3                      const struct nf_hook_state *state)
 4 {
 5     /* root is playing with raw sockets. */
 6     if (skb->len < sizeof(struct iphdr) ||
 7         ip_hdrlen(skb) < sizeof(struct iphdr))
 8         return NF_ACCEPT;
 9 
10     /* 分片,返回accpet */
11     if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
12         return NF_ACCEPT;
13 
14     /* 调用conntrack_in */
15     return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
16 }
ipv4_helper

ipv4_helper函数查找已经注册的help扩展,如果存在则调用扩展的helper函数;

 1 static unsigned int ipv4_helper(void *priv,
 2                 struct sk_buff *skb,
 3                 const struct nf_hook_state *state)
 4 {
 5     struct nf_conn *ct;
 6     enum ip_conntrack_info ctinfo;
 7     const struct nf_conn_help *help;
 8     const struct nf_conntrack_helper *helper;
 9 
10     /* This is where we call the helper: as the packet goes out. */
11     /* 获取skb关联的nf_conn */
12     ct = nf_ct_get(skb, &ctinfo);
13     /* 未关联,或者是 已建立连接的关联连接的响应 */
14     if (!ct || ctinfo == IP_CT_RELATED_REPLY)
15         return NF_ACCEPT;
16 
17     /* 获取help扩展 */
18     help = nfct_help(ct);
19 
20     /* 没有扩展 */
21     if (!help)
22         return NF_ACCEPT;
23 
24     /* rcu_read_lock()ed by nf_hook_thresh */
25     /* 或者helper */
26     helper = rcu_dereference(help->helper);
27     if (!helper)
28         return NF_ACCEPT;
29 
30     /* 执行扩展的help函数 */
31     return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
32                 ct, ctinfo);
33 }
ipv4_confirm

ipv4_confirm相关函数完成对连接的确认,并且将连接按照方向加入到对应的hash表中;

 1 static unsigned int ipv4_confirm(void *priv,
 2                  struct sk_buff *skb,
 3                  const struct nf_hook_state *state)
 4 {
 5     struct nf_conn *ct;
 6     enum ip_conntrack_info ctinfo;
 7 
 8     /* 获取skb关联的nf_conn */
 9     ct = nf_ct_get(skb, &ctinfo);
10     /* 未关联,或者是 已建立连接的关联连接的响应 */
11     if (!ct || ctinfo == IP_CT_RELATED_REPLY)
12         goto out;
13 
14     /* adjust seqs for loopback traffic only in outgoing direction */
15     /* 有调整序号标记,且不是环回包,调整序号 */
16     if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
17         !nf_is_loopback_packet(skb)) {
18         if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
19             NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
20             return NF_DROP;
21         }
22     }
23 out:
24     /* We've seen it coming out the other side: confirm it */
25     /* 调用conntrack_confirm */
26     return nf_conntrack_confirm(skb);
27 }
 1 static inline int nf_conntrack_confirm(struct sk_buff *skb)
 2 {
 3     struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb);
 4     int ret = NF_ACCEPT;
 5 
 6     /* nf_conn存在 */
 7     if (ct) {
 8         /* 未确认,则进行确认 */
 9         if (!nf_ct_is_confirmed(ct))
10             ret = __nf_conntrack_confirm(skb);
11         /* accpet状态事件通知 */
12         if (likely(ret == NF_ACCEPT))
13             nf_ct_deliver_cached_events(ct);
14     }
15     return ret;
16 }
  1 int
  2 __nf_conntrack_confirm(struct sk_buff *skb)
  3 {
  4     const struct nf_conntrack_zone *zone;
  5     unsigned int hash, reply_hash;
  6     struct nf_conntrack_tuple_hash *h;
  7     struct nf_conn *ct;
  8     struct nf_conn_help *help;
  9     struct nf_conn_tstamp *tstamp;
 10     struct hlist_nulls_node *n;
 11     enum ip_conntrack_info ctinfo;
 12     struct net *net;
 13     unsigned int sequence;
 14     int ret = NF_DROP;
 15 
 16     ct = nf_ct_get(skb, &ctinfo);
 17     net = nf_ct_net(ct);
 18 
 19     /* ipt_REJECT uses nf_conntrack_attach to attach related
 20        ICMP/TCP RST packets in other direction.  Actual packet
 21        which created connection will be IP_CT_NEW or for an
 22        expected connection, IP_CT_RELATED. */
 23     /* 只对原始方向的连接进行确认,应答方向是已经处理过的 */
 24     if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 25         return NF_ACCEPT;
 26 
 27     zone = nf_ct_zone(ct);
 28     local_bh_disable();
 29 
 30     /* 计算原始方向和应答方向的hash */
 31     do {
 32         sequence = read_seqcount_begin(&nf_conntrack_generation);
 33         /* reuse the hash saved before */
 34         hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
 35         hash = scale_hash(hash);
 36         reply_hash = hash_conntrack(net,
 37                        &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 38 
 39     } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 40 
 41     /* We're not in hash table, and we refuse to set up related
 42      * connections for unconfirmed conns.  But packet copies and
 43      * REJECT will give spurious warnings here.
 44      */
 45     /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
 46 
 47     /* No external references means no one else could have
 48      * confirmed us.
 49      */
 50     NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 51     pr_debug("Confirming conntrack %p\n", ct);
 52     /* We have to check the DYING flag after unlink to prevent
 53      * a race against nf_ct_get_next_corpse() possibly called from
 54      * user context, else we insert an already 'dead' hash, blocking
 55      * further use of that particular connection -JM.
 56      */
 57     nf_ct_del_from_dying_or_unconfirmed_list(ct);
 58 
 59     if (unlikely(nf_ct_is_dying(ct))) {
 60         nf_ct_add_to_dying_list(ct);
 61         goto dying;
 62     }
 63 
 64     /* See if there's one in the list already, including reverse:
 65        NAT could have grabbed it without realizing, since we're
 66        not in the hash.  If there is, we lost race. */
 67 
 68     /* 下面两个如果找到说明有冲突 */
 69     
 70     /* 遍历原始方向hash,查找是否有相同节点 */
 71     hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
 72         if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 73                     zone, net))
 74             goto out;
 75 
 76     /* 遍历应答方向hash,查找是否有相同节点 */
 77     hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
 78         if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 79                     zone, net))
 80             goto out;
 81 
 82     /* Timer relative to confirmation time, not original
 83        setting time, otherwise we'd get timer wrap in
 84        weird delay cases. */
 85     /* 设置超时时间 */
 86     ct->timeout += nfct_time_stamp;
 87     /* 引用计数增加 */
 88     atomic_inc(&ct->ct_general.use);
 89     /* 更新为已确认 */
 90     ct->status |= IPS_CONFIRMED;
 91 
 92     /* set conntrack timestamp, if enabled. */
 93     /* 有时间戳扩展,则设置时间戳 */
 94     tstamp = nf_conn_tstamp_find(ct);
 95     if (tstamp) {
 96         if (skb->tstamp == 0)
 97             __net_timestamp(skb);
 98 
 99         tstamp->start = ktime_to_ns(skb->tstamp);
100     }
101     /* Since the lookup is lockless, hash insertion must be done after
102      * starting the timer and setting the CONFIRMED bit. The RCU barriers
103      * guarantee that no other CPU can find the conntrack before the above
104      * stores are visible.
105      */
106     /* 将原始节点和应答节点插入到对应的hash中 */
107     __nf_conntrack_hash_insert(ct, hash, reply_hash);
108     nf_conntrack_double_unlock(hash, reply_hash);
109     local_bh_enable();
110 
111     /* 事件通知 */
112     help = nfct_help(ct);
113     if (help && help->helper)
114         nf_conntrack_event_cache(IPCT_HELPER, ct);
115 
116     nf_conntrack_event_cache(master_ct(ct) ?
117                  IPCT_RELATED : IPCT_NEW, ct);
118     return NF_ACCEPT;
119 
120 out:
121     /* 加入到dying列表 */
122     nf_ct_add_to_dying_list(ct);
123     /* 解决冲突?? */
124     ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
125 dying:
126     nf_conntrack_double_unlock(hash, reply_hash);
127     NF_CT_STAT_INC(net, insert_failed);
128     local_bh_enable();
129     return ret;
130 }

猜你喜欢

转载自www.cnblogs.com/wanpengcoder/p/11755703.html
今日推荐