Linux netfilter HOOK机制

    Linux系统上TCP/IP协议栈在内核态(DPDK等在用户态收包情况例外),用户态如果想要干预报文的处理就需要向内核态注入hook函数,如Linux的iptables,netfilter框架中的HOOK机制即是提供该功能的。通过之前分析ip报文的内核处理路径点击打开链接可知,内核中有如下5个hook点,他们和iptables中chain一一对应。


对应hook函数在内核中是NF_HOOK调用的位于include/linux/netfilter.h文件。(上图中的NF_IP_xx等定义较老,最新的定义应该是NF_INET_xx)


/** 
 *  nf_hook_thresh - call a netfilter hook
 *  
 *  Returns 1 if the hook has allowed the packet to pass.  The function
 *  okfn must be invoked by the caller in this case.  Any other return
 *  value indicates the packet has been consumed by the hook.
 */
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
                 struct sk_buff *skb,
                 struct net_device *indev,
                 struct net_device *outdev,
                 int (*okfn)(struct sk_buff *), int thresh)
{   
#ifndef CONFIG_NETFILTER_DEBUG
    if (list_empty(&nf_hooks[pf][hook]))
        return 1;
#endif
    return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
              struct net_device *indev, struct net_device *outdev,
              int (*okfn)(struct sk_buff *))
{   
    return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN);
}

/* Activate hook; either okfn or kfree_skb called, unless a hook
   returns NF_STOLEN (in which case, it's up to the hook to deal with
   the consequences).

   Returns -ERRNO if packet dropped.  Zero means queued, stolen or
   accepted.
*/

/* RR:
   > I don't want nf_hook to return anything because people might forget
   > about async and trust the return value to mean "packet was ok".

   AK:
   Just document it clearly, then you can expect some sense from kernel
   coders :)
*/

static inline int
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb,
           struct net_device *in, struct net_device *out,
           int (*okfn)(struct sk_buff *), int thresh)
{
    int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh);
    if (ret == 1) //如果钩子函数返回1 调用okfn函数走下一流程
        ret = okfn(skb);
    return ret;
}
static inline int
NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb,
    struct net_device *in, struct net_device *out,
    int (*okfn)(struct sk_buff *))
{
    return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN);
}

/* Returns 1 if okfn() needs to be executed by the caller,
 * -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
         struct net_device *indev,
         struct net_device *outdev,
         int (*okfn)(struct sk_buff *),
         int hook_thresh)
{
    struct list_head *elem;
    unsigned int verdict;
    int ret = 0;


    /* We may already have this, but read-locks nest anyway */
    rcu_read_lock();  //RCU同步对nf_hooks[]的访问


    elem = &nf_hooks[pf][hook];
next_hook:    
    verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
                 outdev, &elem, okfn, hook_thresh);
    if (verdict == NF_ACCEPT || verdict == NF_STOP) {
        ret = 1;
    } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
        kfree_skb(skb);//如果hook中将包围drop,在此释放skb
        ret = NF_DROP_GETERR(verdict);
        if (ret == 0)
            ret = -EPERM;
    } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
        int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                        verdict >> NF_VERDICT_QBITS);
        if (err < 0) {
            if (err == -ECANCELED)
                goto next_hook;
            if (err == -ESRCH &&
               (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
                goto next_hook;            kfree_skb(skb);
        }
    }
    rcu_read_unlock();
    return ret;
}
unsigned int nf_iterate(struct list_head *head,
            struct sk_buff *skb,
            unsigned int hook,
            const struct net_device *indev,
            const struct net_device *outdev,
            struct list_head **i,
            int (*okfn)(struct sk_buff *),
            int hook_thresh)
{
    unsigned int verdict;


    /*
     * The caller must not block between calls to this
     * function because of risk of continuing from deleted element.
     */
    list_for_each_continue_rcu(*i, head) {
        struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;


        if (hook_thresh > elem->priority)
            continue;


        /* Optimization: we don't need to hold module
           reference here, since function can't sleep. --RR */
repeat://遍历调用所以hook函数
        verdict = elem->hook(hook, skb, indev, outdev, okfn);
        if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
            if (unlikely((verdict & NF_VERDICT_MASK)
                            > NF_MAX_VERDICT)) {
                NFDEBUG("Evil return from %p(%u).\n",
                    elem->hook, hook);
                continue;
            }
#endif
            if (verdict != NF_REPEAT)
                return verdict;
            goto repeat;
        }
    }
    return NF_ACCEPT;
}

过程比较简单,NF_HOOK直接遍历nf_hooks上的所有所有钩子函数,将报文交给他们处理,如果钩子返回1则交给okfn函数进一步处理。所有钩子函数是存储在nf_hooks上的,nf_hooks是一个二维数组,数组元素是链表。下图以NF_INET_LOCAL_IN为例子展示数据结构的关系,链表的顺序是以priority排序的。


内核插入钩子函数的API是nf_register_hook函数。

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;
    int err;

    err = mutex_lock_interruptible(&nf_hook_mutex);
    if (err < 0)
        return err;
    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)
            break;
    }
    list_add_rcu(&reg->list, elem->list.prev);
    mutex_unlock(&nf_hook_mutex);
    return 0;
}

参考:

https://blog.csdn.net/windeal3203/article/details/51204911



猜你喜欢

转载自blog.csdn.net/kklvsports/article/details/80285727
今日推荐