netfilter之nat代码分析

nat主要在PRE_ROUTING、OUTING、LOCAL_IN、POST_ROUTING四个链上注册了hook函数,PRE_ROUTING、OUTING这个两个链上做DNAT,LOCAL_IN和POST_ROUTING链上做SNAT。nat表没有LOCAL_IN链,但在LOCAL_IN上注册了钩子函数nf_nat_fn,主要作用是修改数据包的源端口。

static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
	/* Before packet filtering, change destination */
	{
		/*做dnat*/
		.hook		= nf_nat_in,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_PRE_ROUTING,
		.priority	= NF_IP_PRI_NAT_DST,
	},
	/* After packet filtering, change source */
	{
		/*做snat*/
		.hook		= nf_nat_out,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_POST_ROUTING,
		.priority	= NF_IP_PRI_NAT_SRC,
	},
	/* Before packet filtering, change destination */
	{
		/*做dnat*/
		.hook		= nf_nat_local_fn,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_OUT,
		.priority	= NF_IP_PRI_NAT_DST,
	},
	/* After packet filtering, change source */
	{
		.hook		= nf_nat_fn,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_IN,
		.priority	= NF_IP_PRI_NAT_SRC,
	},
};

1、nf_nat_in

nf_nat_in钩子函数注册在PRE_ROUTING链上,最终做DNAT的处理函数是nf_nat_fn,这个函数后面再讲,做了DNAT后目的地址改变而且数据包没有被扔掉就调用skb_dst_drop,这个最终调用dst_release,将skb->dst设置为NULL,将skb的dst_entry减1

static unsigned int
nf_nat_in(unsigned int hooknum,
	  struct sk_buff *skb,
	  const struct net_device *in,
	  const struct net_device *out,
	  int (*okfn)(struct sk_buff *))
{
	unsigned int ret;
	__be32 daddr = ip_hdr(skb)->daddr;

	/*最终做dnat的处理函数*/
	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
	if (ret != NF_DROP && ret != NF_STOLEN &&
	    daddr != ip_hdr(skb)->daddr)
	    /*目的地地址改变要将skb->dst设置为NULL*/
		skb_dst_drop(skb);

	return ret;
}

2、nf_nat_out

nf_nat_out注册在POST_ROUTING链上,实现的功能是做SNAT,最终处理的函数也是nf_nat_fn。

static unsigned int
nf_nat_out(unsigned int hooknum,
	   struct sk_buff *skb,
	   const struct net_device *in,
	   const struct net_device *out,
	   int (*okfn)(struct sk_buff *))
{
#ifdef CONFIG_XFRM
	const struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
#endif
	unsigned int ret;

	/* root is playing with raw sockets. */
	if (skb->len < sizeof(struct iphdr) ||
	    ip_hdrlen(skb) < sizeof(struct iphdr))
		return NF_ACCEPT;
	/*做SNAT*/
	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
#ifdef CONFIG_XFRM
	if (ret != NF_DROP && ret != NF_STOLEN &&
	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);

		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
		    (ct->tuplehash[dir].tuple.src.u.all !=
		     ct->tuplehash[!dir].tuple.dst.u.all)
		   )
			return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
	}
#endif
	return ret;
}

3、nf_nat_local_fn

nf_nat_local_fn注册在OUTING链上,最终也是调用nf_nat_fn做DNAT,在OUTING链之前数据包已经做了路由选择,因为做DNAT目的地地址改变所以要调用ip_route_me_hander重新选择路由。

static unsigned int
nf_nat_local_fn(unsigned int hooknum,
		struct sk_buff *skb,
		const struct net_device *in,
		const struct net_device *out,
		int (*okfn)(struct sk_buff *))
{
	const struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
	unsigned int ret;

	/* root is playing with raw sockets. */
	if (skb->len < sizeof(struct iphdr) ||
	    ip_hdrlen(skb) < sizeof(struct iphdr))
		return NF_ACCEPT;

	/*做DNAT*/
	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
	if (ret != NF_DROP && ret != NF_STOLEN &&
	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);

		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
		    ct->tuplehash[!dir].tuple.src.u3.ip) {
		    /*做DNAT后目的地址改变要重新选路由*/
			if (ip_route_me_harder(skb, RTN_UNSPEC))
				ret = NF_DROP;
		}
#ifdef CONFIG_XFRM
		else if (ct->tuplehash[dir].tuple.dst.u.all !=
			 ct->tuplehash[!dir].tuple.src.u.all)
			if (ip_xfrm_me_harder(skb))
				ret = NF_DROP;
#endif
	}
	return ret;
}

4、nf_nat_fn

1nf_nat_fn对数据包的连接跟踪选项的NAT只做一次,后续的数据包根据链接跟踪做NAT。nf_nat_fn主要做以下几件事

(1)判断数据包的链接跟踪是否建立,如果没有建立直接返回,如果链接跟踪没有关联nf_conn_nat也返回

(2)如果数据包状态是一个期望链接或者有reply方向,而且协议是icmp就调用nf_nat_icmp_reply_translation对imcp做nat

扫描二维码关注公众号,回复: 4531282 查看本文章

(3)如果数据包的状态是IP_CT_NEW,就调用nf_nat_initialized判断该数据包的链接跟踪是否已经做 NAT,如果还没有做NAT而且是LOCAL_IN链上的钩子函数,就调用alloc_null_binding修改链接跟踪reply方向

(4)调用函数nf_nat_rule_find查找nat表最后由nf_nat_packet根据链接跟踪做nat

static unsigned int
nf_nat_fn(unsigned int hooknum,
	  struct sk_buff *skb,
	  const struct net_device *in,
	  const struct net_device *out,
	  int (*okfn)(struct sk_buff *))
{
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
	struct nf_conn_nat *nat;
	/* maniptype == SRC for postrouting. */
	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);

	/* We never see fragments: conntrack defrags on pre-routing
	   and local-out, and nf_nat_out protects post-routing. */
	NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));

	/*获取链接跟踪和数据包状态ctinfo*/
	ct = nf_ct_get(skb, &ctinfo);
	/* Can't track?  It's not due to stress, or conntrack would
	   have dropped it.  Hence it's the user's responsibilty to
	   packet filter it out, or implement conntrack/NAT for that
	   protocol. 8) --RR */
	if (!ct)
		return NF_ACCEPT;

	/* Don't try to NAT if this packet is not conntracked */
	/*不做链接跟踪的直接返回*/
	if (ct == &nf_conntrack_untracked)
		return NF_ACCEPT;

	/*链接跟踪没有关联nf_conn_nat直接返回*/
	nat = nfct_nat(ct);
	if (!nat) {
		/* NAT module was loaded late. */
		/*链接跟踪已经确认就返回*/
		if (nf_ct_is_confirmed(ct))
			return NF_ACCEPT;
		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
		if (nat == NULL) {
			pr_debug("failed to add NAT extension\n");
			return NF_ACCEPT;
		}
	}

	switch (ctinfo) {
	case IP_CT_RELATED:
	case IP_CT_RELATED+IP_CT_IS_REPLY:
		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
			/*对于一个期望链接或者有reply方向而且协议是
			icmp就调用nf_nat_imcp_reply_translation做nat*/
			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
							   hooknum, skb))
				return NF_DROP;
			else
				return NF_ACCEPT;
		}
		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
	case IP_CT_NEW:

		/* Seen it before?  This can happen for loopback, retrans,
		   or local packets.. */
		  /*判断连接跟踪是否已经做过NAT*/
		if (!nf_nat_initialized(ct, maniptype)) {
			unsigned int ret;

			/*如果数据包走到了LOCAL_IN链而且状态是NEW就要
			做修改链接跟踪的reply方向*/
			if (hooknum == NF_INET_LOCAL_IN)
				/* LOCAL_IN hook doesn't have a chain!  */
				ret = alloc_null_binding(ct, hooknum);
			else
				/*查找nat表,判断是否已经做nat*/
				ret = nf_nat_rule_find(skb, hooknum, in, out,
						       ct);

			if (ret != NF_ACCEPT)
				return ret;
		} else
			pr_debug("Already setup manip %s for ct %p\n",
				 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
				 ct);
		break;

	default:
		/* ESTABLISHED */
		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
			     ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
	}

	/*根据链接跟踪的tuple修改数据包做nat*/
	return nf_nat_packet(ct, ctinfo, hooknum, skb);
}

4.1 nf_nat_initialized

nf_nat_initlized判断链接跟踪选项是否做了NAT,如做了NAT那么ct->status就会设置IPS_SRC_NAT_DONE_BIT、IPS_SRC_NAT_DONE_BIT。

static inline int nf_nat_initialized(struct nf_conn *ct,
				     enum nf_nat_manip_type manip)
{
	if (manip == IP_NAT_MANIP_SRC)
		return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
	else
		return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
}

4.2 alloc_null_binding

当数据包的状态是IP_CT_NEW并且是LOCAL_IN链上的就调用alloc_null_bingding对链接跟踪做NAT修改reply方向,因为LOCAL_IN是netfileter框架的的一个出口,如果这时链接跟踪没做NAT那么数据包出去就会有问题。

unsigned int
alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
{
	/* Force range to this IP; let proto decide mapping for
	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
	   Use reply in case it's already been mangled (eg local packet).
	*/
	__be32 ip
		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
		   ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
		   : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
	struct nf_nat_range range
		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };

	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip);
	/*链接跟踪做nat,修改tuple的reply方向*/
	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
}

4.3 nf_nat_rule_find

链接状态是IP_CTNEW、IP_CT_RELATED、IP_CT_RELATED+IP_CT_IS_REPLY,而且不是在LOCAL_IN上就调用nf_nat_rule_find查找NAT表匹配规则,找到就调用相应的target函数(ipt_snat_target或者ipt_dnat_target)实现连接跟踪项的转换。然如果没有找到就调用alloc_null_binding做链接跟踪的NAT。alloc_null_bingding实际调用的是nf_nat_setup_info,这个函数下一节再分析。

int nf_nat_rule_find(struct sk_buff *skb,
		     unsigned int hooknum,
		     const struct net_device *in,
		     const struct net_device *out,
		     struct nf_conn *ct)
{
	struct net *net = nf_ct_net(ct);
	int ret;

	/*查找nat表匹配的规则做NAT*/
	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);

	if (ret == NF_ACCEPT) {
		/*判断有没有做NAT*/
		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
			/* NUL mapping 改变tuple的reply方向*/
			ret = alloc_null_binding(ct, hooknum);
	}
	return ret;
}

5、nf_nat_packet

当数据包的链接跟踪已经做了NAT,就调用nf_nat_packet根据链接跟踪修改数据包的ip、端口做NAT。这个函数很巧妙,此时链接跟踪已经做了NAT,就取dir的反方向的tuple,然后再对tuple中的源ip、目的ip、源端口、目的端口颠倒,得到target,最后调用manip_pkt做NAT修改数据包的ip地址和端口。

这个地方有点绕举一个例子:比如一个网关112.112.112.112,它下面的局域网有一个A设备ip是192.168.0.100,这个A设备要访问一个外网服务器地址是113.113.113.113,这样必须由做SNAT,首先链接跟踪做了SNAT后tuple如下

orig
src dst
192.168.0.100 113.113.113.113
reply
src dst
113.113.113.113 112.112.112.112

设备A的数据包访问服务器是orig方向:192.168.0.100  -> 113.113.113.113,调用nf_nat_packet取反也就是reply:113.113.113.113 -> 12.112.112.112再颠倒过来得到target:12.112.112.112  -> 113.113.113.113,然后将target:112.112.112.112 -> 113.113.113.113修改数据包的源Ip、目的ip完成SNAT转换。

当外部服务器有数据包reply:113.113.113->112.112.112.112,调用nf_nat_packet取反方向也就是orig 192.168.0.100->113.113.113.113再颠倒过来得到target:13.113.113.113.->192.168.0.100然后将target:113.113.113.113 -> 192.168.0.100修改数据包的源IP、目的IP。

比如110.110.110.110的网关地址要做DNAT到内部一个地址192.168..0.200,一个外网地址111.111.111.111访问网关110.110.110.110就会做DNAT到192.168.0.200,链接跟踪做DNAT后tuple如下

orig
src dst
111.111.111.111 110.110.110.110
reply
src dst
192.168.0.200 111.111.111.111

当外网地址访问网关也就是orig方向:111.111.111.111->110.110.110.110,调用nf_nat_packet会取相反方向的tuple也就是reply:192.168.0.200->111.111.111.111,然颠倒得到target:111.111.111.111->192.168.0.200然后修改源Ip、目的ip完成dnat转换。

192.168.0.200有回复包也就是reply方向:192.168.0.200->111.111.111.111,调用nf_nat_packet会取相反方向的tuple也就是orig:111.111.111.111->110.110.110.110,然后颠倒得到target:110.110.110.110->111.111.111.111,修改数据包的源地址、目的地地址完成reply。

所以说NAT起始就是基于链接跟踪实现的。

/* Do packet manipulations according to nf_nat_setup_info. */
unsigned int nf_nat_packet(struct nf_conn *ct,
			   enum ip_conntrack_info ctinfo,
			   unsigned int hooknum,
			   struct sk_buff *skb)
{
	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
	unsigned long statusbit;
	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);

	if (mtype == IP_NAT_MANIP_SRC)
		statusbit = IPS_SRC_NAT;
	else
		statusbit = IPS_DST_NAT;

	/* Invert if this is reply dir. */
	if (dir == IP_CT_DIR_REPLY)
		statusbit ^= IPS_NAT_MASK;

	/* Non-atomic: these bits don't change. */
	if (ct->status & statusbit) {
		struct nf_conntrack_tuple target;

		/* We are aiming to look like inverse of other direction. */
		/*取dir的反方向的tuple,然后把该tuple的源ip、目的ip
		源port、目的port颠倒过来得到target*/
		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);

		/*根据链接跟踪的target做nat*/
		if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
			return NF_DROP;
	}
	return NF_ACCEPT;
}

6、manip_pkt

manip_pkt主要根据传进来的target和mainiptype完成三层、四层的NAT转换。先获取四层的struct nf_nat_protocol 结构体实例然后调用四层协议的manip_pkt完成四层端口的NAT,

static bool
manip_pkt(u_int16_t proto,
	  struct sk_buff *skb,
	  unsigned int iphdroff,
	  const struct nf_conntrack_tuple *target,
	  enum nf_nat_manip_type maniptype)
{
	struct iphdr *iph;
	const struct nf_nat_protocol *p;

	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
		return false;

	iph = (void *)skb->data + iphdroff;

	/* Manipulate protcol part. */

	/* rcu_read_lock()ed by nf_hook_slow */
	/*获取nat四层转换结构体实例*/
	p = __nf_nat_proto_find(proto);
	/*四层协议的NAT转换*/
	if (!p->manip_pkt(skb, iphdroff, target, maniptype))
		return false;

	iph = (void *)skb->data + iphdroff;

	if (maniptype == IP_NAT_MANIP_SRC) {
		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
		/*snat改变源地址*/
		iph->saddr = target->src.u3.ip;
	} else {
		/*dnat改变目的地址*/
		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
		iph->daddr = target->dst.u3.ip;
	}
	return true;
}

猜你喜欢

转载自blog.csdn.net/City_of_skey/article/details/84981175