The effect of ip_forward parameter on Linux kernel forwarding

Reprinted from: http://10495372.blog.51cto.com/10485372/1671453

When performing Linux kernel forwarding, you need to set the forwarding parameter in the proc/sys directory of the proc file system. You can use the following method to view the value of this parameter cat /proc/sys/net/ipv4/ip_forward, the default value of this parameter is 0, you can use the following method to modify this value to enable the data capture of the IP layer of the Linux kernel, but the following method will no longer take effect after the system restarts.
echo 1 > /proc/sys/net/ipv4/ip_forward
also provides a system configuration tool sysctl in the Linux system, which can be used to read and configure some parameters of the Linux kernel. However, this method is related to the proc file system. To use this tool, the Linux kernel needs to support the proc file system. Below are the forwarding parameters to configure the kernel using sysctl.
# sysctl net.ipv4.ip_forward
net.ipv4.ip_forward = 0
/ # sysctl -w net.ipv4.ip_forward=1
net.ipv4.ip_forward = 1
/ # sysctl net.ipv4.ip_forward
net.ipv4.ip_forward = 1
/ #
Note that the parameter net.ipv4.ip_forward is actually the corresponding proc directory /proc/sys/net/ipv4/ip_forward, the option -w means to configure the kernel configuration parameters, no option means to read the kernel configuration parameters, without any option information, just Represents a read operation.
Through the above method, we can set and read the parameters of IP forwarding. However, the focus of this article is not on how to configure this parameter, but how it takes effect in the forwarding process of the kernel after the configuration is completed, and how to configure it into the kernel. Since this parameter is configured to enable forwarding at the IP layer, the parameter should be judged in the forwarding part of the Linux kernel. The judgment of this parameter is actually judged when searching for a route.

In the process of checking the route, if It is the forwarded data packet that calls the following macro to determine whether the forwarding parameter is enabled. in the function ip_route_input_slow.
if (!IN_DEV_FORWARD(in_dev))
   goto e_hostunreach;
to see how this macro is defined, the following macro is defined in include/linux/inetdevice.h file.
#define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING)
Further expand the IN_DEV_CONF_GET macro:
#define IN_DEV_CONF_GET(in_dev, attr) \
ipv4_devconf_get((in_dev), NET_IPV4_CONF_ ##attr)//The ## here means Concatenates two strings.
The following is the definition of the ipv4_devconf_get function:
static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
{
index--;//The index here is equivalent to NET_IPV4_CONF_FORWARDING
return in_dev->cnf.data[index];// init_net->ipv4.devconf_dfl.data[0]
}
(1) For the macro NET_IPV4_CONF_ FORWARDING, defined in the include/linux/sysctl.h file, it is an enumeration type of.
enum
{
NET_IPV4_CONF_FORWARDING =. 1,
NET_IPV4_CONF_MC_FORWARDING = 2,
NET_IPV4_CONF_PROXY_ARP =. 3,
NET_IPV4_CONF_ACCEPT_REDIRECTS =. 4,
NET_IPV4_CONF_SECURE_REDIRECTS =. 5,
NET_IPV4_CONF_SEND_REDIRECTS =. 6,
NET_IPV4_CONF_SHARED_MEDIA =. 7,
NET_IPV4_CONF_RP_FILTER =. 8,
NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE =. 9,
NET_IPV4_CONF_BOOTP_RELAY = 10,
NET_IPV4_CONF_LOG_MARTIANS =. 11,
NET_IPV4_CONF_TAG = 12 is,
NET_IPV4_CONF_ARPFILTER=13,
NET_IPV4_CONF_MEDIUM_ID=14,
NET_IPV4_CONF_NOXFRM=15,
= 16 NET_IPV4_CONF_NOPOLICY,
NET_IPV4_CONF_FORCE_IGMP_VERSION =. 17,
NET_IPV4_CONF_ARP_ANNOUNCE = 18 is,
NET_IPV4_CONF_ARP_IGNORE =. 19,
NET_IPV4_CONF_PROMOTE_SECONDARIES = 20 is,
NET_IPV4_CONF_ARP_ACCEPT = 21 is,
NET_IPV4_CONF_ARP_NOTIFY = 22 is,
NET_IPV4_CONF_SRC_VMARK = 24,
__NET_IPV4_CONF_MAX
};
(2) to return in_dev-> cnf.data [index]; The returned value is equivalent to in_dev->cnf.data[0], so let's see how the initial value is generated.
First of all, how to get in_dev, in the ip_route_input_slow function through struct in_device *in_dev = in_dev_get(dev); function to obtain, in the in_dev_get function call __in_dev_get_rcu, through the following assignment statement to assign struct in_device *in_dev = dev-> ip_ptr;
static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev)
{
struct in_device *in_dev = dev->ip_ptr;
if (in_dev)
 in_dev = rcu_dereference(in_dev);
return in_dev;
}
 
static __inline__ struct in_device *
in_dev_get(const struct net_device *dev)
{
struct in_device *in_dev;
 
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev)
 atomic_inc(&in_dev->refcnt);
rcu_read_unlock();
return in_dev;
}
dev->ip_ptr;又是什么时候赋值呢?答案是在net_device注册初始化函数inetdev_init中,
static struct in_device *inetdev_init(struct net_device *dev)
{
struct in_device *in_dev;
 
ASSERT_RTNL();
 
in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
if (!in_dev)
 goto out;
memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 sizeof(in_dev->cnf));//这里对in_dev->cnt进行初始化操作,---(1)
in_dev->cnf.sysctl = NULL;
in_dev->dev = dev;
if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
 goto out_kfree;
if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 dev_disable_lro(dev);
/* Reference in_dev->dev */
dev_hold(dev);
/* Account for reference dev->ip_ptr (below) */
in_dev_hold(in_dev);
 
devinet_sysctl_register(in_dev);
ip_mc_init_dev(in_dev);
if (dev->flags & IFF_UP)
 ip_mc_up(in_dev);
 
/* we can receive as soon as ip_ptr is set -- do this last */
rcu_assign_pointer(dev->ip_ptr, in_dev);//使用RCU保护锁机制对dev->ip_ptr进行赋值
out:
return in_dev;
out_kfree:
kfree(in_dev);
in_dev = NULL;
goto out;
}
(1) dev_net(dev)->ipv4.devconf_dfl is equivalent to init_net->ipv4.devconf_dfl, and devconf_dfl is initialized in /net/ipv4/devinet In the .c file, in the devinet_init_net function,
static struct ipv4_devconf ipv4_devconf_dflt = {
.data = {
 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, [NET_SHA_REDIRECTS - 1] = 1,
 [NET_SHA_RED_IPV4] = 1 ,
 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
},
};//There is no assignment to FORWARDING
static __net_init int devinet_init_net(struct net *net)
{
int err;
struct ipv4_devconf *all, *dflt;
#ifdef CONFIG_SYSCTL
struct ctl_table *tbl = ctl_forward_entry;
struct ctl_table_header *forw_hdr;
#endif
 
err = -ENOMEM;
all = &ipv4_devconf; //----------------------------进行初始化操作
dflt = &ipv4_devconf_dflt;
 
if (net != &init_net) {
 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
 if (all == NULL)
 goto err_alloc_all;
 
 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
 if (dflt == NULL)
 goto err_alloc_dflt;
 
#ifdef CONFIG_SYSCTL
 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
 if (tbl == NULL)
 goto err_alloc_ctl;
 
 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
 tbl[0].extra1 = all;
 tbl[0].extra2 = net;
#endif
}
 
#ifdef CONFIG_SYSCTL
err = __devinet_sysctl_register(net, "all",
 NET_PROTO_CONF_ALL, all);
if (err < 0)
 goto err_reg_all;
 
err = __devinet_sysctl_register(net, "default",
 NET_PROTO_CONF_DEFAULT, dflt);
if (err < 0)
 goto err_reg_dflt;
 
err = -ENOMEM;
forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
if (forw_hdr == NULL)
 goto err_reg_ctl;
net->ipv4.forw_hdr = forw_hdr;
#endif
 
net->ipv4.devconf_all = all;//这里对net->ipv4_devconfi_all进行了初始化
net->ipv4.devconf_dflt = dflt;// //这里对net->devconf_dflt进行了初始化
return 0;
………………………….
}
The above function initializes the net-related functions. There is also a variable ipv4_devconf in the devinet.c file that is similar to ipv4_devconf_dflt, but the IN_DEV_FORWARD (in_dev) macro reads the value of the variable in the structure ipv4_devconf_dflt, so if you want to use it in Linux When modifying the forwarding parameters in the kernel, it needs to be added in ipv4_devconf_dflt to take effect.
static struct ipv4_devconf ipv4_devconf = {
.data = {
 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, [NET_IPV4_CONF_SHARED_MEDIA - 1] = 2, [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, [NET_IPV4_CONF_SHARED_MEDIA -
 1_CONF_CEFOR4_
},
};
(3) Let's see how the Linux kernel IP forwarding takes effect using the echo 1 > /proc/sys/net/ipv4/ip_forward configuration statement.
In the above devinet_init_net() function, there are the following two pieces of code
struct ctl_table *tbl = ctl_forward_entry;
forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
Where ctl_forward_entry is defined as the following structure,
static struct ctl_table ctl_forward_entry[] = {
{
 .ctl_name= NET_IPV4_FORWARD,//an ID,
 .procname = "ip_forward",//string, which is included in the directory entry under proc/sys, which is actually The file name in the proc/sys directory.data
 = &ipv4_devconf.data[
  NET_IPV4_CONF_FORWARDING - 1],//The value set by the callback function.maxlen
 = sizeof(int),//The maximum length of the set
 value.mode = 0644,//File , that is, the permission of the ip_forward file.
 proc_handler = devinet_sysctl_forward,// This callback function is called when the file under /proc/sys is modified.
 .strategy = devinet_conf_sysctl,// The callback function is called when sysctl is used to read and write system parameters.extra1
 = &ipv4_devconf,
 .extra2 = &init_net,
},
{ },
};
forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); Used to dynamically register system control functions, where net_ipv4_path is defined in the following form. That is, the directory name under proc/sys/, and tbl is the ctl_forward_entry[] structure above.
static __net_initdata struct ctl_path net_ipv4_path[] = {
{ .procname = "net", .ctl_name = CTL_NET, },
{ .procname = "ipv4", .ctl_name = NET_IPV4, },
{ },
};
use echo 1 > /proc /sys/net/ipv4/ip_forward calls the devinet_sysctl_forward function for processing. The following is the definition and implementation of this function. The parameter write is 1 to write the configuration, 0 to read the configuration value, buffer is the value to be written, lenp is the size of the buffer, and ppos is the position. The __user here is to tell that the pointer should not be dereferenced, because it is meaningless in the current address space, so for this kind of variable, copy_to_user and copy_from_user are used in the kernel
static int devinet_sysctl_forward(ctl_table *ctl, int write,
   void __user *buffer,
   size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;//Get &ipv4_devconf.
int val = *valp; loff_t
pos = *ppos;
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);//This function handles the incoming int type, and proc_dostring handles the incoming string.
/* ctl->data change echo "0" >/proc/sys/net/ipv4/ip_forward write = 1 *valp = 0 val = 1 */
if (write && *valp != val) {
 struct net *net = ctl->extra2;
 
 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
 if (!rtnl_trylock()) {
 /* Restore the original values ​​before restarting */
 *valp = val;
 *ppos = pos;
 return restart_syscall() ;
 }
 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
 inet_forward_change(net);//Call this function to configure in_dev->cnf.data
 } else if (*valp) {
 struct ipv4_devconf *cnf = ctl->extra1 ;
 struct in_device *idev =
  container_of(cnf, struct in_device, cnf);
 dev_disable_lro(idev->dev);
 }
 rtnl_unlock();
 rt_cache_flush(net, 0);
 }
}
 
return ret;
}
The following is the function to modify the forward parameter,
static void inet_forward_change(struct net *net)
{
struct net_device *dev;
int on = IPV4_DEVCONF_ALL(net, FORWARDING);//Get the configured value
 
IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
IPV4_DEVCONF_DFLT(net, FORWARDING) = on; //Set ipv4_devconf_dflt structure,
 
read_lock(&dev_base_lock);
for_each_netdev(net, dev) {
 struct in_device *in_dev;
 if (on)
 dev_disable_lro(dev);
 rcu_read_lock();
 in_dev = __in_dev_get_rcu(dev);
 if (in_dev)
 IN_DEV_CONF_SET(in_dev, FORWARDING, on);//调用该宏设置in_dev->cnf.data
 rcu_read_unlock();
}
read_unlock(&dev_base_lock);
}
 
static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
     int val)
{
index--;
set_bit(index, in_dev->cnf.state);
in_dev->cnf.data[index] = val;//设置in_dev的data,这里的Index为NET_IPV4_CONF_FORWARDING
}

Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=327100755&siteId=291194637