ipset如何与netfilter内核模块进行通信

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/haolipengzhanshen/article/details/85005730

最近需要使用ipset,iptables,和netfilter,所以把三者的源代码看大概阅读了一遍。

前面我们学习过应用层ipset和netfilter模块之间通信是采用的netlink套接字

用户空间的ipset命令通过 libipset.so 这个库和内核通讯

一、ipset主流程

下面是我总结的主流程

二、用户层如何将创建set的名称和类型传递到内核层的

我们都知道ipset可以创建不同类型set,如"hash:ip","hash:ip,port","hash:net,port"等

从执行命令到内核态,其流程为

ipset命令行 -> libipset.so -> ip_set.ko内核模块 ->根据set类型选择ip_set_hash_ip.ko内核模块

那么应用层是如何解析set的命令和类型的,并且是如何将set名称和类型传递到内核态的呢?

ipset_parse_argv函数中去解析ipset的Create命令

扫描二维码关注公众号,回复: 4684187 查看本文章

ipset_parse_setname是解析刚创建ipet集合的名称

ipset_parse_typename是解析刚创建ipset集合的类型

ret = ipset_parse_setname(session, IPSET_SETNAME, arg0);函数是将将arg0的值传递了session的setname成员

因为我更关注set type类型,所以进入ipset_parse_typename函数

/* Find the corresponding type */
typename = ipset_typename_resolve(str);

通过注释可得知,ipset_typename_resolve是找到对应的set类型,大胆猜想下,命令行是“hash:ip”,通过“hash:ip”我们能够获取到typename类型名称

报告大哥,发现线索typelist链表,函数意思是遍历typelist链表,用ipset_match_typename()来匹配类型名称,匹配成功则返回类型名称。现在需要找到往typelist链表中添加元素的函数!!!

ipset_type_add函数!

ipset_type_add函数!

ipset_type_add函数!

重要的事情说三遍!看看它的英文注释,如下:

/**
 * ipset_type_add - add (register) a userspace set type
 * @type: pointer to the set type structure
 *
 * Add the given set type to the type list. The types
 * are added sorted, in descending revision number.
 *
 * Returns 0 on success or a negative error code.
 */

添加一个用户态的set集合类型,ok,找到了,only you

何人在调用ipset_type_add函数?

打开ipset_hash_ip.c文件,找到其_init函数

看看ipset_hash_ip0结构体定义和初始化

/* Initial release */
static struct ipset_type ipset_hash_ip0 = {
	.name = "hash:ip",
	.alias = { "iphash", NULL },
	.revision = 0,
	.family = NFPROTO_IPSET_IPV46,
	.dimension = IPSET_DIM_ONE,
	.elem = {
		[IPSET_DIM_ONE - 1] = {
			.parse = ipset_parse_ip4_single6,
			.print = ipset_print_ip,
			.opt = IPSET_OPT_IP
		},
	},
	.cmd = {
		[IPSET_CREATE] = {
			.args = {
				IPSET_ARG_FAMILY,
				/* Aliases */
				IPSET_ARG_INET,
				IPSET_ARG_INET6,
				IPSET_ARG_HASHSIZE,
				IPSET_ARG_MAXELEM,
				IPSET_ARG_NETMASK,
				IPSET_ARG_TIMEOUT,
				/* Ignored options: backward compatibilty */
				IPSET_ARG_PROBES,
				IPSET_ARG_RESIZE,
				IPSET_ARG_GC,
				IPSET_ARG_NONE,
			},
			.need = 0,
			.full = 0,
			.help = "",
		},
		[IPSET_ADD] = {
			.args = {
				IPSET_ARG_TIMEOUT,
				IPSET_ARG_NONE,
			},
			.need = IPSET_FLAG(IPSET_OPT_IP),
			.full = IPSET_FLAG(IPSET_OPT_IP)
				| IPSET_FLAG(IPSET_OPT_IP_TO),
			.help = "IP",
		},
		[IPSET_DEL] = {
			.args = {
				IPSET_ARG_NONE,
			},
			.need = IPSET_FLAG(IPSET_OPT_IP),
			.full = IPSET_FLAG(IPSET_OPT_IP)
				| IPSET_FLAG(IPSET_OPT_IP_TO),
			.help = "IP",
		},
		[IPSET_TEST] = {
			.args = {
				IPSET_ARG_NONE,
			},
			.need = IPSET_FLAG(IPSET_OPT_IP),
			.full = IPSET_FLAG(IPSET_OPT_IP)
				| IPSET_FLAG(IPSET_OPT_IP_TO),
			.help = "IP",
		},
	},
	.usage = "where depending on the INET family\n"
		 "      IP is a valid IPv4 or IPv6 address (or hostname),\n"
		 "      CIDR is a valid IPv4 or IPv6 CIDR prefix.\n"
		 "      Adding/deleting multiple elements in IP/CIDR or FROM-TO form\n"
		 "      is supported for IPv4.",
	.description = "Initial revision",
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

三、netlink套接字初始化

想使用netlink套接字,必然要先创建netlink套接字,应该有如下代码

skfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_TEST);

 但是我并没有在ipset源代码中查找到。

后来在书上看到ipset源代码中是采用libmnl库来使用netlink套接字,使用ipset_mnl_init函数来进行初始化操作

static struct ipset_handle *
ipset_mnl_init(mnl_cb_t *cb_ctl, void *data)
{
	struct ipset_handle *handle;

	assert(cb_ctl);
	assert(data);

	handle = calloc(1, sizeof(*handle));
	if (!handle)
		return NULL;

	handle->h = mnl_socket_open(NETLINK_NETFILTER);
	if (!handle->h)
		goto free_handle;

	if (mnl_socket_bind(handle->h, 0, MNL_SOCKET_AUTOPID) < 0)
		goto close_nl;

	handle->portid = mnl_socket_get_portid(handle->h);
	handle->cb_ctl = cb_ctl;
	handle->data = data;
	handle->seq = time(NULL);

	return handle;

close_nl:
	mnl_socket_close(handle->h);
free_handle:
	free(handle);

	return NULL;
}

mnl_socket_open函数传递NETLINK_NETFILTER类型,创建netlink套接字

mnl_socket_bind绑定进程pid,此处传递的是MNL_SOCKET_AUTOPID

mnl_socket_get_portid 通过给定的netlink套接字获取netlink端口id

四、通过netlink函数和内核态进行交互

在同一个文件mnl.c中发现ipset_mnl_query函数,其中调用了mnl_socket_recvfrom和mnl_socket_sendto,和内核态进行通信

下面就是看下libmnl的api官方文档

static int
ipset_mnl_query(struct ipset_handle *handle, void *buffer, size_t len)
{
	struct nlmsghdr *nlh = buffer;
	int ret;

	assert(handle);
	assert(buffer);

	nlh->nlmsg_seq = ++handle->seq;
#ifdef IPSET_DEBUG
	ipset_debug_msg("sent", nlh, nlh->nlmsg_len);
#endif
	if (mnl_socket_sendto(handle->h, nlh, nlh->nlmsg_len) < 0)
		return -ECOMM;

	ret = mnl_socket_recvfrom(handle->h, buffer, len);
#ifdef IPSET_DEBUG
	ipset_debug_msg("received", buffer, ret);
#endif
	while (ret > 0) {
		ret = mnl_cb_run2(buffer, ret,
				  handle->seq, handle->portid,
				  handle->cb_ctl[NLMSG_MIN_TYPE],
				  handle->data,
				  handle->cb_ctl, NLMSG_MIN_TYPE);
		D("nfln_cb_run2, ret: %d, errno %d", ret, errno);
		if (ret <= 0)
			break;
		ret = mnl_socket_recvfrom(handle->h, buffer, len);
		D("message received, ret: %d", ret);
	}
	return ret;
}

关于mnl_socket_recvfrom和mnl_socket_sendto和mnl_cb_run2函数的含义,请自行查找api

用户态和内核态通信,必然会遵循某种特定的规则,我们称之为通信规则

在ip_set.h文件中,有如下命令的定义

/* Message types and commands */
enum ipset_cmd {
	IPSET_CMD_NONE,
	IPSET_CMD_PROTOCOL,	/* 1: Return protocol version */
	IPSET_CMD_CREATE,	/* 2: Create a new (empty) set */
	IPSET_CMD_DESTROY,	/* 3: Destroy a (empty) set */
	IPSET_CMD_FLUSH,	/* 4: Remove all elements from a set */
	IPSET_CMD_RENAME,	/* 5: Rename a set */
	IPSET_CMD_SWAP,		/* 6: Swap two sets */
	IPSET_CMD_LIST,		/* 7: List sets */
	IPSET_CMD_SAVE,		/* 8: Save sets */
	IPSET_CMD_ADD,		/* 9: Add an element to a set */
	IPSET_CMD_DEL,		/* 10: Delete an element from a set */
	IPSET_CMD_TEST,		/* 11: Test an element in a set */
	IPSET_CMD_HEADER,	/* 12: Get set header data only */
	IPSET_CMD_TYPE,		/* 13: Get set type */
	IPSET_CMD_GET_BYNAME,	/* 14: Get set index by name */
	IPSET_CMD_GET_BYINDEX,	/* 15: Get set name by index */
	IPSET_MSG_MAX,		/* Netlink message commands */

	/* Commands in userspace: */
	IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */
	IPSET_CMD_HELP,		/* 17: Get help */
	IPSET_CMD_VERSION,	/* 18: Get program version */
	IPSET_CMD_QUIT,		/* 19: Quit from interactive mode */

	IPSET_CMD_MAX,

	IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */
};

这里我们以IPSET_CMD_CREATE为例子,在内核代码(我的内核版本是3.10)搜索IPSET_CMD_CREATE

找到如下的结构体

static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
    [IPSET_CMD_NONE]    = {
        .call        = ip_set_none,
        .attr_count    = IPSET_ATTR_CMD_MAX,
    },
    [IPSET_CMD_CREATE]    = {
        .call        = ip_set_create,
        .attr_count    = IPSET_ATTR_CMD_MAX,
        .policy        = ip_set_create_policy,
    },
    [IPSET_CMD_DESTROY]    = {
        .call        = ip_set_destroy,
        .attr_count    = IPSET_ATTR_CMD_MAX,
        .policy        = ip_set_setname_policy,
    },
    [IPSET_CMD_FLUSH]    = {
        .call        = ip_set_flush,
        .attr_count    = IPSET_ATTR_CMD_MAX,
        .policy        = ip_set_setname_policy,
    },

}

上面标明IPSET_CMD_CREATE命令的处理函数为ip_set_create

此时从用户态发送命令到内核态

内核态响应用户态的命令

流程已经跑通了。

猜你喜欢

转载自blog.csdn.net/haolipengzhanshen/article/details/85005730