DPDK flow_filtering 源码阅读

代码部分

main.c

/*-
 *   BSD LICENSE
 *
 *   Copyright 2017 Mellanox.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Mellanox. nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <netinet/in.h>
#include <setjmp.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <signal.h>
#include <stdbool.h>

#include <rte_eal.h>
#include <rte_common.h>
#include <rte_malloc.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_net.h>
#include <rte_flow.h>
#include <rte_cycles.h>

static volatile bool force_quit; // 确保本条指令不会因编译器的优化而省略
                                 // 用到这个变量时必须每次都小心地重新读取这个变量的值,而不是使用保存在寄存器里的备份。

static uint16_t port_id;
static uint16_t nr_queues = 5; // 收发队列各5条
static uint8_t selected_queue = 1;
struct rte_mempool *mbuf_pool;
struct rte_flow *flow;

#define SRC_IP ((0<<24) + (0<<16) + (0<<8) + 0) /* src ip = 0.0.0.0 */
#define DEST_IP ((192<<24) + (168<<16) + (1<<8) + 1) /* dest ip = 192.168.1.1 */
#define FULL_MASK 0xffffffff /* full mask */
#define EMPTY_MASK 0x0 /* empty mask */

#include "flow_blocks.c" // generate_ipv4_flow

static inline void
print_ether_addr(const char *what, struct ether_addr *eth_addr)
{
    char buf[ETHER_ADDR_FMT_SIZE];
    ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); // Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx.
    printf("%s%s", what, buf);
}

static void
main_loop(void)
{
    struct rte_mbuf *mbufs[32];
    struct ether_hdr *eth_hdr;
    struct rte_flow_error error;
    uint16_t nb_rx;
    uint16_t i;
    uint16_t j;

    while (!force_quit) {
        for (i = 0; i < nr_queues; i++) {
            nb_rx = rte_eth_rx_burst(port_id,
                        i, mbufs, 32); // 收包
            if (nb_rx) {  // 该应用程序的主要工作是从所有队列读取数据包并打印目标队列的每个数据包
                for (j = 0; j < nb_rx; j++) {
                    struct rte_mbuf *m = mbufs[j];

                    eth_hdr = rte_pktmbuf_mtod(m,
                            struct ether_hdr *);
                    
                    /*rte_pktmbuf_mtod(m,t) 是一个宏,m 是 mbuf 指针
                    返回一个被强制转换成 t * 的指针。指向 mbuf 里的数据的开始处。
                    */
                        
                    print_ether_addr("src=",
                            &eth_hdr->s_addr); // 见文件 rte_ether.h 
                    print_ether_addr(" - dst=",
                            &eth_hdr->d_addr);
                    printf(" - queue=0x%x",
                            (unsigned int)i);
                    printf("\n");

                    rte_pktmbuf_free(m); // 收的包就直接free掉
                }
            }
        }
    }

    /* closing and releasing resources */
    rte_flow_flush(port_id, &error); // Destroy all flow rules associated with a port.
    rte_eth_dev_stop(port_id); // Stop an Ethernet device. 是函数 rte_eth_dev_start()的反义词
    rte_eth_dev_close(port_id); // Close a stopped Ethernet device. The device cannot be restarted
}

#define CHECK_INTERVAL 1000  /* 100ms */
#define MAX_REPEAT_TIMES 90  /* 9s (90 * 100ms) in total */

static void
assert_link_status(void)
{
    struct rte_eth_link link; // 用于检索以太网端口的链路级别信息的结构体
    uint8_t rep_cnt = MAX_REPEAT_TIMES;

    memset(&link, 0, sizeof(link));
    do {
        rte_eth_link_get(port_id, &link); // 获取链路的状态,可以获得如下信息:
        /*
        1. 开启或关闭(ON、OFF)
        2. 链路速度(单位 Mbps)
        3. 通信模式(半双工 or 全双工)
        
        两个参数,port id 和 rte_eth_link 的结构体指针
        这个函数可能会阻塞 9 秒钟。
        */
        if (link.link_status == ETH_LINK_UP) // 链路开启
            break;
        rte_delay_ms(CHECK_INTERVAL); // 等待至少N毫秒,位于rte_cycles.h
    } while (--rep_cnt);

    if (link.link_status == ETH_LINK_DOWN) // 链路关闭
        rte_exit(EXIT_FAILURE, ":: error: link is still down\n");
}

static void
init_port(void)
{
    int ret;
    uint16_t i;
    struct rte_eth_conf port_conf = { // 用于配置以太网口的结构体
        .rxmode = { // 端口 rx 配置
            .split_hdr_size = 0, // hdr buf size (todo)
            .ignore_offload_bitfield = 1, //(todo)
            .offloads = DEV_RX_OFFLOAD_CRC_STRIP, // 不进行 CRC
        },
        .txmode = { // 端口 tx 配置
            .offloads =
                DEV_TX_OFFLOAD_VLAN_INSERT |
                DEV_TX_OFFLOAD_IPV4_CKSUM  |
                DEV_TX_OFFLOAD_UDP_CKSUM   |
                DEV_TX_OFFLOAD_TCP_CKSUM   |
                DEV_TX_OFFLOAD_SCTP_CKSUM  |
                DEV_TX_OFFLOAD_TCP_TSO,
        },
    };
    struct rte_eth_txconf txq_conf;
    struct rte_eth_rxconf rxq_conf;
    struct rte_eth_dev_info dev_info;

    printf(":: initializing port: %d\n", port_id);
    ret = rte_eth_dev_configure(port_id, // 配置网口
                nr_queues, nr_queues, &port_conf); // 收发队列各5条
    if (ret < 0) {
        rte_exit(EXIT_FAILURE,
            ":: cannot configure device: err=%d, port=%u\n",
            ret, port_id);
    }

    rte_eth_dev_info_get(port_id, &dev_info); // 查询以太网设备信息
    rxq_conf = dev_info.default_rxconf; // Default RX configuration,类型:struct rte_eth_rxconf 
    rxq_conf.offloads = port_conf.rxmode.offloads;

    /* only set Rx queues: something we care only so far */
    for (i = 0; i < nr_queues; i++) { // 设置 rx queues
        ret = rte_eth_rx_queue_setup(port_id, i, 512,
                     rte_eth_dev_socket_id(port_id),
                     &rxq_conf, // rx queue的配置数据,类型是 const struct rte_eth_rxconf * 指针
                     mbuf_pool);
        if (ret < 0) {
            rte_exit(EXIT_FAILURE,
                ":: Rx queue setup failed: err=%d, port=%u\n",
                ret, port_id);
        }
    }

    txq_conf = dev_info.default_txconf;
    txq_conf.offloads = port_conf.txmode.offloads;

    for (i = 0; i < nr_queues; i++) { // 设置 tx queues
        ret = rte_eth_tx_queue_setup(port_id, i, 512,
                rte_eth_dev_socket_id(port_id),
                &txq_conf);// tx queue的配置数据,类型是 const struct rte_eth_txconf * 指针
        if (ret < 0) {
            rte_exit(EXIT_FAILURE,
                ":: Tx queue setup failed: err=%d, port=%u\n",
                ret, port_id);
        }
    }

    rte_eth_promiscuous_enable(port_id); // 启用混杂模式
    ret = rte_eth_dev_start(port_id); // 启动设备
    if (ret < 0) {
        rte_exit(EXIT_FAILURE,
            "rte_eth_dev_start:err=%d, port=%u\n",
            ret, port_id);
    }

    assert_link_status();

    printf(":: initializing port: %d done\n", port_id);
}

static void
signal_handler(int signum)
{
    if (signum == SIGINT || signum == SIGTERM) {
        printf("\n\nSignal %d received, preparing to exit...\n",
                signum);
        force_quit = true;
    }
}

int
main(int argc, char **argv)
{
    int ret;
    uint8_t nr_ports;
    struct rte_flow_error error;

    ret = rte_eal_init(argc, argv); // 初始化 EAL
    if (ret < 0)
        rte_exit(EXIT_FAILURE, ":: invalid EAL arguments\n");

    /* signal(sig, handler) 设置某一信号的对应动作
    第一个参数signum指明了所要处理的信号类型,它可以取除了SIGKILL和SIGSTOP外的任何一种信号。
    第二个参数handler描述了与信号关联的动作,它可以取以下三种值:
    1.一个无返回值的函数地址
    2. SIG_IGN :忽略
    3. SIG_DFL :恢复默认处理
    */

    force_quit = false;
    signal(SIGINT, signal_handler);  // 中断信号
    signal(SIGTERM, signal_handler); // 程序结束(terminate)信号

    nr_ports = rte_eth_dev_count();
    if (nr_ports == 0)
        rte_exit(EXIT_FAILURE, ":: no Ethernet ports found\n");
    port_id = 0;
    if (nr_ports != 1) { // 本程序只需要使用 1 个以太网设备
        printf(":: warn: %d ports detected, but we use only one: port %u\n",
            nr_ports, port_id);
    }
    mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", 4096, 128, 0,
                        RTE_MBUF_DEFAULT_BUF_SIZE,
                        rte_socket_id()); // 创建 mempool
    if (mbuf_pool == NULL)
        rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");

    init_port(); // 端口初始化

    /* create flow for send packet with */
    // 参数:port id、queue index、IP四元组、error
    // flow 是 struct rte_flow
    flow = generate_ipv4_flow(port_id, selected_queue, // 将目的地ip等于192.168.1.1的数据包发送到队列号1
                SRC_IP, EMPTY_MASK,
                DEST_IP, FULL_MASK, &error);
    // 虽然配置了5条队列,但会把特定IP地址的流量放到一条特定的队列。
    // 是对 rte_flow_create() 这个函数的一层封装

    if (!flow) {
        printf("Flow can't be created %d message: %s\n",
            error.type,
            error.message ? error.message : "(no stated reason)");
        rte_exit(EXIT_FAILURE, "error in creating flow");
    }

    main_loop();

    return 0;
}

flow_blocks.c

/*-
 *   BSD LICENSE
 *
 *   Copyright 2017 Mellanox.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Mellanox nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#define MAX_PATTERN_NUM     4

struct rte_flow *
generate_ipv4_flow(uint16_t port_id, uint16_t rx_q,
        uint32_t src_ip, uint32_t src_mask,
        uint32_t dest_ip, uint32_t dest_mask,
        struct rte_flow_error *error);


/**
 * create a flow rule that sends packets with matching src and dest ip
 * to selected queue.
 *
 * @param port_id
 *   The selected port.
 * @param rx_q
 *   The selected target queue.
 * @param src_ip
 *   The src ip value to match the input packet.
 * @param src_mask
 *   The mask to apply to the src ip.
 * @param dest_ip
 *   The dest ip value to match the input packet.
 * @param dest_mask
 *   The mask to apply to the dest ip.
 * @param[out] error
 *   Perform verbose error reporting if not NULL.
 *
 * @return
 *   A flow if the rule could be created else return NULL.
 */
struct rte_flow *
generate_ipv4_flow(uint16_t port_id, uint16_t rx_q,
        uint32_t src_ip, uint32_t src_mask,
        uint32_t dest_ip, uint32_t dest_mask,
        struct rte_flow_error *error)
{
    struct rte_flow_attr attr;                      // 流的 attr
    struct rte_flow_item pattern[MAX_PATTERN_NUM];  // 流的 pattern。关于 item,见:http://doc.dpdk.org/api/structrte__flow__item.html
    struct rte_flow_action action[MAX_PATTERN_NUM]; // 流的 action , 这三个是创建一个流的关键。
    struct rte_flow *flow = NULL;
    struct rte_flow_action_queue queue = { .index = rx_q };
    struct rte_flow_item_eth eth_spec; // spec 和
    struct rte_flow_item_eth eth_mask; // mask 是 item 的另外两个字段。void * ,但需要设置成和你选定的特定 type 一样。
    struct rte_flow_item_vlan vlan_spec;
    struct rte_flow_item_vlan vlan_mask;
    struct rte_flow_item_ipv4 ip_spec;
    struct rte_flow_item_ipv4 ip_mask;
    int res;

    memset(pattern, 0, sizeof(pattern));
    memset(action, 0, sizeof(action));

    /*
     * set the rule attribute.
     * in this case only ingress packets will be checked.
     */
    memset(&attr, 0, sizeof(struct rte_flow_attr));
    attr.ingress = 1; // 意思是只对入口流量生效的属性

    /*
     * create the action sequence.
     * one action only,  move packet to queue
     */

    action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE; // 动作是:Assigns packets to a given queue index.
    action[0].conf = &queue;
    action[1].type = RTE_FLOW_ACTION_TYPE_END; // 动作数组必须用 RTE_FLOW_ACTION_TYPE_END 作为最后一个元素来结尾

    /*
     * set the first level of the pattern (eth).
     * since in this example we just want to get the
     * ipv4 we set this level to allow all.
     */

    // 第一个 item 用的是以太网地址
    // rte_flow_item_eth 的字段是 目的MAC地址、源MAC地址、Type
    memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
    memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
    eth_spec.type = 0;
    eth_mask.type = 0;
    pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; // item type 是 以太网的 item
    pattern[0].spec = &eth_spec; // 指向 eth_item 的指针(因为第一个type指定了rte_flow_item_eth)
    pattern[0].mask = &eth_mask; // 掩码设置成全 0,意味着所有以太网的header都是被允许的。

    /* spec、mask、last 在 pattern item 中的用途:
    For example, if for an IPv4 address field, spec provides 10.1.2.3, last provides 10.3.4.5 
    and mask provides 255.255.0.0, the effective range becomes 10.1.0.0 to 10.3.255.255.
    */

    /*
     * setting the second level of the pattern (vlan).
     * since in this example we just want to get the
     * ipv4 we also set this level to allow all.
     */
    // 第二个 item 用于匹配 vlan 标签
    // 也设置成了所有皆可匹配
    memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
    memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
    pattern[1].type = RTE_FLOW_ITEM_TYPE_VLAN;
    pattern[1].spec = &vlan_spec;
    pattern[1].mask = &vlan_mask;

    /*
     * setting the third level of the pattern (ip).
     * in this example this is the level we care about
     * so we set it according to the parameters.
     */
    // 第三个 item 匹配特定的IP地址,根据本函数的参数。
    memset(&ip_spec, 0, sizeof(struct rte_flow_item_ipv4));
    memset(&ip_mask, 0, sizeof(struct rte_flow_item_ipv4));
    // ipv4 item 的结构体里面有一个字段,是 ipv4_hdr

    ip_spec.hdr.dst_addr = htonl(dest_ip); // 将主机数转换成无符号长整型的网络字节顺序
    ip_mask.hdr.dst_addr = dest_mask;
    ip_spec.hdr.src_addr = htonl(src_ip);
    ip_mask.hdr.src_addr = src_mask;
    pattern[2].type = RTE_FLOW_ITEM_TYPE_IPV4;
    pattern[2].spec = &ip_spec;
    pattern[2].mask = &ip_mask;

    /* the final level must be always type end */
    // pattern 数组的最后必须用 END 宏结尾。
    pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
    
    // 验证这条流的有效性
    res = rte_flow_validate(port_id, &attr, pattern, action, error);
    if (!res)
        flow = rte_flow_create(port_id, &attr, pattern, action, error);
    /* rte_flow_create() 在一个给定的端口上创建一条流规则(flow rule)
    参数五个
    1. port id
    2. attr 数组
    3. pattern 数组
    4. actions 数组
    5. rte_flow_error 指针,当有错误发生时,PMD会在此设置内容。

    返回值类型是  struct rte_flow*
    */

    return flow;
}

执行情况

猜你喜欢

转载自www.cnblogs.com/ZCplayground/p/9350354.html
今日推荐