基于libpcap多网卡抓包编程心得

公司业务需要，需要做一个libpcap项目抓包流量分析服务，网上关于此类项目不多，因此自己实现了一个

关于libpcap的一些基础部分这里不再陈诉了

注：本文因为频繁使用malloc的操作，为了防止内存碎片，均采用的jemmloc库

前提数据结构[ 注：下面的抄袭的nginx的ngx_string , 其实说实话，关于此类数据结构，实用性上，redis的sds实现的较为全面，数据接口多，不用你再重复造轮子，我只是个人喜欢nginx的小巧和自定义的灵活，因此拿来使用了，如果各位自己实现，建议使用redis的sds世界使用]

typedef mini_str_s { int len; u_char* data; }mini_str_t;

#define mini_string(str) { sizeof(str) - 1, (u_char *) str }

#define ngx_str_set(str, text) (str)->len = sizeof(text) - 1; (str)->data = (u_char *) text

1 : 基本数据结构定义

定义每个采集网卡数据结构

typedef struct cap_ctx_s{
pcap_t* handle; // pcap_t结构指针
mini_str_t device; //网卡名称： “eth0”“vlan0”
char* errbuf; //错误消息
mini_str_t filter; //过滤表达式

}cap_ctx_t ; //采集数据结构

typedef struct cap_array_s{
zlist_t* context; //存放cap_ctx_t的list
int capnums; // cap_ctx_t采集器的数量

}cap_array_t;

2 ：数据操作

2.1 初始化一个采集端口

// 参数 device : 设备名称， filter_str : 过滤的表达式，分开可以保证每个端口过滤器不一样， flag: 是否开启过滤表达式

cap_ctx_t* init_libpcap_packet( char* device, char* filter_str, int flag )

{
cap_ctx_t* ctx = (cap_ctx_t*)calloc(sizeof(cap_ctx_t));
ctx->errbuf = (char*)calloc(1, PCAP_ERRBUF_SIZE );
ctx->device->data = (char*)calloc(1, MAX_DEVICE_LEN );

memcpy( ctx->device->data, device, strlen(device) );

ctx->device->len = strlen(device);

ctx->handle = pcap_open_live(ctx->device->data, 65535, 1, 0, ctx->errbuf);
ctx->filter->data = (char*)calloc(1, FILTER_BUFFER_LEN );

memcpy( ctx->filter->data, filter_str, strlen(filter_str) );

ctx->filter->len = strlen(filter_str);

if (ctx->handle == NULL)
{
fprintf(stderr, "Couldn't open capture socket %s\n", ctx->errbuf);
return NULL;
}
struct bpf_program filter; /* The compiled filter */
bpf_u_int32 net;
bpf_u_int32 mask;
if( flag )
{
if (pcap_compile(ctx->handle, &filter, ctx->filter, 0, net) == -1) {
fprintf(stderr, "Couldn't parse filter %s: %s\n",ctx->filter->data, pcap_geterr(ctx->handle));
return NULL;
}
/* apply the compiled filter */
if (pcap_setfilter(ctx->handle, &filter) == -1) {
fprintf(stderr, "Couldn't install filter %s: %s\n",ctx->filter->data, pcap_geterr(ctx->handle));
return NULL;

}

return ctx;

}

2.2初始化采集网卡数组,加入网卡数组

// 采集网卡数组初始化

cap_array_t* init_libpcap_array()

{
cap_array_t* ctx = (cap_array_t*)malloc(sizeof(cap_array_t));
if(NULL == ctx)
return NULL;
memset(ctx, '\0', sizeof(cap_array_t));
ctx->context = zlist_new();
ctx->capnums = 0;
return ctx;
}
//加入一个采集网卡到采集数组里面
void libpcap_array_add( cap_array_t* carray, cap_ctx_t* ptx )
{
if(NULL != carray && NULL != ptx){

zlist_append( carray->context , ptx);

carray->capnums++;

}

3 ：多网卡操作使用

我的多网卡配置选项是： eth0:eth1:vlan0:eno2 四个网卡的试验环境

//定义一个网卡采集数组

cap_array_t* capturer = init_libpcap_array();

char* fields[100] = {NULL};

int num = split(g_conf_cap_device, fields, 100, ":"); 配置文件网卡字符串切分

//我这里开了一个线程池，每个网卡采集用一个线程去做，

threadpool_t *thp = threadpool_create(num * 2 ,100,12);

//循环每个网卡接口，激活每个网卡，同时加入到这个激活网卡数组里面

for(int i = 0 ; i < num ;i++)

{

//激活配置的每个网卡，每个激活的网卡返回一个 cap_ctx_t对象

cap_ctx_t* handle = init_libpcap_packet( fields[i], "dst port 80", g_just80 ) ;
if(NULL != handle)
{
libpcap_array_add( capturer, handle ); // 加入到之前定义好的采集数组里面
}
}

//下面就是遍历采集数组，把每个网卡的采集函数激活，并开始采集数据了

cap_ctx_t* header = zlist_first(capturer->context);
int index = 0;
while( header != NULL)
{
//线程池里面加入这些网卡开始数据采集，pkt_cap函数就是简单的数据循环采集工作
threadpool_add(thp, pkt_cap, (void*)(header->handle) );
header = zlist_next(capturer->context);
index++;

}

//下面就是pkt_cap的采集函数

void* pkt_cap( void *arg )
{
pcap_t * handle = (pcap_t*)arg;
pcap_loop(handle, -1, package_handler, NULL);
printf("\nCapture complete.\n");
return(NULL);

}

后面有时间分享一篇 libpcap多线程采集的例子文档，以及libpcap的性能优化的例子，最高的libpcap在千兆网卡下面能达到1.2G/s的采集速度，已经是我优化的极限了，主要是受控于libpcap的双重拷贝机制，pfring的非zero拷贝方式已经通过了mmap的网卡采集方案，如果不想使用继续优化的话，可以直接使用pfring的免费版，免费版实现了内存映射，去除了内核到用户层的一层拷贝工作，性能已经大大降低

但是如果再万兆网卡下工作，还是建议使用netmap或者pfring收费版，真正的0拷贝

基于libpcap多网卡抓包编程心得

猜你喜欢