网站压测工具 Webbench 源码分析

介绍

Webbench是一个在Linux下使用的非常简单的网站压测工具。它的源代码只有500多行，挺值得一看的开源项目。

实现原理

只是简单的fork()出多个子进程模拟客户端去访问设定的URL，测试网站在压力下工作的性能，然后把结果写到管道，让父进程读取并打印到屏幕。

工作流程图

在这里插入图片描述

源码分析

执行结果

一些打印信息根据下面的执行结果进行对比

[luxizheng@VM-12-17-centos WebBench-master]$ ./webbench -f -t 10 -c 10 -2 http://www.baidu.com/
Webbench - Simple Web Benchmark 1.5
Copyright (c) Radim Kolar 1997-2004, GPL Open Source Software.

Request:
GET / HTTP/1.1
User-Agent: WebBench 1.5
Host: www.baidu.com
Connection: close


Runing info: 10 clients, running 10 sec, early socket close.

Speed=43116 pages/min, 0 bytes/sec.
Requests: 7186 susceed, 0 failed.

socket.c文件

该文件中只有一个函数Socket(),它主要是用来连接指定url的服务器的，会返回一个文件描述符。

int Socket(const char *host, int clientPort)
{
    
    
    //以host为服务器端ip，clientPort为服务器端口号建立socket连接
    //连接类型为TCP，使用IPv4网域
    //一旦出错，返回-1
    //正常连接，则返回socket描述符
}

头文件

#include "socket.c"
#include <unistd.h>
#include <sys/param.h>
#include <rpc/types.h>
#include <getopt.h>
#include <strings.h>
#include <time.h>
#include <signal.h>

全局变量

一些全局变量和一些宏定义的介绍，具体用途在注释中给出

/* values */
volatile int timerexpired=0;    // 判断压测时间是否到达
int speed=0;                    // 记录进程成功得到服务器响应的数量
int failed=0; ;                 // 记录失败的数量（speed表示成功数，failed表示失败数）
int bytes=0;                    // 记录进程成功读取的字节数

/* globals */
int http10=1; /* 0 - http/0.9, 1 - http/1.0, 2 - http/1.1  http版本*/
/* Allow: GET, HEAD, OPTIONS, TRACE 支持http的方法*/
#define METHOD_GET 0
#define METHOD_HEAD 1
#define METHOD_OPTIONS 2
#define METHOD_TRACE 3
#define PROGRAM_VERSION "1.5"   // 版本号
int method=METHOD_GET;          // 默认请求方式为GET
int clients=1;                  // 客户端数量，默认为1
int force=0;                    // 是否需要等待读取从server返回的数据，0表示要等待读取
int force_reload=0;             // 是否使用缓存，1表示不缓存，0表示可以缓存页面
int proxyport=80;               // 代理服务器端口号
char *proxyhost=NULL;           // 代理服务器 ip
int benchtime=30;               // 压测时间，默认30s

/* internal */
int mypipe[2];                  // 管道通信，mypipe[0] 读端、 mypipe[1] 写端
char host[MAXHOSTNAMELEN];      // 服务器 ip
#define REQUEST_SIZE 2048       // http请求字符串长度
char request[REQUEST_SIZE];     // 所要发送的http请求

SIGALRM 信号处理函数 `alarm_handler()`

webbench有一个压测时间，一旦时间到就会停止压测，进而把压测结果写到管道。webbench是使用一个定时器，一旦定时时间一到，发出一个SIGALRM信号，主进程收到信号后，会将全局变量timerexpired置1，将表示不在进行压测。在压测函数中，是用一个循环来不停发送请求的，所以该变量是为了判断何时退出循环。

// SIGALRM 信号处理函数 
static void alarm_handler(int signal)
{
    
    
    timerexpired=1;//定时器到，就把该标志置1
}

webbench命令参数的使用信息 `usage()`

一些命令的使用方法。

static void usage(void)
{
    
    
    fprintf(stderr,
            "webbench [option]... URL\n"
            "  -f|--force               Don't wait for reply from server.\n"
            "  -r|--reload              Send reload request - Pragma: no-cache.\n"
            "  -t|--time <sec>          Run benchmark for <sec> seconds. Default 30.\n"
            "  -p|--proxy <server:port> Use proxy server for request.\n"
            "  -c|--clients <n>         Run <n> HTTP clients at once. Default one.\n"
            "  -9|--http09              Use HTTP/0.9 style requests.\n"
            "  -1|--http10              Use HTTP/1.0 protocol.\n"
            "  -2|--http11              Use HTTP/1.1 protocol.\n"
            "  --get                    Use GET request method.\n"
            "  --head                   Use HEAD request method.\n"
            "  --options                Use OPTIONS request method.\n"
            "  --trace                  Use TRACE request method.\n"
            "  -?|-h|--help             This information.\n"
            "  -V|--version             Display program version.\n"
           );
}

构建请求消息 `build_request()`

该函数是根据命令中的 url 构建一个请求消息字符串，请求消息的格式如下：

GET /test.jpg HTTP/1.1
User-Agent: WebBench 1.5
Host:192.168.10.1
Pragma: no-cache
Connection: close
\r\n(这里有一个空行，用\r\n表示)

根据上面请求消息拼接字符串，函数使用了大量的字符串操作函数，例如strcpy，strstr，strncasecmp，strlen，strchr，index，strncpy，strcat。如有不懂的可点这里了解，传送门

// 构建请求
void build_request(const char *url)
{
    
    
    char tmp[10];
    int i;

    // 使用字符串数组前置0
    memset(host,0,MAXHOSTNAMELEN);
    memset(request,0,REQUEST_SIZE);

    // 根据请求方法设置使用哪个版本的http协议
    if(force_reload && proxyhost!=NULL && http10<1) 
        http10=1;
    if(method==METHOD_HEAD && http10<1) 
        http10=1;   // http1.0才支持 head 方法
    if(method==METHOD_OPTIONS && http10<2) 
        http10=2;
    if(method==METHOD_TRACE && http10<2) 
        http10=2;  // http1.1才支持 options、trace 方法
    // 拼接请求方法
    switch(method)
    {
    
    
        default:
        case METHOD_GET: strcpy(request,"GET");break;
        case METHOD_HEAD: strcpy(request,"HEAD");break;
        case METHOD_OPTIONS: strcpy(request,"OPTIONS");break;
        case METHOD_TRACE: strcpy(request,"TRACE");break;
    }

    strcat(request," ");
    // url http://www.baidu.com/
    // 找到第一次出现 :// 的下标，找不到返回 NULL
    if(NULL==strstr(url,"://"))
    {
    
    
        fprintf(stderr, "\n%s: is not a valid URL.\n",url);
        exit(2);
    }
    // url长度大于1500 报错：太长
    if(strlen(url)>1500)
    {
    
    
        fprintf(stderr,"URL is too long.\n");
        exit(2);
    }
    // strncasecmp()用来比较参数s1 和s2 字符串前n个字符，比较时会自动忽略大小写的差异。
    // 检查 url 前面是不是 http:// ，不是的话报错
    if (0!=strncasecmp("http://",url,7)) 
    {
    
     
        fprintf(stderr,"\nOnly HTTP protocol is directly supported, set --proxy for others.\n");
        exit(2);
    }
    
    /* 把协议和主机地址分割 */
    i=strstr(url,"://")-url+3;

    if(strchr(url+i,'/')==NULL) {
    
    
        fprintf(stderr,"\nInvalid URL syntax - hostname don't ends with '/'.\n");
        exit(2);
    }
    // 如果代理服务器为空，自己构建
    if(proxyhost==NULL)
    {
    
    
        /* get port from hostname */
        if(index(url+i,':')!=NULL && index(url+i,':')<index(url+i,'/'))
        {
    
    
            strncpy(host,url+i,strchr(url+i,':')-url-i);
            memset(tmp,0,10);
            strncpy(tmp,index(url+i,':')+1,strchr(url+i,'/')-index(url+i,':')-1);
            proxyport=atoi(tmp);
            if(proxyport==0) proxyport=80;
        } 
        else
        {
    
    
            strncpy(host,url+i,strcspn(url+i,"/"));
        }

        strcat(request+strlen(request),url+i+strcspn(url+i,"/"));
    } 
    else
    {
    
    
        strcat(request,url);//把主机地址拼接进 request
    }

    // 拼接 http 协议版本
    if(http10==1)
        strcat(request," HTTP/1.0");
    else if (http10==2)
        strcat(request," HTTP/1.1");
    // 拼接换行符
    strcat(request,"\r\n");
    // 拼接 User-Agent 字段
    if(http10>0)
        strcat(request,"User-Agent: WebBench "PROGRAM_VERSION"\r\n");
    
    // 拼接 Host字段并换行
    if(proxyhost==NULL && http10>0)
    {
    
    
        strcat(request,"Host: ");
        strcat(request,host);
        strcat(request,"\r\n");
    }
    // 1 表示不使用缓存
    if(force_reload && proxyhost!=NULL)
    {
    
    
        strcat(request,"Pragma: no-cache\r\n");
    }
    // http1.1长连接是自动打开的，这里不使用长连接
    if(http10>1)
        strcat(request,"Connection: close\r\n");
    
    /* 加空行表示首部与body隔开 */
    if(http10>0) strcat(request,"\r\n"); 
    
    // 打印请求消息
    printf("\nRequest:\n%s\n",request);
}

main 函数

在main函数中，使用getopt_long函数来处理命令参数，并根据参数的值给一些相关变量复制，可去这里了解getopt_long函数传送门
在main函数中都做完了准备工作，才开始执行核心工作，即压测工作，在main函数的最后一行代码 bench()

int main(int argc, char *argv[])
{
    
    
    int opt=0;
    int options_index=0;
    char *tmp=NULL;
    // 如果不带参数，会将参数的详细信息打印出来
    if(argc==1)
    {
    
    
        usage();
        return 2;
    } 

    // getopt_long 获取传入的参数，并配置所需全局变量的值
    // optarg：表示当前选项对应的参数值。
    while((opt=getopt_long(argc,argv,"912Vfrt:p:c:?h",long_options,&options_index))!=EOF )
    {
    
    
        switch(opt)
        {
    
    
            case  0 : break;
            case 'f': force=1;break;
            case 'r': force_reload=1;break; 
            case '9': http10=0;break;
            case '1': http10=1;break;
            case '2': http10=2;break;
            // 上面全都直接跳出循环
            case 'V': printf(PROGRAM_VERSION"\n");exit(0);//输出版本
            case 't': benchtime=atoi(optarg);break;	     
            case 'p': 
            /* proxy server parsing server:port */
            tmp=strrchr(optarg,':');
            proxyhost=optarg;
            if(tmp==NULL)
            {
    
    
                break;
            }
            if(tmp==optarg)
            {
    
    
                fprintf(stderr,"Error in option --proxy %s: Missing hostname.\n",optarg);
                return 2;
            }
            if(tmp==optarg+strlen(optarg)-1)
            {
    
    
                fprintf(stderr,"Error in option --proxy %s Port number is missing.\n",optarg);
                return 2;
            }
            *tmp='\0';
            proxyport=atoi(tmp+1);break;
            case ':':
            case 'h':
            case '?': usage();return 2;break;
            case 'c': clients=atoi(optarg);break;
        }
    }
    // optind：表示的是下一个将被处理到的参数在argv中的下标值。
    if(optind==argc) {
    
    
        fprintf(stderr,"webbench: Missing URL!\n");
        usage();
        return 2;
    }

    // 设置默认的进程数和压测时间
    if(clients==0) clients=1;
    if(benchtime==0) benchtime=30;
 
    /* 打印输出结果前两行 */
    fprintf(stderr,"Webbench - Simple Web Benchmark "PROGRAM_VERSION"\n"
            "Copyright (c) Radim Kolar 1997-2004, GPL Open Source Software.\n"
            );
    
    // 构建请求消息，路径是最后的一个参数
    build_request(argv[optind]);
 
    // 打印压测的相关信息，如进程数和压测时间
    printf("Runing info: ");

    if(clients==1) 
        printf("1 client");
    else
        printf("%d clients",clients);

    printf(", running %d sec", benchtime);
    

    if(force) printf(", early socket close");
    if(proxyhost!=NULL) printf(", via proxy server %s:%d",proxyhost,proxyport);
    if(force_reload) printf(", forcing reload");
    
    printf(".\n");
    
    return bench();
}

核心函数 `bench()`

在该函数中，它会首先进行一次连接，看是否能够连通，检测结束后，就会把连接给关闭。如果连接不同，就不会执行下面的工作。
当连接是连通的，就会创建多个子进程来作为客户端去访问指定的url。在创建子进程过程中，当其中一个子进程创建失败，也不会继续下面的工作，直接跳出函数。
fork()是用来创建子进程的，如果创建成功将会返回0或进程id号，小于0就是fork error。
返回值为0，表示是子进程，大于0表示父进程，在它们各自的作用域内执行自己的逻辑。
子进程逻辑：调用benchcore()来发起访问，并把结果写入管道。
父进程逻辑：从管道中读取子进程写入的数据，并打印到品目

static int bench(void)
{
    
    
    int i,j,k;	
    pid_t pid=0;    // 进程id
    FILE *f;        // 文件符

    // 检测是否能够目标服务器建立连接。注意：只是检测，并不是开始压测工作
    i=Socket(proxyhost==NULL?host:proxyhost,proxyport);
    if(i<0) {
    
     
        fprintf(stderr,"\nConnect to server failed. Aborting benchmark.\n");
        return 1;
    }
    close(i);//检测完毕，关闭连接
    
    // 建立管道
    if(pipe(mypipe))
    {
    
    
        perror("pipe failed.");
        return 3;
    }

    // 创建子进程
    for(i=0;i<clients;i++)
    {
    
    
        pid=fork();
        // fork error,剩下的子进程不创建了
        if(pid <= (pid_t) 0)
        {
    
    
            sleep(1); /* make childs faster */
            break;
        }
    }

    // 循环创建子进程过程中，只要有一个创建失败，跳出该函数
    if( pid < (pid_t) 0)
    {
    
    
        fprintf(stderr,"problems forking worker no. %d\n",i);
        perror("fork failed.");
        return 3;
    }

    // 这是子进程的执行逻辑
    if(pid == (pid_t) 0)
    {
    
    
        // 执行压测程序
        if(proxyhost==NULL)
            benchcore(host,proxyport,request);
        else
            benchcore(proxyhost,proxyport,request);

        // 把压测结果写到管道的写端
        f=fdopen(mypipe[1],"w");
        if(f==NULL)
        {
    
    
            perror("open pipe for writing failed.");
            return 3;
        }
        // 写入结果
        fprintf(f,"%d %d %d\n",speed,failed,bytes);
        fclose(f);

        return 0;
    } 
    else
    {
    
    
        // 这是父进程的执行逻辑
        // 打开管道的读端
        f=fdopen(mypipe[0],"r");
        if(f==NULL) 
        {
    
    
            perror("open pipe for reading failed.");
            return 3;
        }
        
        // 不使用缓冲。每个 I/O 操作都被即时写入。buffer 和 size 参数被忽略。
        setvbuf(f,NULL,_IONBF,0);
        
        // 给与结果相关的变量置0
        speed=0;
        failed=0;
        bytes=0;
    
        while(1)
        {
    
    
            //  从流 stream 读取格式化输入。
            pid=fscanf(f,"%d %d %d",&i,&j,&k);
            if(pid<2)
            {
    
    
                fprintf(stderr,"Some of our childrens died.\n");
                break;
            }
            
            speed+=i;
            failed+=j;
            bytes+=k;
        
            if(--clients==0) break;//把所有子进程的压测结果读取完毕后，跳出循环
        }
    
        fclose(f);

        // 打印压测结果
        printf("\nSpeed=%d pages/min, %d bytes/sec.\nRequests: %d susceed, %d failed.\n",
            (int)((speed+failed)/(benchtime/60.0f)),
            (int)(bytes/(float)benchtime),
            speed,
            failed);
    }
    
    return i;
}

核心函数 `benchcore()`

在该函数内先注册一个信号，一旦捕捉到信号就调用信号处理函数 alarm_handler()，在该函数内把标志位置1来结束下面的循环。
它把访问url的行为都放在while循环里，只要定时器没到，就一直访问url,即把 build_request() 中拼接的请求消息字符串发送到指定的url中。

void benchcore(const char *host,const int port,const char *req)
{
    
    
    int rlen;               // 数据长度
    char buf[1500];         // 缓冲区，保存数据
    int s,i;
    struct sigaction sa;    // 注册信号处理函数

    /* setup alarm signal handler */
    sa.sa_handler=alarm_handler;    // 设置信号处理函数
    sa.sa_flags=0;
    // 注册信号处理函数
    if(sigaction(SIGALRM,&sa,NULL))
        exit(3);
    
    // 超过 benchtime 秒后，产生一个 SIGALRM 信号
    alarm(benchtime); // after benchtime,then exit

    rlen=strlen(req);
    nexttry:while(1)
    {
    
    
        // 定时器到，退出循环
        if(timerexpired)
        {
    
    
            if(failed>0)
            {
    
    
                failed--;
            }
            return;
        }

        // 与目标服务器建立连接
        s=Socket(host,port);   

        // 连接失败，则失败数量 failed++
        if(s<0) 
        {
    
     
            failed++;
            continue;
        }
        // write 会返回的实际字节数，如果不能把请求消息完全发送，那也是失败了
        if(rlen!=write(s,req,rlen)) 
        {
    
    
            failed++;
            close(s);
            continue;
        }

        if (http10 == 0)
        {
    
    
            // 关闭连接s的写端,即不在发送数据，但还能接收数据的意思，没有错误发送则返回 0
            if (shutdown(s, 1)) {
    
     
                failed++; 
                close(s); 
                continue; 
            }
        }

        // 如果需要等待结果返回
        if(force==0) 
        {
    
    
            /* read all available data from socket */
            while(1)
            {
    
    
                // 定时器到，退出循环
                if(timerexpired) 
                    break; 
                // 从连接 s 中每次读取1500字节数据到 buf，返回实际读取的字节数
                i=read(s,buf,1500);
                // i < 0,表示有错误发生
                if(i<0) 
                {
    
     
                    failed++;
                    close(s);
                    goto nexttry;
                }
                else {
    
    
                    if (i == 0) 
                        break;      // i为0，表示数据已经读取完毕
                    else
                        bytes += i; // 加上读取的字节数
                }
                
            }
        }

        // 关闭套接字失败
        if(close(s)) {
    
    
            failed++;
            continue;
        
        }

        speed++;// 成功访问，成功数量 speed++
    }
}