epoll IO多路复用器

epoll IO多路复用器


最近在浏览webrtc代码时看到其socket中使用了epoll机制,由于之前对linux的epoll机制较为陌生,故研究记录如下:
IO多路复用存在的意义在于应用程序可以同时监测多个fd的事件,便于单线程处理多个fd,epoll是众多多路复用器的一种,类似的还有select、poll等。服务器程序通常需要具备较高处理用户并发的能力,使用多路复用器意味着可以用一个线程同时处理多个用户并发请求。

  • 阻塞:
    阻塞指的是用户态程序调用系统api进入内核态后,如果条件不满足则被加入到对应的等待队列中,直到条件满足。比如:sleep 2s。在此期间线程得不到CPU调度,自然也就不会往下执行,表现的现象为线程卡在系统api不返回。
  • 非阻塞:
    非阻塞则相反,不论条件是否满足都会立即返回到用户态,线程的CPU资源不会被剥夺,也就意味着程序可以继续往下执行。

在一次发送大量数据(超过发送缓冲区大小)的情况下,如果使用阻塞方式,程序一直阻塞,直到所有的数据都写入到缓冲区中。例如,要发送M字节数据,套接字发送缓冲区大小为B字节,只有当对端向本机返回ack表明其接收到大于等于M-B字节时,才意味着所有的数据都写入到缓冲区中。很明显,如果一次发送的数据量非常大,比如M=10GB、B=64KB,则:
1)一次发送过程中本机线程会在一个fd上阻塞相当长一段时间,其他fd得不到及时处理;
2)如果出现发送失败,无从得知到底有多少数据发送成功,应用程序只能选择重新发送这10G数据,
总之,上述两点都是无法接受的。因此,对性能有要求的服务器一般不采用阻塞而采用非阻塞。


采用非阻塞套接字一次发送大量数据的流程:

1)使劲往发送缓冲区中写数据,直到返回不可写;
2)等待下一次缓冲区可写;
可以有两种方式:

  1. 查询式,程序不停地查询是否可写,这种方式不仅效率低下,而且存在不确定性的处理延迟;
  2. 程序去干其他的事情,等多路复用器监测到可写事件后再接着写;很明显方式2更加高效。

3)要发送的数据写完;

EPOLLOUT事件 就是以事件的方式通知用户程序,可以继续往缓冲区写数据了
EPOLLOUT事件 表示fd的发送缓冲区可写,在一次发送大量数据(超过发送缓冲区大小)的情况下很有用。
EPOLLOIN事件 就是以事件的方式通知用户程序,可以接着从缓冲区读数据了
EPOLLOIN事件 表示fd的接收缓冲区可读,在一次接收大块数据(超过接收缓冲区大小)的情况下很有用。


客户端每次按下任意键(由epoll监听处理),触发一次模拟的http请求,服务器接收请求并返回响应,此处用‘\0’ 填充了一个超大数据体,来模拟超过发送缓冲区的大块数据,服务器和客户端都采用epoll来处理对该大数据块的发送和接收;为了简化示例,此处客户端事先知道服务器要发送的数据大小。

server端

#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/socket.h>
#include <unistd.h>
#include <map>
#include <string>

using namespace std;

int g_socketfd = -1;
int g_clientfd = -1;

#define SERV_PORT   8787
#define exit_if(ret, ...)                                                                          \
    if (ret) {                                                                                     \
        printf(__VA_ARGS__);                                                                     \
        printf("->>> %s: %d error no: %d error msg %s\n",__func__,__LINE__, errno, strerror(errno)); \
        exit(1);                                                                                 \
    }
#define log(...) do{printf("%s(%d): ", __func__, __LINE__); \
   printf(__VA_ARGS__);}while(0)  

#define err_log(errlog) do{printf("%s(%d): ",  __func__, __LINE__);\
   perror(errlog);}while(0)  


void SetNonBlock(int fd)
{
    int flags = ::fcntl(fd, F_GETFL, 0);
    exit_if(flags < 0, "fcntl failed");
	log("set %s O_NONBLOCK\n", fd == g_socketfd ? "socketfd" : "clientfd");

    int ret = ::fcntl(fd, F_SETFL, flags | O_NONBLOCK);
    exit_if(ret < 0, "fcntl failed");
}

void UpdateEvents(int efd, int fd, int events, int op) 
{
	bool in_flag = false;
	bool out_flag = false;
    struct epoll_event ev;

    ::memset(&ev, 0, sizeof(ev));
    ev.events = events;
    ev.data.fd = fd;
	if(ev.events & EPOLLIN) {
		in_flag = true;	
	}

	if(ev.events & EPOLLOUT) {
		out_flag = true;	
	}

    log("%s %s[%d], events read [%s] write [%s]\n", 
		op == EPOLL_CTL_MOD ? "mod" : "add", 
		fd == g_clientfd ? "clientfd" : "socketfd", 
		fd,
		in_flag ? "SET" : "UNSET", 
		out_flag ? "SET" : "UNSET");

    int ret = ::epoll_ctl(efd, op, fd, &ev);
    exit_if(ret, "epoll_ctl failed");
}

void HandleAccept(int efd, int fd) 
{
    struct sockaddr_in raddr;
    socklen_t rsz = sizeof(raddr);

    int clientfd = ::accept(fd, (struct sockaddr *) &raddr, &rsz);
	g_clientfd = clientfd;
    exit_if(clientfd < 0, "accept failed");

    sockaddr_in peer, local;
    socklen_t alen = sizeof(peer);
    int ret = ::getpeername(clientfd, (sockaddr *) &peer, &alen);
    exit_if(ret < 0, "getpeername failed");
    log("accept a connection from %s\n", inet_ntoa(raddr.sin_addr));

    SetNonBlock(clientfd);
    UpdateEvents(efd, clientfd, EPOLLIN, EPOLL_CTL_ADD);
}

struct Counter
{
    string readed;
    size_t n_wt;
	size_t n_rd;
    bool write_en;
    Counter() : n_wt(0), n_rd(0), write_en(false) {}
};
std::map<int, Counter> g_counters;
string g_http_resp;

ssize_t WrapperWrite(int fd, const void *buf, size_t count)
{
	int ret = ::write(fd, buf, count);
	log("write return = %d\n", ret);
	return ret;
}

void SendResp(int efd, int fd) 
{
    Counter &cter = g_counters[fd];
    size_t left = g_http_resp.length() - cter.n_wt;
    log("g_http_resp.length = %ld bytes, counter.n_wt= %ld bytes, left: %lu bytes\n", 
		g_http_resp.length(), cter.n_wt, left);
	
    int wn = 0;
    while ((wn = WrapperWrite(fd, g_http_resp.data() + cter.n_wt, left)) > 0) {
        cter.n_wt += wn;
        left -= wn;
        log("write %d bytes, left: %lu bytes\n", wn, left);
    }

    if (left == 0) {
        //close(fd);  
        if (cter.write_en) {
			log("left is 0, %s[%d] updateEvents(EPOLL_CTL_MOD) to EPOLLIN aka monitor read\n",
				fd == g_clientfd ? "clientfd" : "socketfd", fd);
            UpdateEvents(efd, fd, EPOLLIN, EPOLL_CTL_MOD);  // 当所有数据发送结束后,不再关注其缓冲区可写事件
            cter.write_en= false;
        }
        g_counters.erase(fd);
        return;
    }

    if (wn < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
        if (!cter.write_en) {  ///< default is false
			log("write return %d, && errno is: %s, %s[%d] updateEvents(EPOLL_CTL_MOD) to EPOLLIN | EPOLLOUT\n", 
				wn, (errno == EAGAIN) ? "EAGAIN" : "EWOULDBLOCK",
				fd == g_clientfd ? "clientfd" : "socketfd", fd);
            UpdateEvents(efd, fd, EPOLLIN | EPOLLOUT, EPOLL_CTL_MOD);
            cter.write_en= true;
        }
        return;
    }
    if (wn <= 0) {
        log("write error for %s[%d]: %d %s\n", 
				fd == g_clientfd ? "clientfd" : "socketfd", 
				fd, errno, strerror(errno));
        ::close(fd);
        g_counters.erase(fd);
    }
}

void HandleRead(int efd, int fd) 
{
    char buf[4096];
    int rn = 0;
	log("reading data form %s[%d]\n",
		fd == g_clientfd ? "clientfd" : "socketfd", fd);

    while((rn = ::read(fd, buf, sizeof buf)) > 0) {
        log("read %d bytes\n", rn);
        string &readed = g_counters[fd].readed;
        readed.append(buf, rn);
        if (readed.length() > 4) {
            if (readed.substr(readed.length() - 2, 2) == "\n\n" || readed.substr(readed.length() - 4, 4) == "\r\n\r\n") {
                //当读取到一个完整的http请求,测试发送响应
				log("parse http request success. sending response data to client\n");
                SendResp(efd, fd);
            }
        }
    }
    if (rn < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
        return;
    //实际应用中,rn<0应当检查各类错误,如EINTR
    if (rn < 0) {
        log("read %s[%d]error: %d %s\n", 
			fd == g_clientfd ? "clientfd" : "socketfd", fd,
			errno, strerror(errno));
    }

	if (rn == 0) {
		log("%s[%d] closed\n",
			fd == g_clientfd ? "clientfd" : "socketfd", fd);
	}

    ::close(fd);
    g_counters.erase(fd);
}

void HandleWrite(int efd, int fd) 
{
    SendResp(efd, fd);
}

void MainLoop(int efd, int sockfd, int waitms) 
{
    const int kMaxEvents = 20;
    struct epoll_event activeEvs[100];

    int n = ::epoll_wait(efd, activeEvs, kMaxEvents, waitms);
    log("epoll_wait return %d\n", n);

    for (int i = 0; i < n; i++) {
        int fd = activeEvs[i].data.fd;
        int events = activeEvs[i].events;
		log("epoll get events from %s[%d], ",
				fd == g_clientfd ? "clientfd" : "socketfd", fd);

        if (events & (EPOLLIN | EPOLLERR)) { ///< EPOLLIN or EPOLLERR 
            if (fd == sockfd) {
				log("handling EPOLLIN(accept)\n");
                HandleAccept(efd, fd);       ///< accept EPOLLIN
            } else {
				log("handling EPOLLIN(read)\n");
                HandleRead(efd, fd);         ///< read EPOLLIN 
            }
        } else if (events & EPOLLOUT) {      ///< EPOLLOUT
            log("handling EPOLLOUT(write)\n");
            HandleWrite(efd, fd);            ///< write EPOLLOUT 
        } else {
            exit_if(1, "unknown event");
        }
    }
}

int main(int argc, const char *argv[]) 
{
	exit_if(argc < 2, "./xxx ip");

    ::signal(SIGPIPE, SIG_IGN);
    g_http_resp = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: text/html; charset=UTF-8\r\nContent-Length: 104857600*2+6\r\n\r\n123456";
    for (int i = 0; i < 10485760*2; i++) {
        g_http_resp += '\0';
    }

    int epollfd = ::epoll_create(1);
    exit_if(epollfd < 0, "epoll_create failed");

    int sockfd = ::socket(AF_INET, SOCK_STREAM, 0);
    exit_if(sockfd< 0, "socket failed");
	g_socketfd = sockfd;

    struct sockaddr_in addr;
    memset(&addr, 0, sizeof addr);
    addr.sin_family = AF_INET;
    addr.sin_port = htons(SERV_PORT);
	::inet_pton(AF_INET, argv[1], &addr.sin_addr);
    int ret = ::bind(sockfd, (struct sockaddr *) &addr, sizeof(struct sockaddr));
    exit_if(ret, "bind to %s:%d failed %d %s", argv[1], SERV_PORT, errno, strerror(errno));

    ret = ::listen(sockfd, 20);
    exit_if(ret, "listen failed %d %s", errno, strerror(errno));
    log("fd %d listening at %s[%d]\n", sockfd, argv[1], SERV_PORT);
    SetNonBlock(sockfd);

    UpdateEvents(epollfd, sockfd, EPOLLIN, EPOLL_CTL_ADD);
    for (;;) {  //实际应用应当注册信号处理函数,退出时清理资源
        MainLoop(epollfd, sockfd, 10000);
    }

    return 0;
}

客户端

#include <netinet/in.h>
#include <sys/socket.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <iostream>
#include <fcntl.h>
#include <map>

using namespace std;

int g_socketfd = -1;

#define MAXSIZE     4096
#define SERV_PORT   8787

#define exit_if(ret, ...)                                                                          \
    if (ret) {                                                                                     \
        printf(__VA_ARGS__);                                                                     \
        printf("->>> %s: %d error no: %d error msg %s\n", __func__, __LINE__, errno, strerror(errno)); \
        exit(1);                                                                                 \
    }

#define log(...) do{printf("%s(%d): ", __func__, __LINE__); \
	printf(__VA_ARGS__);}while(0)  

#define err_log(errlog) do{printf("%s(%d): ", __func__, __LINE__);\
	perror(errlog);}while(0)  

/**<
 *  typedef union epoll_data {
 *     void    *ptr;
 *     int      fd;
 *     uint32_t u32;
 *     uint64_t u64;
 *  } epoll_data_t;
 *
 *  struct epoll_event {
 *     uint32_t     events;    ///< Epoll events 
 *     epoll_data_t data;      ///< User data variable 
 *  };
 */


void SetNonBlock(int fd)
{
    int flags = ::fcntl(fd, F_GETFL, 0);
    exit_if(flags < 0, "fcntl failed");

	log("set %s O_NONBLOCK\n", fd == g_socketfd ? "socketfd" : "clientfd");
    int r = ::fcntl(fd, F_SETFL, flags | O_NONBLOCK);
    exit_if(r < 0, "fcntl failed");
}

void UpdateEvents(int efd, int fd, int events, int op) 
{
	bool in_flag = false;
	bool out_flag = false;
    struct epoll_event ev;
    ::memset(&ev, 0, sizeof(ev));
    ev.events = events;
    ev.data.fd = fd;
	if(ev.events & EPOLLIN) {
		in_flag = true;	
	}

	if(ev.events & EPOLLOUT) {
		out_flag = true;	
	}

	log("%s %s[%d], events read [%s] write [%s]\n", 
			op == EPOLL_CTL_MOD ? "mod" : (op == EPOLL_CTL_ADD ? "add" : "del"), 
			fd == STDIN_FILENO ? "STDIN_FILENO" : "socketfd", 
			fd,
			in_flag ? "SET" : "UNSET", 
			out_flag ? "SET" : "UNSET");

    int ret = ::epoll_ctl(efd, op, fd, &ev);
    exit_if(ret, "epoll_ctl failed");
}

struct Counter
{
	std::string readed;
	unsigned long n_wt;
    unsigned long n_rd;
    bool write_en;
    Counter() : n_wt(0), n_rd(0), write_en(false) {}
};
std::map<int, Counter> g_counters;

ssize_t WrapperRead(int fd, void *buf, size_t count) 
{
	log("prepare to read...\n");
	int ret = ::read(fd, buf, MAXSIZE);
	log("read %s[%d] ret = %d\n", fd == STDIN_FILENO ? "STDIN_FILENO" : "socketfd", fd,ret);

	return ret;
}

static void HandleRead(int efd, int fd)
{
	int nr = 0;
    char buf[MAXSIZE];
	Counter &cter = g_counters[fd];
	log("fd is %s\n", fd == g_socketfd ? "socketfd" : "STDIN_FILENO");
	while((nr = WrapperRead(fd, buf, MAXSIZE)) > 0) {
		cter.readed.append(buf, nr);	
		cter.n_rd += nr;
		log("10485760*2 + 6(%d) vs %ld\n", 20971640, cter.n_rd);

		if (fd == STDIN_FILENO){ ///< STDIN_FILENO 
			UpdateEvents(efd, STDIN_FILENO, EPOLLIN, EPOLL_CTL_DEL);    ///< remove STDIN_FILENO can read event.
			UpdateEvents(efd, g_socketfd, EPOLLOUT, EPOLL_CTL_ADD);     ///< add socket can write event.
			return ;
		} else if(cter.n_rd == 20971640) { ///< read over
			UpdateEvents(efd, STDIN_FILENO, EPOLLIN, EPOLL_CTL_ADD);    ///< add STDIN_FILENO can read event.
			UpdateEvents(efd, fd, EPOLLIN, EPOLL_CTL_DEL);              ///< remove socket can read event.

			log("received: %s\n", cter.readed.c_str());
			g_counters.erase(fd);
		}
	}

	if (nr == 0) {
		log("server closed.\n");
		::close(fd);
		g_counters.erase(fd);
		return ;
	}

    if (nr < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
        return;
    //实际应用中,n<0应当检查各类错误,如EINTR
    if (nr < 0) {
        log("read %s[%d]error: %d %s\n", 
				fd == STDIN_FILENO ? "STDIN_FILENO" : "socketfd", fd,
				errno, strerror(errno));
    }

    ::close(fd);
    g_counters.erase(fd);
}

std::string g_http_rqst;

ssize_t WrapperWrite(int fd, const void *buf, size_t count)
{
	int ret = ::write(fd, buf, count);
	log("write return = %d\n", ret);

	return ret;
}

static void HandleWrite(int efd, int fd)
{
    Counter &cter = g_counters[fd];
    unsigned long left = g_http_rqst.length() - cter.n_wt;

    log("g_http_rqst.length = %ld bytes, counter.n_wt= %ld bytes, left: %lu bytes\n", 
		 g_http_rqst.length(), cter.n_wt, left);

    int wn;
    while((wn = WrapperWrite(fd, g_http_rqst.data() + cter.n_wt, left)) > 0) {
		cter.n_wt += wn;
		left -= wn;
        log("write %d bytes, left: %lu bytes\n", wn, left);
	}

    if (left == 0) {
		log("left is 0, %s[%d] UpdateEvents(EPOLL_CTL_MOD) to EPOLLIN aka monitor read\n",
			 fd == STDIN_FILENO ? "STDIN_FILENO" : "socketfd", fd);
		UpdateEvents(efd, fd, EPOLLIN, EPOLL_CTL_MOD);  // 当所有数据发送结束后,不再关注其缓冲区可写事件
		cter.write_en = false;

        g_counters.erase(fd);
        return;
    } 

    if (wn < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
        if (!cter.write_en) {  ///< default is false
			log("write return %d, && errno is: %s, %s[%d] UpdateEvents(EPOLL_CTL_MOD) to EPOLLIN | EPOLLOUT\n", 
				 wn, (errno == EAGAIN) ? "EAGAIN" : "EWOULDBLOCK",
				 fd == STDIN_FILENO ? "STDIN_FILENO" : "socketfd", fd);
            UpdateEvents(efd, fd, EPOLLIN | EPOLLOUT, EPOLL_CTL_MOD);
            cter.write_en = true;
        }
        return;
    }

    if (wn <= 0) {
        log("write error for %s[%d]: %d %s\n", 
			 fd == STDIN_FILENO ? "STDIN_FILENO" : "socketfd", 
			 fd, errno, strerror(errno));
        ::close(fd);
        g_counters.erase(fd);
    }
	
}

static void MainLoop(int efd, int sockfd, int waitms)
{
    const int kMaxEvents = 20;
    struct epoll_event activeEvs[100];

	/**< initialize epoll_event */
    char buf[MAXSIZE];

	/**< events用来从内核得到事件的集合, 
	 * 函数返回需要处理的事件数目,如返回0表示已超时
	 */
	int num = ::epoll_wait(efd, activeEvs, kMaxEvents, waitms);
	for (int i = 0;i < num; i++) {
		int afd = activeEvs[i].data.fd; ///< get collected active fd(Events that have occurred)
		if (activeEvs[i].events & EPOLLIN)       ///< fd can read
			HandleRead(efd, afd); 
		else if (activeEvs[i].events & EPOLLOUT) ///< fd can write
			HandleWrite(efd, afd);
	}
}


int main(int argc,char *argv[])
{
    struct sockaddr_in  servaddr;

	exit_if(argc < 2, "./xxx ip");
	::signal(SIGPIPE, SIG_IGN);
	g_http_rqst = "GET /cia/arts/stories/LuAenOdp11Es9xEbHLoqEnqXad07tlnk4Ovdm0n1rauI5iQ3xmdjOwRRXJBQ3jMQ.mp3 HTTP/1.1\r\n";
	g_http_rqst += "Host: public01-1255411705.file.myqcloud.com\r\n";
	g_http_rqst += "Connection: close\r\n\r\n";

	int epollfd = ::epoll_create(1);
    exit_if(epollfd < 0, "epoll_create failed");

    int sockfd = ::socket(AF_INET,SOCK_STREAM,0);
	g_socketfd = sockfd;
    ::bzero(&servaddr,sizeof(servaddr));

    servaddr.sin_family = AF_INET;
    servaddr.sin_port = htons(SERV_PORT);
    ::inet_pton(AF_INET, argv[1], &servaddr.sin_addr);

    int ret = ::connect(sockfd,(struct sockaddr*)&servaddr,sizeof(servaddr));
    exit_if(ret, "connect failed %d %s", errno, strerror(errno));
    log("fd %d connect to %s[%d]\n", sockfd, argv[1],SERV_PORT);

    SetNonBlock(sockfd);

	/**< add interested event [STDIN_FILENO] to epoll object 
	 * EPOLLIN :表示对应的文件描述符可以读(包括对端SOCKET正常关闭)
	 */
    UpdateEvents(epollfd, STDIN_FILENO, EPOLLIN, EPOLL_CTL_ADD); ///< 此处监听标准输入描述符;
	for (;;) {
		MainLoop(epollfd, sockfd, 10000);
	}
	::close(epollfd);
    ::close(sockfd);

    return 0;
}

发布了134 篇原创文章 · 获赞 20 · 访问量 6万+

猜你喜欢

转载自blog.csdn.net/u011583798/article/details/82895632