多路IO复用模型之epoll
与select和poll相比,epoll更加灵活,没有描述符的限制。epoll使用一个文件描述符来管理多个描述符,将用户关系的文件描述符存放到内核的一个事件表中,这样用户空间和内核空间的数据拷贝只需要一次。
epoll底层采用红黑树作为索引结构,用一个双向链表实现就绪队列。
epoll函数原型
int epoll_create(int size);
int epoll_ctl(int epfd,
int op,
int fd,
struct epoll_event * event);
int epoll_wait(int epfd,
struct epoll_event * events,
int maxevents,
int timeout);
还是用一张图来解释各个参数的作用
epoll实战
server.cpp
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <sys/types.h>
#define IPADDRESS "127.0.0.1"
#define PORT 6666
#define MAXSIZE 1024
#define LISTENQ 5
#define FDSIZE 1000
#define EPOLLEVENTS 100
int socket_bind(const char* ip, int port); //创建套接字并绑定
void do_epoll(int listenfd); //EPOLL
void handle_events(int epollfd, struct epoll_event *events, int num, int listenfd,char * buf); //事件的函数
void handle_accept(int epollfd, int listenfd); //处理收到的连接
void do_read(int epollfd, int fd, char *buf); //读处理
void do_write(int epollfd, int fd, char *buf); //写处理
void add_event(int epollfd, int fd, int state); //添加事件
void modify_event(int epollfd, int fd, int state); //修改事件
void delete_event(int epollfd, int fd, int state); //删除事件
int main(){
int listenfd;
listenfd = socket_bind(IPADDRESS, PORT);
listen(listenfd, LISTENQ);
do_epoll(listenfd);
return 0;
}
int socket_bind(const char* ip, int port){
int listenfd;
struct sockaddr_in servaddr;
listenfd = socket(AF_INET, SOCK_STREAM, 0);
if (listenfd == -1) {
perror("socket error:");
exit(1);
}
bzero(&servaddr, sizeof(servaddr));
servaddr.sin_family = AF_INET;
inet_pton(AF_INET, ip, &servaddr.sin_addr);
servaddr.sin_port = htons(port);
if (bind(listenfd, (struct sockaddr*)&servaddr, sizeof(servaddr)) == -1) {
perror("bind error:");
exit(1);
}
return listenfd;
}
void do_epoll(int listenfd){
int epollfd;
struct epoll_event events[EPOLLEVENTS];
int ret;
char buf[MAXSIZE];
memset(buf, 0, MAXSIZE);
epollfd = epoll_create(FDSIZE); //创建一个描述符
add_event(epollfd, listenfd, EPOLLIN); //添加监听描述符事件
while (1) {
ret = epoll_wait(epollfd, events, EPOLLEVENTS, -1); //获取已经准备好的描述符事件
handle_events(epollfd, events, ret, listenfd, buf);
}
close(epollfd);
}
void handle_events(int epollfd, struct epoll_event *events, int num, int listenfd, char *buf){
int i;
int fd;
for (i = 0; i < num; i++) {
fd = events[i].data.fd;
if ((fd == listenfd) &&(events[i].events & EPOLLIN)) {
//根据描述符的类型和事件类型处理
handle_accept(epollfd, listenfd);
}
else if (events[i].events & EPOLLIN){
do_read(epollfd, fd, buf);
}
else if (events[i].events & EPOLLOUT){
do_write(epollfd, fd, buf);
}
}
}
void handle_accept(int epollfd, int listenfd){
int clifd;
struct sockaddr_in cliaddr;
socklen_t cliaddrlen;
clifd = accept(listenfd, (struct sockaddr*)&cliaddr, &cliaddrlen);
if (clifd == -1) {
perror("accept error:");
}
else{
printf("accept a new client: %s:%d\n",inet_ntoa(cliaddr.sin_addr), cliaddr.sin_port);
add_event(epollfd, clifd, EPOLLIN); //添加一个客户描述符和事件
}
}
void do_read(int epollfd, int fd, char *buf){
int nread;
nread = read(fd, buf, MAXSIZE);
if (nread == -1) {
perror("read error:");
close(fd);
delete_event(epollfd, fd, EPOLLIN);
}
else if (nread == 0){
fprintf(stderr, "client close. \n");
close(fd);
delete_event(epollfd, fd, EPOLLIN);
}
else{
printf("read message is : %s", buf);
modify_event(epollfd, fd, EPOLLOUT); //修改描述符对应的事件,由读改为写
}
}
void do_write(int epollfd, int fd, char *buf){
int nwrite;
nwrite = write(fd, buf, strlen(buf));
if (nwrite == -1) {
perror("write error:");
close(fd);
delete_event(epollfd, fd, EPOLLOUT);
}
else{
modify_event(epollfd, fd, EPOLLIN);
}
memset(buf, 0, MAXSIZE);
}
void add_event(int epollfd, int fd, int state){
struct epoll_event ev;
ev.events = state;
ev.data.fd = fd;
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
}
void delete_event(int epollfd, int fd, int state){
struct epoll_event ev;
ev.events = state;
ev.data.fd = fd;
epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &ev);
}
void modify_event(int epollfd, int fd, int state){
struct epoll_event ev;
ev.events = state;
ev.data.fd = fd;
epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev);
}
client.cpp
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <sys/epoll.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#define IPADDRESS "127.0.0.1"
#define SERV_PORT 6666
#define MAXSIZE 1024
#define FDSIZE 1024
#define EPOLLEVENTS 20
void handle_connection(int sockfd);
void handle_events(int epollfd, struct epoll_event *events, int num, int sockfd, char *buf);
void do_read(int epollfd, int fd, int sockfd, char *buf); //读处理
void do_write(int epollfd, int fd, int sockfd, char *buf); //写处理
void add_event(int epollfd, int fd, int state); //添加事件
void modify_event(int epollfd, int fd, int state); //修改事件
void delete_event(int epollfd, int fd, int state); //删除事件
int count = 0;
int main(){
int sockfd;
struct sockaddr_in servaddr;
sockfd = socket(AF_INET, SOCK_STREAM, 0);
bzero(&servaddr, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_port = htons(SERV_PORT);
inet_pton(AF_INET, IPADDRESS, &servaddr.sin_addr);
connect(sockfd, (struct sockaddr*)&servaddr, sizeof(servaddr));
handle_connection(sockfd); //连接处理
close(sockfd);
return 0;
}
void handle_connection(int sockfd){
int epollfd;
struct epoll_event events[EPOLLEVENTS];
char buf[MAXSIZE];
int ret;
epollfd = epoll_create(FDSIZE);
add_event(epollfd, STDIN_FILENO, EPOLLIN);
while (1) {
ret = epoll_wait(epollfd, events, EPOLLEVENTS, -1);
handle_events(epollfd, events, ret, sockfd, buf);
}
close(epollfd);
}
void handle_events(int epollfd, struct epoll_event *events, int num, int sockfd, char *buf){
int fd;
int i;
for (i = 0; i < num; i++) {
fd = events[i].data.fd;
if (events[i].events & EPOLLIN) {
do_read(epollfd, fd, sockfd, buf);
}
else if (events[i].events & EPOLLOUT){
do_write(epollfd, fd, sockfd, buf);
}
}
}
void do_read(int epollfd, int fd, int sockfd, char *buf){
int nread;
nread = read(fd, buf, MAXSIZE);
if (nread == -1) {
perror("read error:");
close(fd);
}
else if (nread == 0){
fprintf(stderr, "server close. \n");
close(fd);
}
else{
if (fd == STDOUT_FILENO) {
add_event(epollfd, sockfd, EPOLLOUT);
}
else{
delete_event(epollfd, sockfd, EPOLLIN);
add_event(epollfd, STDOUT_FILENO, EPOLLOUT);
}
}
}
void do_write(int epollfd, int fd, int sockfd, char *buf){
int nwrite;
char temp[100];
buf[strlen(buf) - 1] = '\0';
snprintf(temp, sizeof(temp), "%s_%02d\n", buf, count++);
nwrite = write(fd, temp, strlen(temp));
if (nwrite == -1) {
perror("write error:");
close(fd);
}
else{
if (fd == STDIN_FILENO) {
delete_event(epollfd, fd, EPOLLOUT);
}
else{
modify_event(epollfd, fd, EPOLLIN);
}
}
memset(buf, 0, MAXSIZE);
}
void add_event(int epollfd, int fd, int state){
struct epoll_event ev;
ev.events = state;
ev.data.fd = fd;
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
}
void delete_event(int epollfd, int fd, int state){
struct epoll_event ev;
ev.events = state;
ev.data.fd = fd;
epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &ev);
}
void modify_event(int epollfd, int fd, int state){
struct epoll_event ev;
ev.events = state;
ev.data.fd = fd;
epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev);
}
值得一提的是,服务器端设置了在获取到客户端发来的数据后要回包,读到数据后,把事件转为了可写状态,由写事件发包。
总结
select、poll和epoll都是多路IO复用的机制。本质上就是监听多个描述符,当某个描述符就绪时能够通知程序进行相应的读写操作。
1、epoll没有打开最大文件描述符的限制,具体数目可以在/proc/sys/fs/file-max中查看;
2、select/poll采用现行扫描全部集合的方式,导致效率呈线性下降。而epoll对每个fd使用回调函数实现,只有活跃的套接字才会主动调用回调函数,所以epoll仅会对活跃的套接字进行操作。
3、epoll使用mmap(内存映射)加速内核与用户空间的消息传递,通过内核与用户空间mmap处于同一块内存实现。对于select/poll,它们都有数据从用户空间到内核空间的大量拷贝,事件发生后又要将数据传到用户空间,并执行释放内存和剥离等待队列等工作,又是一次拷贝开销。
4、支持两种工作模式
- ET模式,是一种高速的工作模式,效率要高于LT。只有当事件到来时,内核才会通知进程文件描述符已就绪,之后若不再发生文件描述符变化的情况下,内核不会再去通知。此时若没有彻底将缓冲区的数据读完,则会导致缓冲区中的用户请求得不到响应(通知一次后不再通知);
- LT模式,是一种缺省的工作模式。事件到来后内核会不断的通知进程文件描述符已准备就绪(每隔一段时间就发送通知)。