Beanstalk源码分析--数据结构设计

概述

beanstalk是多年前使用过的一个分布式任务队列，通过C实现，十分高效。和Redis（默认）的事件驱动框架一样，都是通过异步的epoll来实现，所以，能够高效的处理大量请求。
但不知什么原因，作者几年前已经不再维护其代码了。但我发现国内还是有一些人在使用该软件，为了能够更好的理解其运行机制，几年前对其代码进行了一些研究。先把分析的几篇发出来。
另外，我对其代码进行了fork，若有问题可以一起探讨解决之道。

beanstalk数据结构概览

这里写图片描述

基本数据结构解析

socket结构

beanstalk 对底层socket做了一层封装，通过面向对象的思想把操作封装在一起。

struct Socket {
    int    fd;   //监听socket fd
    Handle f;
    void   *x;
    int    added;
};

Handle是一个函数指针，参数是void* 和一个整数（这里是以完成三次握手的socket的fd）

typedef void(*Handle)(void*, int rw);

tube结构

该结构定义一个有名的任务队列，用来存储统一类型的job，是producer和consumer操作的对象。

struct tube {
    uint refs;
    char name[MAX_TUBE_NAME_LEN];    //队列名字，最大长度为201个字节
    Heap ready;
    Heap delay;
    struct ms waiting;  /* set of conns */
    struct stats stat;
    uint using_ct;
    uint watching_ct;
    int64 pause;
    int64 deadline_at;
    struct job buried;
};

// 队列名的最大长度为201个字符？作者为什么要限制到201?

#define MAX_TUBE_NAME_LEN 201

状态信息结构

struct stats {
    uint urgent_ct;
    uint waiting_ct;
    uint buried_ct;
    uint reserved_ct;
    uint pause_ct;
    uint64   total_delete_ct;
    uint64   total_jobs_ct;
};

ms结构定义

struct ms {
    size_t used, cap, last;
    void **items;
    ms_event_fn oninsert, onremove;
};
typedef void(*ms_event_fn)(ms a, void *item, size_t i);

Heap(堆)结构定义

struct Heap {
    int     cap;
    int     len;
    void    **data;
    Less    less;    // 函数指针，用来操作data
    Record  rec;    // 函数指针，对data进行操作
};
typedef int(*Less)(void*, void*);
typedef void(*Record)(void*, int);

job结构

该结构定义一个需要异步处理的任务，是Beanstalkd中的基本单元，需要放在一个tube中。

struct job {
    Jobrec r; // persistent fields; these get written to the wal

    /* bookeeping fields; these are in-memory only */
    char pad[6];  // 该字段不保存任何数据，为内存对齐而使用的填充位
    tube tube;  //定义一个tube指针，指向该job属于的队列
    job prev, next; /* linked list of jobs */ // 每个job都被保存到一个双向链表中
    job ht_next; /* Next job in a hash table list */ //job还被添加到hash表中
    size_t heap_index; /* where is this job in its current heap */ //该job属于哪个heap(堆)
    File *file;
    job  fnext; //?
    job  fprev; //?
    void *reserver;
    int walresv;
    int walused;

    char body[]; // written separately to the wal
};

job描述结构

// if you modify this struct, you must increment Walver above
struct Jobrec {
    uint64 id;
    uint32 pri;
    int64  delay;
    int64  ttr;
    int32  body_size;
    int64  created_at;
    int64  deadline_at;
    uint32 reserve_ct;
    uint32 timeout_ct;
    uint32 release_ct;
    uint32 bury_ct;
    uint32 kick_ct;
    byte   state;
};

ms结构

struct ms {
    size_t used, cap, last;
    void **items;
    ms_event_fn oninsert, onremove;
};

Server结构

该结构保存服务器的一些配置信息。

struct Server {
    char *port;    // 服务的端口
    char *addr;    // 服务的绑定地址
    char *user;    // 服务的启动用户

    Wal    wal;
    Socket sock;   //服务的socket信息
    Heap   conns;
};

Wal结构

struct Wal {
    int    filesize;
    int    use;
    char   *dir;
    File   *head;
    File   *cur;
    File   *tail;
    int    nfile;
    int    next;
    int    resv;  // bytes reserved
    int    alive; // bytes in use
    int64  nmig;  // migrations
    int64  nrec;  // records written ever
    int    wantsync;
    int64  syncrate;
    int64  lastsync;
    int    nocomp; // disable binlog compaction?
};

File结构

该结构维护了打开的文件和job的关系。

struct File {
    File *next;
    uint refs;
    int  seq;
    int  iswopen; // is open for writing
    int  fd;
    int  free;
    int  resv;
    char *path;
    Wal  *w;
    struct job jlist; // jobs written in this file
};

连接结构Conn

struct Conn {
    Server *srv;  // 该Conn结构对应的Server实体指针
    Socket sock;
    char   state;
    char   type;
    Conn   *next; // 下一个Conn的指针
    tube   use;
    int64  tickat;      // time at which to do more work
    int    tickpos;     // position in srv->conns
    job    soonest_job; // memoization of the soonest job
    int    rw;          // currently want: 'r', 'w', or 'h'
    int    pending_timeout;
    char   halfclosed;

    char cmd[LINE_BUF_SIZE]; // this string is NOT NUL-terminated
    int  cmd_len;
    int  cmd_read;

    char *reply;
    int  reply_len;
    int  reply_sent;
    char reply_buf[LINE_BUF_SIZE]; // this string IS NUL-terminated

    // How many bytes of in_job->body have been read so far. If in_job is NULL
    // while in_job_read is nonzero, we are in bit bucket mode and
    // in_job_read's meaning is inverted -- then it counts the bytes that
    // remain to be thrown away.
    int in_job_read;
    job in_job;  // a job to be read from the client

    job out_job;
    int out_job_sent;

    struct ms  watch;
    struct job reserved_jobs; // linked list header
};

总结

本文描述了beanstalk的基本数据结构，要理解系统的设计原理，首先要理解其数据结构的设计。