FastDFS源码目录介绍

 

源码目录包括了common,test,client,stroage,tracker

按文件夹顺序和首字母进行分析:

common文件夹:

common_define.h:

跳过首字母a的文件先介绍这个,是因为这个文件定义了整个系统的一些环境变量,包括bool类型,全局变量等等。下文中你没见过,我也没提的变量或者宏都取自这里。


avltree.c/avltree.h:

对于avl树的定义和实现,这是FastDFS实现trunk功能和单盘恢复功能所依赖的数据结构

 
  1. typedef struct tagAVLTreeNode {

  2. void *data;

  3. struct tagAVLTreeNode *left;

  4. struct tagAVLTreeNode *right;

  5. byte balance;

  6. } AVLTreeNode;

  7.  
  8. typedef struct tagAVLTreeInfo {

  9. AVLTreeNode *root;

  10. FreeDataFunc free_data_func;

  11. CompareFunc compare_func;

  12. } AVLTreeInfo;

经典的数据结构,没有修改的原汁原味。


base64.c/base64.h:

FastDFS得到文件包含的信息后,用base64算法对其编码生成文件ID。


chain.c/chain.hi:

对于链表的实现。

 
  1. typedef struct tagChainNode

  2. {

  3. void *data;

  4. struct tagChainNode *next;

  5. } ChainNode;

  6.  
  7. typedef struct

  8. {

  9. int type;

  10. ChainNode *head;

  11. ChainNode *tail;

  12. FreeDataFunc freeDataFunc;

  13. CompareFunc compareFunc;

  14. } ChainList;

type变量是定义链表的使用方式的:

CHAINTYPEINSERT: insert new node before head

CHAINTYPEAPPEND: insert new node after tail

CHAINTYPESORTED: sorted chain

在fast_mblock中#include了它,但是并没有使用,直接注释了这个include也成功编译无报错,可能后续会使用吧?这里会和鱼大咨询下。mark。


connectpool.c/connectpool.h:

连接池的定义与实现

 
  1. typedef struct

  2. {

  3. int sock;

  4. int port;

  5. char ip_addr[IP_ADDRESS_SIZE];

  6. } ConnectionInfo;

  7.  
  8. struct tagConnectionManager;

  9.  
  10. typedef struct tagConnectionNode {

  11. ConnectionInfo *conn;

  12. struct tagConnectionManager *manager;

  13. struct tagConnectionNode *next;

  14. time_t atime; //last access time

  15. } ConnectionNode;

  16.  
  17. typedef struct tagConnectionManager {

  18. ConnectionNode *head;

  19. int total_count; //total connections

  20. int free_count; //free connections

  21. pthread_mutex_t lock;

  22. } ConnectionManager;

  23.  
  24. typedef struct tagConnectionPool {

  25. HashArray hash_array; //key is ip:port, value is ConnectionManager

  26. pthread_mutex_t lock;

  27. int connect_timeout;

  28. int max_count_per_entry; //0 means no limit

  29.  
  30. /*

  31. connections whose the idle time exceeds this time will be closed

  32. */

  33. int max_idle_time;

  34. } ConnectionPool;

呃,注释已经一目了然了。

三层结构

pool->manager->node

pool使用哈希来定位manager,因为作为key的ip:port是唯一的,而后用链表来管理该节点的所有连接。


fastmblock.c/fastmblock.h:

链表的一个变种,存储有已分配的对象和已经释放的对象,大致相当于一个对象池,在trunk功能中被使用。

 
  1. /* free node chain */

  2. struct fast_mblock_node

  3. {

  4. struct fast_mblock_node *next;

  5. char data[0]; //the data buffer

  6. };

  7.  
  8. /* malloc chain */

  9. struct fast_mblock_malloc

  10. {

  11. struct fast_mblock_malloc *next;

  12. };

  13.  
  14. struct fast_mblock_man

  15. {

  16. struct fast_mblock_node *free_chain_head; //free node chain

  17. struct fast_mblock_malloc *malloc_chain_head; //malloc chain to be freed

  18. int element_size; //element size

  19. int alloc_elements_once; //alloc elements once

  20. pthread_mutex_t lock; //the lock for read / write free node chain

  21. };


fasttaskqueue.c/fasttaskqueue.h:

任务队列,挺重要的一个数据结构

 
  1. typedef struct ioevent_entry

  2. {

  3. int fd;

  4. FastTimerEntry timer;

  5. IOEventCallback callback;

  6. } IOEventEntry;

  7.  
  8. struct nio_thread_data

  9. {

  10. struct ioevent_puller ev_puller;

  11. struct fast_timer timer;

  12. int pipe_fds[2];

  13. struct fast_task_info *deleted_list; //链向已被删除的任务指针,复用了已经分配的内存

  14. };

  15.  
  16. struct fast_task_info

  17. {

  18. IOEventEntry event;

  19. char client_ip[IP_ADDRESS_SIZE];

  20. void *arg; //extra argument pointer

  21. char *data; //buffer for write or recv

  22. int size; //alloc size

  23. int length; //data length

  24. int offset; //current offset

  25. int req_count; //request count

  26. TaskFinishCallBack finish_callback; //任务结束回调

  27. struct nio_thread_data *thread_data;

  28. struct fast_task_info *next;

  29. };

  30.  
  31. struct fast_task_queue

  32. {

  33. struct fast_task_info *head; //头尾指针都存在,分别用来做队列的出队和入队

  34. struct fast_task_info *tail;

  35. pthread_mutex_t lock;

  36. int max_connections;

  37. int min_buff_size;

  38. int max_buff_size;

  39. int arg_size;

  40. bool malloc_whole_block;

  41. };


fasttimer.c/fasttimer.h:

时间哈希表,以unix时间戳作为key,用双向链表解决冲突,可以根据当前的使用量进行rehash等操作。

在刚才的fasttaskqueue中被使用

 
  1. typedef struct fast_timer_entry {

  2. int64_t expires;

  3. void *data;

  4. struct fast_timer_entry *prev;

  5. struct fast_timer_entry *next;

  6. bool rehash;

  7. } FastTimerEntry;

  8.  
  9. typedef struct fast_timer_slot {

  10. struct fast_timer_entry head;

  11. } FastTimerSlot;

  12.  
  13. typedef struct fast_timer {

  14. int slot_count; //time wheel slot count

  15. int64_t base_time; //base time for slot 0

  16. int64_t current_time;

  17. FastTimerSlot *slots;

  18. } FastTimer;


fdfsglobal.c/fdfsglobal.h:

定义了fdfs系统所使用的全局变量,包括超时,版本号等等

 
  1. int g_fdfs_connect_timeout = DEFAULT_CONNECT_TIMEOUT;

  2. int g_fdfs_network_timeout = DEFAULT_NETWORK_TIMEOUT;

  3. char g_fdfs_base_path[MAX_PATH_SIZE] = {'/', 't', 'm', 'p', '\0'};

  4. Version g_fdfs_version = {5, 1};

  5. bool g_use_connection_pool = false;

  6. ConnectionPool g_connection_pool;

  7. int g_connection_pool_max_idle_time = 3600;


fdfshttpshared.c/fdfshttpshare.h:

FastDFS使用token来防盗链和分享图片,这一段我也不确定。回头再来看。


hash.c/hash.h:

经典的哈希结构,在FastDFS中应用的很广

哈希找到域,而后用链表解决冲突

 
  1. typedef struct tagHashData

  2. {

  3. int key_len;

  4. int value_len;

  5. int malloc_value_size;

  6.  
  7. #ifdef HASH_STORE_HASH_CODE

  8. unsigned int hash_code;

  9. #endif

  10.  
  11. char *value;

  12. struct tagHashData *next; //解决冲突

  13. char key[0];

  14. } HashData;

  15.  
  16. typedef struct tagHashArray

  17. {

  18. HashData **buckets;

  19. HashFunc hash_func;

  20. int item_count;

  21. unsigned int *capacity;

  22. double load_factor; //hash的负载因子,在FastDFS中大于1.0进行rehash

  23. int64_t max_bytes; //最大占用字节,用于计算负载因子

  24. int64_t bytes_used; //已经使用字节,用于计算负载因子

  25. bool is_malloc_capacity;

  26. bool is_malloc_value;

  27. unsigned int lock_count; //锁总数,为了线程安全

  28. pthread_mutex_t *locks;

  29. } HashArray;

  30.  
  31. typedef struct tagHashStat //所有hash的统计情况

  32. {

  33. unsigned int capacity;

  34. int item_count;

  35. int bucket_used;

  36. double bucket_avg_length;

  37. int bucket_max_length;

  38. } HashStat;


httpfunc.c/httpfunc.h:

http功能已经被砍掉了,这个也回头来看。


inifilereader.c/inifilereader.h:

FastDFS用于初始化加载配置文件的函数。


ioevent.c/ioevent.h && ioeventloop.c/ioeventloop.h:

对epoll,kqueue进行简单封装,成为一个有时间和网络的事件库。这部分逻辑应该会开独立的一章来分析


linuxstacktrace.c/linuxstacktrace.h:

 
  1. /**

  2. * This source file is used to print out a stack-trace when your program

  3. * segfaults. It is relatively reliable and spot-on accurate.

  4. */

这个模块是在程序段错误后输出栈跟踪信息,呃似乎不是鱼大写的


localipfunc.c/localipfunc.h:

基于系统调用getifaddrs来获取本地IP


logger.c/logger.h:

这个太明显了,log模块


md5.c/md5.h:

fdfshttpshared.c中被调用,在fdfshttpgentoken的方法中对secretkey,file_id,timestamp进行md5得到token


mimefileparser.c/mimefileparser.h:

从配置文件中加载mime识别的配置,至于什么是mime。。我也不知道,我问问大神们看看。


osbits.h:

定义了OS的位数


processctrl.c/processctrl.h:

从配置文件中载入pid路径,定义了pid文件的增删查改,并且提供了进程停止,重启等方法


pthreadfunc.c/pthreadfunc.h:

线程相关的操作,包括初始化,创建,杀死线程


schedthread.c/schedthread.h:

定时任务线程的模块,按照hour:minute的期限执行任务

 
  1. typedef struct tagScheduleEntry

  2. {

  3. int id; //the task id

  4.  
  5. /* the time base to execute task, such as 00:00, interval is 3600,

  6. means execute the task every hour as 1:00, 2:00, 3:00 etc. */

  7. TimeInfo time_base;

  8.  
  9. int interval; //the interval for execute task, unit is second

  10.  
  11. TaskFunc task_func; //callback function

  12. void *func_args; //arguments pass to callback function

  13.  
  14. /* following are internal fields, do not set manually! */

  15. time_t next_call_time;

  16. struct tagScheduleEntry *next;

  17. } ScheduleEntry;

  18.  
  19. typedef struct

  20. {

  21. ScheduleEntry *entries;

  22. int count;

  23. } ScheduleArray;

  24.  
  25. typedef struct

  26. {

  27. ScheduleArray scheduleArray;

  28. ScheduleEntry *head; //schedule chain head

  29. ScheduleEntry *tail; //schedule chain tail

  30. bool *pcontinue_flag;

  31. } ScheduleContext;

稍微看了下实现的算法,这是一个变种的链表,实现了一个变种的队列。

但是所有的数据都存在scheduleArray这个数组里面,每次新任务插入后,会对数组按时间进行一次排序

这样可以保证头指针的是最先需要执行的。

而后每次对head进行出队,初始化next域以后重新从tail入队。

总体来看是非常的简单高效的。


sharedfunc.c/sharedfunc.h:

一些工具函数,比如设置随机种子什么的,没必要单独开个文件,所以放在一起了。


sockopt.c/sockopt.h:

socket的一些工具函数,进行了简单的封装。


tracker文件夹:

先分析tracker是因为tracker只集成了网络部分,而storage还有处理磁盘吞吐的,相对复杂一些

fdfssharefunc.c/fdfssharefunc.h

tracker和storage共用的一些工具函数,比如根据IP和端口获取tracker的ID诸如此类的


fdfs_trackerd.c:

tracker的入口函数


trackerdump.c/trackerdump.h:

实现了fdfsdumptrackerglobalvarstofile这个函数

当tracker收到了SIGUSR1或者SIGUSR2信号,将启动sigDumpHandler来调用这个函数,将tracker当前的状态dump进FastDFS跟目录的logs/tracker_dump.log中

关于如何根据该dump文件恢复的,目前没看到,后面再补充


trackerfunc.c/trackerfunc.h:

实现了trackerloadfromconffile这个函数

将tracker的一些基本必要信息,从conf_file中导出


trackerglobal.c/trackerglobal.h:

记录了tracker使用的一些全局变量


trackerhttpcheck.c/trackerhttpcheck.h:

这个模块会对tracker所管理的所有group的可用storage做检测,测试所有的http端口是否可用


trackermem.c/trackermem.h:

这个模块维护了内存的所有数据,包括集群运行情况等等,提供了save,change和load的接口对集群的总情况进行修改


trackernio.c/trackernio.h:

nio的模块在common/ioevent和common/ioevent_loop的基础上进行调用


trackerproto.c/trackerproto.h:

定义了tracker通信的协议,有时间可以分析下。


trackerrelationship.c/trackerrelationship.h:

定义了tracker之间通信的方式,并且定义了选出leader,ping leader等功能,有时间可以分析下。


trackerservice.c/trackerservice.h:

tracker的逻辑层处理,各个请求在nio后进入work线程,而后分发到各个模块


trackerstatus.c/trackerstatus.h:

tracker状态的save和load模块


tracker_types.h:

定义了tracker所用到的所有类型


storage文件夹:

fdfs_storage.c:storage的入口函数


storagedio.c/storagedio.h:

使用common/fasttaskqueue实现了异步的磁盘IO,新任务由storagedioqueue_push方法入队

同时包含了trunk模块的处理,trunk模块后面再提


storagediskrecovery.c/storagediskrecovery.h:

storage的单盘恢复算法,用于故障恢复


storagedump.c/storagedump.h:

和tracker_dump原理相同


storagefunc.c/storagefunc.h:

storagefuncinit函数对应着tracker的trackerloadfromconffile函数

除此之外,还提供了根据storage_id或者ip判断是否是本机的函数

还提供了一些数据持久化的接口


storageglobal.c/storageglobal.h:

定义了storage使用的全局变量


storageipchangeddealer.c/storageipchangerdealer.h:

storage实现ip地址改变的模块

 
  1. int storage_get_my_tracker_client_ip(); //获取storage作为tracker客户端的ip

  2.  
  3. int storage_changelog_req(); //接入tracker的changelog

  4. int storage_check_ip_changed(); //检查ip是否改变


storagenio.c/storagenio.h:

nio的模块在common/ioevent和common/ioevent_loop的基础上进行调用


storageparamgetter.c/storageparamgetter.h:

storagegetparamsfromtracker函数,顾名思义,从tracker获取自身的参数


storageservice.c/storageservice.h:

storage的逻辑层处理,各个请求在nio后进入work线程,而后分发到各个模块


storagesync.c/storagesync.h:

storage的同步模块,众所周知,FastDFS的同步模块是根据时间戳进行的弱一致性同步


trackerclientthread.c/trackerclientthread.h

tracker_report的前缀提示的很明显,这部分是storage作为tracker的客户端,向tracker发送心跳,汇报自己的状态等等

全部接口如下:

 
  1. int tracker_report_init();

  2. int tracker_report_destroy();

  3. int tracker_report_thread_start();

  4. int kill_tracker_report_threads();

  5.  
  6. int tracker_report_join(ConnectionInfo *pTrackerServer, \

  7. const int tracker_index, const bool sync_old_done);

  8. int tracker_report_storage_status(ConnectionInfo *pTrackerServer, \

  9. FDFSStorageBrief *briefServer);

  10. int tracker_sync_src_req(ConnectionInfo *pTrackerServer, \

  11. StorageBinLogReader *pReader);

  12. int tracker_sync_diff_servers(ConnectionInfo *pTrackerServer, \

  13. FDFSStorageBrief *briefServers, const int server_count);

  14. int tracker_deal_changelog_response(ConnectionInfo *pTrackerServer);

trunk_mgr:

这是storage文件的子目录,实现了trunk功能

trunk功能比较零碎,我目前还没搞明白,比如为什么storage和trunk模块交互,storage是作为client出现的,而不是直接调用trunk。

这部分内容应该要单独开一章来分析。


FastDFS源码解析(2)--------trunk模块分析

trunk功能是把大量小文件合并存储,大量的小文件会大量消耗linux文件系统的node,使树变的过于庞大,降低了读写效率

因此小文件合并存储能显著缓解这一压力

我将对上传和下载流程分析来追踪trunk模块的行为。

在storageservice模块中,storageservice.c/storagedealtask对请求安装cmd进行分离逻辑来处理

在storageuploadfile中处理上传逻辑

 
  1. /**

  2. 1 byte: store path index

  3. 8 bytes: file size

  4. FDFS_FILE_EXT_NAME_MAX_LEN bytes: file ext name, do not include dot (.)

  5. file size bytes: file content

  6. **/

  7. static int storage_upload_file(struct fast_task_info *pTask, bool bAppenderFile)

  8. {

  9. StorageClientInfo *pClientInfo;

  10. StorageFileContext *pFileContext;

  11. DisconnectCleanFunc clean_func;

  12. char *p;

  13. char filename[128];

  14. char file_ext_name[FDFS_FILE_PREFIX_MAX_LEN + 1];

  15. int64_t nInPackLen;

  16. int64_t file_offset;

  17. int64_t file_bytes;

  18. int crc32;

  19. int store_path_index;

  20. int result;

  21. int filename_len;

  22.  
  23. pClientInfo = (StorageClientInfo *)pTask->arg;

  24. pFileContext = &(pClientInfo->file_context);

  25. nInPackLen = pClientInfo->total_length - sizeof(TrackerHeader);

  26.  
  27. //对包头大小进行验证

  28.  
  29. if (nInPackLen < 1 + FDFS_PROTO_PKG_LEN_SIZE +

  30. FDFS_FILE_EXT_NAME_MAX_LEN)

  31. {

  32. logError("file: "__FILE__", line: %d, " \

  33. "cmd=%d, client ip: %s, package size " \

  34. INT64_PRINTF_FORMAT" is not correct, " \

  35. "expect length >= %d", __LINE__, \

  36. STORAGE_PROTO_CMD_UPLOAD_FILE, \

  37. pTask->client_ip, nInPackLen, \

  38. 1 + FDFS_PROTO_PKG_LEN_SIZE + \

  39. FDFS_FILE_EXT_NAME_MAX_LEN);

  40. pClientInfo->total_length = sizeof(TrackerHeader);

  41. return EINVAL;

  42. }

  43.  
  44. //跳过包头第一段,获得文件路径索引号

  45. p = pTask->data + sizeof(TrackerHeader);

  46. store_path_index = *p++;

  47.  
  48. if (store_path_index == -1)

  49. {

  50. if ((result=storage_get_storage_path_index( \

  51. &store_path_index)) != 0)

  52. {

  53. logError("file: "__FILE__", line: %d, " \

  54. "get_storage_path_index fail, " \

  55. "errno: %d, error info: %s", __LINE__, \

  56. result, STRERROR(result));

  57. pClientInfo->total_length = sizeof(TrackerHeader);

  58. return result;

  59. }

  60. }

  61. else if (store_path_index < 0 || store_path_index >= \

  62. g_fdfs_store_paths.count)

  63. {

  64. logError("file: "__FILE__", line: %d, " \

  65. "client ip: %s, store_path_index: %d " \

  66. "is invalid", __LINE__, \

  67. pTask->client_ip, store_path_index);

  68. pClientInfo->total_length = sizeof(TrackerHeader);

  69. return EINVAL;

  70. }

  71.  
  72. //获取文件大小

  73. file_bytes = buff2long(p);

  74. p += FDFS_PROTO_PKG_LEN_SIZE;

  75. if (file_bytes < 0 || file_bytes != nInPackLen - \

  76. (1 + FDFS_PROTO_PKG_LEN_SIZE + \

  77. FDFS_FILE_EXT_NAME_MAX_LEN))

  78. {

  79. logError("file: "__FILE__", line: %d, " \

  80. "client ip: %s, pkg length is not correct, " \

  81. "invalid file bytes: "INT64_PRINTF_FORMAT \

  82. ", total body length: "INT64_PRINTF_FORMAT, \

  83. __LINE__, pTask->client_ip, file_bytes, nInPackLen);

  84. pClientInfo->total_length = sizeof(TrackerHeader);

  85. return EINVAL;

  86. }

  87.  
  88. //获取文件名

  89. memcpy(file_ext_name, p, FDFS_FILE_EXT_NAME_MAX_LEN);

  90. *(file_ext_name + FDFS_FILE_EXT_NAME_MAX_LEN) = '\0';

  91. p += FDFS_FILE_EXT_NAME_MAX_LEN;

  92. if ((result=fdfs_validate_filename(file_ext_name)) != 0)

  93. {

  94. logError("file: "__FILE__", line: %d, " \

  95. "client ip: %s, file_ext_name: %s " \

  96. "is invalid!", __LINE__, \

  97. pTask->client_ip, file_ext_name);

  98. pClientInfo->total_length = sizeof(TrackerHeader);

  99. return result;

  100. }

  101.  
  102. pFileContext->calc_crc32 = true;

  103. pFileContext->calc_file_hash = g_check_file_duplicate;

  104. pFileContext->extra_info.upload.start_time = g_current_time;

  105.  
  106. strcpy(pFileContext->extra_info.upload.file_ext_name, file_ext_name);

  107. storage_format_ext_name(file_ext_name, \

  108. pFileContext->extra_info.upload.formatted_ext_name);

  109. pFileContext->extra_info.upload.trunk_info.path. \

  110. store_path_index = store_path_index;

  111. pFileContext->extra_info.upload.file_type = _FILE_TYPE_REGULAR;

  112. pFileContext->sync_flag = STORAGE_OP_TYPE_SOURCE_CREATE_FILE;

  113. pFileContext->timestamp2log = pFileContext->extra_info.upload.start_time;

  114. pFileContext->op = FDFS_STORAGE_FILE_OP_WRITE;

  115.  
  116. //如果是追加写文件,注目额外的文件追加命令值

  117. if (bAppenderFile)

  118. {

  119. pFileContext->extra_info.upload.file_type |= \

  120. _FILE_TYPE_APPENDER;

  121. }

  122. else

  123. {

  124. //判断是否开了trunk_file功能,根据大小检查是否需要trunk合并存储

  125. if (g_if_use_trunk_file && trunk_check_size( \

  126. TRUNK_CALC_SIZE(file_bytes)))

  127. {

  128. pFileContext->extra_info.upload.file_type |= \

  129. _FILE_TYPE_TRUNK;

  130. }

  131. }

  132.  
  133. //根据上一步的检查需要开启trunk合并存储

  134. if (pFileContext->extra_info.upload.file_type & _FILE_TYPE_TRUNK)

  135. {

  136. FDFSTrunkFullInfo *pTrunkInfo;

  137.  
  138. pFileContext->extra_info.upload.if_sub_path_alloced = true;

  139. pTrunkInfo = &(pFileContext->extra_info.upload.trunk_info);

  140. //为trunk文件名分配空间,并添加到缓存

  141. if ((result=trunk_client_trunk_alloc_space( \

  142. TRUNK_CALC_SIZE(file_bytes), pTrunkInfo)) != 0)

  143. {

  144. pClientInfo->total_length = sizeof(TrackerHeader);

  145. return result;

  146. }

  147.  
  148. clean_func = dio_trunk_write_finish_clean_up;

  149. file_offset = TRUNK_FILE_START_OFFSET((*pTrunkInfo));

  150. pFileContext->extra_info.upload.if_gen_filename = true;

  151. trunk_get_full_filename(pTrunkInfo, pFileContext->filename, \

  152. sizeof(pFileContext->filename));

  153. //注册trunk操作的回调

  154. pFileContext->extra_info.upload.before_open_callback = \

  155. dio_check_trunk_file_when_upload;

  156. pFileContext->extra_info.upload.before_close_callback = \

  157. dio_write_chunk_header;

  158. pFileContext->open_flags = O_RDWR | g_extra_open_file_flags;

  159. }

  160. else

  161. {

  162. //普通文件的方式,略过

  163. ...

  164. }

  165.  
  166. return storage_write_to_file(pTask, file_offset, file_bytes, \

  167. p - pTask->data, dio_write_file, \

  168. storage_upload_file_done_callback, \

  169. clean_func, store_path_index);

  170. }

追踪一下trunkclienttrunkallocspace的实现

 
  1. int trunk_client_trunk_alloc_space(const int file_size, \

  2. FDFSTrunkFullInfo *pTrunkInfo)

  3. {

  4. int result;

  5. ConnectionInfo trunk_server;

  6. ConnectionInfo *pTrunkServer;

  7.  
  8. //如果自己就是trunker,直接操作

  9. if (g_if_trunker_self)

  10. {

  11. return trunk_alloc_space(file_size, pTrunkInfo);

  12. }

  13.  
  14. //否则根据trunk_server的ip和port进行连接

  15. if (*(g_trunk_server.ip_addr) == '\0')

  16. {

  17. logError("file: "__FILE__", line: %d, " \

  18. "no trunk server", __LINE__);

  19. return EAGAIN;

  20. }

  21.  
  22. memcpy(&trunk_server, &g_trunk_server, sizeof(ConnectionInfo));

  23. if ((pTrunkServer=tracker_connect_server(&trunk_server, &result)) == NULL)

  24. {

  25. logError("file: "__FILE__", line: %d, " \

  26. "can't alloc trunk space because connect to trunk " \

  27. "server %s:%d fail, errno: %d", __LINE__, \

  28. trunk_server.ip_addr, trunk_server.port, result);

  29. return result;

  30. }

  31.  
  32. //使用client api进行操作

  33. result = trunk_client_trunk_do_alloc_space(pTrunkServer, \

  34. file_size, pTrunkInfo);

  35.  
  36. tracker_disconnect_server_ex(pTrunkServer, result != 0);

  37. return result;

  38. }

对直接调用和client_api操作分别追踪

 
  1. nt trunk_alloc_space(const int size, FDFSTrunkFullInfo *pResult)

  2. {

  3. FDFSTrunkSlot target_slot;

  4. FDFSTrunkSlot *pSlot;

  5. FDFSTrunkNode *pPreviousNode;

  6. FDFSTrunkNode *pTrunkNode;

  7. int result;

  8.  
  9. STORAGE_TRUNK_CHECK_STATUS();

  10.  
  11. target_slot.size = (size > g_slot_min_size) ? size : g_slot_min_size;

  12. target_slot.head = NULL;

  13.  
  14. pPreviousNode = NULL;

  15. pTrunkNode = NULL;

  16. //分配trunk需要锁

  17. pthread_mutex_lock(&trunk_mem_lock);

  18. //寻找可以插入该文件的地方

  19. while (1)

  20. {

  21. pSlot = (FDFSTrunkSlot *)avl_tree_find_ge(tree_info_by_sizes \

  22. + pResult->path.store_path_index, &target_slot);

  23. if (pSlot == NULL)

  24. {

  25. break;

  26. }

  27.  
  28. pPreviousNode = NULL;

  29. pTrunkNode = pSlot->head;

  30. while (pTrunkNode != NULL && \

  31. pTrunkNode->trunk.status == FDFS_TRUNK_STATUS_HOLD)

  32. {

  33. pPreviousNode = pTrunkNode;

  34. pTrunkNode = pTrunkNode->next;

  35. }

  36.  
  37. if (pTrunkNode != NULL)

  38. {

  39. break;

  40. }

  41.  
  42. target_slot.size = pSlot->size + 1;

  43. }

  44.  
  45. //找到了,于是插入

  46. if (pTrunkNode != NULL)

  47. {

  48. if (pPreviousNode == NULL)

  49. {

  50. pSlot->head = pTrunkNode->next;

  51. if (pSlot->head == NULL)

  52. {

  53. trunk_delete_size_tree_entry(pResult->path. \

  54. store_path_index, pSlot);

  55. }

  56. }

  57. else

  58. {

  59. pPreviousNode->next = pTrunkNode->next;

  60. }

  61.  
  62. trunk_free_block_delete(&(pTrunkNode->trunk));

  63. }

  64. else

  65. {

  66. //没找到,为他创建一个单独的trunk_file

  67. pTrunkNode = trunk_create_trunk_file(pResult->path. \

  68. store_path_index, &result);

  69. if (pTrunkNode == NULL)

  70. {

  71. pthread_mutex_unlock(&trunk_mem_lock);

  72. return result;

  73. }

  74. }

  75. pthread_mutex_unlock(&trunk_mem_lock);

  76.  
  77. result = trunk_split(pTrunkNode, size);

  78. if (result != 0)

  79. {

  80. return result;

  81. }

  82.  
  83. pTrunkNode->trunk.status = FDFS_TRUNK_STATUS_HOLD;

  84. result = trunk_add_free_block(pTrunkNode, true);

  85. if (result == 0)

  86. {

  87. memcpy(pResult, &(pTrunkNode->trunk), \

  88. sizeof(FDFSTrunkFullInfo));

  89. }

  90.  
  91. return result;

  92. }

 
  1. static int trunk_client_trunk_do_alloc_space(ConnectionInfo *pTrunkServer, \

  2. const int file_size, FDFSTrunkFullInfo *pTrunkInfo)

  3. {

  4. TrackerHeader *pHeader;

  5.  
  6. //初始化请求包等等数据,略过

  7. ...

  8.  
  9. pHeader->cmd = STORAGE_PROTO_CMD_TRUNK_ALLOC_SPACE;

  10.  
  11. if ((result=tcpsenddata_nb(pTrunkServer->sock, out_buff, \

  12. sizeof(out_buff), g_fdfs_network_timeout)) != 0)

  13. {

  14. logError("file: "__FILE__", line: %d, " \

  15. "send data to storage server %s:%d fail, " \

  16. "errno: %d, error info: %s", __LINE__, \

  17. pTrunkServer->ip_addr, pTrunkServer->port, \

  18. result, STRERROR(result));

  19.  
  20. return result;

  21. }

  22.  
  23. p = (char *)&trunkBuff;

  24. if ((result=fdfs_recv_response(pTrunkServer, \

  25. &p, sizeof(FDFSTrunkInfoBuff), &in_bytes)) != 0)

  26. {

  27. return result;

  28. }

  29.  
  30. //设置pTrunckInfo信息,略过

  31. ...

  32.  
  33. return 0;

  34. }

追踪解析STORAGEPROTOCMDTRUNKALLOC_SPACE行为的服务端函数

storageservice.c会将其由storageservertrunkalloc_space函数来解析

 
  1. /**

  2. * request package format:

  3. * FDFS_GROUP_NAME_MAX_LEN bytes: group_name

  4. * 4 bytes: file size

  5. * 1 bytes: store_path_index

  6. *

  7. * response package format:

  8. * 1 byte: store_path_index

  9. * 1 byte: sub_path_high

  10. * 1 byte: sub_path_low

  11. * 4 bytes: trunk file id

  12. * 4 bytes: trunk offset

  13. * 4 bytes: trunk size

  14. * **/

  15. static int storage_server_trunk_alloc_space(struct fast_task_info *pTask)

  16. {

  17. StorageClientInfo *pClientInfo;

  18. FDFSTrunkInfoBuff *pApplyBody;

  19. char *in_buff;

  20. char group_name[FDFS_GROUP_NAME_MAX_LEN + 1];

  21. FDFSTrunkFullInfo trunkInfo;

  22. int64_t nInPackLen;

  23. int file_size;

  24. int result;

  25.  
  26. pClientInfo = (StorageClientInfo *)pTask->arg;

  27. nInPackLen = pClientInfo->total_length - sizeof(TrackerHeader);

  28. pClientInfo->total_length = sizeof(TrackerHeader);

  29.  
  30. CHECK_TRUNK_SERVER(pTask)

  31.  
  32. if (nInPackLen != FDFS_GROUP_NAME_MAX_LEN + 5)

  33. {

  34. logError("file: "__FILE__", line: %d, " \

  35. "cmd=%d, client ip: %s, package size " \

  36. INT64_PRINTF_FORMAT" is not correct, " \

  37. "expect length: %d", __LINE__, \

  38. STORAGE_PROTO_CMD_TRUNK_ALLOC_SPACE, \

  39. pTask->client_ip, nInPackLen, \

  40. FDFS_GROUP_NAME_MAX_LEN + 5);

  41. return EINVAL;

  42. }

  43.  
  44. in_buff = pTask->data + sizeof(TrackerHeader);

  45. memcpy(group_name, in_buff, FDFS_GROUP_NAME_MAX_LEN);

  46. *(group_name + FDFS_GROUP_NAME_MAX_LEN) = '\0';

  47. if (strcmp(group_name, g_group_name) != 0)

  48. {

  49. logError("file: "__FILE__", line: %d, " \

  50. "client ip:%s, group_name: %s " \

  51. "not correct, should be: %s", \

  52. __LINE__, pTask->client_ip, \

  53. group_name, g_group_name);

  54. return EINVAL;

  55. }

  56.  
  57. file_size = buff2int(in_buff + FDFS_GROUP_NAME_MAX_LEN);

  58. if (file_size < 0 || !trunk_check_size(file_size))

  59. {

  60. logError("file: "__FILE__", line: %d, " \

  61. "client ip:%s, invalid file size: %d", \

  62. __LINE__, pTask->client_ip, file_size);

  63. return EINVAL;

  64. }

  65.  
  66. trunkInfo.path.store_path_index = *(in_buff+FDFS_GROUP_NAME_MAX_LEN+4);

  67. //实质还是调用的trunk_alloc_space

  68. if ((result=trunk_alloc_space(file_size, &trunkInfo)) != 0)

  69. {

  70. return result;

  71. }

  72.  
  73. pApplyBody = (FDFSTrunkInfoBuff *)(pTask->data+sizeof(TrackerHeader));

  74. pApplyBody->store_path_index = trunkInfo.path.store_path_index;

  75. pApplyBody->sub_path_high = trunkInfo.path.sub_path_high;

  76. pApplyBody->sub_path_low = trunkInfo.path.sub_path_low;

  77. int2buff(trunkInfo.file.id, pApplyBody->id);

  78. int2buff(trunkInfo.file.offset, pApplyBody->offset);

  79. int2buff(trunkInfo.file.size, pApplyBody->size);

  80.  
  81. pClientInfo->total_length = sizeof(TrackerHeader) + \

  82. sizeof(FDFSTrunkInfoBuff);

  83. return 0;

  84. }

trunkclienttrunkallocspace会向同组内唯一的trunk_server申请空间

最终的实现还是trunkallocspace函数

trunk相当于一个KV吧。介个会不会出现单点问题,这台trunk失效以后如何冗余故障,接着往下分析看看

以下这段函数是在trackerclientthread里面的,大致是storage和tracker的一个交互,如果有故障冗余,这里应该存在机制

 
  1. static int tracker_check_response(ConnectionInfo *pTrackerServer, \

  2. bool *bServerPortChanged)

  3. {

  4. int64_t nInPackLen;

  5. TrackerHeader resp;

  6. int server_count;

  7. int result;

  8. char in_buff[1 + (2 + FDFS_MAX_SERVERS_EACH_GROUP) * \

  9. sizeof(FDFSStorageBrief)];

  10. FDFSStorageBrief *pBriefServers;

  11. char *pFlags;

  12.  
  13. //解析包

  14. ...

  15.  
  16. //tracker_leader变化

  17. if ((*pFlags) & FDFS_CHANGE_FLAG_TRACKER_LEADER)

  18. {

  19. ...

  20. }

  21.  
  22. //trunk_leader变化

  23. if ((*pFlags) & FDFS_CHANGE_FLAG_TRUNK_SERVER)

  24. {

  25. if (server_count < 1)

  26. {

  27. logError("file: "__FILE__", line: %d, " \

  28. "tracker server %s:%d, reponse server " \

  29. "count: %d < 1", __LINE__, \

  30. pTrackerServer->ip_addr, \

  31. pTrackerServer->port, server_count);

  32. return EINVAL;

  33. }

  34.  
  35. //未启动trunk服务,从tracker重新加载

  36. if (!g_if_use_trunk_file)

  37. {

  38. logInfo("file: "__FILE__", line: %d, " \

  39. "reload parameters from tracker server", \

  40. __LINE__);

  41. storage_get_params_from_tracker();

  42. }

  43.  
  44. //还未启动trunk服务,报错

  45. if (!g_if_use_trunk_file)

  46. {

  47. logWarning("file: "__FILE__", line: %d, " \

  48. "tracker server %s:%d, " \

  49. "my g_if_use_trunk_file is false, " \

  50. "can't support trunk server!", \

  51. __LINE__, pTrackerServer->ip_addr, \

  52. pTrackerServer->port);

  53. }

  54. else

  55. {

  56. memcpy(g_trunk_server.ip_addr, pBriefServers->ip_addr, \

  57. IP_ADDRESS_SIZE - 1);

  58. *(g_trunk_server.ip_addr + (IP_ADDRESS_SIZE - 1)) = '\0';

  59. g_trunk_server.port = buff2int(pBriefServers->port);

  60. //如果本地的ip端口和trunk_server一致

  61. if (is_local_host_ip(g_trunk_server.ip_addr) && \

  62. g_trunk_server.port == g_server_port)

  63. {

  64. //我已经是trunk了,tracker重启把我重新选为trunk了

  65. if (g_if_trunker_self)

  66. {

  67. logWarning("file: "__FILE__", line: %d, " \

  68. "I am already the trunk server %s:%d, " \

  69. "may be the tracker server restart", \

  70. __LINE__, g_trunk_server.ip_addr, \

  71. g_trunk_server.port);

  72. }

  73. else

  74. {

  75. //我成为了新的trunk

  76. logInfo("file: "__FILE__", line: %d, " \

  77. "I am the the trunk server %s:%d", __LINE__, \

  78. g_trunk_server.ip_addr, g_trunk_server.port);

  79.  
  80. tracker_fetch_trunk_fid(pTrackerServer);

  81. g_if_trunker_self = true;

  82.  
  83. if ((result=storage_trunk_init()) != 0)

  84. {

  85. return result;

  86. }

  87.  
  88. if (g_trunk_create_file_advance && \

  89. g_trunk_create_file_interval > 0)

  90. {

  91. ScheduleArray scheduleArray;

  92. ScheduleEntry entries[1];

  93.  
  94. entries[0].id = TRUNK_FILE_CREATOR_TASK_ID;

  95. entries[0].time_base = g_trunk_create_file_time_base;

  96. entries[0].interval = g_trunk_create_file_interval;

  97. entries[0].task_func = trunk_create_trunk_file_advance;

  98. entries[0].func_args = NULL;

  99.  
  100. scheduleArray.count = 1;

  101. scheduleArray.entries = entries;

  102. sched_add_entries(&scheduleArray);

  103. }

  104.  
  105. trunk_sync_thread_start_all();

  106. }

  107. }

  108. else

  109. {

  110. logInfo("file: "__FILE__", line: %d, " \

  111. "the trunk server is %s:%d", __LINE__, \

  112. g_trunk_server.ip_addr, g_trunk_server.port);

  113.  
  114. //我以前是trunk,我让权

  115. if (g_if_trunker_self)

  116. {

  117. int saved_trunk_sync_thread_count;

  118.  
  119. logWarning("file: "__FILE__", line: %d, " \

  120. "I am the old trunk server, " \

  121. "the new trunk server is %s:%d", \

  122. __LINE__, g_trunk_server.ip_addr, \

  123. g_trunk_server.port);

  124.  
  125. tracker_report_trunk_fid(pTrackerServer);

  126. g_if_trunker_self = false;

  127.  
  128. saved_trunk_sync_thread_count = \

  129. g_trunk_sync_thread_count;

  130. if (saved_trunk_sync_thread_count > 0)

  131. {

  132. logInfo("file: "__FILE__", line: %d, "\

  133. "waiting %d trunk sync " \

  134. "threads exit ...", __LINE__, \

  135. saved_trunk_sync_thread_count);

  136. }

  137.  
  138. while (g_trunk_sync_thread_count > 0)

  139. {

  140. usleep(50000);

  141. }

  142.  
  143. if (saved_trunk_sync_thread_count > 0)

  144. {

  145. logInfo("file: "__FILE__", line: %d, " \

  146. "%d trunk sync threads exited",\

  147. __LINE__, \

  148. saved_trunk_sync_thread_count);

  149. }

  150.  
  151. storage_trunk_destroy_ex(true);

  152. if (g_trunk_create_file_advance && \

  153. g_trunk_create_file_interval > 0)

  154. {

  155. sched_del_entry(TRUNK_FILE_CREATOR_TASK_ID);

  156. }

  157. }

  158. }

  159. }

  160.  
  161. pBriefServers += 1;

  162. server_count -= 1;

  163. }

  164.  
  165. if (!((*pFlags) & FDFS_CHANGE_FLAG_GROUP_SERVER))

  166. {

  167. return 0;

  168. }

  169.  
  170. /*

  171. //printf("resp server count=%d\n", server_count);

  172. {

  173. int i;

  174. for (i=0; i<server_count; i++)

  175. {

  176. //printf("%d. %d:%s\n", i+1, pBriefServers[i].status, \

  177. pBriefServers[i].ip_addr);

  178. }

  179. }

  180. */

  181.  
  182. if (*bServerPortChanged)

  183. {

  184. if (!g_use_storage_id)

  185. {

  186. FDFSStorageBrief *pStorageEnd;

  187. FDFSStorageBrief *pStorage;

  188.  
  189. *bServerPortChanged = false;

  190. pStorageEnd = pBriefServers + server_count;

  191. for (pStorage=pBriefServers; pStorage<pStorageEnd;

  192. pStorage++)

  193. {

  194. if (strcmp(pStorage->id, g_my_server_id_str) == 0)

  195. {

  196. continue;

  197. }

  198.  
  199. tracker_rename_mark_files(pStorage->ip_addr, \

  200. g_last_server_port, pStorage->ip_addr, \

  201. g_server_port);

  202. }

  203. }

  204.  
  205. if (g_server_port != g_last_server_port)

  206. {

  207. g_last_server_port = g_server_port;

  208. if ((result=storage_write_to_sync_ini_file()) != 0)

  209. {

  210. return result;

  211. }

  212. }

  213. }

  214.  
  215. return tracker_merge_servers(pTrackerServer, \

  216. pBriefServers, server_count);

  217. }

可以看到,trunk的失败确实是存在冗余机制,由tracker来选出trunk。

trunk的分析暂告一段落,删除文件后是否存在文件空洞,空洞的利用率如何,都得用数据说话才行哈。

总结:

每个组都有唯一的trunk leader,组内所有trunk文件的信息,由这个trunk leader内部组织的avl树来保存。

上传文件后,storage会向trunk leader发起申请空间的请求,这时trunk leader会使用一个全局的锁,获得了trunk存储的位置后,storage在本地写磁盘。

下载文件时,trunk信息在文件名里面已经包含,只需要直接读即可。

使用trunk方式主要是为了解决node过多造成读写性能下降的问题,但是引入trunk方式本身也会造成一定的性能损耗。

目前感觉我对trunk功能还是hold不住,包括如果trunk出错,怎么样恢复trunk文件的数据,因为没有提供的官方的工具,所以不太敢用。

以后如果有需求在跟进,先告一段落了吧。


FastDFS源码解析(3)--------通信协议分析

就上传和下载进行分析,其他暂时略过

上传:

1 根据ip,port连接上tracker

2 发送一个10字节的包,其中第9个字节为TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ONE,也就是101

3 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

4 接受的这个包,0-8字节是下面要接收的包的大小,通过以下算法可以还原成数字

 
  1. int64_t buff2long(const char *buff)

  2. {

  3. unsigned char *p;

  4. p = (unsigned char *)buff;

  5. return (((int64_t)(*p)) << 56) | \

  6. (((int64_t)(*(p+1))) << 48) | \

  7. (((int64_t)(*(p+2))) << 40) | \

  8. (((int64_t)(*(p+3))) << 32) | \

  9. (((int64_t)(*(p+4))) << 24) | \

  10. (((int64_t)(*(p+5))) << 16) | \

  11. (((int64_t)(*(p+6))) << 8) | \

  12. ((int64_t)(*(p+7)));

  13. }

  14.  
  15. void long2buff(int64_t n, char *buff)

  16. {

  17. unsigned char *p;

  18. p = (unsigned char *)buff;

  19. *p++ = (n >> 56) & 0xFF;

  20. *p++ = (n >> 48) & 0xFF;

  21. *p++ = (n >> 40) & 0xFF;

  22. *p++ = (n >> 32) & 0xFF;

  23. *p++ = (n >> 24) & 0xFF;

  24. *p++ = (n >> 16) & 0xFF;

  25. *p++ = (n >> 8) & 0xFF;

  26. *p++ = n & 0xFF;

  27. }

5 读完这个数字对应的字节数目,这个数字应当有TRACKER_QUERY_STORAGE_STORE_BODY_LEN长,否则出错

 
  1. #define TRACKER_QUERY_STORAGE_STORE_BODY_LEN (FDFS_GROUP_NAME_MAX_LEN \

  2. + IP_ADDRESS_SIZE - 1 + FDFS_PROTO_PKG_LEN_SIZE + 1)

也就是16+16-1+8+1 = 40

6 这40个字节,头16字节是组名,接着15字节是IP地址,接着8字节是端口号,还是根据buff2long算法还原成数字,最后1字节是store_path_index

tracker交互完毕,此时进行storage操作

7 根据ip和端口连接storage

8 发送25字节的包

头10字节是TrackerHeader一样的结构,其中1-8字节的内容为filesize+这个包的大小(25)-头的大小(10),也就是file_size+15这个数,通过long2buff,转换的8字节字串,然后其中第9字节的内容是STORAGE_PROTO_CMD_UPLOAD_FILE,也就是11

第11字节是刚才接受的storage_path_index

第12-19字节是file_size,通过long2buff算法转换为8字节字串

19-25字节是ext_name相关,这里设置为0即可

9 发送file_size字节内容,即为文件信息

10 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

11 接受的这个包,0-8字节是下面要接收的包的大小,通过buff2long还原为数字

12 这个数字应该大于FDFS_GROUP_NAME_MAX_LEN,也就是16字节,否则出错

13 头16字节为组名,后面全部的字节为remote_filename

14 上传流程完成

下载:

下载需要上传时rsp返回的文件ID,这里命名为file_id

1 连接tracker

2 切分file_id,第一个/前出现的即为group_name,后面的都是remote_filename

3 发送一个10字节的pHeader,其中1-8字节是FDFS_GROUP_NAME_MAX_LEN(值为16) 加上 remote_filename的长度,通过long2buff转化而成的

第9字节是CMD TRACKER_PROTO_CMD_SERVICE_QUERY_FETCH_ONE,即为102

4 发送16字节是group_name

5 发送remote_filename这个字串

6 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

7 接受的这个包,1-8字节是下面要接收的包的大小,通过buff2long可以还原成数字

8 读完这个数字对应的字节数目,这个数字应当有TRACKERQUERYSTORAGEFETCHBODYLEN(TRACKERQUERYSTORAGESTOREBODYLEN - 1,也就是39)长,否则出错

9 这39个字节,头16字节是组名(下载逻辑时可以忽略),接着15字节是IP地址,接着8字节是端口号,还是根据buff2long算法还原成数字

10 和tracker的交互完成,下面是storage

11 根据ip和端口连接storage

12 发送一个pHeader+file_offset+download_bytes+group_name(补全16字节)+filename的数据包

也就是10+8+8+16+filename_size

1-8字节是8+8+16+filename_size的大小根据long2buff转换的字串

9字节是STORAGE_PROTO_CMD_DOWNLOAD_FILE也就是14

11-18字节是file_offset的long2buff字串

19-26是download_bytes的long2buff字串

27-42是group_name

再往后就是finename

13 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

14 接受的这个包,1-8字节是下面要接收的包的大小,通过buff2long可以还原成数字

15 将接收到的包写入文件,一次下载逻辑完毕

上传下载是最经典的逻辑,其他逻辑都可以从这里衍生,不做详细介绍了


FastDFS源码解析(4)--------storage运行流程分析

大致来分析一下fdfs storage是如何提供服务的,以上传文件为例。

从storage的初始化函数来入手

 
  1. int storage_service_init()

  2. {

  3. int result;

  4. int bytes;

  5. struct storage_nio_thread_data *pThreadData;

  6. struct storage_nio_thread_data *pDataEnd;

  7. pthread_t tid;

  8. pthread_attr_t thread_attr;

  9.  
  10. //storage任务线程锁

  11. if ((result=init_pthread_lock(&g_storage_thread_lock)) != 0)

  12. {

  13. return result;

  14. }

  15.  
  16. //路径索引锁

  17. if ((result=init_pthread_lock(&path_index_thread_lock)) != 0)

  18. {

  19. return result;

  20. }

  21.  
  22. //状态计数锁

  23. if ((result=init_pthread_lock(&stat_count_thread_lock)) != 0)

  24. {

  25. return result;

  26. }

  27.  
  28. //初始化线程堆栈大小

  29. if ((result=init_pthread_attr(&thread_attr, g_thread_stack_size)) != 0)

  30. {

  31. logError("file: "__FILE__", line: %d, " \

  32. "init_pthread_attr fail, program exit!", __LINE__);

  33. return result;

  34. }

  35.  
  36. //建立任务task对象池,复用task类型

  37. if ((result=free_queue_init(g_max_connections, g_buff_size, \

  38. g_buff_size, sizeof(StorageClientInfo))) != 0)

  39. {

  40. return result;

  41. }

  42.  
  43. bytes = sizeof(struct storage_nio_thread_data) * g_work_threads;

  44. g_nio_thread_data = (struct storage_nio_thread_data *)malloc(bytes);

  45. if (g_nio_thread_data == NULL)

  46. {

  47. logError("file: "__FILE__", line: %d, " \

  48. "malloc %d bytes fail, errno: %d, error info: %s", \

  49. __LINE__, bytes, errno, STRERROR(errno));

  50. return errno != 0 ? errno : ENOMEM;

  51. }

  52. memset(g_nio_thread_data, 0, bytes);

  53.  
  54. g_storage_thread_count = 0;

  55. pDataEnd = g_nio_thread_data + g_work_threads;

  56. for (pThreadData=g_nio_thread_data; pThreadData<pDataEnd; pThreadData++)

  57. {

  58. if (ioevent_init(&pThreadData->thread_data.ev_puller,

  59. g_max_connections + 2, 1000, 0) != 0)

  60. {

  61. result = errno != 0 ? errno : ENOMEM;

  62. logError("file: "__FILE__", line: %d, " \

  63. "ioevent_init fail, " \

  64. "errno: %d, error info: %s", \

  65. __LINE__, result, STRERROR(result));

  66. return result;

  67. }

  68. result = fast_timer_init(&pThreadData->thread_data.timer,

  69. 2 * g_fdfs_network_timeout, g_current_time);

  70. if (result != 0)

  71. {

  72. logError("file: "__FILE__", line: %d, " \

  73. "fast_timer_init fail, " \

  74. "errno: %d, error info: %s", \

  75. __LINE__, result, STRERROR(result));

  76. return result;

  77. }

  78.  
  79. if (pipe(pThreadData->thread_data.pipe_fds) != 0)

  80. {

  81. result = errno != 0 ? errno : EPERM;

  82. logError("file: "__FILE__", line: %d, " \

  83. "call pipe fail, " \

  84. "errno: %d, error info: %s", \

  85. __LINE__, result, STRERROR(result));

  86. break;

  87. }

  88.  
  89. #if defined(OS_LINUX)

  90. if ((result=fd_add_flags(pThreadData->thread_data.pipe_fds[0], \

  91. O_NONBLOCK | O_NOATIME)) != 0)

  92. {

  93. break;

  94. }

  95. #else

  96. if ((result=fd_add_flags(pThreadData->thread_data.pipe_fds[0], \

  97. O_NONBLOCK)) != 0)

  98. {

  99. break;

  100. }

  101. #endif

  102.  
  103. //创建工作线程

  104. if ((result=pthread_create(&tid, &thread_attr, \

  105. work_thread_entrance, pThreadData)) != 0)

  106. {

  107. logError("file: "__FILE__", line: %d, " \

  108. "create thread failed, startup threads: %d, " \

  109. "errno: %d, error info: %s", \

  110. __LINE__, g_storage_thread_count, \

  111. result, STRERROR(result));

  112. break;

  113. }

  114. else

  115. {

  116. if ((result=pthread_mutex_lock(&g_storage_thread_lock)) != 0)

  117. {

  118. logError("file: "__FILE__", line: %d, " \

  119. "call pthread_mutex_lock fail, " \

  120. "errno: %d, error info: %s", \

  121. __LINE__, result, STRERROR(result));

  122. }

  123. g_storage_thread_count++;

  124. if ((result=pthread_mutex_unlock(&g_storage_thread_lock)) != 0)

  125. {

  126. logError("file: "__FILE__", line: %d, " \

  127. "call pthread_mutex_lock fail, " \

  128. "errno: %d, error info: %s", \

  129. __LINE__, result, STRERROR(result));

  130. }

  131. }

  132. }

  133.  
  134. pthread_attr_destroy(&thread_attr);

  135.  
  136. last_stat_change_count = g_stat_change_count;

  137.  
  138. //DO NOT support direct IO !!!

  139. //g_extra_open_file_flags = g_disk_rw_direct ? O_DIRECT : 0;

  140.  
  141. if (result != 0)

  142. {

  143. return result;

  144. }

  145.  
  146. return result;

  147. }

跟进工作线程

 
  1. static void *work_thread_entrance(void* arg)

  2. {

  3. int result;

  4. struct storage_nio_thread_data *pThreadData;

  5.  
  6. pThreadData = (struct storage_nio_thread_data *)arg;

  7. if (g_check_file_duplicate)

  8. {

  9. if ((result=fdht_copy_group_array(&(pThreadData->group_array),\

  10. &g_group_array)) != 0)

  11. {

  12. pthread_mutex_lock(&g_storage_thread_lock);

  13. g_storage_thread_count--;

  14. pthread_mutex_unlock(&g_storage_thread_lock);

  15. return NULL;

  16. }

  17. }

  18.  
  19. //启动主io主循环,为pThreadData->thread_data对应的pipe_fd注册回调函数

  20. //storage_recv_notify_read

  21. ioevent_loop(&pThreadData->thread_data, storage_recv_notify_read,

  22. task_finish_clean_up, &g_continue_flag);

  23. //循环退出,销毁响应数据结构

  24. ioevent_destroy(&pThreadData->thread_data.ev_puller);

  25.  
  26. if (g_check_file_duplicate)

  27. {

  28. if (g_keep_alive)

  29. {

  30. fdht_disconnect_all_servers(&(pThreadData->group_array));

  31. }

  32.  
  33. fdht_free_group_array(&(pThreadData->group_array));

  34. }

  35.  
  36. //总线程数目自减

  37. if ((result=pthread_mutex_lock(&g_storage_thread_lock)) != 0)

  38. {

  39. logError("file: "__FILE__", line: %d, " \

  40. "call pthread_mutex_lock fail, " \

  41. "errno: %d, error info: %s", \

  42. __LINE__, result, STRERROR(result));

  43. }

  44. g_storage_thread_count--;

  45. if ((result=pthread_mutex_unlock(&g_storage_thread_lock)) != 0)

  46. {

  47. logError("file: "__FILE__", line: %d, " \

  48. "call pthread_mutex_lock fail, " \

  49. "errno: %d, error info: %s", \

  50. __LINE__, result, STRERROR(result));

  51. }

  52.  
  53. logDebug("file: "__FILE__", line: %d, " \

  54. "nio thread exited, thread count: %d", \

  55. __LINE__, g_storage_thread_count);

  56.  
  57. return NULL;

  58. }

除了workthreadentrance线程,还有一个叫做acceptthreadentrance的线程,专门用来accept请求,防止大量的操作阻塞了accept的性能

 
  1. static void *accept_thread_entrance(void* arg)

  2. {

  3. int server_sock;

  4. int incomesock;

  5. struct sockaddr_in inaddr;

  6. socklen_t sockaddr_len;

  7. in_addr_t client_addr;

  8. char szClientIp[IP_ADDRESS_SIZE];

  9. long task_addr;

  10. struct fast_task_info *pTask;

  11. StorageClientInfo *pClientInfo;

  12. struct storage_nio_thread_data *pThreadData;

  13.  
  14. server_sock = (long)arg;

  15. while (g_continue_flag)

  16. {

  17. sockaddr_len = sizeof(inaddr);

  18. incomesock = accept(server_sock, (struct sockaddr*)&inaddr, \

  19. &sockaddr_len);

  20. if (incomesock < 0) //error

  21. {

  22. if (!(errno == EINTR || errno == EAGAIN))

  23. {

  24. logError("file: "__FILE__", line: %d, " \

  25. "accept failed, " \

  26. "errno: %d, error info: %s", \

  27. __LINE__, errno, STRERROR(errno));

  28. }

  29.  
  30. continue;

  31. }

  32.  
  33. client_addr = getPeerIpaddr(incomesock, \

  34. szClientIp, IP_ADDRESS_SIZE);

  35. if (g_allow_ip_count >= 0)

  36. {

  37. if (bsearch(&client_addr, g_allow_ip_addrs, \

  38. g_allow_ip_count, sizeof(in_addr_t), \

  39. cmp_by_ip_addr_t) == NULL)

  40. {

  41. logError("file: "__FILE__", line: %d, " \

  42. "ip addr %s is not allowed to access", \

  43. __LINE__, szClientIp);

  44.  
  45. close(incomesock);

  46. continue;

  47. }

  48. }

  49.  
  50. if (tcpsetnonblockopt(incomesock) != 0)

  51. {

  52. close(incomesock);

  53. continue;

  54. }

  55.  
  56. pTask = free_queue_pop();

  57. if (pTask == NULL)

  58. {

  59. logError("file: "__FILE__", line: %d, " \

  60. "malloc task buff failed", \

  61. __LINE__);

  62. close(incomesock);

  63. continue;

  64. }

  65.  
  66. pClientInfo = (StorageClientInfo *)pTask->arg;

  67.  
  68. //从task对象池里拿出一个task,将fd域填充为incomesock

  69. pTask->event.fd = incomesock;

  70. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_INIT;

  71. pClientInfo->nio_thread_index = pTask->event.fd % g_work_threads;

  72. pThreadData = g_nio_thread_data + pClientInfo->nio_thread_index;

  73.  
  74. strcpy(pTask->client_ip, szClientIp);

  75.  
  76. task_addr = (long)pTask;

  77.  
  78. //通过pThreadData->thread_data.pipe_fds[1]将task传给work_thread

  79. //work_thread监视着pThreadData->thread_data.pipe_fds[0]

  80. //storage_recv_notify_read将被调用

  81. if (write(pThreadData->thread_data.pipe_fds[1], &task_addr, \

  82. sizeof(task_addr)) != sizeof(task_addr))

  83. {

  84. close(incomesock);

  85. free_queue_push(pTask);

  86. logError("file: "__FILE__", line: %d, " \

  87. "call write failed, " \

  88. "errno: %d, error info: %s", \

  89. __LINE__, errno, STRERROR(errno));

  90. }

  91. }

  92.  
  93. return NULL;

  94. }

关注一下storagerecvnotify_read函数

 
  1. void storage_recv_notify_read(int sock, short event, void *arg)

  2. {

  3. struct fast_task_info *pTask;

  4. StorageClientInfo *pClientInfo;

  5. long task_addr;

  6. int64_t remain_bytes;

  7. int bytes;

  8. int result;

  9.  
  10. while (1)

  11. {

  12. //读取这个task结构

  13. if ((bytes=read(sock, &task_addr, sizeof(task_addr))) < 0)

  14. {

  15. if (!(errno == EAGAIN || errno == EWOULDBLOCK))

  16. {

  17. logError("file: "__FILE__", line: %d, " \

  18. "call read failed, " \

  19. "errno: %d, error info: %s", \

  20. __LINE__, errno, STRERROR(errno));

  21. }

  22.  
  23. break;

  24. }

  25. else if (bytes == 0)

  26. {

  27. logError("file: "__FILE__", line: %d, " \

  28. "call read failed, end of file", __LINE__);

  29. break;

  30. }

  31.  
  32. pTask = (struct fast_task_info *)task_addr;

  33. pClientInfo = (StorageClientInfo *)pTask->arg;

  34.  
  35. if (pTask->event.fd < 0) //quit flag

  36. {

  37. return;

  38. }

  39.  
  40. /* //logInfo("=====thread index: %d, pTask->event.fd=%d", \

  41. pClientInfo->nio_thread_index, pTask->event.fd);

  42. */

  43.  
  44. if (pClientInfo->stage & FDFS_STORAGE_STAGE_DIO_THREAD)

  45. {

  46. pClientInfo->stage &= ~FDFS_STORAGE_STAGE_DIO_THREAD;

  47. }

  48. switch (pClientInfo->stage)

  49. {

  50. //初始化阶段,进行数据初始化

  51. case FDFS_STORAGE_STAGE_NIO_INIT:

  52. result = storage_nio_init(pTask);

  53. break;

  54. //暂时略过,先看storage_nio_init

  55. case FDFS_STORAGE_STAGE_NIO_RECV:

  56. pTask->offset = 0;

  57. remain_bytes = pClientInfo->total_length - \

  58. pClientInfo->total_offset;

  59. if (remain_bytes > pTask->size)

  60. {

  61. pTask->length = pTask->size;

  62. }

  63. else

  64. {

  65. pTask->length = remain_bytes;

  66. }

  67.  
  68. if (set_recv_event(pTask) == 0)

  69. {

  70. client_sock_read(pTask->event.fd,

  71. IOEVENT_READ, pTask);

  72. }

  73. result = 0;

  74. break;

  75. case FDFS_STORAGE_STAGE_NIO_SEND:

  76. result = storage_send_add_event(pTask);

  77. break;

  78. case FDFS_STORAGE_STAGE_NIO_CLOSE:

  79. result = EIO; //close this socket

  80. break;

  81. default:

  82. logError("file: "__FILE__", line: %d, " \

  83. "invalid stage: %d", __LINE__, \

  84. pClientInfo->stage);

  85. result = EINVAL;

  86. break;

  87. }

  88.  
  89. if (result != 0)

  90. {

  91. add_to_deleted_list(pTask);

  92. }

  93. }

  94. }

初始化实质上是将task对应的fd,注册clientsockread函数同时将task状态设置为FDFSSTORAGESTAGENIORECV

 
  1. static int storage_nio_init(struct fast_task_info *pTask)

  2. {

  3. StorageClientInfo *pClientInfo;

  4. struct storage_nio_thread_data *pThreadData;

  5.  
  6. pClientInfo = (StorageClientInfo *)pTask->arg;

  7. pThreadData = g_nio_thread_data + pClientInfo->nio_thread_index;

  8.  
  9. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_RECV;

  10. return ioevent_set(pTask, &pThreadData->thread_data,

  11. pTask->event.fd, IOEVENT_READ, client_sock_read,

  12. g_fdfs_network_timeout);

  13. }

看看这个clientsockread函数

 
  1. static void client_sock_read(int sock, short event, void *arg)

  2. {

  3. int bytes;

  4. int recv_bytes;

  5. struct fast_task_info *pTask;

  6. StorageClientInfo *pClientInfo;

  7.  
  8. pTask = (struct fast_task_info *)arg;

  9. pClientInfo = (StorageClientInfo *)pTask->arg;

  10. if (pClientInfo->canceled)

  11. {

  12. return;

  13. }

  14.  
  15. if (pClientInfo->stage != FDFS_STORAGE_STAGE_NIO_RECV)

  16. {

  17. if (event & IOEVENT_TIMEOUT) {

  18. pTask->event.timer.expires = g_current_time +

  19. g_fdfs_network_timeout;

  20. fast_timer_add(&pTask->thread_data->timer,

  21. &pTask->event.timer);

  22. }

  23.  
  24. return;

  25. }

  26.  
  27. //超时了,删除这个task

  28. if (event & IOEVENT_TIMEOUT)

  29. {

  30. if (pClientInfo->total_offset == 0 && pTask->req_count > 0)

  31. {

  32. pTask->event.timer.expires = g_current_time +

  33. g_fdfs_network_timeout;

  34. fast_timer_add(&pTask->thread_data->timer,

  35. &pTask->event.timer);

  36. }

  37. else

  38. {

  39. logError("file: "__FILE__", line: %d, " \

  40. "client ip: %s, recv timeout, " \

  41. "recv offset: %d, expect length: %d", \

  42. __LINE__, pTask->client_ip, \

  43. pTask->offset, pTask->length);

  44.  
  45. task_finish_clean_up(pTask);

  46. }

  47.  
  48. return;

  49. }

  50.  
  51. //io错误,一样删

  52. if (event & IOEVENT_ERROR)

  53. {

  54. logError("file: "__FILE__", line: %d, " \

  55. "client ip: %s, recv error event: %d, "

  56. "close connection", __LINE__, pTask->client_ip, event);

  57.  
  58. task_finish_clean_up(pTask);

  59. return;

  60. }

  61.  
  62. fast_timer_modify(&pTask->thread_data->timer,

  63. &pTask->event.timer, g_current_time +

  64. g_fdfs_network_timeout);

  65. while (1)

  66. {

  67. //pClientInfo的total_length域为0,说明头还没接收,接收一个头

  68. if (pClientInfo->total_length == 0) //recv header

  69. {

  70. recv_bytes = sizeof(TrackerHeader) - pTask->offset;

  71. }

  72. else

  73. {

  74. recv_bytes = pTask->length - pTask->offset;

  75. }

  76.  
  77. /*

  78. logInfo("total_length="INT64_PRINTF_FORMAT", recv_bytes=%d, "

  79. "pTask->length=%d, pTask->offset=%d",

  80. pClientInfo->total_length, recv_bytes,

  81. pTask->length, pTask->offset);

  82. */

  83.  
  84. bytes = recv(sock, pTask->data + pTask->offset, recv_bytes, 0);

  85. if (bytes < 0)

  86. {

  87. if (errno == EAGAIN || errno == EWOULDBLOCK)

  88. {

  89. }

  90. else

  91. {

  92. logError("file: "__FILE__", line: %d, " \

  93. "client ip: %s, recv failed, " \

  94. "errno: %d, error info: %s", \

  95. __LINE__, pTask->client_ip, \

  96. errno, STRERROR(errno));

  97.  
  98. task_finish_clean_up(pTask);

  99. }

  100.  
  101. return;

  102. }

  103. else if (bytes == 0)

  104. {

  105. logDebug("file: "__FILE__", line: %d, " \

  106. "client ip: %s, recv failed, " \

  107. "connection disconnected.", \

  108. __LINE__, pTask->client_ip);

  109.  
  110. task_finish_clean_up(pTask);

  111. return;

  112. }

  113.  
  114. //用包头数据对pClientInfo进行初始化

  115. if (pClientInfo->total_length == 0) //header

  116. {

  117. if (pTask->offset + bytes < sizeof(TrackerHeader))

  118. {

  119. pTask->offset += bytes;

  120. return;

  121. }

  122.  
  123. pClientInfo->total_length=buff2long(((TrackerHeader *) \

  124. pTask->data)->pkg_len);

  125. if (pClientInfo->total_length < 0)

  126. {

  127. logError("file: "__FILE__", line: %d, " \

  128. "client ip: %s, pkg length: " \

  129. INT64_PRINTF_FORMAT" < 0", \

  130. __LINE__, pTask->client_ip, \

  131. pClientInfo->total_length);

  132.  
  133. task_finish_clean_up(pTask);

  134. return;

  135. }

  136.  
  137. pClientInfo->total_length += sizeof(TrackerHeader);

  138.  
  139. //如果需要接受的数据总长大于pTask的固定长度阀值,那么暂时只接受那么长

  140. if (pClientInfo->total_length > pTask->size)

  141. {

  142. pTask->length = pTask->size;

  143. }

  144. else

  145. {

  146. pTask->length = pClientInfo->total_length;

  147. }

  148. }

  149.  
  150. pTask->offset += bytes;

  151.  
  152. //接受完了当前的包

  153. if (pTask->offset >= pTask->length) //recv current pkg done

  154. {

  155. //略过先看下面

  156. if (pClientInfo->total_offset + pTask->length >= \

  157. pClientInfo->total_length)

  158. {

  159. /* current req recv done */

  160. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_SEND;

  161. pTask->req_count++;

  162. }

  163.  
  164. //刚接受了包头,那么由storage_deal_task分发任务

  165. if (pClientInfo->total_offset == 0)

  166. {

  167. pClientInfo->total_offset = pTask->length;

  168. storage_deal_task(pTask);

  169. }

  170. else

  171. {

  172. //略过先看下面

  173. pClientInfo->total_offset += pTask->length;

  174.  
  175. /* continue write to file */

  176. storage_dio_queue_push(pTask);

  177. }

  178.  
  179. return;

  180. }

  181. }

  182.  
  183. return;

  184. }

storagedealtask将上传请求分发给storageuploadfile

storageuploadfile注册一些基本的函数而后调用 storagewriteto_file

 
  1. static int storage_upload_file(struct fast_task_info *pTask, bool bAppenderFile)

  2. {

  3. //略过

  4. ...

  5.  
  6. return storage_write_to_file(pTask, file_offset, file_bytes, \

  7. p - pTask->data, dio_write_file, \

  8. storage_upload_file_done_callback, \

  9. clean_func, store_path_index);

  10. }

 
  1. static int storage_write_to_file(struct fast_task_info *pTask, \

  2. const int64_t file_offset, const int64_t upload_bytes, \

  3. const int buff_offset, TaskDealFunc deal_func, \

  4. FileDealDoneCallback done_callback, \

  5. DisconnectCleanFunc clean_func, const int store_path_index)

  6. {

  7. StorageClientInfo *pClientInfo;

  8. StorageFileContext *pFileContext;

  9. int result;

  10.  
  11. pClientInfo = (StorageClientInfo *)pTask->arg;

  12. pFileContext = &(pClientInfo->file_context);

  13.  
  14. pClientInfo->deal_func = deal_func;

  15. pClientInfo->clean_func = clean_func;

  16.  
  17. pFileContext->fd = -1;

  18. pFileContext->buff_offset = buff_offset;

  19. pFileContext->offset = file_offset;

  20. pFileContext->start = file_offset;

  21. pFileContext->end = file_offset + upload_bytes;

  22. pFileContext->dio_thread_index = storage_dio_get_thread_index( \

  23. pTask, store_path_index, pFileContext->op);

  24. pFileContext->done_callback = done_callback;

  25.  
  26. if (pFileContext->calc_crc32)

  27. {

  28. pFileContext->crc32 = CRC32_XINIT;

  29. }

  30.  
  31. if (pFileContext->calc_file_hash)

  32. {

  33. if (g_file_signature_method == STORAGE_FILE_SIGNATURE_METHOD_HASH)

  34. {

  35. INIT_HASH_CODES4(pFileContext->file_hash_codes)

  36. }

  37. else

  38. {

  39. my_md5_init(&pFileContext->md5_context);

  40. }

  41. }

  42.  
  43. //将任务压入磁盘队列

  44. if ((result=storage_dio_queue_push(pTask)) != 0)

  45. {

  46. pClientInfo->total_length = sizeof(TrackerHeader);

  47. return result;

  48. }

  49.  
  50. return STORAGE_STATUE_DEAL_FILE;

  51. }

压入磁盘队列的处理函数

 
  1. int storage_dio_queue_push(struct fast_task_info *pTask)

  2. {

  3. StorageClientInfo *pClientInfo;

  4. StorageFileContext *pFileContext;

  5. struct storage_dio_context *pContext;

  6. int result;

  7.  
  8. pClientInfo = (StorageClientInfo *)pTask->arg;

  9. pFileContext = &(pClientInfo->file_context);

  10. pContext = g_dio_contexts + pFileContext->dio_thread_index;

  11.  
  12. //这里为什么要或上这个呢,因为在LT模式的工作下,client_sock_read会被不断的触发

  13. //pTask的数据就会被刷掉了,所以改变当前FDFS_STORAGE_STAGE_NIO_RECV的状态,让client_sock_read调用就被返回

  14. pClientInfo->stage |= FDFS_STORAGE_STAGE_DIO_THREAD;

  15. if ((result=task_queue_push(&(pContext->queue), pTask)) != 0)

  16. {

  17. add_to_deleted_list(pTask);

  18. return result;

  19. }

  20.  
  21. if ((result=pthread_cond_signal(&(pContext->cond))) != 0)

  22. {

  23. logError("file: "__FILE__", line: %d, " \

  24. "pthread_cond_signal fail, " \

  25. "errno: %d, error info: %s", \

  26. __LINE__, result, STRERROR(result));

  27.  
  28. add_to_deleted_list(pTask);

  29. return result;

  30. }

  31.  
  32. return 0;

  33. }

下面就是磁盘线程取task了

 
  1. static void *dio_thread_entrance(void* arg)

  2. {

  3. int result;

  4. struct storage_dio_context *pContext;

  5. struct fast_task_info *pTask;

  6.  
  7. pContext = (struct storage_dio_context *)arg;

  8.  
  9. pthread_mutex_lock(&(pContext->lock));

  10. while (g_continue_flag)

  11. {

  12. if ((result=pthread_cond_wait(&(pContext->cond), \

  13. &(pContext->lock))) != 0)

  14. {

  15. logError("file: "__FILE__", line: %d, " \

  16. "call pthread_cond_wait fail, " \

  17. "errno: %d, error info: %s", \

  18. __LINE__, result, STRERROR(result));

  19. }

  20.  
  21. //循环取队列里的任务,执行他的deal_func

  22. while ((pTask=task_queue_pop(&(pContext->queue))) != NULL)

  23. {

  24. ((StorageClientInfo *)pTask->arg)->deal_func(pTask);

  25. }

  26. }

  27. pthread_mutex_unlock(&(pContext->lock));

  28.  
  29. if ((result=pthread_mutex_lock(&g_dio_thread_lock)) != 0)

  30. {

  31. logError("file: "__FILE__", line: %d, " \

  32. "call pthread_mutex_lock fail, " \

  33. "errno: %d, error info: %s", \

  34. __LINE__, result, STRERROR(result));

  35. }

  36. g_dio_thread_count--;

  37. if ((result=pthread_mutex_unlock(&g_dio_thread_lock)) != 0)

  38. {

  39. logError("file: "__FILE__", line: %d, " \

  40. "call pthread_mutex_lock fail, " \

  41. "errno: %d, error info: %s", \

  42. __LINE__, result, STRERROR(result));

  43. }

  44.  
  45. logDebug("file: "__FILE__", line: %d, " \

  46. "dio thread exited, thread count: %d", \

  47. __LINE__, g_dio_thread_count);

  48.  
  49. return NULL;

  50. }

对于上传任务来说,dealtask实际上是dowrite_file

 
  1. int dio_write_file(struct fast_task_info *pTask)

  2. {

  3. StorageClientInfo *pClientInfo;

  4. StorageFileContext *pFileContext;

  5. int result;

  6. int write_bytes;

  7. char *pDataBuff;

  8.  
  9. pClientInfo = (StorageClientInfo *)pTask->arg;

  10. pFileContext = &(pClientInfo->file_context);

  11. result = 0;

  12. do

  13. {

  14. if (pFileContext->fd < 0)

  15. {

  16. if (pFileContext->extra_info.upload.before_open_callback!=NULL)

  17. {

  18. result = pFileContext->extra_info.upload. \

  19. before_open_callback(pTask);

  20. if (result != 0)

  21. {

  22. break;

  23. }

  24. }

  25.  
  26. if ((result=dio_open_file(pFileContext)) != 0)

  27. {

  28. break;

  29. }

  30. }

  31.  
  32. pDataBuff = pTask->data + pFileContext->buff_offset;

  33. write_bytes = pTask->length - pFileContext->buff_offset;

  34. if (write(pFileContext->fd, pDataBuff, write_bytes) != write_bytes)

  35. {

  36. result = errno != 0 ? errno : EIO;

  37. logError("file: "__FILE__", line: %d, " \

  38. "write to file: %s fail, fd=%d, write_bytes=%d, " \

  39. "errno: %d, error info: %s", \

  40. __LINE__, pFileContext->filename, \

  41. pFileContext->fd, write_bytes, \

  42. result, STRERROR(result));

  43. }

  44.  
  45. pthread_mutex_lock(&g_dio_thread_lock);

  46. g_storage_stat.total_file_write_count++;

  47. if (result == 0)

  48. {

  49. g_storage_stat.success_file_write_count++;

  50. }

  51. pthread_mutex_unlock(&g_dio_thread_lock);

  52.  
  53. if (result != 0)

  54. {

  55. break;

  56. }

  57.  
  58. if (pFileContext->calc_crc32)

  59. {

  60. pFileContext->crc32 = CRC32_ex(pDataBuff, write_bytes, \

  61. pFileContext->crc32);

  62. }

  63.  
  64. if (pFileContext->calc_file_hash)

  65. {

  66. if (g_file_signature_method == STORAGE_FILE_SIGNATURE_METHOD_HASH)

  67. {

  68. CALC_HASH_CODES4(pDataBuff, write_bytes, \

  69. pFileContext->file_hash_codes)

  70. }

  71. else

  72. {

  73. my_md5_update(&pFileContext->md5_context, \

  74. (unsigned char *)pDataBuff, write_bytes);

  75. }

  76. }

  77.  
  78. /*

  79. logInfo("###dio write bytes: %d, pTask->length=%d, buff_offset=%d", \

  80. write_bytes, pTask->length, pFileContext->buff_offset);

  81. */

  82.  
  83. pFileContext->offset += write_bytes;

  84. if (pFileContext->offset < pFileContext->end)

  85. {

  86. pFileContext->buff_offset = 0;

  87. storage_nio_notify(pTask); //notify nio to deal

  88. }

  89. else

  90. {

  91. if (pFileContext->calc_crc32)

  92. {

  93. pFileContext->crc32 = CRC32_FINAL( \

  94. pFileContext->crc32);

  95. }

  96.  
  97. if (pFileContext->calc_file_hash)

  98. {

  99. if (g_file_signature_method == STORAGE_FILE_SIGNATURE_METHOD_HASH)

  100. {

  101. FINISH_HASH_CODES4(pFileContext->file_hash_codes)

  102. }

  103. else

  104. {

  105. my_md5_final((unsigned char *)(pFileContext-> \

  106. file_hash_codes), &pFileContext->md5_context);

  107. }

  108. }

  109.  
  110. if (pFileContext->extra_info.upload.before_close_callback != NULL)

  111. {

  112. result = pFileContext->extra_info.upload. \

  113. before_close_callback(pTask);

  114. }

  115.  
  116. /* file write done, close it */

  117. close(pFileContext->fd);

  118. pFileContext->fd = -1;

  119.  
  120. if (pFileContext->done_callback != NULL)

  121. {

  122. pFileContext->done_callback(pTask, result);

  123. }

  124. }

  125.  
  126. return 0;

  127. } while (0);

  128.  
  129. pClientInfo->clean_func(pTask);

  130.  
  131. if (pFileContext->done_callback != NULL)

  132. {

  133. pFileContext->done_callback(pTask, result);

  134. }

  135. return result;

  136. }

pFileContext->donecallback对应了storageuploadfiledone_callback

 
  1. static void storage_upload_file_done_callback(struct fast_task_info *pTask, \

  2. const int err_no)

  3. {

  4. StorageClientInfo *pClientInfo;

  5. StorageFileContext *pFileContext;

  6. TrackerHeader *pHeader;

  7. int result;

  8.  
  9. pClientInfo = (StorageClientInfo *)pTask->arg;

  10. pFileContext = &(pClientInfo->file_context);

  11.  
  12. if (pFileContext->extra_info.upload.file_type & _FILE_TYPE_TRUNK)

  13. {

  14. result = trunk_client_trunk_alloc_confirm( \

  15. &(pFileContext->extra_info.upload.trunk_info), err_no);

  16. if (err_no != 0)

  17. {

  18. result = err_no;

  19. }

  20. }

  21. else

  22. {

  23. result = err_no;

  24. }

  25.  
  26. if (result == 0)

  27. {

  28. result = storage_service_upload_file_done(pTask);

  29. if (result == 0)

  30. {

  31. if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)

  32. {

  33. result = storage_binlog_write(\

  34. pFileContext->timestamp2log, \

  35. STORAGE_OP_TYPE_SOURCE_CREATE_FILE, \

  36. pFileContext->fname2log);

  37. }

  38. }

  39. }

  40.  
  41. if (result == 0)

  42. {

  43. int filename_len;

  44. char *p;

  45.  
  46. if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)

  47. {

  48. CHECK_AND_WRITE_TO_STAT_FILE3_WITH_BYTES( \

  49. g_storage_stat.total_upload_count, \

  50. g_storage_stat.success_upload_count, \

  51. g_storage_stat.last_source_update, \

  52. g_storage_stat.total_upload_bytes, \

  53. g_storage_stat.success_upload_bytes, \

  54. pFileContext->end - pFileContext->start)

  55. }

  56.  
  57. filename_len = strlen(pFileContext->fname2log);

  58. pClientInfo->total_length = sizeof(TrackerHeader) + \

  59. FDFS_GROUP_NAME_MAX_LEN + filename_len;

  60. p = pTask->data + sizeof(TrackerHeader);

  61. memcpy(p, pFileContext->extra_info.upload.group_name, \

  62. FDFS_GROUP_NAME_MAX_LEN);

  63. p += FDFS_GROUP_NAME_MAX_LEN;

  64. memcpy(p, pFileContext->fname2log, filename_len);

  65. }

  66. else

  67. {

  68. pthread_mutex_lock(&stat_count_thread_lock);

  69. if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)

  70. {

  71. g_storage_stat.total_upload_count++;

  72. g_storage_stat.total_upload_bytes += \

  73. pClientInfo->total_offset;

  74. }

  75. pthread_mutex_unlock(&stat_count_thread_lock);

  76.  
  77. pClientInfo->total_length = sizeof(TrackerHeader);

  78. }

  79.  
  80. STORAGE_ACCESS_LOG(pTask, ACCESS_LOG_ACTION_UPLOAD_FILE, result);

  81.  
  82. pClientInfo->total_offset = 0;

  83. pTask->length = pClientInfo->total_length;

  84.  
  85. pHeader = (TrackerHeader *)pTask->data;

  86. pHeader->status = result;

  87. pHeader->cmd = STORAGE_PROTO_CMD_RESP;

  88. long2buff(pClientInfo->total_length - sizeof(TrackerHeader), \

  89. pHeader->pkg_len);

  90.  
  91. //又看到熟悉的函数了,这完成以后将pTask从磁盘线程压入work线程

  92. //work线程调用storage_recv_notify_read函数来做下一步处理

  93. storage_nio_notify(pTask);

  94. }

 
  1. void storage_recv_notify_read(int sock, short event, void *arg)

  2. {

  3. //前文已有,略过

  4. ...

  5. //刚从磁盘线程里出来的任务状态依然是dio_thread,去掉dio_thread状态

  6. if (pClientInfo->stage & FDFS_STORAGE_STAGE_DIO_THREAD)

  7. {

  8. pClientInfo->stage &= ~FDFS_STORAGE_STAGE_DIO_THREAD;

  9. }

  10. switch (pClientInfo->stage)

  11. {

  12. //前文已有,略过

  13. ...

  14. case FDFS_STORAGE_STAGE_NIO_RECV:

  15. pTask->offset = 0;

  16. remain_bytes = pClientInfo->total_length - \

  17. pClientInfo->total_offset;

  18. if (remain_bytes > pTask->size)

  19. {

  20. pTask->length = pTask->size;

  21. }

  22. else

  23. {

  24. pTask->length = remain_bytes;

  25. }

  26.  
  27. if (set_recv_event(pTask) == 0)

  28. {

  29. client_sock_read(pTask->event.fd,

  30. IOEVENT_READ, pTask);

  31. }

  32. result = 0;

  33. break;

  34. case FDFS_STORAGE_STAGE_NIO_SEND:

  35. result = storage_send_add_event(pTask);

  36. break;

  37. case FDFS_STORAGE_STAGE_NIO_CLOSE:

  38. result = EIO; //close this socket

  39. break;

  40. default:

  41. logError("file: "__FILE__", line: %d, " \

  42. "invalid stage: %d", __LINE__, \

  43. pClientInfo->stage);

  44. result = EINVAL;

  45. break;

  46. }

  47.  
  48. if (result != 0)

  49. {

  50. add_to_deleted_list(pTask);

  51. }

  52. }

调用了clientsockread函数进行处理

 
  1. static void client_sock_read(int sock, short event, void *arg)

  2. {

  3. //前文已有,略

  4. ...

  5. pTask->offset += bytes;

  6. if (pTask->offset >= pTask->length) //recv current pkg done

  7. {

  8. //这个req接受完毕,准备反馈rsp

  9. if (pClientInfo->total_offset + pTask->length >= \

  10. pClientInfo->total_length)

  11. {

  12. /* current req recv done */

  13. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_SEND;

  14. pTask->req_count++;

  15. }

  16.  
  17. if (pClientInfo->total_offset == 0)

  18. {

  19. pClientInfo->total_offset = pTask->length;

  20. storage_deal_task(pTask);

  21. }

  22. else

  23. {

  24. //接受的是数据包,压入磁盘线程

  25. pClientInfo->total_offset += pTask->length;

  26.  
  27. /* continue write to file */

  28. storage_dio_queue_push(pTask);

  29. }

  30.  
  31. return;

  32. }

  33.  
  34. return;

  35. }

数据包的网络接收和磁盘的处理成为一个环,接收完一部分,通过队列压入磁盘队列,磁盘线程处理完以后又通过像工作线程的fd进行写,触发网络线程读取这个task。自此源源不断将数据传过来。

总结:

还是上图吧,整个处理流程如下图

fastdfs storage流程分析图

1 client发出请求,accept线程catch到描述符,初始化pTask结构,填入描述符,然后将pTask通过管道给work_entrance

2 进入storagerecvnotify_read函数

3 根据当前的pTask->stage等于FDFSSTORAGESTAGEINIT为fd创建读事件,绑定函数clientsock_read

4 调用storageuploadfile

5 storageuploadfile调用storagewriteto_file

6 storagewritetofile调用压磁盘队列函数storagedioqueuepush

7 storagedioqueuepush将pTask->stage |= FDFSSTORAGESTAGEDIO_THREAD

8 根

开始:

源码在sourceforge,github上都能找到。这里我使用的FastDFS v5.01版本,值得注意的是,这个版本干掉了该死了libevent,直接使用epoll,kqueue,可读性提高了不少,而且0依赖了,赞一个。

源码目录包括了common,test,client,stroage,tracker

按文件夹顺序和首字母进行分析:

common文件夹:

common_define.h:

跳过首字母a的文件先介绍这个,是因为这个文件定义了整个系统的一些环境变量,包括bool类型,全局变量等等。下文中你没见过,我也没提的变量或者宏都取自这里。


avltree.c/avltree.h:

对于avl树的定义和实现,这是FastDFS实现trunk功能和单盘恢复功能所依赖的数据结构

 
  1. typedef struct tagAVLTreeNode {

  2. void *data;

  3. struct tagAVLTreeNode *left;

  4. struct tagAVLTreeNode *right;

  5. byte balance;

  6. } AVLTreeNode;

  7.  
  8. typedef struct tagAVLTreeInfo {

  9. AVLTreeNode *root;

  10. FreeDataFunc free_data_func;

  11. CompareFunc compare_func;

  12. } AVLTreeInfo;

经典的数据结构,没有修改的原汁原味。


base64.c/base64.h:

FastDFS得到文件包含的信息后,用base64算法对其编码生成文件ID。


chain.c/chain.hi:

对于链表的实现。

 
  1. typedef struct tagChainNode

  2. {

  3. void *data;

  4. struct tagChainNode *next;

  5. } ChainNode;

  6.  
  7. typedef struct

  8. {

  9. int type;

  10. ChainNode *head;

  11. ChainNode *tail;

  12. FreeDataFunc freeDataFunc;

  13. CompareFunc compareFunc;

  14. } ChainList;

type变量是定义链表的使用方式的:

CHAINTYPEINSERT: insert new node before head

CHAINTYPEAPPEND: insert new node after tail

CHAINTYPESORTED: sorted chain

在fast_mblock中#include了它,但是并没有使用,直接注释了这个include也成功编译无报错,可能后续会使用吧?这里会和鱼大咨询下。mark。


connectpool.c/connectpool.h:

连接池的定义与实现

 
  1. typedef struct

  2. {

  3. int sock;

  4. int port;

  5. char ip_addr[IP_ADDRESS_SIZE];

  6. } ConnectionInfo;

  7.  
  8. struct tagConnectionManager;

  9.  
  10. typedef struct tagConnectionNode {

  11. ConnectionInfo *conn;

  12. struct tagConnectionManager *manager;

  13. struct tagConnectionNode *next;

  14. time_t atime; //last access time

  15. } ConnectionNode;

  16.  
  17. typedef struct tagConnectionManager {

  18. ConnectionNode *head;

  19. int total_count; //total connections

  20. int free_count; //free connections

  21. pthread_mutex_t lock;

  22. } ConnectionManager;

  23.  
  24. typedef struct tagConnectionPool {

  25. HashArray hash_array; //key is ip:port, value is ConnectionManager

  26. pthread_mutex_t lock;

  27. int connect_timeout;

  28. int max_count_per_entry; //0 means no limit

  29.  
  30. /*

  31. connections whose the idle time exceeds this time will be closed

  32. */

  33. int max_idle_time;

  34. } ConnectionPool;

呃,注释已经一目了然了。

三层结构

pool->manager->node

pool使用哈希来定位manager,因为作为key的ip:port是唯一的,而后用链表来管理该节点的所有连接。


fastmblock.c/fastmblock.h:

链表的一个变种,存储有已分配的对象和已经释放的对象,大致相当于一个对象池,在trunk功能中被使用。

 
  1. /* free node chain */

  2. struct fast_mblock_node

  3. {

  4. struct fast_mblock_node *next;

  5. char data[0]; //the data buffer

  6. };

  7.  
  8. /* malloc chain */

  9. struct fast_mblock_malloc

  10. {

  11. struct fast_mblock_malloc *next;

  12. };

  13.  
  14. struct fast_mblock_man

  15. {

  16. struct fast_mblock_node *free_chain_head; //free node chain

  17. struct fast_mblock_malloc *malloc_chain_head; //malloc chain to be freed

  18. int element_size; //element size

  19. int alloc_elements_once; //alloc elements once

  20. pthread_mutex_t lock; //the lock for read / write free node chain

  21. };


fasttaskqueue.c/fasttaskqueue.h:

任务队列,挺重要的一个数据结构

 
  1. typedef struct ioevent_entry

  2. {

  3. int fd;

  4. FastTimerEntry timer;

  5. IOEventCallback callback;

  6. } IOEventEntry;

  7.  
  8. struct nio_thread_data

  9. {

  10. struct ioevent_puller ev_puller;

  11. struct fast_timer timer;

  12. int pipe_fds[2];

  13. struct fast_task_info *deleted_list; //链向已被删除的任务指针,复用了已经分配的内存

  14. };

  15.  
  16. struct fast_task_info

  17. {

  18. IOEventEntry event;

  19. char client_ip[IP_ADDRESS_SIZE];

  20. void *arg; //extra argument pointer

  21. char *data; //buffer for write or recv

  22. int size; //alloc size

  23. int length; //data length

  24. int offset; //current offset

  25. int req_count; //request count

  26. TaskFinishCallBack finish_callback; //任务结束回调

  27. struct nio_thread_data *thread_data;

  28. struct fast_task_info *next;

  29. };

  30.  
  31. struct fast_task_queue

  32. {

  33. struct fast_task_info *head; //头尾指针都存在,分别用来做队列的出队和入队

  34. struct fast_task_info *tail;

  35. pthread_mutex_t lock;

  36. int max_connections;

  37. int min_buff_size;

  38. int max_buff_size;

  39. int arg_size;

  40. bool malloc_whole_block;

  41. };


fasttimer.c/fasttimer.h:

时间哈希表,以unix时间戳作为key,用双向链表解决冲突,可以根据当前的使用量进行rehash等操作。

在刚才的fasttaskqueue中被使用

 
  1. typedef struct fast_timer_entry {

  2. int64_t expires;

  3. void *data;

  4. struct fast_timer_entry *prev;

  5. struct fast_timer_entry *next;

  6. bool rehash;

  7. } FastTimerEntry;

  8.  
  9. typedef struct fast_timer_slot {

  10. struct fast_timer_entry head;

  11. } FastTimerSlot;

  12.  
  13. typedef struct fast_timer {

  14. int slot_count; //time wheel slot count

  15. int64_t base_time; //base time for slot 0

  16. int64_t current_time;

  17. FastTimerSlot *slots;

  18. } FastTimer;


fdfsglobal.c/fdfsglobal.h:

定义了fdfs系统所使用的全局变量,包括超时,版本号等等

 
  1. int g_fdfs_connect_timeout = DEFAULT_CONNECT_TIMEOUT;

  2. int g_fdfs_network_timeout = DEFAULT_NETWORK_TIMEOUT;

  3. char g_fdfs_base_path[MAX_PATH_SIZE] = {'/', 't', 'm', 'p', '\0'};

  4. Version g_fdfs_version = {5, 1};

  5. bool g_use_connection_pool = false;

  6. ConnectionPool g_connection_pool;

  7. int g_connection_pool_max_idle_time = 3600;


fdfshttpshared.c/fdfshttpshare.h:

FastDFS使用token来防盗链和分享图片,这一段我也不确定。回头再来看。


hash.c/hash.h:

经典的哈希结构,在FastDFS中应用的很广

哈希找到域,而后用链表解决冲突

 
  1. typedef struct tagHashData

  2. {

  3. int key_len;

  4. int value_len;

  5. int malloc_value_size;

  6.  
  7. #ifdef HASH_STORE_HASH_CODE

  8. unsigned int hash_code;

  9. #endif

  10.  
  11. char *value;

  12. struct tagHashData *next; //解决冲突

  13. char key[0];

  14. } HashData;

  15.  
  16. typedef struct tagHashArray

  17. {

  18. HashData **buckets;

  19. HashFunc hash_func;

  20. int item_count;

  21. unsigned int *capacity;

  22. double load_factor; //hash的负载因子,在FastDFS中大于1.0进行rehash

  23. int64_t max_bytes; //最大占用字节,用于计算负载因子

  24. int64_t bytes_used; //已经使用字节,用于计算负载因子

  25. bool is_malloc_capacity;

  26. bool is_malloc_value;

  27. unsigned int lock_count; //锁总数,为了线程安全

  28. pthread_mutex_t *locks;

  29. } HashArray;

  30.  
  31. typedef struct tagHashStat //所有hash的统计情况

  32. {

  33. unsigned int capacity;

  34. int item_count;

  35. int bucket_used;

  36. double bucket_avg_length;

  37. int bucket_max_length;

  38. } HashStat;


httpfunc.c/httpfunc.h:

http功能已经被砍掉了,这个也回头来看。


inifilereader.c/inifilereader.h:

FastDFS用于初始化加载配置文件的函数。


ioevent.c/ioevent.h && ioeventloop.c/ioeventloop.h:

对epoll,kqueue进行简单封装,成为一个有时间和网络的事件库。这部分逻辑应该会开独立的一章来分析


linuxstacktrace.c/linuxstacktrace.h:

 
  1. /**

  2. * This source file is used to print out a stack-trace when your program

  3. * segfaults. It is relatively reliable and spot-on accurate.

  4. */

这个模块是在程序段错误后输出栈跟踪信息,呃似乎不是鱼大写的


localipfunc.c/localipfunc.h:

基于系统调用getifaddrs来获取本地IP


logger.c/logger.h:

这个太明显了,log模块


md5.c/md5.h:

fdfshttpshared.c中被调用,在fdfshttpgentoken的方法中对secretkey,file_id,timestamp进行md5得到token


mimefileparser.c/mimefileparser.h:

从配置文件中加载mime识别的配置,至于什么是mime。。我也不知道,我问问大神们看看。


osbits.h:

定义了OS的位数


processctrl.c/processctrl.h:

从配置文件中载入pid路径,定义了pid文件的增删查改,并且提供了进程停止,重启等方法


pthreadfunc.c/pthreadfunc.h:

线程相关的操作,包括初始化,创建,杀死线程


schedthread.c/schedthread.h:

定时任务线程的模块,按照hour:minute的期限执行任务

 
  1. typedef struct tagScheduleEntry

  2. {

  3. int id; //the task id

  4.  
  5. /* the time base to execute task, such as 00:00, interval is 3600,

  6. means execute the task every hour as 1:00, 2:00, 3:00 etc. */

  7. TimeInfo time_base;

  8.  
  9. int interval; //the interval for execute task, unit is second

  10.  
  11. TaskFunc task_func; //callback function

  12. void *func_args; //arguments pass to callback function

  13.  
  14. /* following are internal fields, do not set manually! */

  15. time_t next_call_time;

  16. struct tagScheduleEntry *next;

  17. } ScheduleEntry;

  18.  
  19. typedef struct

  20. {

  21. ScheduleEntry *entries;

  22. int count;

  23. } ScheduleArray;

  24.  
  25. typedef struct

  26. {

  27. ScheduleArray scheduleArray;

  28. ScheduleEntry *head; //schedule chain head

  29. ScheduleEntry *tail; //schedule chain tail

  30. bool *pcontinue_flag;

  31. } ScheduleContext;

稍微看了下实现的算法,这是一个变种的链表,实现了一个变种的队列。

但是所有的数据都存在scheduleArray这个数组里面,每次新任务插入后,会对数组按时间进行一次排序

这样可以保证头指针的是最先需要执行的。

而后每次对head进行出队,初始化next域以后重新从tail入队。

总体来看是非常的简单高效的。


sharedfunc.c/sharedfunc.h:

一些工具函数,比如设置随机种子什么的,没必要单独开个文件,所以放在一起了。


sockopt.c/sockopt.h:

socket的一些工具函数,进行了简单的封装。


tracker文件夹:

先分析tracker是因为tracker只集成了网络部分,而storage还有处理磁盘吞吐的,相对复杂一些

fdfssharefunc.c/fdfssharefunc.h

tracker和storage共用的一些工具函数,比如根据IP和端口获取tracker的ID诸如此类的


fdfs_trackerd.c:

tracker的入口函数


trackerdump.c/trackerdump.h:

实现了fdfsdumptrackerglobalvarstofile这个函数

当tracker收到了SIGUSR1或者SIGUSR2信号,将启动sigDumpHandler来调用这个函数,将tracker当前的状态dump进FastDFS跟目录的logs/tracker_dump.log中

关于如何根据该dump文件恢复的,目前没看到,后面再补充


trackerfunc.c/trackerfunc.h:

实现了trackerloadfromconffile这个函数

将tracker的一些基本必要信息,从conf_file中导出


trackerglobal.c/trackerglobal.h:

记录了tracker使用的一些全局变量


trackerhttpcheck.c/trackerhttpcheck.h:

这个模块会对tracker所管理的所有group的可用storage做检测,测试所有的http端口是否可用


trackermem.c/trackermem.h:

这个模块维护了内存的所有数据,包括集群运行情况等等,提供了save,change和load的接口对集群的总情况进行修改


trackernio.c/trackernio.h:

nio的模块在common/ioevent和common/ioevent_loop的基础上进行调用


trackerproto.c/trackerproto.h:

定义了tracker通信的协议,有时间可以分析下。


trackerrelationship.c/trackerrelationship.h:

定义了tracker之间通信的方式,并且定义了选出leader,ping leader等功能,有时间可以分析下。


trackerservice.c/trackerservice.h:

tracker的逻辑层处理,各个请求在nio后进入work线程,而后分发到各个模块


trackerstatus.c/trackerstatus.h:

tracker状态的save和load模块


tracker_types.h:

定义了tracker所用到的所有类型


storage文件夹:

fdfs_storage.c:storage的入口函数


storagedio.c/storagedio.h:

使用common/fasttaskqueue实现了异步的磁盘IO,新任务由storagedioqueue_push方法入队

同时包含了trunk模块的处理,trunk模块后面再提


storagediskrecovery.c/storagediskrecovery.h:

storage的单盘恢复算法,用于故障恢复


storagedump.c/storagedump.h:

和tracker_dump原理相同


storagefunc.c/storagefunc.h:

storagefuncinit函数对应着tracker的trackerloadfromconffile函数

除此之外,还提供了根据storage_id或者ip判断是否是本机的函数

还提供了一些数据持久化的接口


storageglobal.c/storageglobal.h:

定义了storage使用的全局变量


storageipchangeddealer.c/storageipchangerdealer.h:

storage实现ip地址改变的模块

 
  1. int storage_get_my_tracker_client_ip(); //获取storage作为tracker客户端的ip

  2.  
  3. int storage_changelog_req(); //接入tracker的changelog

  4. int storage_check_ip_changed(); //检查ip是否改变


storagenio.c/storagenio.h:

nio的模块在common/ioevent和common/ioevent_loop的基础上进行调用


storageparamgetter.c/storageparamgetter.h:

storagegetparamsfromtracker函数,顾名思义,从tracker获取自身的参数


storageservice.c/storageservice.h:

storage的逻辑层处理,各个请求在nio后进入work线程,而后分发到各个模块


storagesync.c/storagesync.h:

storage的同步模块,众所周知,FastDFS的同步模块是根据时间戳进行的弱一致性同步


trackerclientthread.c/trackerclientthread.h

tracker_report的前缀提示的很明显,这部分是storage作为tracker的客户端,向tracker发送心跳,汇报自己的状态等等

全部接口如下:

 
  1. int tracker_report_init();

  2. int tracker_report_destroy();

  3. int tracker_report_thread_start();

  4. int kill_tracker_report_threads();

  5.  
  6. int tracker_report_join(ConnectionInfo *pTrackerServer, \

  7. const int tracker_index, const bool sync_old_done);

  8. int tracker_report_storage_status(ConnectionInfo *pTrackerServer, \

  9. FDFSStorageBrief *briefServer);

  10. int tracker_sync_src_req(ConnectionInfo *pTrackerServer, \

  11. StorageBinLogReader *pReader);

  12. int tracker_sync_diff_servers(ConnectionInfo *pTrackerServer, \

  13. FDFSStorageBrief *briefServers, const int server_count);

  14. int tracker_deal_changelog_response(ConnectionInfo *pTrackerServer);

trunk_mgr:

这是storage文件的子目录,实现了trunk功能

trunk功能比较零碎,我目前还没搞明白,比如为什么storage和trunk模块交互,storage是作为client出现的,而不是直接调用trunk。

这部分内容应该要单独开一章来分析。


FastDFS源码解析(2)--------trunk模块分析

trunk功能是把大量小文件合并存储,大量的小文件会大量消耗linux文件系统的node,使树变的过于庞大,降低了读写效率

因此小文件合并存储能显著缓解这一压力

我将对上传和下载流程分析来追踪trunk模块的行为。

在storageservice模块中,storageservice.c/storagedealtask对请求安装cmd进行分离逻辑来处理

在storageuploadfile中处理上传逻辑

 
  1. /**

  2. 1 byte: store path index

  3. 8 bytes: file size

  4. FDFS_FILE_EXT_NAME_MAX_LEN bytes: file ext name, do not include dot (.)

  5. file size bytes: file content

  6. **/

  7. static int storage_upload_file(struct fast_task_info *pTask, bool bAppenderFile)

  8. {

  9. StorageClientInfo *pClientInfo;

  10. StorageFileContext *pFileContext;

  11. DisconnectCleanFunc clean_func;

  12. char *p;

  13. char filename[128];

  14. char file_ext_name[FDFS_FILE_PREFIX_MAX_LEN + 1];

  15. int64_t nInPackLen;

  16. int64_t file_offset;

  17. int64_t file_bytes;

  18. int crc32;

  19. int store_path_index;

  20. int result;

  21. int filename_len;

  22.  
  23. pClientInfo = (StorageClientInfo *)pTask->arg;

  24. pFileContext = &(pClientInfo->file_context);

  25. nInPackLen = pClientInfo->total_length - sizeof(TrackerHeader);

  26.  
  27. //对包头大小进行验证

  28.  
  29. if (nInPackLen < 1 + FDFS_PROTO_PKG_LEN_SIZE +

  30. FDFS_FILE_EXT_NAME_MAX_LEN)

  31. {

  32. logError("file: "__FILE__", line: %d, " \

  33. "cmd=%d, client ip: %s, package size " \

  34. INT64_PRINTF_FORMAT" is not correct, " \

  35. "expect length >= %d", __LINE__, \

  36. STORAGE_PROTO_CMD_UPLOAD_FILE, \

  37. pTask->client_ip, nInPackLen, \

  38. 1 + FDFS_PROTO_PKG_LEN_SIZE + \

  39. FDFS_FILE_EXT_NAME_MAX_LEN);

  40. pClientInfo->total_length = sizeof(TrackerHeader);

  41. return EINVAL;

  42. }

  43.  
  44. //跳过包头第一段,获得文件路径索引号

  45. p = pTask->data + sizeof(TrackerHeader);

  46. store_path_index = *p++;

  47.  
  48. if (store_path_index == -1)

  49. {

  50. if ((result=storage_get_storage_path_index( \

  51. &store_path_index)) != 0)

  52. {

  53. logError("file: "__FILE__", line: %d, " \

  54. "get_storage_path_index fail, " \

  55. "errno: %d, error info: %s", __LINE__, \

  56. result, STRERROR(result));

  57. pClientInfo->total_length = sizeof(TrackerHeader);

  58. return result;

  59. }

  60. }

  61. else if (store_path_index < 0 || store_path_index >= \

  62. g_fdfs_store_paths.count)

  63. {

  64. logError("file: "__FILE__", line: %d, " \

  65. "client ip: %s, store_path_index: %d " \

  66. "is invalid", __LINE__, \

  67. pTask->client_ip, store_path_index);

  68. pClientInfo->total_length = sizeof(TrackerHeader);

  69. return EINVAL;

  70. }

  71.  
  72. //获取文件大小

  73. file_bytes = buff2long(p);

  74. p += FDFS_PROTO_PKG_LEN_SIZE;

  75. if (file_bytes < 0 || file_bytes != nInPackLen - \

  76. (1 + FDFS_PROTO_PKG_LEN_SIZE + \

  77. FDFS_FILE_EXT_NAME_MAX_LEN))

  78. {

  79. logError("file: "__FILE__", line: %d, " \

  80. "client ip: %s, pkg length is not correct, " \

  81. "invalid file bytes: "INT64_PRINTF_FORMAT \

  82. ", total body length: "INT64_PRINTF_FORMAT, \

  83. __LINE__, pTask->client_ip, file_bytes, nInPackLen);

  84. pClientInfo->total_length = sizeof(TrackerHeader);

  85. return EINVAL;

  86. }

  87.  
  88. //获取文件名

  89. memcpy(file_ext_name, p, FDFS_FILE_EXT_NAME_MAX_LEN);

  90. *(file_ext_name + FDFS_FILE_EXT_NAME_MAX_LEN) = '\0';

  91. p += FDFS_FILE_EXT_NAME_MAX_LEN;

  92. if ((result=fdfs_validate_filename(file_ext_name)) != 0)

  93. {

  94. logError("file: "__FILE__", line: %d, " \

  95. "client ip: %s, file_ext_name: %s " \

  96. "is invalid!", __LINE__, \

  97. pTask->client_ip, file_ext_name);

  98. pClientInfo->total_length = sizeof(TrackerHeader);

  99. return result;

  100. }

  101.  
  102. pFileContext->calc_crc32 = true;

  103. pFileContext->calc_file_hash = g_check_file_duplicate;

  104. pFileContext->extra_info.upload.start_time = g_current_time;

  105.  
  106. strcpy(pFileContext->extra_info.upload.file_ext_name, file_ext_name);

  107. storage_format_ext_name(file_ext_name, \

  108. pFileContext->extra_info.upload.formatted_ext_name);

  109. pFileContext->extra_info.upload.trunk_info.path. \

  110. store_path_index = store_path_index;

  111. pFileContext->extra_info.upload.file_type = _FILE_TYPE_REGULAR;

  112. pFileContext->sync_flag = STORAGE_OP_TYPE_SOURCE_CREATE_FILE;

  113. pFileContext->timestamp2log = pFileContext->extra_info.upload.start_time;

  114. pFileContext->op = FDFS_STORAGE_FILE_OP_WRITE;

  115.  
  116. //如果是追加写文件,注目额外的文件追加命令值

  117. if (bAppenderFile)

  118. {

  119. pFileContext->extra_info.upload.file_type |= \

  120. _FILE_TYPE_APPENDER;

  121. }

  122. else

  123. {

  124. //判断是否开了trunk_file功能,根据大小检查是否需要trunk合并存储

  125. if (g_if_use_trunk_file && trunk_check_size( \

  126. TRUNK_CALC_SIZE(file_bytes)))

  127. {

  128. pFileContext->extra_info.upload.file_type |= \

  129. _FILE_TYPE_TRUNK;

  130. }

  131. }

  132.  
  133. //根据上一步的检查需要开启trunk合并存储

  134. if (pFileContext->extra_info.upload.file_type & _FILE_TYPE_TRUNK)

  135. {

  136. FDFSTrunkFullInfo *pTrunkInfo;

  137.  
  138. pFileContext->extra_info.upload.if_sub_path_alloced = true;

  139. pTrunkInfo = &(pFileContext->extra_info.upload.trunk_info);

  140. //为trunk文件名分配空间,并添加到缓存

  141. if ((result=trunk_client_trunk_alloc_space( \

  142. TRUNK_CALC_SIZE(file_bytes), pTrunkInfo)) != 0)

  143. {

  144. pClientInfo->total_length = sizeof(TrackerHeader);

  145. return result;

  146. }

  147.  
  148. clean_func = dio_trunk_write_finish_clean_up;

  149. file_offset = TRUNK_FILE_START_OFFSET((*pTrunkInfo));

  150. pFileContext->extra_info.upload.if_gen_filename = true;

  151. trunk_get_full_filename(pTrunkInfo, pFileContext->filename, \

  152. sizeof(pFileContext->filename));

  153. //注册trunk操作的回调

  154. pFileContext->extra_info.upload.before_open_callback = \

  155. dio_check_trunk_file_when_upload;

  156. pFileContext->extra_info.upload.before_close_callback = \

  157. dio_write_chunk_header;

  158. pFileContext->open_flags = O_RDWR | g_extra_open_file_flags;

  159. }

  160. else

  161. {

  162. //普通文件的方式,略过

  163. ...

  164. }

  165.  
  166. return storage_write_to_file(pTask, file_offset, file_bytes, \

  167. p - pTask->data, dio_write_file, \

  168. storage_upload_file_done_callback, \

  169. clean_func, store_path_index);

  170. }

追踪一下trunkclienttrunkallocspace的实现

 
  1. int trunk_client_trunk_alloc_space(const int file_size, \

  2. FDFSTrunkFullInfo *pTrunkInfo)

  3. {

  4. int result;

  5. ConnectionInfo trunk_server;

  6. ConnectionInfo *pTrunkServer;

  7.  
  8. //如果自己就是trunker,直接操作

  9. if (g_if_trunker_self)

  10. {

  11. return trunk_alloc_space(file_size, pTrunkInfo);

  12. }

  13.  
  14. //否则根据trunk_server的ip和port进行连接

  15. if (*(g_trunk_server.ip_addr) == '\0')

  16. {

  17. logError("file: "__FILE__", line: %d, " \

  18. "no trunk server", __LINE__);

  19. return EAGAIN;

  20. }

  21.  
  22. memcpy(&trunk_server, &g_trunk_server, sizeof(ConnectionInfo));

  23. if ((pTrunkServer=tracker_connect_server(&trunk_server, &result)) == NULL)

  24. {

  25. logError("file: "__FILE__", line: %d, " \

  26. "can't alloc trunk space because connect to trunk " \

  27. "server %s:%d fail, errno: %d", __LINE__, \

  28. trunk_server.ip_addr, trunk_server.port, result);

  29. return result;

  30. }

  31.  
  32. //使用client api进行操作

  33. result = trunk_client_trunk_do_alloc_space(pTrunkServer, \

  34. file_size, pTrunkInfo);

  35.  
  36. tracker_disconnect_server_ex(pTrunkServer, result != 0);

  37. return result;

  38. }

对直接调用和client_api操作分别追踪

 
  1. nt trunk_alloc_space(const int size, FDFSTrunkFullInfo *pResult)

  2. {

  3. FDFSTrunkSlot target_slot;

  4. FDFSTrunkSlot *pSlot;

  5. FDFSTrunkNode *pPreviousNode;

  6. FDFSTrunkNode *pTrunkNode;

  7. int result;

  8.  
  9. STORAGE_TRUNK_CHECK_STATUS();

  10.  
  11. target_slot.size = (size > g_slot_min_size) ? size : g_slot_min_size;

  12. target_slot.head = NULL;

  13.  
  14. pPreviousNode = NULL;

  15. pTrunkNode = NULL;

  16. //分配trunk需要锁

  17. pthread_mutex_lock(&trunk_mem_lock);

  18. //寻找可以插入该文件的地方

  19. while (1)

  20. {

  21. pSlot = (FDFSTrunkSlot *)avl_tree_find_ge(tree_info_by_sizes \

  22. + pResult->path.store_path_index, &target_slot);

  23. if (pSlot == NULL)

  24. {

  25. break;

  26. }

  27.  
  28. pPreviousNode = NULL;

  29. pTrunkNode = pSlot->head;

  30. while (pTrunkNode != NULL && \

  31. pTrunkNode->trunk.status == FDFS_TRUNK_STATUS_HOLD)

  32. {

  33. pPreviousNode = pTrunkNode;

  34. pTrunkNode = pTrunkNode->next;

  35. }

  36.  
  37. if (pTrunkNode != NULL)

  38. {

  39. break;

  40. }

  41.  
  42. target_slot.size = pSlot->size + 1;

  43. }

  44.  
  45. //找到了,于是插入

  46. if (pTrunkNode != NULL)

  47. {

  48. if (pPreviousNode == NULL)

  49. {

  50. pSlot->head = pTrunkNode->next;

  51. if (pSlot->head == NULL)

  52. {

  53. trunk_delete_size_tree_entry(pResult->path. \

  54. store_path_index, pSlot);

  55. }

  56. }

  57. else

  58. {

  59. pPreviousNode->next = pTrunkNode->next;

  60. }

  61.  
  62. trunk_free_block_delete(&(pTrunkNode->trunk));

  63. }

  64. else

  65. {

  66. //没找到,为他创建一个单独的trunk_file

  67. pTrunkNode = trunk_create_trunk_file(pResult->path. \

  68. store_path_index, &result);

  69. if (pTrunkNode == NULL)

  70. {

  71. pthread_mutex_unlock(&trunk_mem_lock);

  72. return result;

  73. }

  74. }

  75. pthread_mutex_unlock(&trunk_mem_lock);

  76.  
  77. result = trunk_split(pTrunkNode, size);

  78. if (result != 0)

  79. {

  80. return result;

  81. }

  82.  
  83. pTrunkNode->trunk.status = FDFS_TRUNK_STATUS_HOLD;

  84. result = trunk_add_free_block(pTrunkNode, true);

  85. if (result == 0)

  86. {

  87. memcpy(pResult, &(pTrunkNode->trunk), \

  88. sizeof(FDFSTrunkFullInfo));

  89. }

  90.  
  91. return result;

  92. }

 
  1. static int trunk_client_trunk_do_alloc_space(ConnectionInfo *pTrunkServer, \

  2. const int file_size, FDFSTrunkFullInfo *pTrunkInfo)

  3. {

  4. TrackerHeader *pHeader;

  5.  
  6. //初始化请求包等等数据,略过

  7. ...

  8.  
  9. pHeader->cmd = STORAGE_PROTO_CMD_TRUNK_ALLOC_SPACE;

  10.  
  11. if ((result=tcpsenddata_nb(pTrunkServer->sock, out_buff, \

  12. sizeof(out_buff), g_fdfs_network_timeout)) != 0)

  13. {

  14. logError("file: "__FILE__", line: %d, " \

  15. "send data to storage server %s:%d fail, " \

  16. "errno: %d, error info: %s", __LINE__, \

  17. pTrunkServer->ip_addr, pTrunkServer->port, \

  18. result, STRERROR(result));

  19.  
  20. return result;

  21. }

  22.  
  23. p = (char *)&trunkBuff;

  24. if ((result=fdfs_recv_response(pTrunkServer, \

  25. &p, sizeof(FDFSTrunkInfoBuff), &in_bytes)) != 0)

  26. {

  27. return result;

  28. }

  29.  
  30. //设置pTrunckInfo信息,略过

  31. ...

  32.  
  33. return 0;

  34. }

追踪解析STORAGEPROTOCMDTRUNKALLOC_SPACE行为的服务端函数

storageservice.c会将其由storageservertrunkalloc_space函数来解析

 
  1. /**

  2. * request package format:

  3. * FDFS_GROUP_NAME_MAX_LEN bytes: group_name

  4. * 4 bytes: file size

  5. * 1 bytes: store_path_index

  6. *

  7. * response package format:

  8. * 1 byte: store_path_index

  9. * 1 byte: sub_path_high

  10. * 1 byte: sub_path_low

  11. * 4 bytes: trunk file id

  12. * 4 bytes: trunk offset

  13. * 4 bytes: trunk size

  14. * **/

  15. static int storage_server_trunk_alloc_space(struct fast_task_info *pTask)

  16. {

  17. StorageClientInfo *pClientInfo;

  18. FDFSTrunkInfoBuff *pApplyBody;

  19. char *in_buff;

  20. char group_name[FDFS_GROUP_NAME_MAX_LEN + 1];

  21. FDFSTrunkFullInfo trunkInfo;

  22. int64_t nInPackLen;

  23. int file_size;

  24. int result;

  25.  
  26. pClientInfo = (StorageClientInfo *)pTask->arg;

  27. nInPackLen = pClientInfo->total_length - sizeof(TrackerHeader);

  28. pClientInfo->total_length = sizeof(TrackerHeader);

  29.  
  30. CHECK_TRUNK_SERVER(pTask)

  31.  
  32. if (nInPackLen != FDFS_GROUP_NAME_MAX_LEN + 5)

  33. {

  34. logError("file: "__FILE__", line: %d, " \

  35. "cmd=%d, client ip: %s, package size " \

  36. INT64_PRINTF_FORMAT" is not correct, " \

  37. "expect length: %d", __LINE__, \

  38. STORAGE_PROTO_CMD_TRUNK_ALLOC_SPACE, \

  39. pTask->client_ip, nInPackLen, \

  40. FDFS_GROUP_NAME_MAX_LEN + 5);

  41. return EINVAL;

  42. }

  43.  
  44. in_buff = pTask->data + sizeof(TrackerHeader);

  45. memcpy(group_name, in_buff, FDFS_GROUP_NAME_MAX_LEN);

  46. *(group_name + FDFS_GROUP_NAME_MAX_LEN) = '\0';

  47. if (strcmp(group_name, g_group_name) != 0)

  48. {

  49. logError("file: "__FILE__", line: %d, " \

  50. "client ip:%s, group_name: %s " \

  51. "not correct, should be: %s", \

  52. __LINE__, pTask->client_ip, \

  53. group_name, g_group_name);

  54. return EINVAL;

  55. }

  56.  
  57. file_size = buff2int(in_buff + FDFS_GROUP_NAME_MAX_LEN);

  58. if (file_size < 0 || !trunk_check_size(file_size))

  59. {

  60. logError("file: "__FILE__", line: %d, " \

  61. "client ip:%s, invalid file size: %d", \

  62. __LINE__, pTask->client_ip, file_size);

  63. return EINVAL;

  64. }

  65.  
  66. trunkInfo.path.store_path_index = *(in_buff+FDFS_GROUP_NAME_MAX_LEN+4);

  67. //实质还是调用的trunk_alloc_space

  68. if ((result=trunk_alloc_space(file_size, &trunkInfo)) != 0)

  69. {

  70. return result;

  71. }

  72.  
  73. pApplyBody = (FDFSTrunkInfoBuff *)(pTask->data+sizeof(TrackerHeader));

  74. pApplyBody->store_path_index = trunkInfo.path.store_path_index;

  75. pApplyBody->sub_path_high = trunkInfo.path.sub_path_high;

  76. pApplyBody->sub_path_low = trunkInfo.path.sub_path_low;

  77. int2buff(trunkInfo.file.id, pApplyBody->id);

  78. int2buff(trunkInfo.file.offset, pApplyBody->offset);

  79. int2buff(trunkInfo.file.size, pApplyBody->size);

  80.  
  81. pClientInfo->total_length = sizeof(TrackerHeader) + \

  82. sizeof(FDFSTrunkInfoBuff);

  83. return 0;

  84. }

trunkclienttrunkallocspace会向同组内唯一的trunk_server申请空间

最终的实现还是trunkallocspace函数

trunk相当于一个KV吧。介个会不会出现单点问题,这台trunk失效以后如何冗余故障,接着往下分析看看

以下这段函数是在trackerclientthread里面的,大致是storage和tracker的一个交互,如果有故障冗余,这里应该存在机制

 
  1. static int tracker_check_response(ConnectionInfo *pTrackerServer, \

  2. bool *bServerPortChanged)

  3. {

  4. int64_t nInPackLen;

  5. TrackerHeader resp;

  6. int server_count;

  7. int result;

  8. char in_buff[1 + (2 + FDFS_MAX_SERVERS_EACH_GROUP) * \

  9. sizeof(FDFSStorageBrief)];

  10. FDFSStorageBrief *pBriefServers;

  11. char *pFlags;

  12.  
  13. //解析包

  14. ...

  15.  
  16. //tracker_leader变化

  17. if ((*pFlags) & FDFS_CHANGE_FLAG_TRACKER_LEADER)

  18. {

  19. ...

  20. }

  21.  
  22. //trunk_leader变化

  23. if ((*pFlags) & FDFS_CHANGE_FLAG_TRUNK_SERVER)

  24. {

  25. if (server_count < 1)

  26. {

  27. logError("file: "__FILE__", line: %d, " \

  28. "tracker server %s:%d, reponse server " \

  29. "count: %d < 1", __LINE__, \

  30. pTrackerServer->ip_addr, \

  31. pTrackerServer->port, server_count);

  32. return EINVAL;

  33. }

  34.  
  35. //未启动trunk服务,从tracker重新加载

  36. if (!g_if_use_trunk_file)

  37. {

  38. logInfo("file: "__FILE__", line: %d, " \

  39. "reload parameters from tracker server", \

  40. __LINE__);

  41. storage_get_params_from_tracker();

  42. }

  43.  
  44. //还未启动trunk服务,报错

  45. if (!g_if_use_trunk_file)

  46. {

  47. logWarning("file: "__FILE__", line: %d, " \

  48. "tracker server %s:%d, " \

  49. "my g_if_use_trunk_file is false, " \

  50. "can't support trunk server!", \

  51. __LINE__, pTrackerServer->ip_addr, \

  52. pTrackerServer->port);

  53. }

  54. else

  55. {

  56. memcpy(g_trunk_server.ip_addr, pBriefServers->ip_addr, \

  57. IP_ADDRESS_SIZE - 1);

  58. *(g_trunk_server.ip_addr + (IP_ADDRESS_SIZE - 1)) = '\0';

  59. g_trunk_server.port = buff2int(pBriefServers->port);

  60. //如果本地的ip端口和trunk_server一致

  61. if (is_local_host_ip(g_trunk_server.ip_addr) && \

  62. g_trunk_server.port == g_server_port)

  63. {

  64. //我已经是trunk了,tracker重启把我重新选为trunk了

  65. if (g_if_trunker_self)

  66. {

  67. logWarning("file: "__FILE__", line: %d, " \

  68. "I am already the trunk server %s:%d, " \

  69. "may be the tracker server restart", \

  70. __LINE__, g_trunk_server.ip_addr, \

  71. g_trunk_server.port);

  72. }

  73. else

  74. {

  75. //我成为了新的trunk

  76. logInfo("file: "__FILE__", line: %d, " \

  77. "I am the the trunk server %s:%d", __LINE__, \

  78. g_trunk_server.ip_addr, g_trunk_server.port);

  79.  
  80. tracker_fetch_trunk_fid(pTrackerServer);

  81. g_if_trunker_self = true;

  82.  
  83. if ((result=storage_trunk_init()) != 0)

  84. {

  85. return result;

  86. }

  87.  
  88. if (g_trunk_create_file_advance && \

  89. g_trunk_create_file_interval > 0)

  90. {

  91. ScheduleArray scheduleArray;

  92. ScheduleEntry entries[1];

  93.  
  94. entries[0].id = TRUNK_FILE_CREATOR_TASK_ID;

  95. entries[0].time_base = g_trunk_create_file_time_base;

  96. entries[0].interval = g_trunk_create_file_interval;

  97. entries[0].task_func = trunk_create_trunk_file_advance;

  98. entries[0].func_args = NULL;

  99.  
  100. scheduleArray.count = 1;

  101. scheduleArray.entries = entries;

  102. sched_add_entries(&scheduleArray);

  103. }

  104.  
  105. trunk_sync_thread_start_all();

  106. }

  107. }

  108. else

  109. {

  110. logInfo("file: "__FILE__", line: %d, " \

  111. "the trunk server is %s:%d", __LINE__, \

  112. g_trunk_server.ip_addr, g_trunk_server.port);

  113.  
  114. //我以前是trunk,我让权

  115. if (g_if_trunker_self)

  116. {

  117. int saved_trunk_sync_thread_count;

  118.  
  119. logWarning("file: "__FILE__", line: %d, " \

  120. "I am the old trunk server, " \

  121. "the new trunk server is %s:%d", \

  122. __LINE__, g_trunk_server.ip_addr, \

  123. g_trunk_server.port);

  124.  
  125. tracker_report_trunk_fid(pTrackerServer);

  126. g_if_trunker_self = false;

  127.  
  128. saved_trunk_sync_thread_count = \

  129. g_trunk_sync_thread_count;

  130. if (saved_trunk_sync_thread_count > 0)

  131. {

  132. logInfo("file: "__FILE__", line: %d, "\

  133. "waiting %d trunk sync " \

  134. "threads exit ...", __LINE__, \

  135. saved_trunk_sync_thread_count);

  136. }

  137.  
  138. while (g_trunk_sync_thread_count > 0)

  139. {

  140. usleep(50000);

  141. }

  142.  
  143. if (saved_trunk_sync_thread_count > 0)

  144. {

  145. logInfo("file: "__FILE__", line: %d, " \

  146. "%d trunk sync threads exited",\

  147. __LINE__, \

  148. saved_trunk_sync_thread_count);

  149. }

  150.  
  151. storage_trunk_destroy_ex(true);

  152. if (g_trunk_create_file_advance && \

  153. g_trunk_create_file_interval > 0)

  154. {

  155. sched_del_entry(TRUNK_FILE_CREATOR_TASK_ID);

  156. }

  157. }

  158. }

  159. }

  160.  
  161. pBriefServers += 1;

  162. server_count -= 1;

  163. }

  164.  
  165. if (!((*pFlags) & FDFS_CHANGE_FLAG_GROUP_SERVER))

  166. {

  167. return 0;

  168. }

  169.  
  170. /*

  171. //printf("resp server count=%d\n", server_count);

  172. {

  173. int i;

  174. for (i=0; i<server_count; i++)

  175. {

  176. //printf("%d. %d:%s\n", i+1, pBriefServers[i].status, \

  177. pBriefServers[i].ip_addr);

  178. }

  179. }

  180. */

  181.  
  182. if (*bServerPortChanged)

  183. {

  184. if (!g_use_storage_id)

  185. {

  186. FDFSStorageBrief *pStorageEnd;

  187. FDFSStorageBrief *pStorage;

  188.  
  189. *bServerPortChanged = false;

  190. pStorageEnd = pBriefServers + server_count;

  191. for (pStorage=pBriefServers; pStorage<pStorageEnd;

  192. pStorage++)

  193. {

  194. if (strcmp(pStorage->id, g_my_server_id_str) == 0)

  195. {

  196. continue;

  197. }

  198.  
  199. tracker_rename_mark_files(pStorage->ip_addr, \

  200. g_last_server_port, pStorage->ip_addr, \

  201. g_server_port);

  202. }

  203. }

  204.  
  205. if (g_server_port != g_last_server_port)

  206. {

  207. g_last_server_port = g_server_port;

  208. if ((result=storage_write_to_sync_ini_file()) != 0)

  209. {

  210. return result;

  211. }

  212. }

  213. }

  214.  
  215. return tracker_merge_servers(pTrackerServer, \

  216. pBriefServers, server_count);

  217. }

可以看到,trunk的失败确实是存在冗余机制,由tracker来选出trunk。

trunk的分析暂告一段落,删除文件后是否存在文件空洞,空洞的利用率如何,都得用数据说话才行哈。

总结:

每个组都有唯一的trunk leader,组内所有trunk文件的信息,由这个trunk leader内部组织的avl树来保存。

上传文件后,storage会向trunk leader发起申请空间的请求,这时trunk leader会使用一个全局的锁,获得了trunk存储的位置后,storage在本地写磁盘。

下载文件时,trunk信息在文件名里面已经包含,只需要直接读即可。

使用trunk方式主要是为了解决node过多造成读写性能下降的问题,但是引入trunk方式本身也会造成一定的性能损耗。

目前感觉我对trunk功能还是hold不住,包括如果trunk出错,怎么样恢复trunk文件的数据,因为没有提供的官方的工具,所以不太敢用。

以后如果有需求在跟进,先告一段落了吧。


FastDFS源码解析(3)--------通信协议分析

就上传和下载进行分析,其他暂时略过

上传:

1 根据ip,port连接上tracker

2 发送一个10字节的包,其中第9个字节为TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ONE,也就是101

3 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

4 接受的这个包,0-8字节是下面要接收的包的大小,通过以下算法可以还原成数字

 
  1. int64_t buff2long(const char *buff)

  2. {

  3. unsigned char *p;

  4. p = (unsigned char *)buff;

  5. return (((int64_t)(*p)) << 56) | \

  6. (((int64_t)(*(p+1))) << 48) | \

  7. (((int64_t)(*(p+2))) << 40) | \

  8. (((int64_t)(*(p+3))) << 32) | \

  9. (((int64_t)(*(p+4))) << 24) | \

  10. (((int64_t)(*(p+5))) << 16) | \

  11. (((int64_t)(*(p+6))) << 8) | \

  12. ((int64_t)(*(p+7)));

  13. }

  14.  
  15. void long2buff(int64_t n, char *buff)

  16. {

  17. unsigned char *p;

  18. p = (unsigned char *)buff;

  19. *p++ = (n >> 56) & 0xFF;

  20. *p++ = (n >> 48) & 0xFF;

  21. *p++ = (n >> 40) & 0xFF;

  22. *p++ = (n >> 32) & 0xFF;

  23. *p++ = (n >> 24) & 0xFF;

  24. *p++ = (n >> 16) & 0xFF;

  25. *p++ = (n >> 8) & 0xFF;

  26. *p++ = n & 0xFF;

  27. }

5 读完这个数字对应的字节数目,这个数字应当有TRACKER_QUERY_STORAGE_STORE_BODY_LEN长,否则出错

 
  1. #define TRACKER_QUERY_STORAGE_STORE_BODY_LEN (FDFS_GROUP_NAME_MAX_LEN \

  2. + IP_ADDRESS_SIZE - 1 + FDFS_PROTO_PKG_LEN_SIZE + 1)

也就是16+16-1+8+1 = 40

6 这40个字节,头16字节是组名,接着15字节是IP地址,接着8字节是端口号,还是根据buff2long算法还原成数字,最后1字节是store_path_index

tracker交互完毕,此时进行storage操作

7 根据ip和端口连接storage

8 发送25字节的包

头10字节是TrackerHeader一样的结构,其中1-8字节的内容为filesize+这个包的大小(25)-头的大小(10),也就是file_size+15这个数,通过long2buff,转换的8字节字串,然后其中第9字节的内容是STORAGE_PROTO_CMD_UPLOAD_FILE,也就是11

第11字节是刚才接受的storage_path_index

第12-19字节是file_size,通过long2buff算法转换为8字节字串

19-25字节是ext_name相关,这里设置为0即可

9 发送file_size字节内容,即为文件信息

10 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

11 接受的这个包,0-8字节是下面要接收的包的大小,通过buff2long还原为数字

12 这个数字应该大于FDFS_GROUP_NAME_MAX_LEN,也就是16字节,否则出错

13 头16字节为组名,后面全部的字节为remote_filename

14 上传流程完成

下载:

下载需要上传时rsp返回的文件ID,这里命名为file_id

1 连接tracker

2 切分file_id,第一个/前出现的即为group_name,后面的都是remote_filename

3 发送一个10字节的pHeader,其中1-8字节是FDFS_GROUP_NAME_MAX_LEN(值为16) 加上 remote_filename的长度,通过long2buff转化而成的

第9字节是CMD TRACKER_PROTO_CMD_SERVICE_QUERY_FETCH_ONE,即为102

4 发送16字节是group_name

5 发送remote_filename这个字串

6 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

7 接受的这个包,1-8字节是下面要接收的包的大小,通过buff2long可以还原成数字

8 读完这个数字对应的字节数目,这个数字应当有TRACKERQUERYSTORAGEFETCHBODYLEN(TRACKERQUERYSTORAGESTOREBODYLEN - 1,也就是39)长,否则出错

9 这39个字节,头16字节是组名(下载逻辑时可以忽略),接着15字节是IP地址,接着8字节是端口号,还是根据buff2long算法还原成数字

10 和tracker的交互完成,下面是storage

11 根据ip和端口连接storage

12 发送一个pHeader+file_offset+download_bytes+group_name(补全16字节)+filename的数据包

也就是10+8+8+16+filename_size

1-8字节是8+8+16+filename_size的大小根据long2buff转换的字串

9字节是STORAGE_PROTO_CMD_DOWNLOAD_FILE也就是14

11-18字节是file_offset的long2buff字串

19-26是download_bytes的long2buff字串

27-42是group_name

再往后就是finename

13 接受一个10字节的包,其中第10个字节为返回状态,如果是0,说明一切正常

14 接受的这个包,1-8字节是下面要接收的包的大小,通过buff2long可以还原成数字

15 将接收到的包写入文件,一次下载逻辑完毕

上传下载是最经典的逻辑,其他逻辑都可以从这里衍生,不做详细介绍了


FastDFS源码解析(4)--------storage运行流程分析

大致来分析一下fdfs storage是如何提供服务的,以上传文件为例。

从storage的初始化函数来入手

 
  1. int storage_service_init()

  2. {

  3. int result;

  4. int bytes;

  5. struct storage_nio_thread_data *pThreadData;

  6. struct storage_nio_thread_data *pDataEnd;

  7. pthread_t tid;

  8. pthread_attr_t thread_attr;

  9.  
  10. //storage任务线程锁

  11. if ((result=init_pthread_lock(&g_storage_thread_lock)) != 0)

  12. {

  13. return result;

  14. }

  15.  
  16. //路径索引锁

  17. if ((result=init_pthread_lock(&path_index_thread_lock)) != 0)

  18. {

  19. return result;

  20. }

  21.  
  22. //状态计数锁

  23. if ((result=init_pthread_lock(&stat_count_thread_lock)) != 0)

  24. {

  25. return result;

  26. }

  27.  
  28. //初始化线程堆栈大小

  29. if ((result=init_pthread_attr(&thread_attr, g_thread_stack_size)) != 0)

  30. {

  31. logError("file: "__FILE__", line: %d, " \

  32. "init_pthread_attr fail, program exit!", __LINE__);

  33. return result;

  34. }

  35.  
  36. //建立任务task对象池,复用task类型

  37. if ((result=free_queue_init(g_max_connections, g_buff_size, \

  38. g_buff_size, sizeof(StorageClientInfo))) != 0)

  39. {

  40. return result;

  41. }

  42.  
  43. bytes = sizeof(struct storage_nio_thread_data) * g_work_threads;

  44. g_nio_thread_data = (struct storage_nio_thread_data *)malloc(bytes);

  45. if (g_nio_thread_data == NULL)

  46. {

  47. logError("file: "__FILE__", line: %d, " \

  48. "malloc %d bytes fail, errno: %d, error info: %s", \

  49. __LINE__, bytes, errno, STRERROR(errno));

  50. return errno != 0 ? errno : ENOMEM;

  51. }

  52. memset(g_nio_thread_data, 0, bytes);

  53.  
  54. g_storage_thread_count = 0;

  55. pDataEnd = g_nio_thread_data + g_work_threads;

  56. for (pThreadData=g_nio_thread_data; pThreadData<pDataEnd; pThreadData++)

  57. {

  58. if (ioevent_init(&pThreadData->thread_data.ev_puller,

  59. g_max_connections + 2, 1000, 0) != 0)

  60. {

  61. result = errno != 0 ? errno : ENOMEM;

  62. logError("file: "__FILE__", line: %d, " \

  63. "ioevent_init fail, " \

  64. "errno: %d, error info: %s", \

  65. __LINE__, result, STRERROR(result));

  66. return result;

  67. }

  68. result = fast_timer_init(&pThreadData->thread_data.timer,

  69. 2 * g_fdfs_network_timeout, g_current_time);

  70. if (result != 0)

  71. {

  72. logError("file: "__FILE__", line: %d, " \

  73. "fast_timer_init fail, " \

  74. "errno: %d, error info: %s", \

  75. __LINE__, result, STRERROR(result));

  76. return result;

  77. }

  78.  
  79. if (pipe(pThreadData->thread_data.pipe_fds) != 0)

  80. {

  81. result = errno != 0 ? errno : EPERM;

  82. logError("file: "__FILE__", line: %d, " \

  83. "call pipe fail, " \

  84. "errno: %d, error info: %s", \

  85. __LINE__, result, STRERROR(result));

  86. break;

  87. }

  88.  
  89. #if defined(OS_LINUX)

  90. if ((result=fd_add_flags(pThreadData->thread_data.pipe_fds[0], \

  91. O_NONBLOCK | O_NOATIME)) != 0)

  92. {

  93. break;

  94. }

  95. #else

  96. if ((result=fd_add_flags(pThreadData->thread_data.pipe_fds[0], \

  97. O_NONBLOCK)) != 0)

  98. {

  99. break;

  100. }

  101. #endif

  102.  
  103. //创建工作线程

  104. if ((result=pthread_create(&tid, &thread_attr, \

  105. work_thread_entrance, pThreadData)) != 0)

  106. {

  107. logError("file: "__FILE__", line: %d, " \

  108. "create thread failed, startup threads: %d, " \

  109. "errno: %d, error info: %s", \

  110. __LINE__, g_storage_thread_count, \

  111. result, STRERROR(result));

  112. break;

  113. }

  114. else

  115. {

  116. if ((result=pthread_mutex_lock(&g_storage_thread_lock)) != 0)

  117. {

  118. logError("file: "__FILE__", line: %d, " \

  119. "call pthread_mutex_lock fail, " \

  120. "errno: %d, error info: %s", \

  121. __LINE__, result, STRERROR(result));

  122. }

  123. g_storage_thread_count++;

  124. if ((result=pthread_mutex_unlock(&g_storage_thread_lock)) != 0)

  125. {

  126. logError("file: "__FILE__", line: %d, " \

  127. "call pthread_mutex_lock fail, " \

  128. "errno: %d, error info: %s", \

  129. __LINE__, result, STRERROR(result));

  130. }

  131. }

  132. }

  133.  
  134. pthread_attr_destroy(&thread_attr);

  135.  
  136. last_stat_change_count = g_stat_change_count;

  137.  
  138. //DO NOT support direct IO !!!

  139. //g_extra_open_file_flags = g_disk_rw_direct ? O_DIRECT : 0;

  140.  
  141. if (result != 0)

  142. {

  143. return result;

  144. }

  145.  
  146. return result;

  147. }

跟进工作线程

 
  1. static void *work_thread_entrance(void* arg)

  2. {

  3. int result;

  4. struct storage_nio_thread_data *pThreadData;

  5.  
  6. pThreadData = (struct storage_nio_thread_data *)arg;

  7. if (g_check_file_duplicate)

  8. {

  9. if ((result=fdht_copy_group_array(&(pThreadData->group_array),\

  10. &g_group_array)) != 0)

  11. {

  12. pthread_mutex_lock(&g_storage_thread_lock);

  13. g_storage_thread_count--;

  14. pthread_mutex_unlock(&g_storage_thread_lock);

  15. return NULL;

  16. }

  17. }

  18.  
  19. //启动主io主循环,为pThreadData->thread_data对应的pipe_fd注册回调函数

  20. //storage_recv_notify_read

  21. ioevent_loop(&pThreadData->thread_data, storage_recv_notify_read,

  22. task_finish_clean_up, &g_continue_flag);

  23. //循环退出,销毁响应数据结构

  24. ioevent_destroy(&pThreadData->thread_data.ev_puller);

  25.  
  26. if (g_check_file_duplicate)

  27. {

  28. if (g_keep_alive)

  29. {

  30. fdht_disconnect_all_servers(&(pThreadData->group_array));

  31. }

  32.  
  33. fdht_free_group_array(&(pThreadData->group_array));

  34. }

  35.  
  36. //总线程数目自减

  37. if ((result=pthread_mutex_lock(&g_storage_thread_lock)) != 0)

  38. {

  39. logError("file: "__FILE__", line: %d, " \

  40. "call pthread_mutex_lock fail, " \

  41. "errno: %d, error info: %s", \

  42. __LINE__, result, STRERROR(result));

  43. }

  44. g_storage_thread_count--;

  45. if ((result=pthread_mutex_unlock(&g_storage_thread_lock)) != 0)

  46. {

  47. logError("file: "__FILE__", line: %d, " \

  48. "call pthread_mutex_lock fail, " \

  49. "errno: %d, error info: %s", \

  50. __LINE__, result, STRERROR(result));

  51. }

  52.  
  53. logDebug("file: "__FILE__", line: %d, " \

  54. "nio thread exited, thread count: %d", \

  55. __LINE__, g_storage_thread_count);

  56.  
  57. return NULL;

  58. }

除了workthreadentrance线程,还有一个叫做acceptthreadentrance的线程,专门用来accept请求,防止大量的操作阻塞了accept的性能

 
  1. static void *accept_thread_entrance(void* arg)

  2. {

  3. int server_sock;

  4. int incomesock;

  5. struct sockaddr_in inaddr;

  6. socklen_t sockaddr_len;

  7. in_addr_t client_addr;

  8. char szClientIp[IP_ADDRESS_SIZE];

  9. long task_addr;

  10. struct fast_task_info *pTask;

  11. StorageClientInfo *pClientInfo;

  12. struct storage_nio_thread_data *pThreadData;

  13.  
  14. server_sock = (long)arg;

  15. while (g_continue_flag)

  16. {

  17. sockaddr_len = sizeof(inaddr);

  18. incomesock = accept(server_sock, (struct sockaddr*)&inaddr, \

  19. &sockaddr_len);

  20. if (incomesock < 0) //error

  21. {

  22. if (!(errno == EINTR || errno == EAGAIN))

  23. {

  24. logError("file: "__FILE__", line: %d, " \

  25. "accept failed, " \

  26. "errno: %d, error info: %s", \

  27. __LINE__, errno, STRERROR(errno));

  28. }

  29.  
  30. continue;

  31. }

  32.  
  33. client_addr = getPeerIpaddr(incomesock, \

  34. szClientIp, IP_ADDRESS_SIZE);

  35. if (g_allow_ip_count >= 0)

  36. {

  37. if (bsearch(&client_addr, g_allow_ip_addrs, \

  38. g_allow_ip_count, sizeof(in_addr_t), \

  39. cmp_by_ip_addr_t) == NULL)

  40. {

  41. logError("file: "__FILE__", line: %d, " \

  42. "ip addr %s is not allowed to access", \

  43. __LINE__, szClientIp);

  44.  
  45. close(incomesock);

  46. continue;

  47. }

  48. }

  49.  
  50. if (tcpsetnonblockopt(incomesock) != 0)

  51. {

  52. close(incomesock);

  53. continue;

  54. }

  55.  
  56. pTask = free_queue_pop();

  57. if (pTask == NULL)

  58. {

  59. logError("file: "__FILE__", line: %d, " \

  60. "malloc task buff failed", \

  61. __LINE__);

  62. close(incomesock);

  63. continue;

  64. }

  65.  
  66. pClientInfo = (StorageClientInfo *)pTask->arg;

  67.  
  68. //从task对象池里拿出一个task,将fd域填充为incomesock

  69. pTask->event.fd = incomesock;

  70. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_INIT;

  71. pClientInfo->nio_thread_index = pTask->event.fd % g_work_threads;

  72. pThreadData = g_nio_thread_data + pClientInfo->nio_thread_index;

  73.  
  74. strcpy(pTask->client_ip, szClientIp);

  75.  
  76. task_addr = (long)pTask;

  77.  
  78. //通过pThreadData->thread_data.pipe_fds[1]将task传给work_thread

  79. //work_thread监视着pThreadData->thread_data.pipe_fds[0]

  80. //storage_recv_notify_read将被调用

  81. if (write(pThreadData->thread_data.pipe_fds[1], &task_addr, \

  82. sizeof(task_addr)) != sizeof(task_addr))

  83. {

  84. close(incomesock);

  85. free_queue_push(pTask);

  86. logError("file: "__FILE__", line: %d, " \

  87. "call write failed, " \

  88. "errno: %d, error info: %s", \

  89. __LINE__, errno, STRERROR(errno));

  90. }

  91. }

  92.  
  93. return NULL;

  94. }

关注一下storagerecvnotify_read函数

 
  1. void storage_recv_notify_read(int sock, short event, void *arg)

  2. {

  3. struct fast_task_info *pTask;

  4. StorageClientInfo *pClientInfo;

  5. long task_addr;

  6. int64_t remain_bytes;

  7. int bytes;

  8. int result;

  9.  
  10. while (1)

  11. {

  12. //读取这个task结构

  13. if ((bytes=read(sock, &task_addr, sizeof(task_addr))) < 0)

  14. {

  15. if (!(errno == EAGAIN || errno == EWOULDBLOCK))

  16. {

  17. logError("file: "__FILE__", line: %d, " \

  18. "call read failed, " \

  19. "errno: %d, error info: %s", \

  20. __LINE__, errno, STRERROR(errno));

  21. }

  22.  
  23. break;

  24. }

  25. else if (bytes == 0)

  26. {

  27. logError("file: "__FILE__", line: %d, " \

  28. "call read failed, end of file", __LINE__);

  29. break;

  30. }

  31.  
  32. pTask = (struct fast_task_info *)task_addr;

  33. pClientInfo = (StorageClientInfo *)pTask->arg;

  34.  
  35. if (pTask->event.fd < 0) //quit flag

  36. {

  37. return;

  38. }

  39.  
  40. /* //logInfo("=====thread index: %d, pTask->event.fd=%d", \

  41. pClientInfo->nio_thread_index, pTask->event.fd);

  42. */

  43.  
  44. if (pClientInfo->stage & FDFS_STORAGE_STAGE_DIO_THREAD)

  45. {

  46. pClientInfo->stage &= ~FDFS_STORAGE_STAGE_DIO_THREAD;

  47. }

  48. switch (pClientInfo->stage)

  49. {

  50. //初始化阶段,进行数据初始化

  51. case FDFS_STORAGE_STAGE_NIO_INIT:

  52. result = storage_nio_init(pTask);

  53. break;

  54. //暂时略过,先看storage_nio_init

  55. case FDFS_STORAGE_STAGE_NIO_RECV:

  56. pTask->offset = 0;

  57. remain_bytes = pClientInfo->total_length - \

  58. pClientInfo->total_offset;

  59. if (remain_bytes > pTask->size)

  60. {

  61. pTask->length = pTask->size;

  62. }

  63. else

  64. {

  65. pTask->length = remain_bytes;

  66. }

  67.  
  68. if (set_recv_event(pTask) == 0)

  69. {

  70. client_sock_read(pTask->event.fd,

  71. IOEVENT_READ, pTask);

  72. }

  73. result = 0;

  74. break;

  75. case FDFS_STORAGE_STAGE_NIO_SEND:

  76. result = storage_send_add_event(pTask);

  77. break;

  78. case FDFS_STORAGE_STAGE_NIO_CLOSE:

  79. result = EIO; //close this socket

  80. break;

  81. default:

  82. logError("file: "__FILE__", line: %d, " \

  83. "invalid stage: %d", __LINE__, \

  84. pClientInfo->stage);

  85. result = EINVAL;

  86. break;

  87. }

  88.  
  89. if (result != 0)

  90. {

  91. add_to_deleted_list(pTask);

  92. }

  93. }

  94. }

初始化实质上是将task对应的fd,注册clientsockread函数同时将task状态设置为FDFSSTORAGESTAGENIORECV

 
  1. static int storage_nio_init(struct fast_task_info *pTask)

  2. {

  3. StorageClientInfo *pClientInfo;

  4. struct storage_nio_thread_data *pThreadData;

  5.  
  6. pClientInfo = (StorageClientInfo *)pTask->arg;

  7. pThreadData = g_nio_thread_data + pClientInfo->nio_thread_index;

  8.  
  9. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_RECV;

  10. return ioevent_set(pTask, &pThreadData->thread_data,

  11. pTask->event.fd, IOEVENT_READ, client_sock_read,

  12. g_fdfs_network_timeout);

  13. }

看看这个clientsockread函数

 
  1. static void client_sock_read(int sock, short event, void *arg)

  2. {

  3. int bytes;

  4. int recv_bytes;

  5. struct fast_task_info *pTask;

  6. StorageClientInfo *pClientInfo;

  7.  
  8. pTask = (struct fast_task_info *)arg;

  9. pClientInfo = (StorageClientInfo *)pTask->arg;

  10. if (pClientInfo->canceled)

  11. {

  12. return;

  13. }

  14.  
  15. if (pClientInfo->stage != FDFS_STORAGE_STAGE_NIO_RECV)

  16. {

  17. if (event & IOEVENT_TIMEOUT) {

  18. pTask->event.timer.expires = g_current_time +

  19. g_fdfs_network_timeout;

  20. fast_timer_add(&pTask->thread_data->timer,

  21. &pTask->event.timer);

  22. }

  23.  
  24. return;

  25. }

  26.  
  27. //超时了,删除这个task

  28. if (event & IOEVENT_TIMEOUT)

  29. {

  30. if (pClientInfo->total_offset == 0 && pTask->req_count > 0)

  31. {

  32. pTask->event.timer.expires = g_current_time +

  33. g_fdfs_network_timeout;

  34. fast_timer_add(&pTask->thread_data->timer,

  35. &pTask->event.timer);

  36. }

  37. else

  38. {

  39. logError("file: "__FILE__", line: %d, " \

  40. "client ip: %s, recv timeout, " \

  41. "recv offset: %d, expect length: %d", \

  42. __LINE__, pTask->client_ip, \

  43. pTask->offset, pTask->length);

  44.  
  45. task_finish_clean_up(pTask);

  46. }

  47.  
  48. return;

  49. }

  50.  
  51. //io错误,一样删

  52. if (event & IOEVENT_ERROR)

  53. {

  54. logError("file: "__FILE__", line: %d, " \

  55. "client ip: %s, recv error event: %d, "

  56. "close connection", __LINE__, pTask->client_ip, event);

  57.  
  58. task_finish_clean_up(pTask);

  59. return;

  60. }

  61.  
  62. fast_timer_modify(&pTask->thread_data->timer,

  63. &pTask->event.timer, g_current_time +

  64. g_fdfs_network_timeout);

  65. while (1)

  66. {

  67. //pClientInfo的total_length域为0,说明头还没接收,接收一个头

  68. if (pClientInfo->total_length == 0) //recv header

  69. {

  70. recv_bytes = sizeof(TrackerHeader) - pTask->offset;

  71. }

  72. else

  73. {

  74. recv_bytes = pTask->length - pTask->offset;

  75. }

  76.  
  77. /*

  78. logInfo("total_length="INT64_PRINTF_FORMAT", recv_bytes=%d, "

  79. "pTask->length=%d, pTask->offset=%d",

  80. pClientInfo->total_length, recv_bytes,

  81. pTask->length, pTask->offset);

  82. */

  83.  
  84. bytes = recv(sock, pTask->data + pTask->offset, recv_bytes, 0);

  85. if (bytes < 0)

  86. {

  87. if (errno == EAGAIN || errno == EWOULDBLOCK)

  88. {

  89. }

  90. else

  91. {

  92. logError("file: "__FILE__", line: %d, " \

  93. "client ip: %s, recv failed, " \

  94. "errno: %d, error info: %s", \

  95. __LINE__, pTask->client_ip, \

  96. errno, STRERROR(errno));

  97.  
  98. task_finish_clean_up(pTask);

  99. }

  100.  
  101. return;

  102. }

  103. else if (bytes == 0)

  104. {

  105. logDebug("file: "__FILE__", line: %d, " \

  106. "client ip: %s, recv failed, " \

  107. "connection disconnected.", \

  108. __LINE__, pTask->client_ip);

  109.  
  110. task_finish_clean_up(pTask);

  111. return;

  112. }

  113.  
  114. //用包头数据对pClientInfo进行初始化

  115. if (pClientInfo->total_length == 0) //header

  116. {

  117. if (pTask->offset + bytes < sizeof(TrackerHeader))

  118. {

  119. pTask->offset += bytes;

  120. return;

  121. }

  122.  
  123. pClientInfo->total_length=buff2long(((TrackerHeader *) \

  124. pTask->data)->pkg_len);

  125. if (pClientInfo->total_length < 0)

  126. {

  127. logError("file: "__FILE__", line: %d, " \

  128. "client ip: %s, pkg length: " \

  129. INT64_PRINTF_FORMAT" < 0", \

  130. __LINE__, pTask->client_ip, \

  131. pClientInfo->total_length);

  132.  
  133. task_finish_clean_up(pTask);

  134. return;

  135. }

  136.  
  137. pClientInfo->total_length += sizeof(TrackerHeader);

  138.  
  139. //如果需要接受的数据总长大于pTask的固定长度阀值,那么暂时只接受那么长

  140. if (pClientInfo->total_length > pTask->size)

  141. {

  142. pTask->length = pTask->size;

  143. }

  144. else

  145. {

  146. pTask->length = pClientInfo->total_length;

  147. }

  148. }

  149.  
  150. pTask->offset += bytes;

  151.  
  152. //接受完了当前的包

  153. if (pTask->offset >= pTask->length) //recv current pkg done

  154. {

  155. //略过先看下面

  156. if (pClientInfo->total_offset + pTask->length >= \

  157. pClientInfo->total_length)

  158. {

  159. /* current req recv done */

  160. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_SEND;

  161. pTask->req_count++;

  162. }

  163.  
  164. //刚接受了包头,那么由storage_deal_task分发任务

  165. if (pClientInfo->total_offset == 0)

  166. {

  167. pClientInfo->total_offset = pTask->length;

  168. storage_deal_task(pTask);

  169. }

  170. else

  171. {

  172. //略过先看下面

  173. pClientInfo->total_offset += pTask->length;

  174.  
  175. /* continue write to file */

  176. storage_dio_queue_push(pTask);

  177. }

  178.  
  179. return;

  180. }

  181. }

  182.  
  183. return;

  184. }

storagedealtask将上传请求分发给storageuploadfile

storageuploadfile注册一些基本的函数而后调用 storagewriteto_file

 
  1. static int storage_upload_file(struct fast_task_info *pTask, bool bAppenderFile)

  2. {

  3. //略过

  4. ...

  5.  
  6. return storage_write_to_file(pTask, file_offset, file_bytes, \

  7. p - pTask->data, dio_write_file, \

  8. storage_upload_file_done_callback, \

  9. clean_func, store_path_index);

  10. }

 
  1. static int storage_write_to_file(struct fast_task_info *pTask, \

  2. const int64_t file_offset, const int64_t upload_bytes, \

  3. const int buff_offset, TaskDealFunc deal_func, \

  4. FileDealDoneCallback done_callback, \

  5. DisconnectCleanFunc clean_func, const int store_path_index)

  6. {

  7. StorageClientInfo *pClientInfo;

  8. StorageFileContext *pFileContext;

  9. int result;

  10.  
  11. pClientInfo = (StorageClientInfo *)pTask->arg;

  12. pFileContext = &(pClientInfo->file_context);

  13.  
  14. pClientInfo->deal_func = deal_func;

  15. pClientInfo->clean_func = clean_func;

  16.  
  17. pFileContext->fd = -1;

  18. pFileContext->buff_offset = buff_offset;

  19. pFileContext->offset = file_offset;

  20. pFileContext->start = file_offset;

  21. pFileContext->end = file_offset + upload_bytes;

  22. pFileContext->dio_thread_index = storage_dio_get_thread_index( \

  23. pTask, store_path_index, pFileContext->op);

  24. pFileContext->done_callback = done_callback;

  25.  
  26. if (pFileContext->calc_crc32)

  27. {

  28. pFileContext->crc32 = CRC32_XINIT;

  29. }

  30.  
  31. if (pFileContext->calc_file_hash)

  32. {

  33. if (g_file_signature_method == STORAGE_FILE_SIGNATURE_METHOD_HASH)

  34. {

  35. INIT_HASH_CODES4(pFileContext->file_hash_codes)

  36. }

  37. else

  38. {

  39. my_md5_init(&pFileContext->md5_context);

  40. }

  41. }

  42.  
  43. //将任务压入磁盘队列

  44. if ((result=storage_dio_queue_push(pTask)) != 0)

  45. {

  46. pClientInfo->total_length = sizeof(TrackerHeader);

  47. return result;

  48. }

  49.  
  50. return STORAGE_STATUE_DEAL_FILE;

  51. }

压入磁盘队列的处理函数

 
  1. int storage_dio_queue_push(struct fast_task_info *pTask)

  2. {

  3. StorageClientInfo *pClientInfo;

  4. StorageFileContext *pFileContext;

  5. struct storage_dio_context *pContext;

  6. int result;

  7.  
  8. pClientInfo = (StorageClientInfo *)pTask->arg;

  9. pFileContext = &(pClientInfo->file_context);

  10. pContext = g_dio_contexts + pFileContext->dio_thread_index;

  11.  
  12. //这里为什么要或上这个呢,因为在LT模式的工作下,client_sock_read会被不断的触发

  13. //pTask的数据就会被刷掉了,所以改变当前FDFS_STORAGE_STAGE_NIO_RECV的状态,让client_sock_read调用就被返回

  14. pClientInfo->stage |= FDFS_STORAGE_STAGE_DIO_THREAD;

  15. if ((result=task_queue_push(&(pContext->queue), pTask)) != 0)

  16. {

  17. add_to_deleted_list(pTask);

  18. return result;

  19. }

  20.  
  21. if ((result=pthread_cond_signal(&(pContext->cond))) != 0)

  22. {

  23. logError("file: "__FILE__", line: %d, " \

  24. "pthread_cond_signal fail, " \

  25. "errno: %d, error info: %s", \

  26. __LINE__, result, STRERROR(result));

  27.  
  28. add_to_deleted_list(pTask);

  29. return result;

  30. }

  31.  
  32. return 0;

  33. }

下面就是磁盘线程取task了

 
  1. static void *dio_thread_entrance(void* arg)

  2. {

  3. int result;

  4. struct storage_dio_context *pContext;

  5. struct fast_task_info *pTask;

  6.  
  7. pContext = (struct storage_dio_context *)arg;

  8.  
  9. pthread_mutex_lock(&(pContext->lock));

  10. while (g_continue_flag)

  11. {

  12. if ((result=pthread_cond_wait(&(pContext->cond), \

  13. &(pContext->lock))) != 0)

  14. {

  15. logError("file: "__FILE__", line: %d, " \

  16. "call pthread_cond_wait fail, " \

  17. "errno: %d, error info: %s", \

  18. __LINE__, result, STRERROR(result));

  19. }

  20.  
  21. //循环取队列里的任务,执行他的deal_func

  22. while ((pTask=task_queue_pop(&(pContext->queue))) != NULL)

  23. {

  24. ((StorageClientInfo *)pTask->arg)->deal_func(pTask);

  25. }

  26. }

  27. pthread_mutex_unlock(&(pContext->lock));

  28.  
  29. if ((result=pthread_mutex_lock(&g_dio_thread_lock)) != 0)

  30. {

  31. logError("file: "__FILE__", line: %d, " \

  32. "call pthread_mutex_lock fail, " \

  33. "errno: %d, error info: %s", \

  34. __LINE__, result, STRERROR(result));

  35. }

  36. g_dio_thread_count--;

  37. if ((result=pthread_mutex_unlock(&g_dio_thread_lock)) != 0)

  38. {

  39. logError("file: "__FILE__", line: %d, " \

  40. "call pthread_mutex_lock fail, " \

  41. "errno: %d, error info: %s", \

  42. __LINE__, result, STRERROR(result));

  43. }

  44.  
  45. logDebug("file: "__FILE__", line: %d, " \

  46. "dio thread exited, thread count: %d", \

  47. __LINE__, g_dio_thread_count);

  48.  
  49. return NULL;

  50. }

对于上传任务来说,dealtask实际上是dowrite_file

 
  1. int dio_write_file(struct fast_task_info *pTask)

  2. {

  3. StorageClientInfo *pClientInfo;

  4. StorageFileContext *pFileContext;

  5. int result;

  6. int write_bytes;

  7. char *pDataBuff;

  8.  
  9. pClientInfo = (StorageClientInfo *)pTask->arg;

  10. pFileContext = &(pClientInfo->file_context);

  11. result = 0;

  12. do

  13. {

  14. if (pFileContext->fd < 0)

  15. {

  16. if (pFileContext->extra_info.upload.before_open_callback!=NULL)

  17. {

  18. result = pFileContext->extra_info.upload. \

  19. before_open_callback(pTask);

  20. if (result != 0)

  21. {

  22. break;

  23. }

  24. }

  25.  
  26. if ((result=dio_open_file(pFileContext)) != 0)

  27. {

  28. break;

  29. }

  30. }

  31.  
  32. pDataBuff = pTask->data + pFileContext->buff_offset;

  33. write_bytes = pTask->length - pFileContext->buff_offset;

  34. if (write(pFileContext->fd, pDataBuff, write_bytes) != write_bytes)

  35. {

  36. result = errno != 0 ? errno : EIO;

  37. logError("file: "__FILE__", line: %d, " \

  38. "write to file: %s fail, fd=%d, write_bytes=%d, " \

  39. "errno: %d, error info: %s", \

  40. __LINE__, pFileContext->filename, \

  41. pFileContext->fd, write_bytes, \

  42. result, STRERROR(result));

  43. }

  44.  
  45. pthread_mutex_lock(&g_dio_thread_lock);

  46. g_storage_stat.total_file_write_count++;

  47. if (result == 0)

  48. {

  49. g_storage_stat.success_file_write_count++;

  50. }

  51. pthread_mutex_unlock(&g_dio_thread_lock);

  52.  
  53. if (result != 0)

  54. {

  55. break;

  56. }

  57.  
  58. if (pFileContext->calc_crc32)

  59. {

  60. pFileContext->crc32 = CRC32_ex(pDataBuff, write_bytes, \

  61. pFileContext->crc32);

  62. }

  63.  
  64. if (pFileContext->calc_file_hash)

  65. {

  66. if (g_file_signature_method == STORAGE_FILE_SIGNATURE_METHOD_HASH)

  67. {

  68. CALC_HASH_CODES4(pDataBuff, write_bytes, \

  69. pFileContext->file_hash_codes)

  70. }

  71. else

  72. {

  73. my_md5_update(&pFileContext->md5_context, \

  74. (unsigned char *)pDataBuff, write_bytes);

  75. }

  76. }

  77.  
  78. /*

  79. logInfo("###dio write bytes: %d, pTask->length=%d, buff_offset=%d", \

  80. write_bytes, pTask->length, pFileContext->buff_offset);

  81. */

  82.  
  83. pFileContext->offset += write_bytes;

  84. if (pFileContext->offset < pFileContext->end)

  85. {

  86. pFileContext->buff_offset = 0;

  87. storage_nio_notify(pTask); //notify nio to deal

  88. }

  89. else

  90. {

  91. if (pFileContext->calc_crc32)

  92. {

  93. pFileContext->crc32 = CRC32_FINAL( \

  94. pFileContext->crc32);

  95. }

  96.  
  97. if (pFileContext->calc_file_hash)

  98. {

  99. if (g_file_signature_method == STORAGE_FILE_SIGNATURE_METHOD_HASH)

  100. {

  101. FINISH_HASH_CODES4(pFileContext->file_hash_codes)

  102. }

  103. else

  104. {

  105. my_md5_final((unsigned char *)(pFileContext-> \

  106. file_hash_codes), &pFileContext->md5_context);

  107. }

  108. }

  109.  
  110. if (pFileContext->extra_info.upload.before_close_callback != NULL)

  111. {

  112. result = pFileContext->extra_info.upload. \

  113. before_close_callback(pTask);

  114. }

  115.  
  116. /* file write done, close it */

  117. close(pFileContext->fd);

  118. pFileContext->fd = -1;

  119.  
  120. if (pFileContext->done_callback != NULL)

  121. {

  122. pFileContext->done_callback(pTask, result);

  123. }

  124. }

  125.  
  126. return 0;

  127. } while (0);

  128.  
  129. pClientInfo->clean_func(pTask);

  130.  
  131. if (pFileContext->done_callback != NULL)

  132. {

  133. pFileContext->done_callback(pTask, result);

  134. }

  135. return result;

  136. }

pFileContext->donecallback对应了storageuploadfiledone_callback

 
  1. static void storage_upload_file_done_callback(struct fast_task_info *pTask, \

  2. const int err_no)

  3. {

  4. StorageClientInfo *pClientInfo;

  5. StorageFileContext *pFileContext;

  6. TrackerHeader *pHeader;

  7. int result;

  8.  
  9. pClientInfo = (StorageClientInfo *)pTask->arg;

  10. pFileContext = &(pClientInfo->file_context);

  11.  
  12. if (pFileContext->extra_info.upload.file_type & _FILE_TYPE_TRUNK)

  13. {

  14. result = trunk_client_trunk_alloc_confirm( \

  15. &(pFileContext->extra_info.upload.trunk_info), err_no);

  16. if (err_no != 0)

  17. {

  18. result = err_no;

  19. }

  20. }

  21. else

  22. {

  23. result = err_no;

  24. }

  25.  
  26. if (result == 0)

  27. {

  28. result = storage_service_upload_file_done(pTask);

  29. if (result == 0)

  30. {

  31. if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)

  32. {

  33. result = storage_binlog_write(\

  34. pFileContext->timestamp2log, \

  35. STORAGE_OP_TYPE_SOURCE_CREATE_FILE, \

  36. pFileContext->fname2log);

  37. }

  38. }

  39. }

  40.  
  41. if (result == 0)

  42. {

  43. int filename_len;

  44. char *p;

  45.  
  46. if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)

  47. {

  48. CHECK_AND_WRITE_TO_STAT_FILE3_WITH_BYTES( \

  49. g_storage_stat.total_upload_count, \

  50. g_storage_stat.success_upload_count, \

  51. g_storage_stat.last_source_update, \

  52. g_storage_stat.total_upload_bytes, \

  53. g_storage_stat.success_upload_bytes, \

  54. pFileContext->end - pFileContext->start)

  55. }

  56.  
  57. filename_len = strlen(pFileContext->fname2log);

  58. pClientInfo->total_length = sizeof(TrackerHeader) + \

  59. FDFS_GROUP_NAME_MAX_LEN + filename_len;

  60. p = pTask->data + sizeof(TrackerHeader);

  61. memcpy(p, pFileContext->extra_info.upload.group_name, \

  62. FDFS_GROUP_NAME_MAX_LEN);

  63. p += FDFS_GROUP_NAME_MAX_LEN;

  64. memcpy(p, pFileContext->fname2log, filename_len);

  65. }

  66. else

  67. {

  68. pthread_mutex_lock(&stat_count_thread_lock);

  69. if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)

  70. {

  71. g_storage_stat.total_upload_count++;

  72. g_storage_stat.total_upload_bytes += \

  73. pClientInfo->total_offset;

  74. }

  75. pthread_mutex_unlock(&stat_count_thread_lock);

  76.  
  77. pClientInfo->total_length = sizeof(TrackerHeader);

  78. }

  79.  
  80. STORAGE_ACCESS_LOG(pTask, ACCESS_LOG_ACTION_UPLOAD_FILE, result);

  81.  
  82. pClientInfo->total_offset = 0;

  83. pTask->length = pClientInfo->total_length;

  84.  
  85. pHeader = (TrackerHeader *)pTask->data;

  86. pHeader->status = result;

  87. pHeader->cmd = STORAGE_PROTO_CMD_RESP;

  88. long2buff(pClientInfo->total_length - sizeof(TrackerHeader), \

  89. pHeader->pkg_len);

  90.  
  91. //又看到熟悉的函数了,这完成以后将pTask从磁盘线程压入work线程

  92. //work线程调用storage_recv_notify_read函数来做下一步处理

  93. storage_nio_notify(pTask);

  94. }

 
  1. void storage_recv_notify_read(int sock, short event, void *arg)

  2. {

  3. //前文已有,略过

  4. ...

  5. //刚从磁盘线程里出来的任务状态依然是dio_thread,去掉dio_thread状态

  6. if (pClientInfo->stage & FDFS_STORAGE_STAGE_DIO_THREAD)

  7. {

  8. pClientInfo->stage &= ~FDFS_STORAGE_STAGE_DIO_THREAD;

  9. }

  10. switch (pClientInfo->stage)

  11. {

  12. //前文已有,略过

  13. ...

  14. case FDFS_STORAGE_STAGE_NIO_RECV:

  15. pTask->offset = 0;

  16. remain_bytes = pClientInfo->total_length - \

  17. pClientInfo->total_offset;

  18. if (remain_bytes > pTask->size)

  19. {

  20. pTask->length = pTask->size;

  21. }

  22. else

  23. {

  24. pTask->length = remain_bytes;

  25. }

  26.  
  27. if (set_recv_event(pTask) == 0)

  28. {

  29. client_sock_read(pTask->event.fd,

  30. IOEVENT_READ, pTask);

  31. }

  32. result = 0;

  33. break;

  34. case FDFS_STORAGE_STAGE_NIO_SEND:

  35. result = storage_send_add_event(pTask);

  36. break;

  37. case FDFS_STORAGE_STAGE_NIO_CLOSE:

  38. result = EIO; //close this socket

  39. break;

  40. default:

  41. logError("file: "__FILE__", line: %d, " \

  42. "invalid stage: %d", __LINE__, \

  43. pClientInfo->stage);

  44. result = EINVAL;

  45. break;

  46. }

  47.  
  48. if (result != 0)

  49. {

  50. add_to_deleted_list(pTask);

  51. }

  52. }

调用了clientsockread函数进行处理

 
  1. static void client_sock_read(int sock, short event, void *arg)

  2. {

  3. //前文已有,略

  4. ...

  5. pTask->offset += bytes;

  6. if (pTask->offset >= pTask->length) //recv current pkg done

  7. {

  8. //这个req接受完毕,准备反馈rsp

  9. if (pClientInfo->total_offset + pTask->length >= \

  10. pClientInfo->total_length)

  11. {

  12. /* current req recv done */

  13. pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_SEND;

  14. pTask->req_count++;

  15. }

  16.  
  17. if (pClientInfo->total_offset == 0)

  18. {

  19. pClientInfo->total_offset = pTask->length;

  20. storage_deal_task(pTask);

  21. }

  22. else

  23. {

  24. //接受的是数据包,压入磁盘线程

  25. pClientInfo->total_offset += pTask->length;

  26.  
  27. /* continue write to file */

  28. storage_dio_queue_push(pTask);

  29. }

  30.  
  31. return;

  32. }

  33.  
  34. return;

  35. }

数据包的网络接收和磁盘的处理成为一个环,接收完一部分,通过队列压入磁盘队列,磁盘线程处理完以后又通过像工作线程的fd进行写,触发网络线程读取这个task。自此源源不断将数据传过来。

总结:

还是上图吧,整个处理流程如下图

fastdfs storage流程分析图

1 client发出请求,accept线程catch到描述符,初始化pTask结构,填入描述符,然后将pTask通过管道给work_entrance

2 进入storagerecvnotify_read函数

3 根据当前的pTask->stage等于FDFSSTORAGESTAGEINIT为fd创建读事件,绑定函数clientsock_read

4 调用storageuploadfile

5 storageuploadfile调用storagewriteto_file

6 storagewritetofile调用压磁盘队列函数storagedioqueuepush

7 storagedioqueuepush将pTask->stage |= FDFSSTORAGESTAGEDIO_THREAD

8 根据事件触发机制,clientsockread将被不断的调用,然而由于pTask->stage != FDFSSTORAGESTAGE_RECV,所以返回

9 磁盘线程通过队列取pTask,调用pTask的处理函数diowritefile

10 调用storageuploadfiledonecallback,调用storagenionotify,通过管道的形式将pTask压入工作进程

11 触发storagerecvnotifyread,将task->stage的FDFSSTORAGESTAGEDIO_THREAD标志去除

12 根据task->stage的FDFSSTORAGESTAGERECV状态,调用函数clientsock_read

13 clientsockread读取完以后调用磁盘队列函数storagedioqueue_push

14 重复7

15 直到结束


一次上传逻辑分析完成

另外pTask的大小是在配置文件里指定的,默认256KB,补充说明一下

每个连接只提供一个pTask来做数据接受和写,猜测是怕大并发占用太多的系统内存吧。

比如1W并发下,256K的pTask大致是存在1W个,也就是2.5G左右内存

我以前自己写的那个分布式文件系统也是这个串行化的逻辑,因为这样开发简单有效。

有一点不足,我以前把数据压入磁盘IO后,我就删除了这个事件,等到磁盘线程读写完毕,我再建立这个事件。

看鱼大是通过判断pTask->stage的状态来暂时忽略回调的,这样在逻辑上比较好,毕竟有事件发生了就要去处理,删掉了始终不是什么好办法。

据事件触发机制,clientsockread将被不断的调用,然而由于pTask->stage != FDFSSTORAGESTAGE_RECV,所以返回

9 磁盘线程通过队列取pTask,调用pTask的处理函数diowritefile

10 调用storageuploadfiledonecallback,调用storagenionotify,通过管道的形式将pTask压入工作进程

11 触发storagerecvnotifyread,将task->stage的FDFSSTORAGESTAGEDIO_THREAD标志去除

12 根据task->stage的FDFSSTORAGESTAGERECV状态,调用函数clientsock_read

13 clientsockread读取完以后调用磁盘队列函数storagedioqueue_push

14 重复7

15 直到结束


一次上传逻辑分析完成

另外pTask的大小是在配置文件里指定的,默认256KB,补充说明一下

每个连接只提供一个pTask来做数据接受和写,猜测是怕大并发占用太多的系统内存吧。

比如1W并发下,256K的pTask大致是存在1W个,也就是2.5G左右内存

我以前自己写的那个分布式文件系统也是这个串行化的逻辑,因为这样开发简单有效。

有一点不足,我以前把数据压入磁盘IO后,我就删除了这个事件,等到磁盘线程读写完毕,我再建立这个事件。

看鱼大是通过判断pTask->stage的状态来暂时忽略回调的,这样在逻辑上比较好,毕竟有事件发生了就要去处理,删掉了始终不是什么好办法。

猜你喜欢

转载自blog.csdn.net/ws1296931325/article/details/86074126
今日推荐