“ModSecurity2”源码分析

一、相关结构体

struct msre_engine{
  apr_pool_t *mp;
  apr_table_t *variables;
  apr_table_t *operators;
  apr_table_t *actions;
  apr_table_t *tfns;
  apr_table_t *reqbody_processors;
};

//在msre_engine_variable_register()函数中初始化metadata使用的结构体
struct msre_var_metadata {
  const char *name;
  unsigned int type;   /* VAR_TYPE_ constants*/
  unsigned int argc_min;
  unsigned int argc_max;
  fn_var_validate_t validate;
  fn_var_generate_t generate;
  unsigned int is_cacheable;/* 0-no,1-yes*/
  unsigned int availability;/*when does this variable become available?*/
};

struct msc_string { 
  char *name;
  unsigned int name_len;
  cahr *value;
  unsigned int value_len;
};

//运算符元数据结构体
struct msre_op_metadata {
  const char   *name;
  fn_op_param_init_t param_init;
  fn_op_execute_t execute;
};

//转换函数结构体
struct msre_tfn_metadata {
  const char *name;
  fn_tfn_execute_t execute;
}

//引擎动作结构体
struct msre_action_metadata {
  const char *name;
  unsigned int type;
  unsigned int argc_min;
  unsigned int argc_max;
  unsigned int allow_param_plusminus;
  unsigned int cardinality;
  unsigned int cardinality_group;
  fn_action_validate_t validate;
  fn_action_init_t init;
  fn_action_execute_t execute;
};

//在modsecurity_tx_init()函数出现的modsec_rec结构体
struct modsec_rec {
  apr_pool_t *mp;
  msc_engine  *modsecurity;
  request_rec *r_early;
  request_rec *r;
  directory_config *dcfg1;
  directory_config *dcfg2;
  directory_config *usercfg;
  directory_config *txcfg;
  
  unsigned int reqbody_should_exist;
  unsigned int reqbody_chunked;
  
  unsigned int phase;
  unsigned int phase_request_headers_complete;
  unsigned int phase_request_body_complete;
  apr_bucket_brigade *if_brigade;
  unsigned int if_seen_eos;
  unsigned int if_status;
  unsigned int if_started_forwarding;

  apr_size_t reqbody_length;

  apr_bucket_brigade *of_brigade;
  unsigned int of_status;
  unsigned int of_done_reading;
  unsigned int of_skipping;
  unsigned int of_partial;
  unsigned int of_is_error;

  unsigned int resbody_status;
  apr_size_t resbody_length;
  char *resbody_data;
  unsigned int resbody_contains_html;
  apr_size_t stream_input_length;
  char *stream_input_data;
  apr_size_t stream_output_length;
  char *stream_output_data;
  unsigned int of_stream_changed;
  unsigned int if_stream_changed;

  apr_array_header_t *error_messages;
  apr_array_header_t *alerts;

  const char *txid;
  const char *sessionid;
  const char *userid;

  const char *server_software;
  const char *local_addr;
  unsigned int local_port;
  const char *local_user;

  /*client*/
  const char *remote_addr;
  unsigned int remote_port;
  const char *remote_user;

  /*useragent*/
  const char *useragent_ip;
  /*request*/
  const char *request_line;
  const char *request_method;
  const char *request_uri;
  const char *query_string;
  const char *request_protocol;
  const char *hostname;
  apr_table_t *request_headers;
  apr_off_t request_content_length;
  const char *request_content_type;
  apr_table_t *arguments;
  apr_table_t *arguments_to_sanitize;
  apr_table_t *request_headers_to_sanitize;
  apr_table_t *response_headers_to_sanitize;
  apr_table_t *request_cookies;
  apr_table_t *pattern_to_sanitize;

  unsigned int urlencoded_error;
  unsigned int inbound_error;
  unsigned int outbound_error;

  unsigned int is_relevant;
  apr_table_t *tx_vars;
  apr_table_t *geo_vars;
  /*response*/
  unsigned int response_status;
  const char *status_line;
  const char *response_protocol;
  apr_table_t *response_headers;
  unsigned int response_headers_sent;
  apr_off_t bytes_sent;
  /* modsecurity request body processing stuff*/
  unsigned int msc_reqbody_storage;  /*on disk or in memory*/
  unsigned int msc_reqbody_spilltodisk;
  unsigned int msc_reqbody_read;
  apr_pool_t *msc_reqbody_mp;
  apr_array_header_t *msc_reqbody_chunks;
  unsigned int msc_reqbody_length;
  int msc_reqbody_chunk_position;
  unsigned int msc_reqbody_chunk_offset;
  msc_data_chunk *msc_reqbody_chunk_current;
  char *msc_reqbody_buffer;
  const char *msc_reqbody_filename;
  int msc_reqbody_fd;
  msc_data_chunk *msc_reqbody_disk_chunk;

  const char *msc_reqbody_processor;
  int msc_reqbody_error;
  cosnt char *msc_reqbody_error_msg;

  apr_size_t msc_reqbody_no_files_length;

  char *msc_full_request_buffer;
  int msc_full_request_length;
  char *multipart_filename;
  char *multipart_name;
  multipart_data *mpd;
  xml_data *xml;
#ifdef WITH_YAJL
  json_data *json
#endif
  /* audit logging */
  char *new_auditlog_boundary;
  char *new_auditlog_filename;
  apr_file_t *new_auditlog_fd;
  unsigned int new_auditlog_size;
  apr_md5_ctx_t new_auditlog_md5ctx;
  
  unsigned int was_intercepted;
  unsigned int rule_was_intercepted;
  unsigned int intercept_phase;
  msre_actionset *intercept_actionset;
  const char *intercept_message;

  /*performance measurement*/
  apr_time_t request_time;
  apr_time_t time_phase1;
  apr_time_t time_phase2;
  apr_time_t time_phase3;
  apr_time_t time_phase4;
  apr_time_t time_phase5;
  apr_time_t time_storage_read;
  apr_time_t time_storage_write;
  apr_time_t time_logging;
  apr_time_t time_gc;
  apr_table_t *perf_rules;
 
  apr_array_header_t *matched_rules;
  msc_string *matched_var;
  int highest_severity;

  /* upload */
  int upload_extract_files;
  int upload_remove_files;
  int upload_files_count;
  /* other*/
  apr_table_t *collections_original;
  apr_table_t *collections;
  apr_table_t *collections_dirty;
  /* rule processing temp pool */
  apr_pool_t *msc_rule_mptmp;
  /* content injection*/
  const char *content_prepend;
  apr_off_t content_prepend_len;
  const char *content_append;
  apr_off_t content_append_len;

  /*data cache*/
  apr_hash_t *tcache;
  apr_size_t tcache_items;
  /*removed rules*/
  apr_array_header_t *removed_rules;
  apr_arrya_header_t *removed_rules_tag;
  apr_array_header_t *removed_rules_msg;
  /*removed targets*/
  apr_table_t *removed_targets;
  unsigned int allow_scope;
  /*matched vars*/
  apr_table_t *matched_vars;
  void *reqbody_processor_ctx;
  htmlDocPtr crypto_html_tree;
#if defined(WITH_LUA)
  #ifdef CACHE_LUA
  lua_State *L;
  #endif
#endif
  int msc_sdbm_delete_error;
};

//在parse_arguments()函数中出现的msc_arg结构体
struct msc_arg {
  const char *name;
  unsigned int name_len; 
  unsigned int name_origin_offset;
  unsigned int name_origin_len;
  const char *value;
  unsigned int value_len;
  unsigned int value_origin_offset;
  unsigned int value_origin_len;
  const char *origin;
};

//出现在msc_regexec()函数中的msc_regex_t结构体
struct msc_regex_t {
  void *re;
  void *pe;
  const char *pattern;
};

//出现在msre_ruleset_process_phase()函数中的msre_ruleset结构体
struct msre_ruleset {
  apr_pool_t *mp;
  msre_engine *engine;
  apr_array_header_t *phase_request_headers;
  apr_array_header_t *phase_request_body;
  apr_array_header_t *phase_response_headers;
  apr_array_header_t *phase_response_body;
  apr_array_header_t *phase_logging;
};

//出现在modsecurity_process_phase()函数中的msre_cache_rec结构体
struct msre_cache_rec {
  int hits;
  int changed;
  int num;
  const char *path;
  const char *val;
  apr_size_t val_len;
};

1.1 位于apache2/mod_securitty2.c文件中，有个模块的入口点，这是挂载到apache主程序的入口:

/* Module entry points 模块的入口点*/
module AP_MODULE_DECLARE_DATA security2_module = {
    STANDARD20_MODULE_STUFF,
    create_directory_config, //a
    merge_directory_configs, //b
    NULL,    /* create_server_config */
    NULL,    /* merge_server_configs */
    module_directives,   //c
    register_hooks        //d
};

上述代码块中的a是创建一个directory_config结构体变量，然后赋初值，并return到这个结构体地址。

我们来看看这个结构体内容:

struct directory_config {
  apr_pool_t *mp;
  msre_ruleset *ruleset;
  int is_enabled;
  int reqbody_access;
  int reqintercept_oe;
  int reqbody_buffering;
  long int reqbody_inmemory_limit;
  long int reqbody_limit;
  long int reqbody_no_files_limit;
  int resbody_access;
  
  long int of_limit;
  apr_table_t *of_mime_types;
  int of_mime_types_cleared;
  int of_limit_action;
  int if_limit_action;

  const char *debuglog_name;
  int debuglog_level;
  apr_file_t *debuglog_fd;
  
  int cookie_format;
  int argument_separator;
  const char *cookiev0_separator;

  int rule_inheritance;
  apr_array_header_t *rule_exceptions;

  /* -- Audit log -- */
  
  /* Max rule time */
  int max_rule_time;
  
  /* Whether audit log should be enabled in the context or not */
  int auditlog_flag;
  /* AUDITLOG_SERIAL (single file) or AUDITLOG_CONCURRENT (multiple files) */
  int auditlog_type;
#ifdef WITH_YAJL
  /* AUDITLOGFORMAT_NATIVE or AUDITLOGFORMAT_JSON */
  int auditlog_format;
#endif
  /* Mode for audit log directories and files */
  apr_fileperms_t auditlog_dirperms;
  apr_fileperms_t auditlog_fileperms;
  char *auditlog_name;
  char *auditlog2_name;

  /* The file descriptors for the files above */
  apr_file_t *auditlog_fd;
  apr_file_t *auditlog2_fd;
   /* For the new-style audit log only, the path where audit log entries will be stored */
  char *auditlog_storage_dir;
  char *auditlog_parts;
  /* A regular expression that determines if a response status is treated as relevant */
  msc_regex_t *auditlog_relevant_regex;
  /* Upload */
  const char *tmp_dir;
  const char *upload_dir;
  int upload_keep_files;
  int upload_validates_files;
  int upload_filemode; /* int only so NOT_SET works */
  int upload_file_limit;
  
  /* Used only in the configuration phase */
  msre_rule *tmp_chain_starter;
  msre_actionset *tmp_default_actionset;
  apr_table_t *tmp_rule_placeholders;
  /* Misc */
  const char *data_dir;
  const char *webappid;
  const char *sensor_id;
  const char *httpBlkey;
  /* Content injection*/
  int content_injection_enabled;
  /* Stream Inspection */
  int stream_inbody_inspection;
  int stream_outbody_inspection;

  /* Geo Lookup */
  geo_db *geo;
  /* Gsb Lookup */
  gsb_db *gsb;
  /* Unicode map*/
  unicode_map *u_map;
  /*Cache */
  int cache_trans;
  int cache_trans_incremental;
  apr_size_t cache_trans_min;
  apr_size_t cache_trans_max;
  apr_size_t cache_trans_maxitems;
  
  apr_array_header_t *component_signatures;
  /* Request character encoding */
  const char *request_encoding;
  int disable_backend_compression;
  /* Collection timeout */
  int col_timeout;
  /*hash of ids*/
  apr_hash_t *rule_id_htab;
  /* Hash */
  apr_array_header_t *hash_method;
  const char *crypto_key;
  int crypto_key_len;
  const char *crypto_param_name;
  int hash_is_enabled;
  int hash_enforcement;
  int crypto_key_add;
  int crypto_hash_href_rx;
  int crypto_hash_faction_rx;
  int crypto_hash_location_rx;
  int crypto_hash_iframesrc_rx;
  int crypto_hash_framesrc_rx;
  int crypto_hash_href_pm;
  int crypto_hash_faction_pm;
  int crypto_hash_location_pm;
  int crypto_hash_iframesrc_pm;
  int crypto_hash_framesrc_pm;

  /* xml */
  int xml_external_entity;
};

上述代码块中的b作用是:合并两个目录配置，参数2和参数3分别是_parent和_child,说明是合并两个父子目录。

上述代码块中的c中结构体叫做module_directives，这里面跟apache中的module的参数写法一样，调用的函数分别是AP_INIT_TAKE1、AP_INIT_TAKE12等，主要是指令名和参数的个数区别。

上述代码块中的d是注册Apache的模块钩子。在此钩子中，相继调用了多个函数，比如初始化函数等。

下面分析register_hooks()中所做的事情:

1.1.1 注册可选函数

#if (!defined(NO_MODSEC_API))
  /*导出可选的函数
    在模块register_hooks函数内注册可选函数，将可选函数添加到apache内核维护的全局可选函数哈希表中，
    Optional Function将可选函数注册到apache内核的全局可选函数哈希表中*/
  APR_REGISTER_OPTIONAL_FN(modsec_register_tfn);
  APR_REGISTER_OPTIONAL_FN(modsec_register_operator);
  API_REGISTER_OPTIONAL_FN(modsec_register_variable);
  APR_REGISTER_OPTIONAL_FN(modsec_register_reqbody_processor);
#endif

1.1.2 主要的钩子函数

1.1.2.1

ap_hook_pre_config(hook_pre_config, NULL, NULL, APR_HOOK_FIRST);

上面函数中目的是预配置的初始化，在hook_pre_config()函数中初始化创建ModSecuritty引擎，其中modsecurity是全局变量。然后有条件的注册了一个modsec_var_log_handler()，看起来是用于log作用的，这里先不分析这个log函数。

hook_pre_config()
  |->modsecurity=modsecurity_create(mp,MODSEC_ONLINE)
    |->msre_engine_create(msce->mp);
      |->apr_pool_create()
      |->engine=apr_pcalloc()
      |->engine->tfns=apr_table_make()
    |->msre_engine_register_default_variables(msce->msre);//在此函数中使用msre_engine_variable_register()函数向引擎中注册很多个默认变量
      |->msre_engine_variable_register(); 
        |->msre_var_metadata *metadata = ap_pcalloc();
        |->赋值(包括变量名，回调函数等,回调函数放到后面举例介绍)然后apr_table_setn(engine->variables, name,(void *)metadata);
    |->msre_engine_register_default_operators(msce->msre);//注册了很多运算符
      |->msre_engine_op_register()
        |->msre_op_metadata *metadata = apr_pcalloc()
        |->赋值(包括变量名，回调函数等，回调函数放到后面举例介绍)然后apr_table_setn();
    |->msre_engine_register_default_tfns(msce->msre);
      |->msre_engine_tfn_register()
        |->msre_tfn_metadata *metadata = apr_pcalloc()
        |->赋值(包括变量名，回调函数等，回调函数放到后面举例介绍)然后apr_table_setn()
    |->msre_engine_register_default_actions(msce->msre);
      |->msre_engine_action_register()
        |->msre_action_metadata *metadata = apr_pcalloc()
        |->赋值(包括变量名，回调函数等，回调函数放到后面举例介绍)然后apr_table_setn()

通过举例来说明msre_engine_register_default_variables()函数的需要完成的任务:

/*ARGS_POST*/
msre_engine_variable_register(engine,
      "ARGS_POST",
      VAR_LIST,
      0, 1,
      var_generic_list_validate,
      var_args_post_generate,
      VAR_CACHE,
      PHASE_REQUEST_BODY
);
其中，var_generic_list_validate()函数中主要判断了参数是否是一个正则表达式
var_args_post_generate()函数:
  |->for(i=0;i<arr->nelts;i++) 
       if(strcmp("BODY",arg->origin)!=0) continue;
       if(var->param==NULL)match=1;
       else
         if(var->param_data!=NULL) //正则表达式
           msc_regexec((msc_regex_t *)var->param_data,...)
         else
           if(strcasecmp(arg->name,var->param)==0) match=1//简单的比较
       if(match)  //如果我们有一个匹配，将这个参数添加到集合中
         apr_table_addn(vartab,rvar->name,(void *)rvar)

通过举例来说明msre_engine_register_default_operators()函数需要做的工作，此处的例子中还有一个

1./* contains*/
msre_engine_op_register(engine,
     "contains",
      NULL,/*init function to flag var substitution*/
      msre_op_contains_execute
);
其中，msre_op_contains_execute()函数，参考一个SecRule例子:
SecRule REQUEST_LINE "!@contains .php" t:none,deny,status:403
SecRule ARGS:ip "!@contains %{TX.1}"
|->msre_op_contains_execute() 
  |->expand_macros(msr,str,rule,msr->mp)//在给定的变量中扩展宏("%{NAME}"实体 
  |->for(i=0;i<=i_max;i++) { 
       if(target[i] == match[0]) { 
         if((match_length==1) || (memcmp((match+1),(target+i+1),(match_length-1)) == 0)) 
           return 1;//匹配 
  |->return 0;//没有匹配
2./* detectSQLi */
    msre_engine_op_register(engine,
        "detectSQLi",
         NULL,
         msre_op_detectSQLi_execute
    );
其中msre_op_detectSQLi_execute()函数会使用libinjection/目录下的相关文件的函数，具体的分析看源代码，暂不介绍

通过举例来说明msre_engine_register_default_tfns()函数的需要完成的任务:

/*lowercase*/
msre_engine_tfn_register(engine,
        "lowercase",
        msre_fn_lowercase_execute
);
其中，msre_fn_lowercase_execute()函数具有小写化的功能
|->msre_fn_lowercase_execute()
  |->while(i<input_len) {
       int x = input[i];
       input[i]=tolower(x);
       if(x!=input[i]) changed=1;
       i++;
     }

通过举例来说明msre_engine_register_default_actions()函数的需要完成的任务:

/*phase*/
msre_engine_action_register(engine,
        "phase",
        ACTION_DISRUPTIVE,
        1, 1,
        NO_PLUS_MINUS,
        ACTION_CARDINALITY_ONE,
        ACTION_CGROUP_NONE,
        msre_action_phase_validate,
        msre_action_phase_init,
        NULL
    );
其中，msre_action_phase_validate()函数什么也没做，msre_action_phase_init()函数根据参数名将actionset->phase设置成相应的值
if(strcasecmp(action->param,"request") == 0)
        actionset->phase = 2;
    else if(strcasecmp(action->param,"response") == 0)
        actionset->phase = 4;
    else if(strcasecmp(action->param,"logging") == 0)
        actionset->phase = 5;

1.1.2.2

ap_hook_post_config(hook_post_config, postconfig_beforeme_list,postconfig_afterme_list,APR_HOOK_REALLY_LAST);

由于没有找到ap_hook_post_config()函数的定义，所以上面函数中的postconfig_beforeme_list

和postconfig_afterme_list参数暂时不清楚，我们将重点放在hook_post_config()函数上:

//此函数是(后配置)模块初始化
|->hook_post_config()
  |->apr_pool_userdata_get(&init_flag,...)//通过apr函数获取在当前池中的key的value
  |->如果init_flag==NULL，调用apr_pool_userdata_set(),否则调用modsecurity_init(modsecurity,mp);//在hook_pre_config()中已经初始化好了modsecurity对象
    |->modsecurity_init()预置modsecurity引擎，这个函数必须在配置处理完成后被调用，因为Apache需要知道正在运行的用户名
      |->rc=apr_global_mutex_create(&msce->auditlog_lock,...)
      |->rc=apr_global_mutex_create(&msce->geo_lock,...)
      |->rc=apr_global_mutex_create(&msce->dbm_lock,...)
  |->real_server_signature=apr_pstrdup(mp, apache_get_server_version()) //存储原始服务器签名
  |->如果real_server_signature不是NULL,则ap_add_version_component()和change_server_signature()//忽略此函数的过程
  |->#if (!(defined(WIN32) || defined(NETWARE))) 则执行内部一系列chroot功能
  |->apr_pool_cleanup_register(mp,(void *)s, module_cleanup,apr_pool_cleanup_null);//在主池被销毁时，为稍后的时间安排主要的清理工作

1.1.2.3

ap_hook_child_init(hook_child_init,NULL,NULL,APR_HOOK_MIDDLE);

上面函数中hook_child_init()函数为每个新的子进程执行初始化

|->hook_child_init()
  |->modsecurity_child_init(modsecurity);
    |->xmlInitParser();//在任何其他XML调用之前，需要将此过程调用一次
    |->apr_status_t rc = apr_global_mutex_child_init()//apr_global_mutex_child_init在子进程中重新打开互斥锁
    |->apr_global_mutex_child_init()
    |->apr_global_mutex_child_init()

1.1.2.4 连接进程钩子

ap_hook_process_connection(hook_connection_early, NULL, NULL, APR_HOOK_FIRST)

上面函数中hook_connection_early()函数目的是为连接钩子限制繁忙状态的连接数

|->hook_connection_early()
  |->ap_get_scoreboard_worker(sbh)
  |->ws_record=ap_get_scoreboard_worker_from_indexes(i,j)
  |->tree_contains_ip()

1.1.2.5 事务进程钩子

ap_hook_post_read_request(hook_request_early,postread_beforeme_list, postread_afterme_list, APR_HOOK_REALLY_FIRST);

上面函数中的hook_request_early()函数初始请求处理，在Apache接受请求头之后立即执行，该函数将创建事务上下文。在下面的函数分析中，有几个定义需要了解一下:

#define AUDITLOG_PART_FIRST                 'A'
#define AUDITLOG_PART_HEADER                'A'
#define AUDITLOG_PART_REQUEST_HEADERS       'B'
#define AUDITLOG_PART_REQUEST_BODY          'C'
#define AUDITLOG_PART_RESPONSE_HEADERS      'D'
#define AUDITLOG_PART_RESPONSE_BODY         'E'
#define AUDITLOG_PART_A_RESPONSE_HEADERS    'F'
#define AUDITLOG_PART_A_RESPONSE_BODY       'G'
#define AUDITLOG_PART_TRAILER               'H'
#define AUDITLOG_PART_FAKE_REQUEST_BODY     'I'
#define AUDITLOG_PART_UPLOADS               'J'
#define AUDITLOG_PART_MATCHEDRULES          'K'
#define AUDITLOG_PART_LAST                  'K'
#define AUDITLOG_PART_ENDMARKER             'Z'

#define NEXT_CHAIN 1
#define NEXT_RULE  2
#define SKIP_RULES 3

|->hook_request_early()
  |->msr=create_tx_context(r);//初始化事务上下文并创建初始配置
    |->msr=apr_pcalloc(r->pool,..)//创建一个新的msr并赋值
    |->apr_allocator_create(&allocator);//创建一个新的分配器
    |->apr_allocator_max_free_set(allocator, 1024);//设置当前的阈值，在该阈值中，分配器应该开始向系统返回块
    |->apr_pool_create_ex(&msr->mp,r->pool,NULL,allocator);//创建新pool,这个函数是线程安全的，因为多个线程可以同时安全地创建同一个父池的子池，类似地，一个线程可以在另一个线程访问父池的同时创建一个子池
    |->apr_allocator_owner_set(allocator, msr->mp);//设置分配器的所有者
    |->msr->dcfg1=ap_get_module_config(r->per_dir_config,&security2_module)
    |->msr->usercfg=create_directory_config()//创建特殊的用户配置，将被用来覆盖默认设置
    |->msr->txcfg=create_direcotry_config() //创建一个事务上下文并用我们刚从Apache得到的目录配置填充它
    |->msr->txcfg=merge_directory_configs(msr->mp,msr->txcfg,msr->dcfg1)
    |->init_directory_config(msr->txcfg);//初始化目录配置
    |->msr->txid=get_env_var(r, "UNIQUE_ID")//检索指定的环境变量,当mod_unique_id模块注册的时候这个值存在
      |->apr_table_get(r->notes, name)
    |->msr->request_uri=r->uri//这里有很多赋值操作，目的是填充tx字段，从r的字段到msr的相关字段的赋值
    |->msr->request_headers = apr_table_copy(msr->mp,r->headers_in)//创建一个新表，并将另一个表复制到其中
    |->msr->hostname=ap_get_server_name(r)//从请求中获取当前的服务器名称
    |->modsecurity_tx_init(msr) //调用引擎以继续初始化,继续给msr的相关字段赋值
      |->apr_pool_cleanup_register(msr->mp,msr,modsecurity_tx_cleanup,apr_pool_cleanup_null);
      |->apr_table_get(msr->request_headers,"Content-Length");//这里判断了请求是否有正文，总共两者情况有正文
      |->apr_table_get(msr->request_headers,"Content-Type")
      |->parse_arguments()  //解析QUERY_STRING字段值
        |->urldecode_nonstrict_inplace_ex()  //进行urldecode处理
        |->add_argument(msr,arguments,arg) //向msr的成员arguments成员中增加key-value对
          |->apr_table_addn(arguments,log_escape_nq_ex(msr->mp,arg->name,arg->name_len),(void *)arg)
      |->if(msr->txcfg->cookie_format==COOKIES_V0) parse_cookies_v0(msr,te[i].val, msr->request_cookies,";")
        |->apr_strtok(cookie_header,delim,&saveptr)
      |->else parse_cookies_v1(msr, te[i].val,msr->request_cookies)
    |->store_tx_context(msr,r);  //存储事务上下文，可以在随后的阶段、重定向或子请求中找到它
      |->apr_table_setn(r->nots,NOTE_MSR,(void *)msr);//apr_table_setn()向表中添加键/值对。如果另一个元素已经具有相同的键，那么覆盖之
  |->#ifdef REQUEST_EARLY
  |->if (modsecurity_process_phase(msr, PHASE_REQUEST_HEADERS) > 0) //一个事务阶段，由于在modsec_rec结构中已经可用，所以不需要显示地提供阶段号
    |->modsecurity_process_phase_request_headers(msr); //处理进程请求头(REQUEST_HEADERS)阶段
      |->rc=msre_ruleset_process_phase(msr->txcfg->ruleset,msr)
        |->首先确定我们需要使用哪一组规则(包括PHASE_REQUEST_HEADERS,PHASE_REQUEST_BODY等)
        |->apr_table_clear(msr->matched_vars)//从表中删除所有元素
        |->for(i=0;i<arr->nelts;i++)//这是一个循环，针对每一个ruleset中的相应成员(阶段)的元素来做处理，一直到整个函数结束
        |->if(mode==SKIP_RULES) //SKIP_RULES用于跳过所有规则，直到我们用指定的规则ID命中一个占位符，然后在此之后继续执行
        |->if(rule->placeholder != RULE_PH_NONE)//跳过任何标记为占位符的规则
        |->if(mode==NEXT_CHAIN) //当链中的一个规则不匹配时，就会使用NEXT_CHAIN,然后我们需要跳过该链中的剩余规则，以获得可以执行的下一个规则
        |->if((mode == NEXT_RULE)&&(skip>0))//如果我们在这里意味着是NEXT_RULE,如果设置"跳过"参数，则需要跳过
        |->if(((rule->actionser->id!=NULL) && !apr_is_empty_array(msr->removed_rules)) ||(apr_is_empty_array(msr->removed_rules_tag)==0 ||(
           apr_is_empty_array(msr->removed_rules_msg)==0)) //检查该规则是否在运行时被删除，此处的逻辑块不分析
        |->rc=msre_rule_process(rule,msr);//使用一个新的内存子池来处理每个规则
          |->apr_pool_create(&msr->msc_rule_mptmp,msr->mp)//创建规则处理临时池
          |->#if defined(WITH_LUA) msre_rule_process_lua(rule,msr)//处理lua脚本，这里直接不介绍
          |->msre_rule_process_normal(rule,msr) //对给定的事务执行规则
            |->apr_table_get(rule->actionset->actions, "multiMatch")  //获取multiMatch字段的值
            |->for(i=0;i<rule->targets->nelts;i++) {
                  list_count=targets[i]->metadata->generate(msr,targets[i],rule,vartab,mptmp)//这里调用之前初始化的回调函数
            |->for(i=0;i<arr->nelts;i++) //循环一直到函数结尾，循环遍历最终目标列表中的目标，根据需要执行转换，并调用操作符
            |->if(msr->txcfg->cache_trans != MODSEC_CACHE_DISABLED) //判断这是不是var缓存
            |->for(k=0;k<tarr->nelts;k++) //构建转换函数的最终列表
                apr_table_addn(normtab,action->param,(void *)action) //增加t的参数到表中
            |->if(usecache && !multi_match && (crec != NULL) &&(crec == last_crec)) //如果最后一个缓存的tfn是列表中的最后一个，那么我们可以在这里停止并立即执行该操作
            |->rc = execute_operator(var,rule,msr,acting_actionset, mptmp)//根据给定值调用规则操作符,例如: SecRule REQUEST_HEADERS:Content-Type "text/xml" ...或者 SecRule REQUEST_HEADERS:User-Agent "@contains SECRET_PASSWORD"
              |->tarr=apr_table_elts(msr->removed_targets)
              |->telts=(const apr_table_entry_t*)tarr->elts
              |->for(i=0;i<tarr->nelts;i++) //循环处理msr的removed_targets成员
                   rc=msre_ruleset_rule_matches_exception(rule,re) //
                   if(rc>0) rc=fetch_target_exception(rule,msr,var,exceptions)
              |->rc=rule->op_metadata->execute(msr,rule, var, &my_error_msg) //此函数调用了op_metadata的回调执行函数，是最关键的函数之一，另一个是转换metadata的回调执行函数和action_metadata的回调函数
              |->if(((rc==0)&&(rule->op_negated == 0)) || ((rc==1)&&(rule->op_negated==1)))//返回RULE_NO_MATCH
              |->else  //匹配
                   if(rc==0) //记录日志
                   *(const msre_rule **)apr_array_push(msr->matched_rules) = rule;
                   if (var!=NULL && msr !=NULL)//保存最后匹配的var数据给msr->matched_var的各个成员赋值，给创建的变量mvar赋值
                     apr_table_addn(msr->matched_vars, mvar->name, (void *)mvar)
                   if((acting_actionser->serverity>0)&&(acting_actionset->serverity<msr->highest_severity)&&!rule->actionset->is_chained)
                   msre_perform_nondisruptive_actions(msr,rule,rule->actionset,mptmp)//执行非破坏性操作
                     |->for(i=0;i<tarr->nelts;i++)
                          action->metadata->execute(msr,mptmp,rule,action)//执行action_metadata的回调执行函数
                   if(rule->actionset->is_chained==0)
                     msre_perform_disruptive_actions(msr,rule,acting_actionset,mptmp,my_error_msg)//执行破坏性操作
                       |->for(i=0;i<tarr->nelts;i++)
                            action->metadata->execute(msr,mptmp,rule,action)
                       |->if(actionset->intercept_action_rec->metadata->type==ACTION_DISRUPTIVE)
                            actionset->intercept_action_rec->metadata->execute(msr,mptmp,rule,actionset->intercept_action_rec)
                       |->if((msr->phase==PHASE_LOGGING)||...)
                            apr_array_push(msr->alerts)=msc_alert_message(msr,actionset,NULL,message)//msc_alert_message()格式化一个警告信息
                       |->msc_alert(msr, log_level, actionset, "Warning", message)
            |->tarr=apr_table_elts(normtab)//从normtab表中获取元素的，返回整个元素数组的地址
            |->for(;k<tarr->nelts;k++) 
                 if(multi_match && (k==0||tfnchanged)) //在多匹配模式下，我们在开始时执行一次运算符，然后每次变量被转换函数改变一次
                   rc=execute_operator(var,rule,msr,acting_actionset,mptmp)
                 metadata=(msre_tfn_metadata *)action->param_data;
                 tfnchanged=metadata->execute(mptmp,(unsigned char *)var->value,var->value_len,&rval,&rval_length)//调用metadata的回调函数
                 if(usecache) //这里不介绍，忽略
            |->if(!multi_match || tfnchanged) //如果没有启用多匹配，则执行操作符，或者如果是，我们需要处理最后一个转换的结果
                 rc=execute_opeartor(var,rule,msr,acting_actionset,mptmp)
        |->if(rc==RULE_NO_MATCH) //如果返回值rc==RULE_NO_MATCH
        |->else if(rc==RULE_MATCH)//如果返回值rc==RULE_MATCH
        |->else if(rc<0) //如果返回值rc小于0，表示规则匹配失败
        |->else   //剩余的情况表示规则匹配失败而且未知的返回码
    |->modsecurity_process_phase_request_body(msr)
      |->rc=msre_ruleset_process_phase(msr->txcfg->ruleset,msr)//到这儿，请求体和请求头的处理基本相同
    |->modsecurity_process_phase_response_headers(msr);
      |->rc=msre_ruleset_process_phase(msr->txcfg->ruleset,msr);//到这儿，响应头和请求头的处理基本相同
    |->modsecurity_process_phase_response_body(msr);
      |->msre_ruleset_process_phase(msr->txcfg->ruleset,msr)//到这儿，响应体和请求头的处理基本相同
    |->modsecurity_process_phase_logging(msr);
      |->msre_ruleset_process_phase(msr->txcfg->ruleset,msr)
      |->modsecurity_persist_data(msr)
        |->collection_store(msr,col)
        |->collections_remove_stale(msr,te[i].key)
      |->if(msr->is_relevant==0) //这个请求是否与日志记录有关?
           is_response_status_relevant(msr,msr->r->status) //检查状态
      |->if((msr->txcfg->upload_keep_files==KEEP_FILES_ON)||...)//如果我们向保存这些文件(如果有的话)
      |->sec_audit_logger(msr) //调用审计日志记录器
        |->#ifdef WITH_YAJL  sec_audit_logger_json(msr) //这里不介绍
        |->sec_audit_logger_native(msr) //以本机格式生成审计日志条目
          |->msr->new_auditlog_boundary=create_auditlog_boundary(msr->r)
          |->if(msr->txcfg->auditlog_type != AUDITLOG_CONCURRENT) //串行日志记录-我们已经有一个打开的文件描述符
          |->else
               apr_md5_init(&msr->new_auditlog_md5ctx)//MD5初始化，开始MD5操作，编写新的上下文
               msr->new_auditlog_filename=construct_auditlog_filename(msr->mp,msr->txid)//构造一个文件名，用于存储审计日志条目
               entry_filename=msr->txcfg->auditlog_storage_dir
               entry_basename=file_dirname(msr->mp,entry_filename)
               apr_dir_make_recursive()//在文件系统上创建一个新目录，但行为类似于“mkdir -p”。根据需要创建中间目录。如果路径已经存在，则不会报告错误。
               apr_file_open()
          |->apr_global_mutex_lock(msr->modsecurity->auditlog_lock)
          |->sec_auditlog_write(msr,text,strlen(text))
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_REQUEST_HEADERS)!=NULL) //REQUEST_HEADERS的日志
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_REQUEST_BODY)!=NULL) //REQUEST_BODY
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_RESPONSE_HEADERS) !=NULL) //RESPONSE_HEADERS
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_RESPONSE_BODY) !=NULL) //RESPONSE_BODY
          |->剩下的if分支在这里不显示
  |->rc=perform_interception(msr)  //使用结构本身指定的方法拦截事务，必须返回一个HTTP状态码，它将被用来终止事务
    |->switch(actionset->intercept_action) //确定如何响应和准备日志消息
         case ACTION_DENY:
         case ACTION_PROXY:
         case ACTION_DROP:
         case ACTION_REDIRECT:
           expand_macros(msr, var, NULL, msr->mp)
         case ACTION_ALLOW:
         case ACTION_PAUSE:
         case ACTION_ALLOW_PHASE:
         case ACTION_ALLOW_REQUEST:
         default:
     |->msc_alert_message(msr,actionset,NULL,message)
     |->msc_alert()

1.1.2.6

ap_hook_fixups(hook_request_late, fixups_beforeme_list, NULL,APR_HOOK_REALLY_FIRST)

上述函数中的hook_request_late()函数作为处理程序链中的第一个钩子，该函数执行ModSecurity请求处理的第二阶段

|->hook_request_late()
  |->msr=retrieve_tx_context(r) //找到事务上下文并确保我们继续进行
  |->if(msr->phase_request_body_complete) //这个阶段已经完成了吗?
  |->msr->dcfg2=(directory_config *)ap_get_module_config(r->per_dir_config,&security2_module)//获取第二个配置上下文
  |->msr->txcfg=create_directory_config(msr->mp,NULL)//创建一个事务上下文
  |->msr->txcfg=merge_directory_configs(msr->mp,msr->txcfg,msr->dcfg2) 
  |->msr->txcfg=merge_directory_configs(msr->mp,msr->txcfg,msr->usercfg);//使用显示用户设置更新
  |->init_directory_config(msr->txcfg)
  |->rc=read_request_body(msr,&my_error_msg) //从客户端读取请求体
    |->modsecurity_request_body_start(msr, error_msg) 
    |->bb_in=apr_brigade_create()
    |->do{
          rc=ap_get_brigade(r->input_filters,...)
          for(bucket=APR_BRIGADE_FIRST(bb_in);...) //循环遍历brigade中的Bucket，以便提取可用数据的大小
            rc=apr_bucket_read(bucket,&buf,&buflen,APR_BLOCK_READ)
            if(buflen!=0)
              modsecurity_request_body_store(msr,buf,buflen,error_msg)//存储一大块请求体数据
            if(APR_BUCKET_IS_EOS(bucket))
               finished_reading=1;msr->if_seen_eos=1;
       }while(!finished_reading);
     |->modsecurity_request_body_end(msr,error_msg) //停止接收请求体

1.1.2.7 Logging

ap_hook_error_log(hook_error_log,NULL,NULL,APR_HOOK_MIDDLE)

此函数中的hook_error_log()函数在每次Apache都有要写入的错误日志的东西时调用

|->hook_error_log
  |->retrieve_tx_context((request_rec *)info->r) //通过查看朱请求和之前的请求来检索之前存储的事务上下文

ap_hook_log_transaction(hook_log_transaction,NULL,transaction_afterme_list,APR_HOOK_MIDDLE)

上述函数中的hook_log_transaction()函数在每个事务结束时调用

|->hook_log_transaction()
  |->msr=retrieve_tx_context(r)
    |->msr->response_protocol=get_response_protocol(origr)
    |->sec_guardian_logger(r,origr,msr)  //Guardian日志记录器用于连接到web服务器保护的外部脚本——httpd_guardian。
    |->modsecurity_process_phase(msr, PHASE_LOGGING) //调用引擎来完成剩余的工作

1.1.2.8 Filter hooks

ap_hook_insert_filter(hook_insert_filter,NULL,NULL,APR_HOOK_FIRST)

上述函数中的hook_insert_filter()在请求处理开始之前调用，这是我们需要决定是否要连接到输出过滤器链的时候

|->hook_insert_filter()
  |->msr=retrieve_tx_context(r)//首先发现事务上下文
  |->ap_add_input_filter("MODSECURITY_IN", msr, r, r->connection) //增加输入过滤器
  |->ap_add_output_filter("MODSECURITY_OUT", msr, r, r->connection)  //增加输出过滤器

ap_hook_insert_error_filter(hook_insert_error_filter,NULL,NULL,APR_HOOK_FIRST)

上述函数中的hook_insert_error_filter()在Apache开始处理错误时调用，这是一个插入到输出过滤器链中的机会。

|->hook_insert_error_filter()
  |->msr=retrieve_tx_context(r)
  |->ap_add_output_filter("MODSECURITY_OUT",msr,r,r->connection)//如果输出过滤器已经完成，不要运行此行

1.1.2.9 注册一个输入过滤器

ap_register_input_filter("MODSECURITY_IN", input_filter, NULL, AP_FTYPE_CONTENT_SET)

上述函数用于在系统中注册一个输入过滤器，在执行此注册之后，可以使用ap_add_input_filter()将过滤器添加到过滤器链中，并简单地指定名称。其中input_filter()函数会将先前存储的请求体转发到链。

|->input_filter()
  |->rc=modsecurity_request_body_retrieve_start(msr,&my_error_msg)  //准备转发请求体
  |->rc=modsecurity_request_body_retrieve(msr,&chunk,(unsigned int)nbytes,&my_error_msg)
  |->if(rc==0) modsecurity_request_body_retrieve_end(msr)

1.1.2.10 注册输出过滤器

ap_register_output_filter("MODSECURITY_OUT", output_filter, NULL, AP_FTYPE_CONTENT_SET - 3)

确保输出过滤器在其他模块之前运行，这样我们就可以得到一个不被修改的更好的请求

|->output_filter()
  |->msr->response_protocol=get_response_protocol(r)
  |->rc=modify_response_header(msr)
  |->rc=modsecurity_process_phase(msr,PHASE_RESPONSE_HEADERS)
  |->if(rc>0) perform_interception(msr) //事务需要被中断
  |->rc=output_filter_init(msr,f,bb_in) //初始化输出过滤器
     switch(rc)
       case -2:
       case -1:
       case 0:
  |->for(bucket=APR_BRIGADE_FIRST(bb_in);...) {//循环遍历brigade中的bucket,以便提取可用数据的大小
       rc=apr_bucket_read(bucket,&buf,&buflen, APR_BLOCK_READ);
       if(APR_BUCKET_IF_EOS(bucket))
         bucket_ci=apr_bucket_heap_create(msr->content_append,...)
         APR_BUCKET_INSERT_BEFORE(bucket,bucket_ci);//在指定的桶前插入一个桶
  |->ap_save_brigade(f,&msr->of_brigade,&bb_in,msr->mp)
  |->flatten_response_body(msr) 
  |->rc=modsecurity_process_phase(msr,PHASE_RESPONSE_BODY);//处理阶段RESPONSE_BODY
  |->if(rc>0) perform_interception(msr)
  |->perpend_content_to_of_brigade(msr, f)
  |->rc=send_of_brigade(msr, f)
  |->if(msr->phase<PHASE_RESPONSE_BODY)
       flatten_response_body(msr)
       modsecurity_process_phase(msr,PHASE_RESPONSE_BODY)
  |->inject_content_to_of_brigade(msr,f)
  |->prepend_content_to_of_brigade(msr, f)
  |->rc=send_of_brigade(msr,f)//将数据发送到过滤器流

“ModSecurity2”源码分析

猜你喜欢