浅析uthash系列之一:数据结构

uthash版本 :2.0.2

uthash用三个结构体来实现散列表的存储管理。

  • UT_hash_table:用于散列表整体管理。
typedef struct UT_hash_table {
   UT_hash_bucket *buckets;
   unsigned num_buckets, log2_num_buckets;
   unsigned num_items;
   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */

   /* in an ideal situation (all buckets used equally), no bucket would have
    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
   unsigned ideal_chain_maxlen;

   /* nonideal_items is the number of items in the hash whose chain position
    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
    * hash distribution; reaching them in a chain traversal takes >ideal steps */
   unsigned nonideal_items;

   /* ineffective expands occur when a bucket doubling was performed, but
    * afterward, more than half the items in the hash had nonideal chain
    * positions. If this happens on two consecutive expansions we inhibit any
    * further expansion, as it's not helping; this happens when the hash
    * function isn't a good fit for the key domain. When expansion is inhibited
    * the hash will still work, albeit no longer in constant time. */
   unsigned ineff_expands, noexpand;

   uint32_t signature; /* used only to find hash tables in external analysis */
#ifdef HASH_BLOOM
   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
   uint8_t *bloom_bv;
   uint8_t bloom_nbits;
#endif

} UT_hash_table;
  1. UT_hash_bucket *buckets :指向通过uthash_malloc分配的数组结构,在第一次使用时会自动分配,个数为HASH_INITIAL_NUM_BUCKETS。
  2. unsigned num_buckets :bucket的个数,也就是散列表条数。
  3. unsigned log2_num_buckets :bucket的2幂次,散列表的扩容是按照2的幂次进行的,初始值为HASH_INITIAL_NUM_BUCKETS_LOG2。
  4. struct UT_hash_handle *tail :指向应用最后插入的元素,用于快速扩展。
  5. ptrdiff_t hho :UT_hash_handle结构在用户结构中的偏移位置,通过UT_hash_handle对象减去hho定位到用户结构对象。
  6. unsigned ideal_chain_maxlen :每个bucket理想的元素数目,元素应该能平均得分配到每个bucket,这与散列函数和key值取法相关。
  7. unsigned nonideal_items :buckets中超过ideal_chain_maxlen的元素个数。
  8. unsigned ineff_expands :标志非平均分布,当发现bucket中元素偏离了理想分布是会职位。
  9. unsigned noexpand :标志不要进行bucket幂次扩容,原因是元素已经偏离理想分布。
  10. uint32_t signature :签名值。
  • UThash_bucket:具有相同hashkey值,以及hashkey值与num_buckets-1与运算有相同结果的元素,将存储在相同的bucket中。
typedef struct UT_hash_bucket {
   struct UT_hash_handle *hh_head;
   unsigned count;

   /* expand_mult is normally set to 0. In this situation, the max chain length
    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
    * the bucket's chain exceeds this length, bucket expansion is triggered).
    * However, setting expand_mult to a non-zero value delays bucket expansion
    * (that would be triggered by additions to this particular bucket)
    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
    * (The multiplier is simply expand_mult+1). The whole idea of this
    * multiplier is to reduce bucket expansions, since they are expensive, in
    * situations where we know that a particular bucket tends to be overused.
    * It is better to let its chain length grow to a longer yet-still-bounded
    * value, than to do an O(n) bucket expansion too often.
    */
   unsigned expand_mult;

} UT_hash_bucket;
  1. struct UT_hash_handle *hh_head :指向bucket中存储的头元素。
  2. unsigned count :bucket中存储的元素个数。
  3. unsigned expand_mult :扩容控制乘数,用于减少扩容操作。当bucket出现扩容操作时,会进行大量的计算和复制操作。
  • UT_hash_handle:嵌入在用户数据结构中,代表散列表元素。
typedef struct UT_hash_handle {
   struct UT_hash_table *tbl;
   void *prev;                       /* prev element in app order      */
   void *next;                       /* next element in app order      */
   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
   void *key;                        /* ptr to enclosing struct's key  */
   unsigned keylen;                  /* enclosing struct's key len     */
   unsigned hashv;                   /* result of hash-fcn(key)        */
} UT_hash_handle;
  1. struct UT_hash_table *tbl :指向管理表。
  2. void *prev :指向应用添加顺序前一个元素。
  3. void *next :指向应用添加顺序后一个元素。
  4. struct UT_hash_handle *hh_prev :指向在同一个bucket中的前一个UT_hash_handle。
  5. struct UT_hash_handle *hh_next : 指向在同一个bucket中的后一个UT_hash_handle。
  6. void *key :指向用于计算hashkey值用的数据。
  7. unsigned keylen :用于计算hashkey值用的数据长度。
  8. unsigned hashv :hashkey。

猜你喜欢

转载自blog.csdn.net/jt_notes/article/details/81168152