哈希(Hash)算法 DJB/ELF/PJW/SDBM/FNV1(a)

1 DJB hash function

unsigned long hash_djbx33a(const char *str, size_t len)
{
    unsigned long hash = 0U;
    for(size_t i = 0;i < len; ++i) {
        hash = hash * 33 + (unsigned long)str[i];
        /* or, hash = ((hash << 5) + hash) + (unsigned long)str[i]; 
         * where, hash * 33 = ((hash << 5) + hash)
         */
    }

    return hash;
} 

2 ELF hash function

ELFLinux系统中使用较多,linux内核ELF: [linux 2.4.0 -> irqueue.c]

/*
 *    This function hash the input string 'name' using the ELF hash
 *    function for strings.
 */
static unsigned int hash(char* name)
{
    unsigned int h = 0;
    unsigned int g;

    while(*name) {
        h = (h<<4) + *name++;
        if ((g = (h & 0xf0000000)))
            h ^=g>>24;
        h &=~g;
    }
    return h;
}

UCC C语言编译器中ELF: [UCC(your C compiler) -> str.c]

static unsigned int ELFHash(char *str, int len)
{
    unsigned int h = 0;
    unsigned int x = 0;
    int i;

    for (i = 0; i < len; ++i)
    {
        h = (h << 4) + *str++;
        if ((x = h & 0xF0000000) != 0)
        {
            h ^= x >> 24;
            h &= ~x;
        }
    }

    return h;
}

3 PJW hash function

/* A generic hash function HashPJW better than ElfHash point, but depending on the context */
#include <limits.h>
#define BITS_IN_int     ( sizeof(int) * CHAR_BIT )
#define THREE_QUARTERS  ((int) ((BITS_IN_int * 3) / 4))
#define ONE_EIGHTH      ((int) (BITS_IN_int / 8))
#define HIGH_BITS       ( ~((unsigned int)(~0) >> ONE_EIGHTH ))

unsigned int HashPJW ( const char * datum )
{
    unsigned int hash_value, i;
    for ( hash_value = 0; *datum; ++datum )
    {
        hash_value = ( hash_value << ONE_EIGHTH ) + *datum;
        if (( i = hash_value & HIGH_BITS ) != 0 )
            hash_value = ( hash_value ^ ( i >> THREE_QUARTERS )) & ~HIGH_BITS;
    }
    return ( hash_value );
}

4 SDBM hash function

this algorithm was created for sdbm (a public-domain reimplementation of ndbm) database library. it was found to do well in scrambling bits(置乱位), causing better distribution of the keys and fewer splits. it also happens to be a good general hashing function with good distribution. the actual function is hash(i) = hash(i - 1) * 65599 + str[i]; what is included below is the faster version used in gawk. the magic constant 65599 was picked out of thin air while experimenting with different constants, and turns out to be a prime. this is one of the algorithms used in berkeley db (see sleepycat) and elsewhere.

static unsigned long sdbm(unsigned char *str)
{
    unsigned long hash = 0;
    int c;

    while (c = *str++)
        hash = c + (hash << 6) + (hash << 16) - hash;

    return hash;
}

sdbm_hash.c

/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * sdbm - ndbm work-alike hashed database library
 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 * author: [email protected]
 * status: ex-public domain. keep it that way.
 *
 * hashing routine
 */

#include "apr_sdbm.h"
#include "sdbm_private.h"

/*
 * polynomial conversion ignoring overflows
 * [this seems to work remarkably well, in fact better
 * then the ndbm hash function. Replace at your own risk]
 * use: 65599  nice.
 *      65587  even better. 
 */
long sdbm_hash(const char *str, int len)
{
    register unsigned long n = 0;

#define DUFF/* go ahead and use the loop-unrolled version */
#ifdef DUFF

#define HASHCn = *str++ + 65599 * n

    if (len > 0) {
        register int loop = (len + 8 - 1) >> 3;

        switch(len & (8 - 1)) {
        case 0:do { 
               HASHC;case 7:HASHC;
        case 6:HASHC; case 5:HASHC;
        case 4:HASHC; case 3:HASHC;
        case 2:HASHC;case 1:HASHC;
            } while (--loop);
        }
    }
#else
    while (len--)
        n = *str++ + 65599 * n;
#endif
    return n;
}

5 FNV1/FNV1a hash function

One of FNV’s key advantages is that it is very simple to implement. Start with an initial hash value of FNV offset basis. For each byte in the input, multiply hash by the FNV prime, then XOR it with the byte from the input. The alternate algorithm, FNV-1a, reverses the multiply and XOR steps.[Fowler–Noll–Vo hash function From Wikipedia]

FNV-1 hash

hash = FNV_offset_basis
for each byte_of_data to be hashed
    hash = hash × FNV_prime
    hash = hash XOR byte_of_data
return hash

FNV-1a hash

hash = FNV_offset_basis
for each byte_of_data to be hashed
    hash = hash XOR byte_of_data
    hash = hash × FNV_prime
return hash

其中:

Size in bits: n = 2 8 FNV prime FNV offset basis
32 2 24 + 2 8 + 0 x 93 = 16777619 2166136261 = 0 x 811 c 9 d c 5
64 2 40 + 2 8 + 0 x b 3 = 1099511628211 14695981039346656037 = 0 x c b f 29 c e 484222325
/* Apache HTTP server
 * Provide a string hashing function for the proxy.
 * We offer 2 methods: one is the APR model but we
 * also provide our own, based on either FNV or SDBM.
 * The reason is in case we want to use both to ensure no
 * collisions.
 */
PROXY_DECLARE(unsigned int)
ap_proxy_hashfunc(const char *str, proxy_hash_t method)
{
    if (method == PROXY_HASHFUNC_APR) {
        apr_ssize_t slen = strlen(str);
        return apr_hashfunc_default(str, &slen);
    }
    else if (method == PROXY_HASHFUNC_FNV) {
        /* FNV model */
        unsigned int hash;
        const unsigned int fnv_prime = 0x811C9DC5;
        for (hash = 0; *str; str++) {
            hash *= fnv_prime;
            hash ^= (*str);
        }
        return hash;
    }
    else { /* method == PROXY_HASHFUNC_DEFAULT */
        /* SDBM model */
        unsigned int hash;
        for (hash = 0; *str; str++) {
            hash = (*str) + (hash << 6) + (hash << 16) - hash;
        }
        return hash;
    }
}

FNV source code: fnv-5.0.3.tar.gz

6 GI STL string hash function

inline size_t __stl_hash_string(const char* __s)  
{  
    unsigned long __h = 0;   
    for ( ; *__s; ++__s)  
        __h = 5*__h + *__s;  

    return size_t(__h);  
}  

7 lose lose

This hash function appeared in K&R (1st ed), but at least the reader was warned: “This is not the best possible algorithm, but it has the merit of extreme simplicity.” This is an understatement; It is a terrible hashing algorithm, and it could have been much better without sacrificing its “extreme simplicity.” Many C programmers use this function without actually testing it, or checking something like Knuth’s Sorting and Searching, so it stuck(被卡住的;不知所措;). It is now found mixed with otherwise respectable code, eg. cnews. sigh.

unsigned long hash(unsigned char *str)
{
    unsigned int hash = 0;
    int c;

    while (c = *str++)
        hash += c;

    return hash;
}

猜你喜欢

转载自blog.csdn.net/wwchao2012/article/details/80329766
今日推荐