几种hash函数实现垃圾邮件过滤的比较

题目要求:给你两个txt文档,一个是发送垃圾邮件的黑名单邮箱地址,另一个是待检测的邮箱地址,现要求给出一种方案,尽快将黑名单当中的地址从待测邮箱地址中分离出来。

大家都知道,对于这一类问题,散列表所给出的解决方案虽然需要消耗大量的物理空间,但在时间上却有很大的优势,不同的哈希函数有不同的效果,在特定的和环境下都能发挥出不一样的效果。在这里作者给出了十余种解决方式,并进行了测试,具体如下。。。。不多说,上C代码:

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define N  10000005
#define  MOD  10000007

typedef struct node
{
	char address[20];
	struct node *next;
}node, *ptr;
node str[N] = { 0 };


/*unsigned int Hash(char* str)  // 154.4s
{  
   unsigned int hash = 0;  
   unsigned int x    = 0;  
   unsigned int i    = 0;  
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash = (hash << 4) + (*str);  
      if((x = hash & 0xF0000000L) != 0)  
      {  
         hash ^= (x >> 24);  
      }  
      hash &= ~x;  
   }  
   return hash;  
}  */


/*unsigned int Hash(char* str)  // 148.9s
{  
   const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);  
   const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);  
   const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);  
   const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);  
   unsigned int hash              = 0;  
   unsigned int test              = 0;  
   unsigned int i                 = 0;  
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash = (hash << OneEighth) + (*str);  
      if((test = hash & HighBits)  != 0)  
      {  
         hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits));  
      }  
   }   
   return hash;  
}  */


/*unsigned int Hash(char* str)  // 151.7s
{  
   unsigned int hash = 1315423911;  
   unsigned int i    = 0;  
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash ^= ((hash << 5) + (*str) + (hash >> 2));  
   }  
   return hash;  
}  */


/*unsigned int Hash(char* str)  // 148.1s
{  
   unsigned int b    = 378551;  
   unsigned int a    = 63689;  
   unsigned int hash = 0;  
   unsigned int i    = 0;    
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash = hash * a + (*str);  
      a    = a * b;  
   }   
   return hash;  
}  */


/*unsigned int Hash(char* str)  // 155.8s
{  
   unsigned int hash = 0;  
   unsigned int x    = 0;  
   unsigned int i    = 0;    
   for(i = 0; i <sizeof(str); str++, i++)  
   {  
      hash = (hash << 4) + (*str);  
      if((x = hash & 0xF0000000L) != 0)  
      {  
         hash ^= (x >> 24);  
      }  
      hash &= ~x;  
   }   
   return hash;  
} */


/*unsigned int Hash(char* str)  // 152.9s 
{  
   unsigned int seed = 131; // 31 131 1313 13131 131313 etc.. 
   unsigned int hash = 0;  
   unsigned int i    = 0;   
   for(i = 0; i <sizeof(str); str++, i++)  
   {  
      hash = (hash * seed) + (*str);  
   }   
   return hash;  
} */


/*unsigned int Hash(char str[])   200s  
{  
   unsigned int b    = 217;  
   unsigned int a    = 119;  
   unsigned int hash = 0;  
   unsigned int i    = 0;   
   for(i = 0; i <15;  i++)  
   {  
      hash = hash  + (str[i]+str[i+1]+str[i+2])*a  +b*str[i];  
      a    = a + str[i];  
   }  
   hash=hash-2800000;
   return hash;  
} */


/*unsigned int Hash(char* str)  //146.4s
{  
   unsigned int hash = 0;  
   unsigned int i    = 0;   
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash = (*str) + (hash << 6) + (hash << 16) - hash;  
   }   
   return hash;  
}  */


/*unsigned int Hash(char* str)  //149.6s
{  
   unsigned int hash = 5381;  
   unsigned int i    = 0;    
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash = ((hash << 5) + hash) + (*str);  
   }    
   return hash;  
}  */


/*unsigned int Hash(char* str)  //147s
{  
   unsigned int hash = sizeof(str);  
   unsigned int i    = 0;    
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash = ((hash << 5) ^ (hash >> 27)) ^ (*str);  
   }  
   return hash;  
}  */


/*unsigned int Hash(char* str)  //141.2s
{  
   unsigned int hash = 0;  
   unsigned int i    = 0;  
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash = hash << 7 ^ (*str);  
   }   
   return hash;  
}  */


/*unsigned int Hash(char* str)  //151.3s
{  
   const unsigned int fnv_prime = 0x811C9DC5;  
   unsigned int hash      = 0;  
   unsigned int i         = 0;  
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash *= fnv_prime;  
      hash ^= (*str);  
   }   
   return hash;  
}  */

unsigned int Hash(char* str)  //140.7s
{  
   unsigned int hash = 0xAAAAAAAA;  
   unsigned int i    = 0;  
   for(i = 0; i < sizeof(str); str++, i++)  
   {  
      hash ^= ((i & 1) == 0) ? (  (hash <<  7) ^ (*str) * (hash >> 3)) :  
                               (~((hash << 11) + ((*str) ^ (hash >> 5))));  
   }  
   return hash;  
}  


void read(node *str)
{
	FILE *fp;
	char str_read[20];
	unsigned int hash_value;                            //鍝堝笇鍊?
	unsigned int hash_solved;                           //MOD鍚庣殑鍝堝笇
	ptr p, q;
	int flag = 0;
	fp = fopen("黑名单.txt", "r");
	if (fp == NULL)
	{
		printf("cannot open address.txt file!\n");
		exit(0);
	}
	while (fscanf(fp, "%s", str_read) != EOF)
	{
		hash_value = Hash(str_read);
		hash_solved = hash_value%MOD;
		p = &str[hash_solved];
		if (strcmp(p->address, str_read) == 0)
			continue;
		if (strcmp(p->address, "\0") == 0)strcpy(p->address, str_read);
		else
		{
			while (p->next != NULL)
			{
				if (strcmp(p->address, str_read) == 0)
					flag = 1;
				p = p->next;
			}
			if (flag == 1)continue;
			q = (ptr)malloc(sizeof(node));
			q->next = NULL;
			strcpy(q->address, str_read);
			p->next = q;
			flag = 0;
		}
	}
	fclose(fp);
}
int search(char *str_search, node *str)
{
	int flag = 0;
	ptr p;
	unsigned int hash_value;
	unsigned int hash_solved;
	hash_value = Hash(str_search);
	hash_solved = hash_value%MOD;
	p = &str[hash_solved];
	if (strcmp(p->address, str_search) == 0) flag = 1;
	while (p->next != NULL)
	{
		if (strcmp(str[hash_solved].address, str_search) == 0)
			flag = 1;
		p = p->next;
	}
	return flag;
}
int main()
{
	int a;
	FILE *fp, *fp_1;
	char str_search[20];
	fp = fopen("待测地址.txt", "r");
	if (fp == NULL){
		printf("cannot open this input.txt file!\n");
		exit(0);
	}
	fp_1 = fopen("F:\\test\\test3(2.1.1).txt", "w");
	if (fp == NULL)
	{
		printf("cannot open this output.txt file!\n");
		exit(0);
	}
	read(str);
	while (fscanf(fp, "%s", str_search) != EOF)
	{
		a = search(str_search, str);
		fprintf(fp_1, "%s\t\t\t%d\n", str_search, a);
	}
	fclose(fp);
	fclose(fp_1);
	return 0;
}

猜你喜欢

转载自blog.csdn.net/goodboydan/article/details/79070802