进阶实验5-3.4 迷你搜索引擎 (35分)-哈希

解题思路：
1、存储：用哈希存储单词，并用文件链表记录单词所在文件，以及行号链表记录单词所在文件中的行号
2、查询：取各单词文件名交集，再取各文件名再行号并集
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>
typedef char Element[51];//每行字符最多50个
//行号结点
typedef struct LNode {
    int LineNo;
    struct LNode *NextLine;
}*LList,LNode;
char word[101][101][51];//注意由于将#号也存入在word数组中，故文件行号最大101行
int F[101];
int L[101][101];
int ans=0;
//文件结点
typedef struct FNode {
    int FileNo;
    struct FNode *NextFile;
    LList Line;
}*FList,FNode;
//哈希结点
struct HashTbl {
    Element word;
    int flag;
    FList Next;
};
//哈希表
typedef struct {
    int TableSize;
    struct HashTbl *TheCells;
}*HashTable;
//创建行号结点
LNode *CreateLNode(int LineNo) {
    LNode *l=(LNode*)malloc(sizeof(LNode));
    l->LineNo=LineNo;
    l->NextLine=NULL;
    return l;
}
//创建文件结点
FNode *CreateFNode(int FileNo,LNode *l) {
    FNode *f=(FNode*)malloc(sizeof(FNode));
    f->FileNo=FileNo;
    f->NextFile=NULL;
    f->Line=l;
    return f;
}
//哈希表初始化
HashTable Create(int size) {
    HashTable H=(HashTable)malloc(sizeof(HashTable));
    H->TableSize=size;
    H->TheCells=(struct HashTbl *)malloc(sizeof(struct HashTbl)*size);
    while(size) {
        H->TheCells[--size].Next=NULL;
        H->TheCells[size].flag=0;
    }
    return H;
}
//哈希函数
int Hash(HashTable H,Element Key) {
    unsigned int h=0;
    while(*Key!='\0') {
        h=h<<5+(*Key++-'a');
    }
    return (h&(32*32*32-1))%H->TableSize;
}
//求存储位置
int FindPos(HashTable H,Element Key) {
    int pos=Hash(H,Key);
    while(H->TheCells[pos].flag&&strcmp(H->TheCells[pos].word,Key)!=0) {
        pos++;
        if(pos==H->TableSize)
            pos-=H->TableSize;
    }
    return pos;
}
//单词插入
void Insert(HashTable H,Element Key,int FileNo,int LineNo) {
    int pos=FindPos(H,Key);
    if(!H->TheCells[pos].flag) {//位置为空则插入
        strcpy(H->TheCells[pos].word,Key);
        H->TheCells[pos].flag=1;
        LNode *l=CreateLNode(LineNo);
        FNode *f=CreateFNode(FileNo,l);
        H->TheCells[pos].Next=f;
    } else if(strcmp(H->TheCells[pos].word,Key)==0) {//位置不空且存储单词一致
        FNode *p=H->TheCells[pos].Next;
        FNode *s=NULL;
        while(p&&p->FileNo!=FileNo) {//文件号不空且文件号不等
            s=p;
            p=p->NextFile;
        }
        if(!p) {//文件指针空，尾插
            LNode *l=CreateLNode(LineNo);
            FNode *f=CreateFNode(FileNo,l);
            s->NextFile=f;
        } else if(p->FileNo==FileNo) {//文件号一致，则比较行号
            LNode *q=p->Line;
            LNode *r=NULL;;
            while(q&&q->LineNo!=LineNo) {
                r=q;
                q=q->NextLine;
            }
            if(!q) {//行号指针空，尾插
                LNode *l=CreateLNode(LineNo);
                r->NextLine=l;
            }

        }

    }
}
//分离出单词
void GetWord(HashTable H,Element word,int FileNo,int LineNo) {
    int i=0,k=0;
    Element str;
    while(word[i]!='\0') {
        if(isalpha(word[i])) {
            str[k++]=tolower(word[i]);
        } else {
            if(isalpha(word[i+1])) {
                str[k]='\0';
                Insert(H,str,FileNo,LineNo);
                k=0;
            }
        }
        i++;
    }
    str[k]='\0';
    Insert(H,str,FileNo,LineNo);
}
//求文件名交集，行号并集
int Merge(HashTable H,FNode *p) {
    int w=0;
    int tmpF[101];
    int tmpL[101];
    int v=0;
    while(p&&F[w]!=-1) {
        if(F[w]>p->FileNo) {
            p=p->NextFile;
        } else if(F[w]<p->FileNo) {
            w++;
        } else if(F[w]==p->FileNo) {
            tmpF[v++]=p->FileNo;
            LNode *q=p->Line;
            int u=0,h=0;
            while(q&&L[p->FileNo][u]!=-1) {
                if(q->LineNo<L[p->FileNo][u]) {
                    tmpL[h++]=q->LineNo;
                    q=q->NextLine;
                } else if(q->LineNo>L[p->FileNo][u]) {
                    tmpL[h++]=L[p->FileNo][u];
                    u++;
                } else {
                    tmpL[h++]=q->LineNo;
                    u++;
                    q=q->NextLine;
                }
            }
            while(q&&L[p->FileNo][u]==-1) {
                tmpL[h++]=q->LineNo;
                q=q->NextLine;
            }
            while(!q&&L[p->FileNo][u]!=-1) {
                tmpL[h++]=L[p->FileNo][u];
                u++;
            }
            int i;
            for(i=0; i<h; i++) {
                L[p->FileNo][i]=tmpL[i];
            }
            L[p->FileNo][i]=-1;
            w++;
            p=p->NextFile;
        }
    }
    int i;
    for(i=0; i<v; i++) {
        F[i]=tmpF[i];
    }
    F[i]=-1;
    ans=v;
    return v;
}
//输出
void Out() {
    int i,t;
    for(i=0; F[i]!=-1; i++) {
        printf("%s\n",word[F[i]][0]);
        for(t=0; L[F[i]][t]!=-1; t++) {
            int line=L[F[i]][t];
            printf("%s\n",word[F[i]][line]);
        }
    }
}
//求首个单词的文件名和行号
int Get(HashTable H,Element str) {
    int flag=0;
    int pos=FindPos(H,str);
    FNode *p=H->TheCells[pos].Next;
    if(!p) {
        flag=1;
        printf("0\nNot Found\n");
    } else {
        int t=0;
        while(p) {
            int s=0;
            F[t++]=p->FileNo;
            LNode *q=p->Line;
            while(q) {
                L[p->FileNo][s++]=q->LineNo;
                q=q->NextLine;
            }
            L[p->FileNo][s]=-1;
            p=p->NextFile;
        }
        F[t]=-1;
        ans=t;
    }
    return flag;
}
//最终结果
void GetAns(HashTable H) {
    int m;
    scanf("%d",&m);
    char str[10][11];
    char c;
    getchar();
    int i;
    for(i=0; i<m; i++) {
        int flag=0;
        int j=0,k=0;
        while(c=getchar(),c!='\n') {
            if(isalpha(c))
                str[j][k++]=tolower(c);
            else {
                str[j][k]='\0';
                j++;
                k=0;
            }
        }
        str[j][k]='\0';
        j++;
        flag=Get(H,str[0]);
        if(!flag) {
            if(j>1) {
                for(k=1; k<j; k++) {
                    int pos=FindPos(H,str[k]);
                    FNode *p=H->TheCells[pos].Next;
                    flag=Merge(H,p);
                    if(!flag) {
                        printf("0\nNot Found\n");
                        break;
                    }
                }
                if(flag) {
                    printf("%d\n",ans);
                    Out();
                }
            } else if(j==1) {
                printf("%d\n",ans);
                Out();
            }
        }
    }
}
int main() {
    int n;
    scanf("%d",&n);

    getchar();
    int i,j;
    HashTable H=Create(500009);
    for(i=0; i<n; i++) {
        j=0;
        while(gets(word[i][j]),strcmp(word[i][j],"#")!=0) {
            if(j)
                GetWord(H,word[i][j],i,j);
            j++;
        }
    }
    GetAns(H);
    return 0;
}
进阶实验5-3.4 迷你搜索引擎 (35分)-哈希

猜你喜欢