HDU 5880 Family View (AC自动机 屏蔽敏感词)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Tc_To_Top/article/details/84670754

Family View

Time Limit: 3000/1000 MS (Java/Others)    Memory Limit: 65536/65536 K (Java/Others)
Total Submission(s): 3272    Accepted Submission(s): 709

 

Problem Description

Steam is a digital distribution platform developed by Valve Corporation offering digital rights management (DRM), multiplayer gaming and social networking services. A family view can help you to prevent your children access to some content which are not suitable for them. 
Take an MMORPG game as an example, given a sentence T, and a list of forbidden words {P}, your job is to use '*' to subsititute all the characters, which is a part of the substring matched with at least one forbidden word in the list (case-insensitive).
For example, T is: "I love Beijing's Tiananmen, the sun rises over Tiananmen. Our great leader Chairman Mao, he leades us marching on."
And {P} is: {"tiananmen", "eat"}
The result should be: "I love Beijing's *********, the sun rises over *********. Our gr*** leader Chairman Mao, he leades us marching on."

Input

The first line contains the number of test cases. For each test case:
The first line contains an integer n, represneting the size of the forbidden words list P. Each line of the next n lines contains a forbidden words Pi (1≤|Pi|≤1000000,∑|Pi|≤1000000) where Pi only contains lowercase letters.
The last line contains a string T (|T|≤1000000).

Output

For each case output the sentence in a line.

Sample Input

1 3
trump
ri
o
Donald John Trump (born June 14, 1946) is an American businessman, television personality, author, politician, and the Republican Party nominee for President of the United States in the 2016 election. He is chairman of The Trump Organization, which is the principal holding company for his real estate ventures and other business interests.

Sample Output

D*nald J*hn ***** (b*rn June 14, 1946) is an Ame**can businessman, televisi*n pers*nality, auth*r, p*litician, and the Republican Party n*minee f*r President *f the United States in the 2016 electi*n. He is chairman *f The ***** *rganizati*n, which is the p**ncipal h*lding c*mpany f*r his real estate ventures and *ther business interests.

Source

2016 ACM/ICPC Asia Regional Qingdao Online

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=5880

题目大意:将文章中包含字典里单词的部分全变成*

题目分析:记录串长,跑一下AC机,输出时维护要输出'*'的最远位置,因为可能会有重叠(e.g. 字典abc, bcdef    文章abcdef)文章的某个字符如果不是字母需将当前指针指向root(e.g. 字典a,b    文章a_b),此题因为for(int i=0;i<(int)strlen(t);i++) T到自闭

#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;
int const MAX = 1000005;
char s[1000005], t[1000005];
int mp[MAX];

struct AC_Automaton {
    int nxt[MAX][26], tot, root, fail[MAX], len[MAX];
    queue<int> q;

    int NewNode() {
        memset(nxt[tot], -1, sizeof(nxt[tot]));
        fail[tot] = 0;
        len[tot] = 0;
        return tot++;
    }

    void Init() {
        tot = 0;
        root = NewNode();
    }

    void Insert(char *s) {
        int cur = root, slen = strlen(s);
        for (int i = 0; i < slen; i++) {
            int idx = s[i] - 'a';
            if (nxt[cur][idx] == -1) {
                nxt[cur][idx] = NewNode();
            }
            //printf("nxt[%d][%d] = %d\n", cur, idx, nxt[cur][idx]);
            cur = nxt[cur][idx];
        }
        len[cur] = slen;
    }

    void Build() {
        fail[root] = root;
        for (int i = 0; i < 26; i++) {
            if (nxt[root][i] == -1) {
                nxt[root][i] = root;
            } else {
                fail[nxt[root][i]] = root;
                q.push(nxt[root][i]);
            }
        }
        while (!q.empty()) {
            int cur = q.front();
            q.pop();
            for (int i = 0; i < 26; i++) {
                if (nxt[cur][i] == -1) {
                    nxt[cur][i] = nxt[fail[cur]][i];
                } else {
                    fail[nxt[cur][i]] = nxt[fail[cur]][i];
                    q.push(nxt[cur][i]);
                }
            }
        }
    }

    void Search(char *t) {
        memset(mp, 0, sizeof(mp));
        int cur = root, lent = strlen(t);
        for (int i = 0; i < lent; i++) {
            int idx = 0;
            if (t[i] >= 'a' && t[i] <= 'z') {
                idx = t[i] - 'a';
            } else if(t[i] >= 'A' && t[i] <= 'Z') {
                idx = t[i] - 'A';
            } else {
                cur = root;
                continue;
            }

            //printf("nxt[%d][%d] = %d\n", cur, idx, nxt[cur][idx]);
            while (nxt[cur][idx] == -1 && cur != root) {
                cur = fail[cur];
            }
            cur = nxt[cur][idx];
            if (cur == -1) {
                cur = root;
                continue;
            }
            int p = cur;
            while (p != root) {
                if (len[p]) {
                    mp[i - len[p] + 1] = len[p];
                    break;
                }
                p = fail[p];
            }
        }
    }
}ac;

int main() {
    int T, n;
    scanf("%d", &T);
    while (T--) {
        ac.Init();
        scanf("%d", &n);
        for (int i = 0; i < n; i++) {
            scanf("%s", s);
            ac.Insert(s);
        }
        ac.Build();
        getchar();
        gets(t);
        ac.Search(t);
        int lent = strlen(t), la = 0;
        int cnt = 0;
        for (int i = 0; i < lent; i++) {
            la = max(la, i + mp[i]);
            printf("%c", i < la ? '*' : t[i]);
        }
        printf("\n");
    }
}

猜你喜欢

转载自blog.csdn.net/Tc_To_Top/article/details/84670754