Matrix Matcher UVA - 11019 AC自动机

题目链接

Given an N*M matrix, your task is to nd the number of occurences of an X*Y pattern.

分析：对于矩阵二的每行建立Trie，并在单词结尾结点记录走到该结点的为行数c（有多个可开数组记录），利用一个co[r][i]数组记录在矩阵一中以(r, i)为矩阵二的右上角，大小与矩阵二相同的矩阵包含的行数。对矩阵一每行均find()一遍，最后扫描co数组，统计值为x的个数。

#include <cstdio>
#include <cstring>
#include <queue>
using namespace std; 
const int MAXNODE = 10000+5;
const int SIGMA_SIZE = 26;

void process_match(int pos, int v); // AC自动机每找到一个匹配会调用一次，结束位置为pos，val为v
struct ACautomata{
	int ch[MAXNODE][SIGMA_SIZE];
	int f[MAXNODE];    // fail函数
  	int val[MAXNODE];  // 每个字符串的结尾结点都有一个非0的val
  	int last[MAXNODE]; // 输出链表的下一个结点
	int size;
		
	void init(){
		size = 1;
    	memset(ch[0], 0, sizeof(ch[0]));
	}
	int idx (char c){
		return c - 'a';
	}
	// 插入字符串。v必须非0
	void insert(char *s, int v){
		int u = 0, n = strlen(s);
		for(int i = 0; i < n; i++){
			int c = idx(s[i]);
			if(!ch[u][c]){
				memset(ch[size], 0, sizeof(ch[size]));
				val[size] = 0;
        		ch[u][c] = size++; //指向新建的子节点idx = size;								
			}
			u = ch[u][c]; 
		}
		val[u] = v;		
	}
	
	void getFail(){
		queue<int> q;
		f[0] = 0; // 初始化队列
		for(int c = 0; c < SIGMA_SIZE; c++){ 
			int u = ch[0][c];
			if(u){
				q.push(u);
				f[u] = 0;
			}
		}
		while(!q.empty()){ // 按BFS顺序计算fail
			int r = q.front(); q.pop();
			for(int c = 0; c < SIGMA_SIZE; c++){
				int u = ch[r][c];
				if(!u) continue;
				q.push(u);
				int v = f[r]; 
				if( v && !ch[v][c]) v = f[v];
				f[u] = ch[v][c]; //指向v的失效节点的下一个节点 
				last[u] = val[f[u]] ? f[u] : last[f[u]]; //f[u]所指的节点是否为字符串的结尾
			}
		}
	}
	// 在T中找模板
	void find(char *T){
		int n = strlen(T); 
		int j = 0; // 当前结点编号，初始为根结点
		for(int i = 0; i < n; i++){ // 文本串当前指针
			int c = idx(T[i]);
			while(j && !ch[j][c]) j = f[j]; // 顺着细边走，直到可以匹配
			j = ch[j][c];
			if( val[j]) report(i, j);
			else if (last[j]) report(i,last[j]); // 后缀链接last[j]不为0
		}
	}
	
	// 递归打印以结点j结尾的所有字符串
	void report(int pos, int j) {
		if(j) {
		  process_match(pos, val[j]);
		  report(pos, last[j]);
		}
	}
};
ACautomata ac;
const int N = 1000+5, X = 100+5;
char text[N][N];
char p[X][X];
int repr[X]; // repr[i]为模板第i行的“代表元”
int follow[X]; // next[i]为模板中与第i行相等的下一个行编号
int len[X]; // 模板各行的长度

int tr; // 当前文本行编号
int cnt[N][N];
void process_match(int pos, int v){
	int pr = repr[v - 1]; // 匹配到得模板行编号
	int c = pos - len[pr] + 1;
	while(pr >= 0) {
		if(tr >= pr) // P的行pr出现在在T的tr行，起始列编号为c
		  cnt[tr - pr][c]++;
		pr = follow[pr];
	}
}

int main(int argc, char** argv) {
	int t;
	scanf("%d",&t);
	while( t--){
		int n, m, x, y;
		scanf("%d%d",&n, &m);
		for(int i = 0; i < n; i++){
			scanf("%s",&text[i]);
		}
		scanf("%d%d",&x, &y);
		ac.init();
		for(int i = 0; i < x; i++){
			scanf("%s",&p[i]);
			len[i] = strlen(p[i]);
			repr[i] = i;
			follow[i] = -1;
			for(int j = 0; j < i; j++){
				if( strcmp(p[i],p[j]) == 0){ //防止有重复的情况 
					repr[i] = j; //代表元相同 
					follow[i] = follow[j]; // 形成链表链接 
					follow[j] = i; // i at head of list j; 
					break;
				}
			}
			if(repr[i] == i) ac.insert(p[i], i+1);
		}
		
		ac.getFail();
		memset(cnt, 0, sizeof(cnt));
		for(tr = 0; tr < n; tr++)
			ac.find(text[tr]);
			
		int ans = 0;
		for(int i = 0; i < n-x+1; i++)
      		for(int j = 0; j < m-y+1; j++)
        		if(cnt[i][j] == x) ans++;
        printf("%d\n",ans);
	}
	return 0;
}

Input
The rst line contains a single integer t (t 15), the number of test cases.
For each case, the rst line contains two integers N and M (N;M 1000). The next N lines
contain M characters each.
The next line contains two integers X and Y (X; Y 100). The next X lines contain Y characters
each.
Output
For each case, output a single integer in its own line, the number of occurrences.
Sample Input
2
1 1
x
1 1
y
3 3
abc
bcd
cde
2 2
bc
cd
Sample Output
0
2

Matrix Matcher UVA - 11019 AC自动机

猜你喜欢