[Compilation principle] C++ implements LL(1) grammar (super detailed, broken for you to eat!) FIRST set, FOLLOW set, predictive analysis table algorithm ideas

Purpose:

Compile and debug LL (1) analysis program according to a certain grammar to analyze any input symbol string. The purpose of this experiment is to deepen the understanding of predictive analysis LL (1) analysis method.

Simple requirements:

At least do the following known grammar, use LL (1) analysis method to analyze any input symbol string:

（1） E-> TG

（2）G->+TG

（3）G->ε

（4）T->FS

（5）S->*FS

（6）S->ε

（7）F->(E)

（8）F->I

High requirements:

(1) Enter the grammar manually;

(2) FOLLOW set showing non-terminal symbols of the grammar;

(3) Display an optional set of rules;

(4) Construct a forecast analysis table;

(5) Analyze arbitrary input symbol strings

experiment procedure:

The algorithmic idea of the first set:
If the first character on the right of the production is a terminal, it is counted in the first set on the left. If the first character on the right of the production is a non-terminal, perform the following steps to find the non-terminal the first set of nonterminals $ first set of non-recorded portion of the first set of left
if present $, production pointer will point to the right
if there $, the traversal stops production, into the next production
if Has reached the rightmost non-terminal of the production, then add $ to the first set on the left, and
deal with the terminal symbols in the first set that are repeated in the array

The algorithmic idea of the follow set:
   The way to construct FOLLOW(A) for each non-terminal symbol A in grammar G is to use the following rules continuously until each FOLLOW does not increase.
  For the start symbol S of the grammar, set #于FOLLOW(S);
  if A->aBb is a production, add FIRST(b)\(ε) to FOLLOW(B);

If A->aB is a production, or A->aBb is a production and b=>ε (that is, ε∈FIRST(b)), add FOLLOW(A) to FOLLOW(B)

The algorithm idea of generating the predictive analysis table: The algorithm for
constructing the analysis table M is:
perform the second and third steps for each production A->a of grammar G;
for each terminal a∈FIRST(a), put A->a is added to M[A,a];
if ε∈FIRST(a), then any b∈FOLLOW(A) and A->a is added to M[A,b];
all undefined The M[A,a] is marked with an error flag.

The analysis process of the symbol string:
The master control program of the predictive analysis program is acting according to the STACK stack top symbol X and the current input symbol at any time. For any (X, a), the master control program
executes the following every time One of the three possible actions;
if X=a=”#”, the analysis is declared successful, and the analysis process is stopped.
If X=a≠”#”, then X is ejected from the top of the STACK stack, and a points to the next one Input symbol.
If X is a non-terminal symbol, check the analysis table M. If M[A,a] stores a production about X, then first expel X from the top of the STACK stack, and then put the production The right symbol string is pushed into the STACK stack one by one in reverse order (if the right symbol is ε, it means that nothing is pushed onto the stack). While pushing the right symbol of the production into the stack, the production should be made accordingly. Semantic action, if the "error flag" is stored in M[A,a], call the error diagnosis program ERROR.

Experiment code:

#include<iostream>
#include<string>
#include<map>
#include<vector>
#include<stack>
#include<set>
#include<cstring>
using namespace std;

map<char, int>getnum;        //map容器有键跟键对应的值两个变量，组成一个pair对象。
char getchar_[100];         //获得对应字符
vector<string>proce;       //定义了一个容器向量   用来存储产生式的。
int table[100][100];      //构建预测分析表
int num = 0;
int numvt = 0;     //numvt是终结符集合，0是‘#’，numvt表空字
string first[100];
string follow[200];
void readin()//将所需要的各类资料读取进去.
{
	memset(table, -1, sizeof(table));//将table的地址空间全部置-1，memset是内存重置函数，第一个参数是要重置的首地址，第二个是要重置的结果，第三个是重置的大小,sizeof是计算table内存空间大小的意思
	getnum['#'] = 0;
	getchar_[0] = '#';
	cout << "请输入所有的终结符：" << endl;
	char x;
	do
	{
		cin >> x;
		getnum[x] = ++num;
		getchar_[num] = x;
	} while (cin.peek() != '\n');//cin.peek()的返回值是一个char型的字符，其返回值是指针指向的当前字符，但它只是观测指针停留在当前位置并不后移；	
	numvt = ++num;
	getnum['@'] = numvt;        //空字,因为ε无法显示所以用@来代替
	getchar_[num] = ('@');
	cout << "请输入所有非终结符：" << endl;
	do
	{
		cin >> x;
		getnum[x] = ++num;
		getchar_[num] = x;
	} while (cin.peek() != '\n');
	cout << "输入产生式集合（空字用‘@’表示）,以‘end’结束:" << endl;
	string pro;
	while (cin >> pro && pro != "end")
	{
		string ss;
		ss += pro[0];
		for (int i = 3; i < pro.size(); i++)//从3开始是因为前面有？->
		{
			if (pro[i] == '|')
			{
				proce.push_back(ss);
				ss.clear();
				ss += pro[0];
			}
			else
			{
				ss += pro[i];
			}
		}
		proce.push_back(ss);//作用是字符串之后插入一个字符。
	}
}
void bingji(string& a, string b)  //a=a or b   取a,b并集赋值给a
{
	set<char>se;
	for (int i = 0; i < a.size(); i++)
		se.insert(a[i]);
	for (int i = 0; i < b.size(); i++)
		se.insert(b[i]);
	string ans;
	set<char>::iterator it;//这条语句定义了一个名为it的变量,iterator为迭代器:提供一种方法访问一个容器对象中各个元素，而又不需暴露该对象的内部细节。 
	for (it = se.begin(); it != se.end(); it++)
		ans += *it;
	a = ans;
}
string get_f(int vn, int& has_0)     //推出vn能推出的不含空字的vt集合，并且判断vn能否推出空字
{
	if (vn == numvt)has_0 = 1;       //为@  则赋值使得可以继续下去
	if (vn < numvt)return first[vn];//为终结符，归入first中
	string ans;
	for (int i = 0; i < proce.size(); i++)
	{
		if (getnum[proce[i][0]] == vn)//如果产生式左部等于非终结符，就进入非终结符中继续查询
			ans += get_f(getnum[proce[i][1]], has_0);
	}
	return  ans;
}
void getfirst()
{
	for (int i = 1; i <= numvt; i++)     //终结符，first集是其本身。
	{
		first[i] += ('0' + i);
	}
	for (int j = 0; j < proce.size(); j++)    //扫描所有产生式
	{
		int k = 0;        //k扫瞄该产生式
		int has_0 = 0;
		do {
			has_0 = 0;
			k++;
			if (k == proce[j].size())  //推到最后一个了，则附加空字
			{
				first[getnum[proce[j][0]]] += ('0' + numvt);
				break;
			}                     //合并之
			bingji(first[getnum[proce[j][0]]], get_f(getnum[proce[j][k]], has_0));//如果前面没有空集，就将空集排除
		} while (has_0);  //到无法推出空字为止
	}
}
void print_first()
{
	cout << "first集:" << endl;
	for (int i = numvt + 1; i <= num; i++)
	{
		cout << "first [" << getchar_[i] << "]: ";
		for (int j = 0; j < first[i].size(); j++)
			cout << getchar_[first[i][j] - '0'] << " ";
		cout << endl;
	}
	cout << endl;
}
void getfollow()
{
	bingji(follow[getnum[proce[0][0]]], "0");  //先添加‘#’；
	for (int j = 0; j < proce.size(); j++)       //扫所有产生式
	{
		for (int jj = 1; jj < proce[j].size(); jj++)   //每个非终结符的follow集
		{
			if (getnum[proce[j][jj]] <= numvt)continue;  //为终结符，vt无follow集
			int k = jj; int has_0;
			do
			{
				has_0 = 0;
				k++;
				if (k == proce[j].size())   //都能推出空字，follow集=产生式左边的vn，
				{
					bingji(follow[getnum[proce[j][jj]]], follow[getnum[proce[j][0]]]);
					break;
				}
				bingji(follow[getnum[proce[j][jj]]], get_f(getnum[proce[j][k]], has_0));
			} while (has_0);
		}
	}
}
void gettable()          //得预测分析表
{
	for (int i = 0; i < proce.size(); i++)   //扫所有产生式
	{
		if (proce[i][1] == '@')     //直接推出空字的，把follow集加进去（follow集=产生式左边的follow中元素填）
		{
			string flw = follow[getnum[proce[i][0]]];
			for (int k = 0; k < flw.size(); k++)
			{
				table[getnum[proce[i][0]]][flw[k] - '0'] = i;
			}
		}
		string temps = first[getnum[proce[i][1]]];
		for (int j = 0; j < temps.size(); j++)               //考察first集
		{
			if (temps[j] != ('0' + numvt))//没有空字终结符
			{
				table[getnum[proce[i][0]]][temps[j] - '0'] = i;//字符型减'0'可以得到整数型
			}
			else                                     //有空字终结符的，考察follw集
			{
				string flw = follow[getnum[proce[i][1]]];
				for (int k = 0; k < flw.size(); k++)
				{
					table[getnum[proce[i][0]]][flw[k] - '0'] = i;
				}
			}
		}
	}
}
string get_proce(int i)  //由对应下标获得对应产生式。
{
	if (i < 0)return " ";    //无该产生式
	string ans;
	ans += proce[i][0];
	ans += "->";
	for (int j = 1; j < proce[i].size(); j++)
		ans += proce[i][j];
	return ans;
}
void print_table()
{
	cout << "预测分析表：" << endl;
	for (int i = 0; i < numvt; i++)
		cout << '\t' << getchar_[i];
	cout << endl;
	for (int i = numvt + 1; i <= num; i++)
	{
		cout << getchar_[i];
		for (int j = 0; j < numvt; j++)
		{
			cout << '\t' << get_proce(table[i][j]);
		}
		cout << endl;
	}
	cout << endl;
}
void print_follow()
{
	cout << "follow集：" << endl;
	for (int i = numvt + 1; i <= num; i++)
	{
		cout << "follow [" << getchar_[i] << "]: ";
		for (int j = 0; j < follow[i].size(); j++)
			cout << getchar_[follow[i][j] - '0'] << " ";
		cout << endl;
	}
	cout << endl;
}
string word;
string shuchu;
bool analyze()       //总控，分析字word的合法性，若合法，输出所有产生式。
{
	stack<char>sta;
	sta.push('#');
	sta.push(proce[0][0]);
	shuchu.push_back('#');
	shuchu.push_back(proce[0][0]);
	int i = 0;
	while (!sta.empty())
	{
		int cur = sta.top();//取出栈顶元素
		sta.pop();        //删除栈顶元素
		if (cur == word[i])       //是终结符，推进
		{
			word[i] = NULL;
			if (!shuchu.empty())
				shuchu.pop_back();
			i++;
		}
		else  if (cur == '#')   //成功，结束
		{
			return 1;
		}
		else  if (table[getnum[cur]][getnum[word[i]]] != -1) //查表,最初预测分析表全定义为-1
		{
			int k = table[getnum[cur]][getnum[word[i]]];
			cout << shuchu << "   ";
			cout << word << "#" << " ";
			cout << proce[k][0] << "->";
			for (int j = 1; j < proce[k].size(); j++)
				cout << proce[k][j];
			cout << endl; shuchu.pop_back();
			for (int j = proce[k].size() - 1; j > 0; j--)  //逆序入栈
			{
				if (proce[k][j] != '@') {
					sta.push(proce[k][j]);
					shuchu.push_back(proce[k][j]);
				}
			}
		}
		else      //失败！
		{
			return 0;
		}
	}
	return 1;
}
int main()
{
	readin();
	getfirst();
	getfollow();
	getfollow();
	gettable();
	print_first();
	print_follow();
	print_table();
	cout << "请输入字：" << endl;
	cin >> word;
	if (analyze())
		cout << "succeed!该字有效，所用产生式如上。" << endl;
	else   cout << "error!" << endl;
	system("pause");
	return 0;
}