编译原理实验一（词法分析）

#define _CRT_SECURE_NO_WARNINGS
#include <cstdio>
#include <iostream>
#include <string>
#include <map> 
#include <vector>
#include <cctype>
using namespace std;
const string singleSign = "+-*/:=;()#.{}", emptyStr = " \t\n";
string text; //存文本
map<string, int> key2id;
vector<string> keystrVec = { "#", "main", "if", "then", "while", "do",
"static", "int", "double", "struct", "break",
"else", "long", "switch", "case", "typedef",
"char", "return", "const", "float", "short",
"continue", "for", "void", "sizeof",  "+",
"-", "*", "/" , ":", ":=", "<", "!=", "<=", ">",
">=", "=", "default", "do", ";", "(", ")", ".", "{", "}"
}; //元素，标志符，字符串， 数字NUM
const int N = keystrVec.size(), NUM = N + 1, STR = N + 2, IDTF = N + 3, UNKNOW = N + 4, CHAR = N+5;
//初始化词法分析结果存储的容器
void initID(vector<string> keystrVec) {
	for (int i = 0; i < keystrVec.size(); ++i) {
		key2id[keystrVec[i]] = i;
	}
}
map<string, int> key;
//读取文本，返回该文本形成的一个字串
string myread() {
	string text, line;
	while (getline(cin, line)) {
		text += line + '\n';
	}
	return text;
}
void ErrorProcess(){
	cout << "Grammer Error" << endl;
	while (true);
}
string deleteAnnotation (string text) {
	string txt;
	int len = text.length(), cur = 0;
	while (cur < len) {
		switch (text[cur]) {
		case '/':
			if (cur < len && text[cur + 1] == '/') { //单行注释处理
				cur++;
				while (cur < len && text[++cur] != '\n');
				txt += '\n';
			}
			else { 
				//断点
				if (cur + 1 < len && text[cur + 1] == '*') { //判断是否为多行注释。超前搜索
					++cur; //text[cur] = '*'
					++cur;
					while (cur < len) {
						if (text[cur] == '*') {
							//断点
							if (cur < len && text[cur + 1] == '/') {
								cur++;// text[cur] = '//';
								break;
							}
							else ++cur;
						}
						else ++cur;
					}
				}
				else { //抛出异常.
					ErrorProcess();
				}
			}
			++cur;//读取下一个字符
			break;
		case '\"':
			txt += text[cur++];
			while (cur < len) {
				if (text[cur] == '\"') {
					if (text[cur - 1] != '\\') {
						txt += text[cur];
						break; //字符串读取结束
					}
					else txt += text[cur++];
				}
				else txt += text[cur++];
			}
			++cur;
			break;
		case '\'': //这个处理小心
			txt += text[cur++]; // '
			if (text[cur] == '\\') //转义字符
				txt += text[cur++];
			txt += text[cur++]; // a
			txt += text[cur];
			if (text[cur++] != '\'') ErrorProcess();
			break;
		default:
			txt += text[cur++];
			//cout << txt << endl; //调试时取消注释，静态debug
			break;
		}
	}
	
	return txt;
}
void init() {
	initID(keystrVec);
}
string dec2bin(string num) {
	if (num.length() > 18) return "nan";
	if (num == "0") return num;
	long long n = 0;
	for (int i = 0; i < num.length(); ++i) {
		n = n * 10 + num[i] - '0';
	}
	string ret = "";
	while (n > 0) {
		if (n & 1) ret = "1" + ret;
		else ret = "0" + ret;
		n >>= 1;
	}
	return ret;
}
void work() {
	cout << "单词符号\t\t种别码\t\t\t内码值" << endl;
	cout << "---------------\t\t---------------\t\t---------------" << endl;
	int len = text.length(), cur = 0;
	while (cur < len) {
		if (isalpha(text[cur]) || text[cur] == '_') {
			string indetifier = string(1, text[cur]); //"" + text[cur]
			while (++cur < len) {
				if (isalnum(text[cur]) || text[cur] == '_') {
					indetifier += text[cur];
				}
				else {
					if (key2id.find(indetifier) != key2id.end())
						//判别为关键字
						cout  << indetifier << "\t\t\t" << key2id[indetifier] << "\t\t\t" << "-" << endl;
					else cout << indetifier << "\t\t\t" << IDTF << "\t\t\t" << indetifier << endl;//判断为标志符
					break;
				}
			}		
			--cur;
		}
		else if (isdigit(text[cur])) {
			string num = string(1, text[cur]);
			while (isdigit(text[++cur])) {
				num += text[cur];
			}
			cout  << num << "\t\t\t" << NUM << "\t\t\t" << dec2bin(num) << endl;
			--cur;
		}
		else if (singleSign.find(text[cur]) != -1) {
			cout  << text[cur] << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl;
		}
		else if (text[cur] == '<' || text[cur] == '>' || text[cur] == '!') {
			if (cur < len && text[cur + 1] == '=') {
				cout  << string(1, text[cur]) + "=" << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl;
			}
			else {
				cout  << text[cur] << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl;
			}
		}
		else if (text[cur] == '\"') {
			string str = string(1, '\"');
			while (++cur < len) {
				str += text[cur];
				if (text[cur] == '\"') {
					if (text[cur - 1] != '\\') {					
						cout  << str << "\t\t\t" << STR << "\t\t\t" << "-" << endl;
						break;
					}
				}
			}
		}
		else if (text[cur] == '\'') {
			string str = string(1, text[cur++]); // '
			if (text[cur] == '\\') //转义字符
				str += text[cur++];
			str += text[cur++]; // a
			str += text[cur];
			cout << str << "\t\t\t" << CHAR << "\t\t\t" << '-' << endl;
		}
		else if (true) {
			if(emptyStr.find(text[cur]) == -1)
				cout  << text[cur] << "\t\t\t" << UNKNOW << "\t\t\t" << "-" << endl;
		}
		++cur;
	}
}
int main()
{
	freopen("data.in", "r", stdin); //若手动输入，取消本行重定向语句
	//freopen("data.out", "w", stdout);
	ios::sync_with_stdio(false);
	init();
	cout << keystrVec.size() << endl; //查看待识别的关键字个数
	text = myread();
	//cout << text << endl; //查看读入的文本
	text = deleteAnnotation(text);
	//cout << text << endl; //查看删除注释后的文本
	work();	
	cout << "===处理完毕===" << endl; while (true); //暂停显示结果

	return 0;
}
编译原理实验一（词法分析）

猜你喜欢