编译原理实验一(词法分析)
#define _CRT_SECURE_NO_WARNINGS #include <cstdio> #include <iostream> #include <string> #include <map> #include <vector> #include <cctype> using namespace std; const string singleSign = "+-*/:=;()#.{}", emptyStr = " \t\n"; string text; //存文本 map<string, int> key2id; vector<string> keystrVec = { "#", "main", "if", "then", "while", "do", "static", "int", "double", "struct", "break", "else", "long", "switch", "case", "typedef", "char", "return", "const", "float", "short", "continue", "for", "void", "sizeof", "+", "-", "*", "/" , ":", ":=", "<", "!=", "<=", ">", ">=", "=", "default", "do", ";", "(", ")", ".", "{", "}" }; //元素,标志符,字符串, 数字NUM const int N = keystrVec.size(), NUM = N + 1, STR = N + 2, IDTF = N + 3, UNKNOW = N + 4, CHAR = N+5; //初始化词法分析结果存储的容器 void initID(vector<string> keystrVec) { for (int i = 0; i < keystrVec.size(); ++i) { key2id[keystrVec[i]] = i; } } map<string, int> key; //读取文本,返回该文本形成的一个字串 string myread() { string text, line; while (getline(cin, line)) { text += line + '\n'; } return text; } void ErrorProcess(){ cout << "Grammer Error" << endl; while (true); } string deleteAnnotation (string text) { string txt; int len = text.length(), cur = 0; while (cur < len) { switch (text[cur]) { case '/': if (cur < len && text[cur + 1] == '/') { //单行注释处理 cur++; while (cur < len && text[++cur] != '\n'); txt += '\n'; } else { //断点 if (cur + 1 < len && text[cur + 1] == '*') { //判断是否为多行注释。超前搜索 ++cur; //text[cur] = '*' ++cur; while (cur < len) { if (text[cur] == '*') { //断点 if (cur < len && text[cur + 1] == '/') { cur++;// text[cur] = '//'; break; } else ++cur; } else ++cur; } } else { //抛出异常. ErrorProcess(); } } ++cur;//读取下一个字符 break; case '\"': txt += text[cur++]; while (cur < len) { if (text[cur] == '\"') { if (text[cur - 1] != '\\') { txt += text[cur]; break; //字符串读取结束 } else txt += text[cur++]; } else txt += text[cur++]; } ++cur; break; case '\'': //这个处理小心 txt += text[cur++]; // ' if (text[cur] == '\\') //转义字符 txt += text[cur++]; txt += text[cur++]; // a txt += text[cur]; if (text[cur++] != '\'') ErrorProcess(); break; default: txt += text[cur++]; //cout << txt << endl; //调试时取消注释,静态debug break; } } return txt; } void init() { initID(keystrVec); } string dec2bin(string num) { if (num.length() > 18) return "nan"; if (num == "0") return num; long long n = 0; for (int i = 0; i < num.length(); ++i) { n = n * 10 + num[i] - '0'; } string ret = ""; while (n > 0) { if (n & 1) ret = "1" + ret; else ret = "0" + ret; n >>= 1; } return ret; } void work() { cout << "单词符号\t\t种别码\t\t\t内码值" << endl; cout << "---------------\t\t---------------\t\t---------------" << endl; int len = text.length(), cur = 0; while (cur < len) { if (isalpha(text[cur]) || text[cur] == '_') { string indetifier = string(1, text[cur]); //"" + text[cur] while (++cur < len) { if (isalnum(text[cur]) || text[cur] == '_') { indetifier += text[cur]; } else { if (key2id.find(indetifier) != key2id.end()) //判别为关键字 cout << indetifier << "\t\t\t" << key2id[indetifier] << "\t\t\t" << "-" << endl; else cout << indetifier << "\t\t\t" << IDTF << "\t\t\t" << indetifier << endl;//判断为标志符 break; } } --cur; } else if (isdigit(text[cur])) { string num = string(1, text[cur]); while (isdigit(text[++cur])) { num += text[cur]; } cout << num << "\t\t\t" << NUM << "\t\t\t" << dec2bin(num) << endl; --cur; } else if (singleSign.find(text[cur]) != -1) { cout << text[cur] << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl; } else if (text[cur] == '<' || text[cur] == '>' || text[cur] == '!') { if (cur < len && text[cur + 1] == '=') { cout << string(1, text[cur]) + "=" << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl; } else { cout << text[cur] << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl; } } else if (text[cur] == '\"') { string str = string(1, '\"'); while (++cur < len) { str += text[cur]; if (text[cur] == '\"') { if (text[cur - 1] != '\\') { cout << str << "\t\t\t" << STR << "\t\t\t" << "-" << endl; break; } } } } else if (text[cur] == '\'') { string str = string(1, text[cur++]); // ' if (text[cur] == '\\') //转义字符 str += text[cur++]; str += text[cur++]; // a str += text[cur]; cout << str << "\t\t\t" << CHAR << "\t\t\t" << '-' << endl; } else if (true) { if(emptyStr.find(text[cur]) == -1) cout << text[cur] << "\t\t\t" << UNKNOW << "\t\t\t" << "-" << endl; } ++cur; } } int main() { freopen("data.in", "r", stdin); //若手动输入,取消本行重定向语句 //freopen("data.out", "w", stdout); ios::sync_with_stdio(false); init(); cout << keystrVec.size() << endl; //查看待识别的关键字个数 text = myread(); //cout << text << endl; //查看读入的文本 text = deleteAnnotation(text); //cout << text << endl; //查看删除注释后的文本 work(); cout << "===处理完毕===" << endl; while (true); //暂停显示结果 return 0; }