1.1 Description
Assume that the number of keywords, operators, delimiters and words in a language are as follows:
struct { int number; string str[10]; } keywords={3,"int","main","return"} ; //关键词
struct { int number; string str[10]; } operators ={5,"+","*","=","+=","*="}; //运算符
struct { int number; string str[10]; } boundaries ={6,"(",")","{","}",",",";"} ; //分界符
struct { int number; string str[100];} identifieres={0}; //identifier
struct { int number; string str[100];} Unsigned_integer={0}; //Unsigned integer
The above class numbers are 1~5 respectively, and the serial numbers start from 0;
The identifier is an alphanumeric string starting with a letter; the constant is an unsigned integer; design a program to implement lexical analysis .
1.2 Input
Enter a program, end with "#" ;
1.3 Output
Output word count pairs: <class number, sequence number>. Output a table of identifiers, separated by spaces; Output a table of unsigned integers, separated by spaces ;
2. Algorithm description (introduction to program module functions; flow chart)
Module functions:
The input() function is used to read the code string
void input() { char ch; while (true) { ch = getchar(); if (ch == '#') break; // ignore '\n',' ' code.push_back(ch); if (ch == '\n' || ch == ' ') continue; s.push_back(ch); } } |
CheckBoundaries() function is used to check whether it is a delimiter
int CheckBoundaries(int index) { for (int i = 0; i < boundaries.number; i++) { if (boundaries.str[i][0] == s[index]) return i; } return -1; } |
CheckIdentifieres() function is used to check whether the identifier is legal
int CheckIdentifieres(string s) { for (int i = 0; i < identifieres.number; i++) if (s == identifieres.str[i]) return i; return -1; } |
The CheckInteger() function is used to check whether an unsigned integer is legal;
int CheckInteger(string s) { for (int i = 0; i < Unsigned_integer.number; i++) if (s == Unsigned_integer.str[i]) return i; return -1; } |
The CheckEachWord() function is used to check the words in the code one by one and output the corresponding results;
void CheckEachWord() { for (int i = 0; i < s.size(); i++) { // "int"? if (s.substr(i, 3) == "int") { i += 2; cout << "<1,0>"; } // "main"? else if (s.substr(i, 4) == "main") { i += 3; cout << "<1,1>"; } // "return"? else if (s.substr(i, 6) == "return") { i += 5; cout << "<1,2>"; } // operators? else if (s[i] == '+' || s[i] == '*' || s[i] == '=') { if (s[i] == '+') { if (s[i + 1] == '=') { i++; cout << "<2,3>"; } else cout << "<2,0>"; } else if (s[i] == '*') { if (s[i + 1] == '=') { i++; cout << "<2,4>"; } else cout << "<2,1>"; } else cout << "<2,2>"; } // boundaries? else if (CheckBoundaries(i) != -1) { cout << "<3," << CheckBoundaries(i) << ">"; } // identifieres 判断标识符是否合法 else if (isalpha(s[i])) { string temp; temp.push_back(s[i]); while (isalnum(s[i + 1])) { i++; temp.push_back(s[i]); } if (CheckIdentifieres(temp) == -1) { identifieres.str[identifieres.number] = temp; cout << "<4," << identifieres.number++ << ">"; } else { cout << "<4," << CheckIdentifieres(temp) << ">"; } } // Unsigned_integer? else if (isdigit(s[i])) { string temp; temp.push_back(s[i]); while (isdigit(s[i + 1])) { i++; temp.push_back(s[i]); } if (CheckInteger(temp) == -1) { Unsigned_integer.str[Unsigned_integer.number] = temp; cout << "<5," << Unsigned_integer.number++ << ">"; } else { cout << "<5," << CheckInteger(temp) << ">"; } } } cout << endl; } |
print()函数用于输出所有识别出的合法标识符和无符号整数;
void print() { //判断标识符是否合法 cout << "identifieres:"; for (int i = 0; i < identifieres.number - 1; i++) { cout << identifieres.str[i] << " "; } cout << identifieres.str[identifieres.number - 1] << endl; cout << "Unsigned_integer:"; for (int i = 0; i < Unsigned_integer.number - 1; i++) { cout << Unsigned_integer.str[i] << " "; } cout << Unsigned_integer.str[Unsigned_integer.number - 1] << endl; } |
run()函数是程序的主函数,调用其他各个函数完成整个词法分析的过程。
void run() { input(); CheckEachWord(); print(); } |
3. 测试数据(2组)
样例1 |
输入 |
main(){int a=2,b=3;return 2*b+a;}# |
输出 |
<1,1><3,0><3,1><3,2><1,0><4,0><2,2><5,0><3,4><4,1><2,2><5,1><3,5><1,2><5,0><2,1><4,1><2,0><4,0><3,5><3,3> identifieres:a b Unsigned_integer:2 3 |
|
样例2 |
输入 |
main(){int a=21;int b_=3;return 2*b_+a;}# |
输出 |
<1,1><3,0><3,1><3,2><1,0><4,0><2,2><5,0><3,5><1,0><4,1><2,2><5,1><3,5><1,2><5,2><2,1><4,1><2,0><4,0><3,5><3,3> identifieres:a b_ Unsigned_integer:21 3 2 |
* 结果截屏
4.程序清单
词法分析的程序,主要功能是根据给定的代码字符串,将其中的每个单词或符号识别出来,并输出对应的类型和编号。具体流程如下:
- 定义了存储关键词、运算符、分界符、标识符和无符号整数等信息的数据结构。
- 从输入中读取代码字符串,按照给定的规则识别其中的各个单词或符号。
- 对于识别出的各个单词或符号,根据其类型和编号输出对应的结果。
- 最后输出所有识别出的合法标识符和无符号整数。
完整代码:需要¥的,你还想要?
#include <iostream> #include <string> #include <cctype> using namespace std; struct { int number; string str[10]; } keywords = {3, "int", "main", "return"}; //关键词 struct { int number; string str[10]; } operators = {5, "+", "*", "=", "+=", "*="}; //运算符 struct { int number; string str[10]; } boundaries = {6, "(", ")", "{", "}", ",", ";"}; //分界符 struct { int number; string str[100]; } identifieres = {0}; //标识符 struct { int number; string str[100]; } Unsigned_integer = {0}; //无符号整数 string code; string s; // 输入程序 void input() { char ch; while (true) { ch = getchar(); if (ch == '#') break; // ignore '\n',' ' code.push_back(ch); if (ch == '\n' || ch == ' ') continue; s.push_back(ch); } } int CheckBoundaries(int index) { for (int i = 0; i < boundaries.number; i++) { if (boundaries.str[i][0] == s[index]) return i; } return -1; } int CheckIdentifieres(string s) { for (int i = 0; i < identifieres.number; i++) if (s == identifieres.str[i]) return i; return -1; } int CheckInteger(string s) { for (int i = 0; i < Unsigned_integer.number; i++) if (s == Unsigned_integer.str[i]) return i; return -1; } void CheckEachWord() { for (int i = 0; i < s.size(); i++) { // "int"? if (s.substr(i, 3) == "int") { i += 2; cout << "<1,0>"; } // "main"? else if (s.substr(i, 4) == "main") { i += 3; cout << "<1,1>"; } // "return"? else if (s.substr(i, 6) == "return") { i += 5; cout << "<1,2>"; } // operators? else if (s[i] == '+' || s[i] == '*' || s[i] == '=') { if (s[i] == '+') { if (s[i + 1] == '=') { i++; cout << "<2,3>"; } else cout << "<2,0>"; } else if (s[i] == '*') { if (s[i + 1] == '=') { i++; cout << "<2,4>"; } else cout << "<2,1>"; } else cout << "<2,2>"; } // boundaries? else if (CheckBoundaries(i) != -1) { cout << "<3," << CheckBoundaries(i) << ">"; } // identifieres 判断标识符是否合法 else if (isalpha(s[i])) { string temp; temp.push_back(s[i]); while (isalnum(s[i + 1])) { i++; temp.push_back(s[i]); } if (CheckIdentifieres(temp) == -1) { identifieres.str[identifieres.number] = temp; cout << "<4," << identifieres.number++ << ">"; } else { cout << "<4," << CheckIdentifieres(temp) << ">"; } } // Unsigned_integer? else if (isdigit(s[i])) { string temp; temp.push_back(s[i]); while (isdigit(s[i + 1])) { i++; temp.push_back(s[i]); } if (CheckInteger(temp) == -1) { Unsigned_integer.str[Unsigned_integer.number] = temp; cout << "<5," << Unsigned_integer.number++ << ">"; } else { cout << "<5," << CheckInteger(temp) << ">"; } } } cout << endl; } void print() { //判断标识符是否合法 cout << "identifieres:"; for (int i = 0; i < identifieres.number - 1; i++) { cout << identifieres.str[i] << " "; } cout << identifieres.str[identifieres.number - 1] << endl; cout << "Unsigned_integer:"; for (int i = 0; i < Unsigned_integer.number - 1; i++) { cout << Unsigned_integer.str[i] << " "; } cout << Unsigned_integer.str[Unsigned_integer.number - 1] << endl; } void run() { input(); CheckEachWord(); print(); } int main(int argc, char const *argv[]) { run(); return 0; } |
#别谢哥,哥只是个哥!