Compilation Principle Lexical Analysis Training

1.1 Description

Assume that the number of keywords, operators, delimiters and words in a language are as follows: 

struct { int number; string str[10]; } keywords={3,"int","main","return"} ; //关键词

struct { int number; string str[10]; } operators ={5,"+","*","=","+=","*="}; //运算符

struct { int number; string str[10]; } boundaries ={6,"(",")","{","}",",",";"} ; //分界符

struct { int number; string str[100];} identifieres={0}; //identifier

struct { int number; string str[100];} Unsigned_integer={0}; //Unsigned integer

The above class numbers are 1~5 respectively, and the serial numbers start from 0;

The identifier is an alphanumeric string starting with a letter; the constant is an unsigned integer; design a program to implement lexical analysis .

1.2 Input

Enter a program, end with "#" ;

1.3 Output

Output word count pairs: <class number, sequence number>. Output a table of identifiers, separated by spaces; Output a table of unsigned integers, separated by spaces ;

2. Algorithm description (introduction to program module functions; flow chart)

Module functions:

The input() function is used to read the code string

void input()

{

char ch;

while (true)

{

ch = getchar();

if (ch == '#')

break;

// ignore '\n',' '

code.push_back(ch);

if (ch == '\n' || ch == ' ')

continue;

s.push_back(ch);

}

}

CheckBoundaries() function is used to check whether it is a delimiter

int CheckBoundaries(int index)

{

for (int i = 0; i < boundaries.number; i++)

{

if (boundaries.str[i][0] == s[index])

return i;

}

return -1;

}

CheckIdentifieres() function is used to check whether the identifier is legal

int CheckIdentifieres(string s)

{

for (int i = 0; i < identifieres.number; i++)

if (s == identifieres.str[i])

return i;

return -1;

}

The CheckInteger() function is used to check whether an unsigned integer is legal;

int CheckInteger(string s)

{

for (int i = 0; i < Unsigned_integer.number; i++)

if (s == Unsigned_integer.str[i])

return i;

return -1;

}

The CheckEachWord() function is used to check the words in the code one by one and output the corresponding results;

void CheckEachWord()

{

for (int i = 0; i < s.size(); i++)

{

// "int"?

if (s.substr(i, 3) == "int")

{

i += 2;

cout << "<1,0>";

}

// "main"?

else if (s.substr(i, 4) == "main")

{

i += 3;

cout << "<1,1>";

}

// "return"?

else if (s.substr(i, 6) == "return")

{

i += 5;

cout << "<1,2>";

}

// operators?

else if (s[i] == '+' || s[i] == '*' || s[i] == '=')

{

if (s[i] == '+')

{

if (s[i + 1] == '=')

{

i++;

cout << "<2,3>";

}

else

cout << "<2,0>";

}

else if (s[i] == '*')

{

if (s[i + 1] == '=')

{

i++;

cout << "<2,4>";

}

else

cout << "<2,1>";

}

else

cout << "<2,2>";

}

// boundaries?

else if (CheckBoundaries(i) != -1)

{

cout << "<3," << CheckBoundaries(i) << ">";

}

// identifieres 判断标识符是否合法

else if (isalpha(s[i]))

{

string temp;

temp.push_back(s[i]);

while (isalnum(s[i + 1]))

{

i++;

temp.push_back(s[i]);

}

if (CheckIdentifieres(temp) == -1)

{

identifieres.str[identifieres.number] = temp;

cout << "<4," << identifieres.number++ << ">";

}

else

{

cout << "<4," << CheckIdentifieres(temp) << ">";

}

}

// Unsigned_integer?

else if (isdigit(s[i]))

{

string temp;

temp.push_back(s[i]);

while (isdigit(s[i + 1]))

{

i++;

temp.push_back(s[i]);

}

if (CheckInteger(temp) == -1)

{

Unsigned_integer.str[Unsigned_integer.number] = temp;

cout << "<5," << Unsigned_integer.number++ << ">";

}

else

{

cout << "<5," << CheckInteger(temp) << ">";

}

}

}

cout << endl;

}

print()函数用于输出所有识别出的合法标识符和无符号整数;

void print()

{

//判断标识符是否合法

cout << "identifieres:";

for (int i = 0; i < identifieres.number - 1; i++)

{

cout << identifieres.str[i] << " ";

}

cout << identifieres.str[identifieres.number - 1] << endl;

cout << "Unsigned_integer:";

for (int i = 0; i < Unsigned_integer.number - 1; i++)

{

cout << Unsigned_integer.str[i] << " ";

}

cout << Unsigned_integer.str[Unsigned_integer.number - 1] << endl;

}

run()函数是程序的主函数,调用其他各个函数完成整个词法分析的过程。

void run()

{

input();

CheckEachWord();

print();

}

3. 测试数据(2组)

样例1

输入

main(){int a=2,b=3;return 2*b+a;}#

输出

<1,1><3,0><3,1><3,2><1,0><4,0><2,2><5,0><3,4><4,1><2,2><5,1><3,5><1,2><5,0><2,1><4,1><2,0><4,0><3,5><3,3>

identifieres:a b

Unsigned_integer:2 3

样例2

输入

main(){int a=21;int b_=3;return 2*b_+a;}#

输出

<1,1><3,0><3,1><3,2><1,0><4,0><2,2><5,0><3,5><1,0><4,1><2,2><5,1><3,5><1,2><5,2><2,1><4,1><2,0><4,0><3,5><3,3>

identifieres:a b_

Unsigned_integer:21 3 2

* 结果截屏

 

 

4.程序清单

词法分析的程序,主要功能是根据给定的代码字符串,将其中的每个单词或符号识别出来,并输出对应的类型和编号。具体流程如下:

  1. 定义了存储关键词、运算符、分界符、标识符和无符号整数等信息的数据结构。
  2. 从输入中读取代码字符串,按照给定的规则识别其中的各个单词或符号。
  3. 对于识别出的各个单词或符号,根据其类型和编号输出对应的结果。
  4.  最后输出所有识别出的合法标识符和无符号整数。

完整代码:需要¥的,你还想要?

#include <iostream>

#include <string>

#include <cctype>

using namespace std;

struct

{

int number;

string str[10];

} keywords = {3, "int", "main", "return"}; //关键词

struct

{

int number;

string str[10];

} operators = {5, "+", "*", "=", "+=", "*="}; //运算符

struct

{

int number;

string str[10];

} boundaries = {6, "(", ")", "{", "}", ",", ";"}; //分界符

struct

{

int number;

string str[100];

} identifieres = {0}; //标识符

struct

{

int number;

string str[100];

} Unsigned_integer = {0}; //无符号整数

string code;

string s;

// 输入程序

void input()

{

char ch;

while (true)

{

ch = getchar();

if (ch == '#')

break;

// ignore '\n',' '

code.push_back(ch);

if (ch == '\n' || ch == ' ')

continue;

s.push_back(ch);

}

}

int CheckBoundaries(int index)

{

for (int i = 0; i < boundaries.number; i++)

{

if (boundaries.str[i][0] == s[index])

return i;

}

return -1;

}

int CheckIdentifieres(string s)

{

for (int i = 0; i < identifieres.number; i++)

if (s == identifieres.str[i])

return i;

return -1;

}

int CheckInteger(string s)

{

for (int i = 0; i < Unsigned_integer.number; i++)

if (s == Unsigned_integer.str[i])

return i;

return -1;

}

void CheckEachWord()

{

for (int i = 0; i < s.size(); i++)

{

// "int"?

if (s.substr(i, 3) == "int")

{

i += 2;

cout << "<1,0>";

}

// "main"?

else if (s.substr(i, 4) == "main")

{

i += 3;

cout << "<1,1>";

}

// "return"?

else if (s.substr(i, 6) == "return")

{

i += 5;

cout << "<1,2>";

}

// operators?

else if (s[i] == '+' || s[i] == '*' || s[i] == '=')

{

if (s[i] == '+')

{

if (s[i + 1] == '=')

{

i++;

cout << "<2,3>";

}

else

cout << "<2,0>";

}

else if (s[i] == '*')

{

if (s[i + 1] == '=')

{

i++;

cout << "<2,4>";

}

else

cout << "<2,1>";

}

else

cout << "<2,2>";

}

// boundaries?

else if (CheckBoundaries(i) != -1)

{

cout << "<3," << CheckBoundaries(i) << ">";

}

// identifieres 判断标识符是否合法

else if (isalpha(s[i]))

{

string temp;

temp.push_back(s[i]);

while (isalnum(s[i + 1]))

{

i++;

temp.push_back(s[i]);

}

if (CheckIdentifieres(temp) == -1)

{

identifieres.str[identifieres.number] = temp;

cout << "<4," << identifieres.number++ << ">";

}

else

{

cout << "<4," << CheckIdentifieres(temp) << ">";

}

}

// Unsigned_integer?

else if (isdigit(s[i]))

{

string temp;

temp.push_back(s[i]);

while (isdigit(s[i + 1]))

{

i++;

temp.push_back(s[i]);

}

if (CheckInteger(temp) == -1)

{

Unsigned_integer.str[Unsigned_integer.number] = temp;

cout << "<5," << Unsigned_integer.number++ << ">";

}

else

{

cout << "<5," << CheckInteger(temp) << ">";

}

}

}

cout << endl;

}

void print()

{

//判断标识符是否合法

cout << "identifieres:";

for (int i = 0; i < identifieres.number - 1; i++)

{

cout << identifieres.str[i] << " ";

}

cout << identifieres.str[identifieres.number - 1] << endl;

cout << "Unsigned_integer:";

for (int i = 0; i < Unsigned_integer.number - 1; i++)

{

cout << Unsigned_integer.str[i] << " ";

}

cout << Unsigned_integer.str[Unsigned_integer.number - 1] << endl;

}

void run()

{

input();

CheckEachWord();

print();

}

int main(int argc, char const *argv[])

{

run();

return 0;

}

#别谢哥,哥只是个哥!

Guess you like

Origin blog.csdn.net/m0_70711364/article/details/131747135