Principles of Compilation | Course Design — Lexical Analysis of PL/0 Compiler

1. Task description

Use C/C++ language to write a lexical analysis program for PL/0 compiler. Points to note:

(1) Identify illegal characters: such as @, & and !, etc.;

(2) Identify illegal words: combinations of numbers and letters beginning with numbers;

(3) The length of identifiers and unsigned integers does not exceed 8 bits;

(4) Can automatically recognize and ignore comment information in /* */ and // format;

(5) After encountering an error during the lexical analysis, it can continue to recognize and output an error message.

2. Programming requirements

After completing the above programming tasks, copy and paste the C/C++ language source program to the code editor on the right, click the "Evaluate" button, run the program, and the system will automatically compare the results.

3. Test instructions

The platform will test the code you write:

Test input:

const a = 10;
var   b, c;
procedure fun1;
    if a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b # 0 do
        begin
            call fun1;
            write(2 * c);
            read(b);
        end
end.

Expected output:

(保留字,const)
(标识符,a)
(运算符,=)
(无符号整数,10)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符,fun1)
(界符,;)
(保留字,if)
(标识符,a)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(运算符,#)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符,fun1)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

Test input:

const 2a = 123456789;
var   b, c;
//单行注释
/*
* 多行注释
*/
procedure function1;
    if 2a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b @ 0 do
        begin
            call function1;
            write(2 * c);
            read(b);
        end
end.

Expected output:

(保留字,const)
(非法字符(串),2a,行号:1)
(运算符,=)
(无符号整数越界,123456789,行号:1)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符长度超长,function1,行号:10)
(界符,;)
(保留字,if)
(非法字符(串),2a,行号:11)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(非法字符(串),@,行号:17)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符长度超长,function1,行号:19)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

4. Code

#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;

vector<string> wordTable;	// 单词表
map<string, int> B;         // 保留字
map<string, int> Y;         // 运算符
map<char, int> J;           // 界符
map<char, int> y;
string s1, s2, s3;
int F; // 标记'/*''*/'注释

void Init() {
    B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字,13个 
    Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符,11个 
    y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
    J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符,5个
    s1 = "//", s2 = "/*", s3 = "*/";
}

void LexicalAnalysis() {
    string input, word;
    Init();
    for (int line = 1; getline(cin, input); line++) {
        int f = 0; // 标记'//'注释
        for (int i = 0; input[i] != '\0'; i++) {
            // 去掉空格与'\t'
            while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
                i++;

            int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
            int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)

            // 标记非法字符
            if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
                illegalFlag = 1;

            // 识别数字与字母串
            while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
                if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字 
                else letterFlag = 1; // 标记字母 
                j1++;
            }

            // 识别运算符,并标记运算符 
            while (input[j2] != '\0' && y[input[j2]] == 1) {
                operatorFlag = 1;
                j2++;
            }

            // 识别界符,并标记界符
            if (J[input[i]] == 1)
                boundaryFlag = 1;

            //提取字符串
            if (numFlag || letterFlag)
                word = input.substr(i, j1 - i);


            //无符号整数
            if (numFlag == 1 && letterFlag == 0 && !f && !F) {
                if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
                else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //标识符
            else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
                if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
                else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
                else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //保留字
            else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
                cout << "(保留字," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //运算符及注释
            else if (operatorFlag == 1) {
                word = input.substr(i, j2 - i); // 提取字符串
                if (word == s1) f = 1; // '//'注释,标记'//' 
                else if (word == s2) F = 1; // '/*'注释,标记'/*'
                else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
                else if (!f && !F) {
                    cout << "(运算符," << word << ")" << endl;
                    wordTable.push_back(word);
                }
            }
            //界符 
            else if (boundaryFlag == 1 && !f && !F) {
                word = input[i];
                cout << "(界符," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //识别非法字符
            else if (illegalFlag == 1 && !f && !F) {
                cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }

            if (j1 != i)
                i = j1 - 1;
            else if (j2 != i)
                i = j2 - 1;
        }
    }
}


int main() {
    LexicalAnalysis();
    return 0;
}

5. Expansion

        Add the function of file input, the program can read the source program string txt file in the specified directory, perform syntax analysis and output the result to the specified file. code show as below:

#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;

ifstream infile;    // 文件输入流
ofstream outfile;   // 文件输出流

vector<string> wordTable;	// 单词表
map<string, int> B;         // 保留字
map<string, int> Y;         // 运算符
map<char, int> J;           // 界符
map<char, int> y;           // 
string s1 , s2, s3 ;
int F; // 标记'/*''*/'注释

void Init() {
    B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字,13个 
    Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符,11个 
    y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
    J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符,5个
    s1 = "//", s2 = "/*", s3 = "*/";
}

void LexicalAnalysis() {
    string input, word;
    Init();
    for (int line = 1; getline(infile, input); line++) {
        int f = 0; // 标记'//'注释
        for (int i = 0; input[i] != '\0'; i++) {
            // 去掉空格与'\t'
            while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
                i++;

            int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
            int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)

            // 标记非法字符
            if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
                illegalFlag = 1;

            // 识别数字与字母串
            while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
                if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字 
                else letterFlag = 1; // 标记字母 
                j1++;
            }

            // 识别运算符,并标记运算符 
            while (input[j2] != '\0' && y[input[j2]] == 1) {
                operatorFlag = 1;
                j2++;
            }

            // 识别界符,并标记界符
            if (J[input[i]] == 1)
                boundaryFlag = 1;

            //提取字符串
            if (numFlag || letterFlag)
                word = input.substr(i, j1 - i);


            //无符号整数
            if (numFlag == 1 && letterFlag == 0 && !f && !F) {
                if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
                else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //标识符
            else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
                if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
                else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
                else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //保留字
            else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
                cout << "(保留字," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //运算符及注释
            else if (operatorFlag == 1) {
                word = input.substr(i, j2 - i); // 提取字符串
                if (word == s1) f = 1; // '//'注释,标记'//' 
                else if (word == s2) F = 1; // '/*'注释,标记'/*'
                else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
                else if (!f && !F) {
                    cout << "(运算符," << word << ")" << endl;
                    wordTable.push_back(word);
                }
            }
            //界符 
            else if (boundaryFlag == 1 && !f && !F) {
                word = input[i];
                cout << "(界符," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //识别非法字符
            else if (illegalFlag == 1 && !f && !F) {
                cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }

            if (j1 != i)
                i = j1 - 1;
            else if (j2 != i)
                i = j2 - 1;
        }
    }
}

int ReadFile(string path) {
    infile.open(path.c_str());
    if (!infile.is_open()) {
        cout << "输入文件打开失败!" << endl;
        return 0;
    }
    return 1;
}

int WriteFile(string path) {
    outfile.open(path.c_str());
    if (!outfile.is_open()) {
        cout << "输出文件打开失败!" << endl;
        return 0;
    }
    else {
        for (int i = 0; i < wordTable.size(); i++) {
            outfile << wordTable[i] << endl;
        }
        return 1;
    }
}

int main() {
    // 要分析的 源程序串 的地址
    string source_path = "E:\\...\\source.txt";
    // 要写入结果的 文件 的地址
    string result_path = "E:\\...\\result.txt";
    if (ReadFile(source_path)) {
        LexicalAnalysis();
        if (WriteFile(result_path)) {
            cout << "\n单词表已保存到文件中" << endl;
        }
    }
    return 0;
}

 The specified files are as follows:

  The result of the operation is as follows:

 Read the source program file:

 Write the result file:

 

Guess you like

Origin blog.csdn.net/sun80760/article/details/131078558