1、任务描述

使用C/C++语言编写PL/0编译程序的词法分析程序。需要注意的点：

（1）识别非法字符：如 @ 、 & 和 ! 等；

（2）识别非法单词：数字开头的数字字母组合；

（3）标识符和无符号整数的长度不超过8位；

（4）能自动识别并忽略/* */及//格式的注释信息；

（5）词法分析过程中遇到错误后能继续往下识别，并输出错误信息。

2、编程要求

完成上述编程任务，将C/C++语言源程序复制粘贴到右侧代码编辑器，点击“评测”按钮，运行程序，系统会自动进行结果对比。

3、测试说明

平台会对你编写的代码进行测试：

测试输入：

const a = 10;
var   b, c;
procedure fun1;
    if a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b # 0 do
        begin
            call fun1;
            write(2 * c);
            read(b);
        end
end.

预期输出：

(保留字,const)
(标识符,a)
(运算符,=)
(无符号整数,10)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符,fun1)
(界符,;)
(保留字,if)
(标识符,a)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(运算符,#)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符,fun1)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

测试输入：

const 2a = 123456789;
var   b, c;
//单行注释
/*
* 多行注释
*/
procedure function1;
    if 2a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b @ 0 do
        begin
            call function1;
            write(2 * c);
            read(b);
        end
end.

预期输出：

(保留字,const)
(非法字符(串),2a,行号:1)
(运算符,=)
(无符号整数越界,123456789,行号:1)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符长度超长,function1,行号:10)
(界符,;)
(保留字,if)
(非法字符(串),2a,行号:11)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(非法字符(串),@,行号:17)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符长度超长,function1,行号:19)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

4、代码

#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;

vector<string> wordTable;	// 单词表
map<string, int> B;         // 保留字
map<string, int> Y;         // 运算符
map<char, int> J;           // 界符
map<char, int> y;
string s1, s2, s3;
int F; // 标记'/*''*/'注释

void Init() {
    B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字，13个 
    Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符，11个 
    y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
    J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符，5个
    s1 = "//", s2 = "/*", s3 = "*/";
}

void LexicalAnalysis() {
    string input, word;
    Init();
    for (int line = 1; getline(cin, input); line++) {
        int f = 0; // 标记'//'注释
        for (int i = 0; input[i] != '\0'; i++) {
            // 去掉空格与'\t'
            while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
                i++;

            int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
            int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)

            // 标记非法字符
            if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
                illegalFlag = 1;

            // 识别数字与字母串
            while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
                if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字 
                else letterFlag = 1; // 标记字母 
                j1++;
            }

            // 识别运算符，并标记运算符 
            while (input[j2] != '\0' && y[input[j2]] == 1) {
                operatorFlag = 1;
                j2++;
            }

            // 识别界符，并标记界符
            if (J[input[i]] == 1)
                boundaryFlag = 1;

            //提取字符串
            if (numFlag || letterFlag)
                word = input.substr(i, j1 - i);


            //无符号整数
            if (numFlag == 1 && letterFlag == 0 && !f && !F) {
                if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
                else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //标识符
            else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
                if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
                else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
                else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //保留字
            else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
                cout << "(保留字," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //运算符及注释
            else if (operatorFlag == 1) {
                word = input.substr(i, j2 - i); // 提取字符串
                if (word == s1) f = 1; // '//'注释,标记'//' 
                else if (word == s2) F = 1; // '/*'注释,标记'/*'
                else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
                else if (!f && !F) {
                    cout << "(运算符," << word << ")" << endl;
                    wordTable.push_back(word);
                }
            }
            //界符 
            else if (boundaryFlag == 1 && !f && !F) {
                word = input[i];
                cout << "(界符," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //识别非法字符
            else if (illegalFlag == 1 && !f && !F) {
                cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }

            if (j1 != i)
                i = j1 - 1;
            else if (j2 != i)
                i = j2 - 1;
        }
    }
}


int main() {
    LexicalAnalysis();
    return 0;
}

5、扩展

添加文件输入的功能，程序能够读取指定目录下的源程序字符串txt文件，进行语法分析并将结果输出到指定的文件下。代码如下：

#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;

ifstream infile;    // 文件输入流
ofstream outfile;   // 文件输出流

vector<string> wordTable;	// 单词表
map<string, int> B;         // 保留字
map<string, int> Y;         // 运算符
map<char, int> J;           // 界符
map<char, int> y;           // 
string s1 , s2, s3 ;
int F; // 标记'/*''*/'注释

void Init() {
    B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字，13个 
    Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符，11个 
    y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
    J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符，5个
    s1 = "//", s2 = "/*", s3 = "*/";
}

void LexicalAnalysis() {
    string input, word;
    Init();
    for (int line = 1; getline(infile, input); line++) {
        int f = 0; // 标记'//'注释
        for (int i = 0; input[i] != '\0'; i++) {
            // 去掉空格与'\t'
            while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
                i++;

            int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
            int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)

            // 标记非法字符
            if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
                illegalFlag = 1;

            // 识别数字与字母串
            while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
                if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字 
                else letterFlag = 1; // 标记字母 
                j1++;
            }

            // 识别运算符，并标记运算符 
            while (input[j2] != '\0' && y[input[j2]] == 1) {
                operatorFlag = 1;
                j2++;
            }

            // 识别界符，并标记界符
            if (J[input[i]] == 1)
                boundaryFlag = 1;

            //提取字符串
            if (numFlag || letterFlag)
                word = input.substr(i, j1 - i);


            //无符号整数
            if (numFlag == 1 && letterFlag == 0 && !f && !F) {
                if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
                else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //标识符
            else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
                if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
                else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
                else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //保留字
            else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
                cout << "(保留字," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //运算符及注释
            else if (operatorFlag == 1) {
                word = input.substr(i, j2 - i); // 提取字符串
                if (word == s1) f = 1; // '//'注释,标记'//' 
                else if (word == s2) F = 1; // '/*'注释,标记'/*'
                else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
                else if (!f && !F) {
                    cout << "(运算符," << word << ")" << endl;
                    wordTable.push_back(word);
                }
            }
            //界符 
            else if (boundaryFlag == 1 && !f && !F) {
                word = input[i];
                cout << "(界符," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //识别非法字符
            else if (illegalFlag == 1 && !f && !F) {
                cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }

            if (j1 != i)
                i = j1 - 1;
            else if (j2 != i)
                i = j2 - 1;
        }
    }
}

int ReadFile(string path) {
    infile.open(path.c_str());
    if (!infile.is_open()) {
        cout << "输入文件打开失败！" << endl;
        return 0;
    }
    return 1;
}

int WriteFile(string path) {
    outfile.open(path.c_str());
    if (!outfile.is_open()) {
        cout << "输出文件打开失败！" << endl;
        return 0;
    }
    else {
        for (int i = 0; i < wordTable.size(); i++) {
            outfile << wordTable[i] << endl;
        }
        return 1;
    }
}

int main() {
    // 要分析的 源程序串 的地址
    string source_path = "E:\\...\\source.txt";
    // 要写入结果的 文件 的地址
    string result_path = "E:\\...\\result.txt";
    if (ReadFile(source_path)) {
        LexicalAnalysis();
        if (WriteFile(result_path)) {
            cout << "\n单词表已保存到文件中" << endl;
        }
    }
    return 0;
}

指定的文件如下：

运行结果如下：

读取源程序文件：

写入结果文件：

编译原理 | 课程设计 — PL/0编译程序词法分析

1、任务描述

2、编程要求

3、测试说明

4、代码

5、扩展

猜你喜欢