Level 1: Write a lexical analysis program for a PL/0 compiler using C/C++ language

mission details

Use C/C++ language to write a lexical analysis program for PL/0 compiler. Points to note:

(1) Identify illegal characters: such as @, & and !, etc.;

(2) Identify illegal words: combinations of numbers and letters beginning with numbers;

(3) The length of identifiers and unsigned integers does not exceed 8 bits;

(4) Can automatically recognize and ignore comment information in /* */ and // format;

(5) After encountering an error during the lexical analysis, it can continue to identify and output an error message.

programming requirements

After completing the above programming tasks, copy and paste the C/C++ language source program to the code editor on the right, click the "Evaluate" button, run the program, and the system will automatically compare the results.

Test instruction

The platform will test the code you write:

Test input:

const a = 10;
var   b, c;

procedure fun1;
    if a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b # 0 do
        begin
            call fun1;
            write(2 * c);
            read(b);
        end
end.

Expected output:

(保留字,const)
(标识符,a)
(运算符,=)
(无符号整数,10)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符,fun1)
(界符,;)
(保留字,if)
(标识符,a)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(运算符,#)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符,fun1)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

Test input:

const 2a = 123456789;
var   b, c;

//单行注释

/*
* 多行注释
*/

procedure function1;
    if 2a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b @ 0 do
        begin
            call function1;
            write(2 * c);
            read(b);
        end
end.

Expected output:

(保留字,const)
(非法字符(串),2a,行号:1)
(运算符,=)
(无符号整数越界,123456789,行号:1)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符长度超长,function1,行号:10)
(界符,;)
(保留字,if)
(非法字符(串),2a,行号:11)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(非法字符(串),@,行号:17)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符长度超长,function1,行号:19)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

problem solving ideas

double pointer + map

The idea is divided into two steps, namely screening and identification. Screening is to scan and filter strings, extract strings that meet the requirements, and prepare for recognition. This step can be realized by double pointer algorithm, and recognition is to identify the extracted strings . , to see if it is one of the five reserved words, operators, delimiters, unsigned integers, and identifiers. This step can be identified by map .

In the process of scanning and extracting character strings using the double-pointer algorithm, pay attention to classifying and identifying reserved words, operators, delimiters, unsigned integers, and identifiers to prepare for subsequent classification and identification. Because reserved words, unsigned integers, and identifiers are all composed of numbers or letters, you can directly scan and filter strings composed of numbers or letters. As for delimiters and operators, you can scan and filter them separately. When the scan extracts the string, we can classify and identify it. The content in the last comment is invalid and does not need to be recognized.

the code

//识别保留字、运算符、标识符、界符、无符号整数 (可能还存在没有考虑的情况,仅供参考)
#include<bits/stdc++.h>
using namespace std;
map<string,int> B, Y; //B(保留字)、Y(运算符)
map<char,int> J, y; //J(界符)
string s;
int F; //(标记'/*''*/'注释) 
int main()
{
    B["const"]=B["var"]=B["procedure"]=B["begin"]=B["end"]=B["odd"]=B["if"]=B["then"]=B["call"]=B["while"]=B["do"]=B["read"]=B["write"]=1; //保留字,13个 
    Y["+"]=Y["-"]=Y["*"]=Y["/"]=Y["<"]=Y["<="]=Y[">"]=Y[">="]=Y["#"]=Y["="]=Y[":="]=1; //运算符,11个 
    y['+']=y['-']=y['*']=y['/']=y['<']=y['>']=y['=']=y['#']=y[':']=1;
    J['(']=J[')']=J[',']=J[';']=J['.']=1; //界符,5个
    string s1 = "//", s2 = "/*", s3 = "*/", str;
    for(int id = 1; getline( cin, s); id++) {
        int f = 0; // (标记'//'注释) 
        for(int i=0; s[i]!='\0'; i++) { //双指针算法(扫描识别) 
            while(s[i]!='\0' && (s[i]==' ' || s[i]=='\t')) i++; //去掉空格与'\t'
            int f1 = 0, f2 = 0, f3 = 0, f4 = 0, flog = 0;
            int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)
            if(!((s[i]>='0'&&s[i]<='9') || (s[i]>='a'&&s[i]<='z') || (s[i]>='A'&&s[i]<='Z') || y[s[i]]==1 || J[s[i]]==1)) flog = 1; //标记非法字符
            while(s[j1]!='\0' && ((s[j1]>='0'&&s[j1]<='9') || (s[j1]>='a'&&s[j1]<='z') || (s[j1]>='A'&&s[j1]<='Z')) ) {//识别数字与字母串
                if( s[j1]>='0'&& s[j1]<='9' ) f1 = 1; //标记数字 
                else f2 = 1; //标记字母 
                j1 ++;
            }
            while(s[j2]!='\0' && y[ s[j2] ] == 1) f3 = 1, j2 ++; //识别运算符,并标记运算符 
            if(J[ s[i] ] == 1) f4 = 1; //识别界符,并标记界符
            if(f1 || f2) str =  s.substr( i, j1-i); //提取字符串
            if(f1 == 1 && f2 == 0 && !f && !F) { //无符号整数
                if(str.size() <= 8)  cout<<"(无符号整数,"<<str<<")"<<endl;
                else cout<<"(无符号整数越界,"<<str<<",行号:"<<id<<")"<<endl;
            }
            else if(f2 == 1 && B[str] != 1 && !f && !F) { //标识符
                if(s[i]>='0' && s[i]<='9') cout<<"(非法字符(串),"<<str<<",行号:"<<id<<")"<<endl; //识别非法字符串
                else if(str.size() <= 8) cout<<"(标识符,"<<str<<")"<<endl;
                else cout<<"(标识符长度超长,"<<str<<",行号:"<<id<<")"<<endl;
            }
            else if(f2 == 1 && B[str] == 1 && !f && !F) cout<<"(保留字,"<<str<<")"<<endl; //保留字
            else if( f3 == 1) { //运算符及注释
                str = s.substr( i, j2-i); // 提取字符串
                if(str == s1) f = 1; // '//'注释,标记'//' 
                else if(str == s2) F = 1; // '/*'注释,标记'/*'
                else if(str == s3) F = 0; // '*/'注释,解除'/*/'标记
                else if(!f && !F)
                    cout<<"(运算符,"<<str<<")"<<endl;
            }
            else if(f4 == 1 && !f && !F) //界符 
                cout<<"(界符,"<<s[i]<<")"<<endl;
            else if(flog == 1 && !f && !F) cout<<"(非法字符(串),"<<s[i]<<",行号:"<<id<<")"<<endl; //识别非法字符
            if(j1 != i) i = j1 - 1;
            else if(j2 != i) i = j2 - 1; 
        }
    }
    return 0;
 }

Guess you like

Origin blog.csdn.net/qq_51936803/article/details/129508879