编译原理词法分析实验代码(C/C++实现)

临时写了一个词法分析的代码,大概可以分析所有C语言程序吧,反正可以分析它自己,有较强的可维护性,时间空间效率都不低,采用一个类似字典树的结构(可能类似自动机),可处理任意长度的分界符识别,单双引号中的反斜杠转义字符也可处理。报错位置合理,信息清晰。muuuuuuuuuuuuuua!

#include<bits/stdc++.h>
using namespace std;
const int KEY_WORD_NUM = 22 ;
const int DIVIDE_WORD_NUM = 49 ;
const int CHARACTER_SET_NUM = 256 ;
const string NUM1 = "NUM1" ;
const string NUM2 = "NUM2" ;
const string CONSTANT_STRING = "CONSTANT_STRING" ;
const string ID = "ID" ;
const string ANNOTATIONL1 = "//" ;
const string ANNOTATIONL2 = "/*" ;
const string key_word [ KEY_WORD_NUM ] = { "if", "else", "for", "while", "do", "int", "double", "char", "read", "write", "const", "vector", "struct", "map", "void", "bool",
        "print", "scanf", "return", "inline", "true", "false" } ;
const string divide_word [ DIVIDE_WORD_NUM ]= { "!", "~", "(", ")", "{", "}", "[", "]", "+", "-", "*", "/", "%", "=", "^", "&", "|", "&&", "||", "<<", ">>", "++", "--",
        "<", ">", "<=", ">=", "!=", "==", "+=", "-=", "/=", "*=", "%=", "^=", "&=", "|=", ">>=", "<<=", ".", ",", ":", ";", "'", "\"", "\\", "#", "//", "/*" } ;

struct Node{
    string str;
    bool is_terminator;
    map < char, int > next;
    Node(){
        str.clear();
        is_terminator = false;
        next.clear();
    }
    Node( const string &str ){
        this -> next.clear();
        this -> str = str;
        this -> is_terminator = false;
    }
};
struct Word{
    string type;
    string val;
    Word(){
        type.clear();
        val.clear();
    }
    Word( const string &str , const string &val ){
        this -> type = str;
        this -> val = val;
    }
    void print(){
        printf( " %s  %s\n", this -> type.c_str(), this -> val.c_str() );
    }
};

int now_line = 1;
char ch = 0;
map < string, int > key_word_id;
map < char, int > delimiter_head;
vector < Node > state_graph;
vector < Word > ans ;
string error_information ;
string fin_name , fout_name ;
void read(char &c){
    int ret = scanf("%c",&c);
    if( c == '\n' ) now_line++ ;
    if( ret == EOF ) c = EOF;
}
inline bool Is_Character(char &c){
    return c>='A'&&c<='Z' || c>='a'&&c<='z' || c=='_';
}
inline bool Is_Number(char &c){
    return c>='0'&&c<='9' ;
}
inline bool Is_delimiter_head(char &c){
    return delimiter_head.count(c) > 0 ;
}
void InitTestScan(){
///读取标识符
    key_word_id.clear();
    for( int i=0; i<KEY_WORD_NUM; i++ ) {
        key_word_id[ key_word[i] ] = i;
    }
///读取分界符头字母
    delimiter_head.clear();
    for( int i=0; i<DIVIDE_WORD_NUM; i++ ) {
        delimiter_head[ divide_word[i][0] ] = i;
    }

    state_graph.clear();
    state_graph.push_back( Node() );
    for( int i=0; i<DIVIDE_WORD_NUM; i++ ) {
        const string &temp = divide_word[i];
        int node_index = 0;
        for( int j=0; j<(int)temp.length(); j++ ) {
            Node &now_node = state_graph[node_index];
            if( now_node.next.count( temp[j] ) == 0 ) {
                now_node.next[ temp[j] ] = state_graph.size();
                state_graph.push_back( Node( now_node.str +temp[j] ) );
                node_index = state_graph.size() -1 ;
            }
            else{
                node_index = now_node.next[ temp[j] ];
            }
        }
        state_graph[node_index].is_terminator = true ;
    }
    
    ans.clear();
    now_line = 1;
}
void PrintError( int op ){
    if( op == 1 ) {
        printf( "ERROR: fail to open the source program !\n" );
    }
    if( op == 2 ) { //理论不可能发生
        printf( "ERROR: fail to open the output file !\n" );
    }
    if( op == 3 ) {
        printf( "ERROR: the number %s is not comply with the rules !\n" , error_information.c_str() );
    }
    if( op == 4 ) {
        printf( "ERROR: %s is not a divide word !\n" , error_information.c_str() );
    }
    if( op == 5 ) {
        printf( "ERROR: the character %s in line %d was not clear !\n" , error_information.c_str(), now_line );
    }
    if( op == 6 ) {
        printf( "ERROR: missing terminating \" character in line %d !\n", now_line );
    }
    if( op == 7 ) {
        printf( "ERROR: missing terminating \' character in line %d !\n", now_line );
    }
}
int InputTestScan(){
    FILE *temp ;
    printf( "input the name of the source program...\n" );
    cin >> fin_name ;
    temp = fopen( fin_name.c_str(), "r" );
    if( temp == NULL ) return 1;
    else fclose( temp );
    printf( "input the name of the output file...\n" );
    cin >> fout_name ;
    freopen( fin_name.c_str(), "r", stdin ) ;
    freopen( fout_name.c_str(), "w", stdout ) ;
    return 0;
}
int TestScan(){
    int ret = InputTestScan();
    if( ret != 0 ) return ret;
    InitTestScan();
    read( ch );
    while( ch != EOF ){
        while( ch==' ' || ch=='\t' || ch=='\n' ){
            read( ch );
            if( ch == EOF ) return 0;
        }
        if( Is_Character(ch) == true ) { //处理标识符
            string str;
            while( Is_Character(ch) == true || Is_Number(ch) == true ) {
                str += ch;
                read( ch );
            }
            if( key_word_id.count(str) > 0 ) {
                ans.push_back( Word(str, str) );
            }
            else {
                ans.push_back( Word(ID, str) );
            }
        }
        else if( Is_Number(ch) == true ) { //处理数字常量
            string str;
            bool is_decimal = false;
            while( Is_Number(ch) == true ) {
                str +=ch;
                read( ch );
            }
            if( ch == '.' ) {
                is_decimal = true;
                str +=ch;
                read( ch );
                while( Is_Number(ch) == true ) {
                    str +=ch;
                    read( ch );
                }
            }
            if( is_decimal == true ) {
                ans.push_back( Word( NUM2, str ) );
                if( str.back() == '.' ) {
                    error_information = str ;
                    return 3;
                }
            }
            else{
                ans.push_back( Word( NUM1, str ) );
            }
        }
        else if ( ch == '\'' ) { //处理单引号
            string str;
            str +=ch;
            read( ch );
            while( ch!='\'' && ch!='\n' ) {
                if( ch == '\\' ) {
                    str +=ch;
                    read( ch );
                }
                str +=ch;
                read( ch );
            }
            if( ch != '\'' ) {
                return 7;
            }
            str +=ch;
            ans.push_back( Word(CONSTANT_STRING, str) );
            read( ch );
        }
        else if ( ch == '"' ) { //处理双引号
            string str;
            str +=ch;
            read( ch );
            while( ch!='"' && ch!='\n' ) {
                if( ch == '\\' ) {
                    str +=ch;
                    read( ch );
                }
                str +=ch;
                read( ch );
            }
            if( ch != '"' ) {
                return 6;
            }
            str +=ch;
            ans.push_back( Word(CONSTANT_STRING, str) );
            read( ch );
        }
        else if ( Is_delimiter_head(ch) == true ) { //处理分界符
            int node_index = 0;
            while( state_graph[node_index].next.count(ch) > 0 ) {
                node_index = state_graph[node_index].next[ch] ;
                read( ch );
            }
            Node &now_node = state_graph[node_index];
            if( now_node.str == ANNOTATIONL1 ) { //处理注释1
                while( ch != '\n' ) read( ch );
                read( ch );
            }
            else if( now_node.str == ANNOTATIONL2 ) { //处理注释2
                char pre_ch = ch;
                read( ch );
                while( !(pre_ch == '*' && ch == '/') ) {
                    pre_ch = ch;
                    read( ch );
                }
                read( ch );
            }
            else if( state_graph[node_index].is_terminator == false ) { //标识符不合法
                error_information = state_graph[node_index].str ;
                return 4;
            }
            else { //合法标识符
                ans.push_back( Word(now_node.str, now_node.str) );
            }
        }
        else{ //不合法字符
            error_information = ch;
            return 5;
        }
    }
}
void output(){
    printf( "Lexical analysis completed !\n" );
    for( int i=0; i<ans.size(); i++ ) {
        ans[i].print();
    }
    printf( "----------------------------------------------------------------\n" );
    printf(" Statistical information: \n" );
    int num_int = 0, num_double = 0, num_string = 0, num_variable = 0, num_keyword = 0, num_divide_word = 0 ;
    for( int i=0; i<ans.size() ;i++ ) {
        if( ans[i].type == NUM1) {
            num_int++ ;
        }
        else if( ans[i].type == NUM2 ) {
            num_double++ ;
        }
        else if( ans[i].type == CONSTANT_STRING ) {
            num_string++ ;
        }
        else if( ans[i].type == ID ) {
            num_variable++ ;
        }
        else if( key_word_id.count( ans[i].type ) > 0 ) {
            num_keyword++ ;
        }
        else {
            num_divide_word++ ;
        }
    }
    printf(" num_int = %d\n num_double = %d\n num_string = %d\n num_variable = %d\n num_keyword = %d\n num_divide_word = %d\n ",
           num_int, num_double, num_string, num_variable, num_keyword, num_divide_word ) ;
}
int main()
{
    int flag = TestScan();
    if( flag == 0 ) {
        output();
        freopen( "CON", "w", stdout );
        printf( "Lexical analysis completed, the results have been saved in the \"%s\" file !\n", fout_name.c_str() );
    }
    else {
        freopen( "CON", "w", stdout );
        PrintError( flag );
    }
    return 0;
}

发布了227 篇原创文章 · 获赞 142 · 访问量 9万+

猜你喜欢

转载自blog.csdn.net/qq_36306833/article/details/89786597