编译原理:语法分析

目的:熟练掌握自上而下的语法分析方法,并能用程序实现。



要求:
1. 使用的文法如下:
    E->TE'
    E'->+TE'|#
    T->FT'
    T'->*FT'|#
    F->(E)|id
2. 对于任意给定的输入串(词法记号流)进行语法分析,递归下降方法和非递归预测分析方法可以任选其一来实现。
3. 要有一定的错误处理功能。即对错误能提示,并且能在一定程度上忽略尽量少的记号来进行接下来的分析。可以参考书上介绍的同步记号集合来处理。
可能的出错情况:idid*id,  id**id,  (id+id, +id*+id ……
4. 输入串以#结尾,输出推导过程中使用到的产生式。例如:
   输入:id+id*id#
   输出:E->TE'
        T->FT'
        F->id
        E'->+TE'
        T->FT'
        .....
如果输入串有错误,则在输出中要体现是跳过输入串的某些记号了,还是弹栈,弹出某个非终结符或者是终结符了,同时给出相应的出错提示信息。比如:
idid*id对应的出错信息是:“输入串跳过记号id,用户多输入了一个id”;
id**id对应的出错信息是:“弹栈,弹出非终结符F,用户少输入了一个id”
(id+id对应的出错信息是:“弹栈,弹出终结符 ) ,用户少输入了一个右括号(或者说,括号不匹配)”

套路:还是先把代码给贴出来,回头再找时间写篇文章总结一下,分析一下数据结构和算法。代码写的乱七八糟,不过基本功能实现了。

#pragma once
#include <iostream>
#include <algorithm>
#include <fstream>
#include <map>
#include <set>
#include <string>
#include <stack>

using namespace std;

/*----------------------------------------全局变量存放终结符和非终结符---------------------------------------------------*/
set<string> non_terminal;                                                       //存放非终结符
set<string> productions;                                                        //存放产生式
std::map<string, string> match_map;                                             //存放非终结符和其对应的产生式的文法的键值对
std::map<string, set<string>> first;                                            //string:非终结符;set<string>:非终结符所对应的first集合
std::map<string, set<string>> follow;                                           //string:非终结符;set<string>:非终结符所对应的follow集合
bool is_not_changing = false;

/**
typedef struct _nonTerm_input                                                   //非终结符_输入符号
{
    string nonTerm;                                                             //非终结符
    string input;                                                               //输入符号
    friend bool operator < (struct _nonTerm_input const &a, struct _nonTerm_input const &b) {
        if (a.nonTerm == b.nonTerm && a.input == b.input) {
            return false;
        }
        return true;
    }
}nonTerm_input;
set<nonTerm_input> structs;                                                     //非终结符.size()*终结符.size()个struct
*/

std::map<map<string, string>, string> analyze_table;                            //分析表

///std::map<nonTerm_input, string> analyze_table;                                   //分析表

stack<string> analyze_stack;                                                    //分析栈

/*------------------------------------------------------------------------------------------------------------------*/

void divide_words(string grammar, map<string, string>& match_map) {
    for (int i = 0; i < (int)grammar.length(); ++i) {
        if (grammar[i] == '-' && grammar[i + 1] == '>') {
            /* code */
            string left = grammar.substr(0, i);                                 //一句文法的左边即非终结符
            string right = grammar.substr(i + 2, grammar.length() - 1);         //一句文法的右边即非终结符对应的产生式
            non_terminal.insert(left);                                          //插入非终结符集合里
            productions.insert(right);                                          //插入产生式集合里
            match_map.insert(make_pair(left, right));                           //将一句文法里的非终结符和其对应的产生式作为键值对插入到匹配map里
            break;
        }
    }
}
/*将被'|'隔开的产生式拆分成对应多个的单词*/
void divide_right(string grammar_right, set<string>& small_right) {
    /*或许可以用grammar.find_first_of一个一个找|,然后用substr分开子串,最后再insert到small_right中去*/
    size_t found = grammar_right.find('|');
    if (found != string::npos) {
        int i = 0;
        string temp = "\0";
        while ((size_t)i < grammar_right.length()) {
            if (grammar_right[i] != '|') {
                temp += grammar_right[i];
                i = i + 1;
            }
            else {
                i = i + 1;
                small_right.insert(temp);
                temp = "\0";
            }
            if (i == grammar_right.length()) {
                small_right.insert(temp);
                temp = "\0";
            }
        }
    }
    else {
        small_right.insert(grammar_right);
    }
}
/*对每个非终结符non_term寻找它的非终结符集合first*/
void find_first(string non_term, set<string>& first) {
    set<string> or_words;                                               //存放产生式中被'|'隔开的单词
    auto search = match_map.find(non_term);
    if (search != match_map.end()) {
        divide_right(search->second, or_words);
        //匹配非终结符是否在or_words的开头
        for (set<string>::iterator i = or_words.begin(); i != or_words.end(); i++) {
            for (set<string>::iterator j = non_terminal.begin(); j != non_terminal.end(); j++) {
                if ((*i).find(*j) == 0) {                               //在or_words[i]的开头找到了一个非终结符
                                                                        //递归寻找非终结符j的first集合
                    find_first((*j), first);
                }
                else {                                                  //在or_words[i]的开头如果没有找到非终结符,即终结符
                    if ((*i)[0] >= 'a' && (*i)[0] <= 'z') {
                        first.insert(*i);
                    }
                    switch ((*i)[0]) {
                    case '(':
                        first.insert(string("("));
                        break;
                    case ')':
                        first.insert(string(")"));
                        break;
                    case '+':
                        first.insert(string("+"));
                        break;
                    case '*':
                        first.insert(string("*"));
                        break;
                    case '#':
                        first.insert(string("#"));
                        break;
                    default:                                            //如果没有匹配到符号的话就把这个单词插入到first集合中
                                                                        //first.insert(*i);
                        break;
                    }
                    continue;                                           //找到之后跳出循环,避免进行多余的遍历浪费时间
                }
            }
        }
    }
}

//对非终结符的follow集进行初始化,开始符号的follow集初始化成{$},其余的初始化成空集
void initial_follow() {
    for (set<string>::iterator i = non_terminal.begin(); i != non_terminal.end(); i++) {
        if (i == non_terminal.begin()) {
            set<string> startFollow;
            startFollow.insert("$");
            auto pair = make_pair(*i, startFollow);
            follow.insert(pair);
        }
        set<string> temp_follow;
        auto pair = make_pair(*i, temp_follow);
        follow.insert(pair);
    }
}
//判断一个非终结符的first集合中是不是含有空串#
bool first_contains_null(set<string> &first) {
    auto find = first.find("#");
    if (find != first.end()) {
        return true;
    }
    return false;
}
//判断一个字符串str是否是非终结符,如果是返回true,否则返回false
bool is_non_terminal(string str) {
    auto find = non_terminal.find(str);
    if (find != non_terminal.end()) {
        return true;
    }
    return false;
}

bool is_letter(char a) {                                                        //是否是小写字母
    if (a >= 'a' && a <= 'z') {
        return true;
    }
    return false;
}

bool is_cap_letter(char a) {
    return (a >= 'A' && a <= 'Z') ? true : false;
}

//返回一个产生式的右部str的最后一个终结符或者非终结符
string find_last(string &str) {

    if ("\0" == str) {
        return "\0";
    }
    if ('\'' == str.at(str.size() - 1)) {
        string s = str.substr(str.size() - 2, 2);
        str = str.substr(0, str.size() - 2);
        return s;
    }
    else if (is_letter( str.at(str.size() - 1) ) && is_letter( str.at( str.size() - 2) ) ) {
        string s = str.substr(str.size() - 2, 2);
        str = str.substr(0, str.size() - 2);
        return s;
    }
    else {
        string s = str.substr(str.size() - 1, 1);
        str = str.substr(0, str.size() - 1);
        return s;
    }
}



int cal_follow_total_size() {                                                   //计算所有follow集合的总size
    int total_size = 0;
    for (map<string, set<string>>::iterator i = follow.begin(); i != follow.end(); i++) {
        total_size += i->second.size();
    }
    return total_size;
}

void find_follow(std::map<string, set<string>>& Follow) {
    while (!is_not_changing) {
        int fomer_size = cal_follow_total_size();
        for (std::map<string, string>::iterator i = match_map.begin(); i != match_map.end(); i++) {//对每一个产生式进行遍历
            set<string> or_words;
            string left = (*i).first;                                       //左边的非终结符A
            string right = (*i).second;                                     //右边的产生式A->b1b2b3B...
            divide_right(right, or_words);
            for (set<string>::iterator j = or_words.begin(); j != or_words.end(); j++) {
                set<string> temp = Follow.find(left)->second;
                string str;
                string word = *j;
                for (; word != "\0"; ) {
                    str = find_last(word);
                    if (!is_non_terminal(str)) {                        //是终结符
                        temp.clear();
                        temp.insert(str);
                    }
                    else {
                        for (set<string>::iterator k = temp.begin(); k != temp.end(); k++) {
                            if ("#" != (*k)) {
                                (Follow.find(str)->second).insert(*k);
                            }
                        }
                        if (!first_contains_null(first.find(str)->second)) {
                            temp = first.find(str)->second;
                        }
                        else {
                            for (set<string>::iterator m = first.find(str)->second.begin(); m != first.find(str)->second.end(); m++) {
                                temp.insert(*m);
                            }
                        }
                    }
                }
            }
        }
        //判断是否发生变化
        int latter_size = cal_follow_total_size();
        is_not_changing = fomer_size == latter_size ? true : false;
    }
}



/*
初始化预测分析表的表头,并用synch来指示从非终结符的FOLLOW集合中得到的同步记号,并将其填入表中
*/
void init_table(string input) {                                 //input:输入串
    set<string> input_symbol;                                   //输入符号集合
    for (int i = 0; i < (int)input.size(); i++) {               //默认输入符号中id这种终结符最大长度为2
        if (is_letter(input[i])) {                              //形如id的终结符
            if (is_letter(input[i + 1])) {
                input_symbol.insert(input.substr(i, 2));
                ++i;
            }
            else {
                string m;
                m = input[i];
                input_symbol.insert(m);
            }
        }
        else {
            switch (input[i]) {
            case '+':
                input_symbol.insert("+");
                break;
            case '*':
                input_symbol.insert("*");
                break;
            case '(':
                input_symbol.insert("(");
                break;
            case ')':
                input_symbol.insert(")");
                break;
            case '$':                                          //结束符
                input_symbol.insert("$");
                break;
            default:
                break;
            }
        }
    }
    for (set<string>::iterator i = non_terminal.begin(); i != non_terminal.end(); i++) {
        for (set<string>::iterator j = input_symbol.begin(); j != input_symbol.end(); j++) {
            map<string, string> _temp_map;
            auto pair = make_pair(*i, *j);
            _temp_map.insert(pair);

            if ((follow.find(*i))->second.find(*j) != (follow.find(*i))->second.end()) {//用ssynch来指示从非终结符的FOLLOW集合中得到的同步记号
                analyze_table.insert(make_pair(_temp_map, "synch"));
            }
            else {
                analyze_table.insert(make_pair(_temp_map, ""));                         //初步完成构建预测分析表的表头,analyze的第二个元素才是分析表中的元素
            }
        }
    }
}

set<string> find_first_s(string left, string production) {
    set<string> FIRST_S;

    for (size_t i = 0; i < production.size(); i++) {
        if (is_cap_letter(production.at(i))) {
            string M;
            if ('\'' == production.at(i + 1)) {
                M = production.substr(i, 2);
            }
            else {
                ///M = "" + production.at(i);
                M = production.at(i);
            }
            set<string> M_FIRST;
            find_first(M, M_FIRST);
            for (set<string>::iterator j = M_FIRST.begin(); j != M_FIRST.end(); j++) {
                FIRST_S.insert(*j);
            }
            if (!first_contains_null(M_FIRST)) {
                return FIRST_S;
            }
        }
        else {
            if (is_letter(production.at(i))) {
                if (is_letter(production.at(i + 1))) {
                    FIRST_S.insert(production.substr(i, 2));
                }
                else {
                    FIRST_S.insert(production.substr(i, 1));
                }
            }
            else {
                string _temp_;
                _temp_ = production.at(i);
                FIRST_S.insert(_temp_);
            }
            return FIRST_S;
        }
    }
    FIRST_S = follow.find(left)->second;
    return FIRST_S;
}

void bulid_table() {                                                    //返回<非终结符,输入符号>位置所对应的产生式
    for (set<string>::iterator nT = non_terminal.begin(); nT != non_terminal.end(); nT++) {
        auto search = match_map.find(*nT);
        if (search != match_map.end()) {
            set<string> or_words;
            divide_right(search->second, or_words);
            for (set<string>::iterator _or_words_iterator = or_words.begin(); _or_words_iterator != or_words.end(); _or_words_iterator++) {
                set<string> FIRST_S = find_first_s(*nT, *_or_words_iterator);
                for (set<string>::iterator first_s = FIRST_S.begin(); first_s != FIRST_S.end(); first_s++) {
                    //对FIRST(Alpha)的每个终结符a,把A->a加入analyze_table[A,a]
                    if ("#" == (*first_s)) {                            //如果空串在FIRST(A->Alpha)中,则对FOLLOW(A)的每个终结符b(包括$),把A->Alpha加入analyze_table[A,b](包括analyze_table[M,$])
                        auto _find_follow = follow.find(*nT);
                        if (follow.end() != _find_follow) {
                            set<string> follow_A = _find_follow->second;
                            for (set<string>::iterator i = follow_A.begin(); i != follow_A.end(); i++) {
                                map<string, string> _temp;
                                auto _temp_pair = make_pair(*nT, *i);
                                _temp.insert(_temp_pair);
                                auto find_position = analyze_table.find(_temp);
                                if (analyze_table.end() != find_position) {
                                    string element = *nT + "->#";
                                    find_position->second = element;
                                }
                            }
                        }
                    }
                    else {
                        map<string, string> _temp_map;
                        _temp_map.insert(make_pair(*nT, *first_s));
                        auto find = analyze_table.find(_temp_map);
                        if (find != analyze_table.end()) {
                            string element = *nT + "->" + *_or_words_iterator;
                            find->second = element;
                        }
                    }
                }
            }
        }
        else {
            continue;
        }
    }
}
/*输出analyze_table*/
void diaplay_table() {
    cout << "非终结符\t输入符号\t同步记号" << endl;
    for (map<map<string, string>, string>::iterator i = analyze_table.begin(); i != analyze_table.end(); i++) {
        cout << i->first.begin()->first << "\t\t" << i->first.begin()->second;
        cout << "\t\t" << (*i).second << "\n";
    }
}

bool no_empty_in_table(string left, string right) {
    map<string, string> _temp_map;
    _temp_map.insert(make_pair(left, right));
    auto find_element = analyze_table.find(_temp_map);
    if (analyze_table.end() != find_element) {
        if (find_element->second != "") {
            return true;
        }
        else {
            return false;
        }
    }
    return false;
}

bool is_synch(string left, string right) {
    map<string, string> _temp_map;
    _temp_map.insert(make_pair(left, right));
    auto find_element = analyze_table.find(_temp_map);
    if (analyze_table.end() != find_element) {
        if (find_element->second == "synch") {
            return true;
        }
        else {
            return false;
        }
    }
    return false;
}


void error_empty(string redundant) {
    cout << "出错!跳过!" << " 多输入了一个" << redundant << endl;
}

void error_synch(string synch) {
    cout << "出错!弹栈!" << " 多输入了一个" << synch << endl;
}

void error_bracket() {
    cout << "出错!括号不匹配!" << endl;
}

int main() {
    /*读取文法文件*/
    const char* filename = "wenfa.txt";
    ifstream inFile(filename);
    if (!inFile) {
        cout << "\nFiled to open file " << filename;
        return -1;
    }
    string st = "\0";
    char buf[100];
    while (!inFile.eof()) {
        inFile.getline(buf, 20);
        st = buf;
        if (strlen(buf) == 0 || st == "end") {
            break;
        }
        divide_words(st, match_map);                                        //对每一行文法进行分析找出非终结符和对应的产生式
    }
    inFile.close();
    /*遍历非终结符集合,为每个非终结符寻找first集合*/
    for (set<string>::iterator i = non_terminal.begin(); i != non_terminal.end(); ++i) {
        set<string> the_first;                                              //当前非终结符的first集合
        find_first(*i, the_first);
        first.insert(make_pair(*i, the_first));
    }
    cout << "非终结符" << "\t" << "First集合" << endl;
    for (map<string, set<string>>::iterator i = first.begin(); i != first.end(); i++) {
        cout << "-------------------------" << endl;
        cout << i->first << "\t|\t";
        cout << "{ ";
        //倒序输出first集合中的元素与文法中出现的顺序保持一致
        for (set<string>::reverse_iterator j = (i->second).rbegin(); j != (i->second).rend(); j++) {
            cout << *j << ", ";
        }
        cout << "\b\b }";
        cout << endl;
    }
    cout << endl;
    initial_follow();
    find_follow(follow);
    cout << "非终结符" << "\t" << "Follow集合" << endl;
    for (map<string, set<string>>::iterator i = follow.begin(); i != follow.end(); i++) {
        cout << "------------------------------" << endl;
        cout << i->first << "\t|\t";
        cout << "{ ";
        //倒序输出first集合中的元素与文法中出现的顺序保持一致
        for (set<string>::reverse_iterator j = (i->second).rbegin(); j != (i->second).rend(); j++) {
            cout << *j << ", ";
        }
        cout << "\b\b }";
        cout << endl;
    }
    cout << "PLEASE ENTER YOUR WORDS" << endl;
    string w;                                                       //输入串
    while (true) {
        char temp;
        cin >> temp;
        if ('#' == temp) {
            break;
        }
        w = w + temp;
    }
    w = w + '$';													//在最后再加上$符
    cout << "WHAT YOUR INPUT STREAM IS: " << endl;
    cout << w << endl;
    init_table("id+id*id*(id*id)$");                               //用文法中的一些符号进行初始化
    bulid_table();
    diaplay_table();
    analyze_stack.push("$");										//压入$符
    analyze_stack.push(*non_terminal.begin());                      //压入文法的开始符号
    int token = 0;

    while (analyze_stack.size() > 0) {
        string left;
        string right;

        left = analyze_stack.top();
        if (is_letter(w.at(token))) {
            if (is_letter(w.at(token + 1))) {
                //形如id的
                right = w.substr(token, 2);
                token++;
            }
            else {
                right = w.at(token);
            }
        }
        else {
            right = w.at(token);
        }

        if (left == "(" || left == ")") {
            error_bracket();
            break;
        }

        map<string, string> found_pair;
        found_pair.insert(make_pair(left, right));
        bool _is_empty = no_empty_in_table(left, right);            //true为不空,false为空
        if (analyze_table.find(found_pair) != analyze_table.end() && !_is_empty) {                                          //如果为空的话
            token++;
            error_empty(right);
        }
        else if (is_synch(left, right)) {
            analyze_stack.pop();
            error_synch(right);
        }
        else {
            bool flag = false;
            //递归直到left和right相同
            while (left != right) {
                found_pair.insert(make_pair(left, right));
                string element = analyze_table.find(found_pair)->second;
                cout << element << endl;
                string ele_right = "\0";
                for (int i = 0; i < (int)element.length(); ++i) {
                    if (element[i] == '-' && element[i + 1] == '>') {
                        ele_right = element.substr(i + 2, element.length() - 1);    
                        break;
                    }
                }
                if (ele_right == "#") {
                    analyze_stack.pop();
                    flag = true;
                    break;
                }
                else {
                    analyze_stack.pop();
                    while (ele_right != "\0") {
                        analyze_stack.push(find_last(ele_right));
                    }
                    left = analyze_stack.top();
                }
                found_pair.clear();
            }
            if (!flag) {
                analyze_stack.pop();
                token++;
            }
            found_pair.clear();
        }

    }

    return 0;
}

猜你喜欢

转载自blog.csdn.net/lrwwll/article/details/72853524