编译原理 - 课程设计(简易词法分析器)

版权声明:本文为博主原创文章,欢迎转载,转载请贴上博客地址 http://blog.csdn.net/xdg_blog https://blog.csdn.net/xdg_blog/article/details/52865165
/*------------------------------------
author:XD_G
location:SWUN
time:05/2016
course:Compiler
teacher:Wei Zhou
如果认识周伟老师,请代我向他问好!
------------------------------------*/
#include <iostream>
#include <string>
#include <vector>
#include <iterator>
#include <fstream>//files
#include <iomanip>//put_time()
#include <ctime>//time
#include <sstream>//stringstream
#include <direct.h>//_mkdir()


using namespace std;

#pragma region 全局变量
const int MAX_PROCESS_SIZE(100);
const vector<string > reserveWordTable = { "function","if","then","while","do","endfunc" };
int SYN(-1);
int SYNPREV(SYN);
string token;
vector<string > subStrSet;
vector<int > lineNumber;
bool REC_FINISH_FLAG = false;

vector<string > result;

#pragma endregion


#pragma region 创建输出文件名字符串
///函数作用(使用系统时间生成文件名字符串)
///参数(可选参数1:字符串前缀,可选参数2:文件后缀名,例如".txt"、“.log”
///返回值(根据当前的系统时间以及提供的参数生成的带后缀名的字符串)
string getNowTimeFileName(const string preStr = "", const string suffixalNameStr = "") {//获取当前的系统时间以创建文件名
    static string pastTime;
    time_t t = time(NULL);
    tm tm = *localtime(&t);
    string nowTime;
    stringstream os;
    os.clear();
    os << put_time(&tm, "%y%m%d_%H%M%S");
    nowTime = os.str();
    string fileName;
    static short k = 0;
    if (nowTime != pastTime && (!nowTime.empty())) {//因为获取时间只精确到秒,但是程序可以在一秒之内创建数百个文件,所以要对文件名进行区分
        fileName = preStr + " - " + nowTime + "_0000" + suffixalNameStr;
        k = 0;
    }
    else {
        char extra[5];
        sprintf_s(extra, sizeof(extra), "%04d", ++k);//
        fileName = preStr + " - " + nowTime + "_" + extra + suffixalNameStr;
    }
    pastTime = nowTime;
    return fileName;
}
#pragma endregion

//将输入字符串进行预处理并分割
void originStrPartition(const string source, vector<string > &destination) {
    if (source.size() == 0)
        return;
    int num(1);
    string temp;
    for (string::const_iterator p = source.cbegin(); p != source.cend(); ++p) {
        if ('\n' != *p && ' ' != *p && '\t' != *p)
            temp.push_back(*p);
        else {
            if (!temp.empty()) {
                destination.push_back(temp);
                lineNumber.push_back(num);
            }
            if ('\n' == *p)
                ++num;

            temp.clear();
        }

    }
    if (!temp.empty()) {
        destination.push_back(temp);
        lineNumber.push_back(num);
    }
    temp.clear();
}

//处理预处理之后的字符串为单个字符的情况
void singleChar(string str, int lineNum) {
    SYN = -1;
    token.clear();

    char ch = *str.begin();
    if (ch >= 'a' && ch <= 'z' || ch >= 'A'&& ch <= 'Z') {
        token.push_back(ch);
        SYN = 10;
        return;
    }
    if (ch >= '0' && ch <= '9') {
        token.push_back(ch);
        SYN = 11;
        return;
    }

    switch (ch) {
    case '<':
        token.push_back(ch);
        SYN = 20;
    case '>':
        token.push_back(ch);
        SYN = 23;
        break;
    case '=':
        token.push_back(ch);
        SYN = 18;
        break;
    case '!':
        token.push_back(ch);
        SYN = -1 - lineNum;
        break;

    case '+':
        token.push_back(ch);
        SYN = 13;
        break;

    case '-':
        token.push_back(ch);
        SYN = 14;
        break;

    case '*':
        token.push_back(ch);
        SYN = 15;
        break;

    case '/':
        token.push_back(ch);
        SYN = 16;
        break;

    case ';':
        token.push_back(ch);
        SYN = 26;
        break;

    case '(':
        token.push_back(ch);
        SYN = 27;

        break;

    case ')':
        token.push_back(ch);
        SYN = 28;
        break;

    case '#':
        token.push_back(ch);
        SYN = 0;
        break;

    case '\n':
        break;

    case ' ':
        break;

    case '\t':
        break;

    default:
        token.push_back(ch);
        SYN = -1 - lineNum;
    }

}

//以字符串为单位处理
void scanner(string::iterator &ch, string &str, int lineNum) {
    SYN = -1;
    token.clear();

    if (*ch >= 'a'&& *ch <= 'z' || *ch >= 'A'&& *ch <= 'Z') {
        while (*ch >= 'a' && *ch <= 'z' || *ch >= 'A' && *ch <= 'Z' || *ch >= '0' && *ch <= '9') {//判断字符串为变量名形式
            token.push_back(*ch);
            if (++ch == str.end())
                break;
        }
        SYN = 10;
        for (auto p = reserveWordTable.begin(); p != reserveWordTable.end(); ++p) {
            if (token == *p) {
                SYN = p - reserveWordTable.begin() + 1;
                break;
            }
        }

        if (ch == str.end())
            return;
    }
    else {
        bool eStatus = false;//是否进入科学记数法判断阶段

        if ('.' == *ch) {
            SYN = -1 - lineNum;//+
            while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                token.push_back(*ch++);//+
                if (ch == str.end())
                    break;
            }
            return;
        }

        //判断之前识别的单元是否为关键词或者运算符
        bool opFlag1 = (SYNPREV >= 13 && SYNPREV <= 27) || (SYNPREV >= 1 && SYNPREV <= 6) || -1 == SYNPREV;
        if (*ch == '0' || opFlag1 && (('+' == *ch || '-' == *ch) && *(ch + 1) == '0')) {
            if ('+' == *ch || '-' == *ch) {
                token.push_back(*ch++);
            }
            if (*(str.end() - 1) == '0') {
                SYN = 11;
                token.push_back(*ch++);
                return;
            }
            if (*(ch + 1) >= '0' && *(ch + 1) <= '9') {//如果首字符为0,并且第二个字符依然为数字字符
                //不能存在前导零//+
                SYN = -1 - lineNum;//+
                while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                    token.push_back(*ch++);//+
                    if (ch == str.end())
                        break;
                }
                return;
            }

            token.push_back(*ch++);
            if ('.' == *ch) {//如果0之后的'.'后面没有数字字符
                if (*(str.end() - 1) == '.') {
                    //小数点不能处于数字末端//+
                    SYN = -1 - lineNum;//+
                    while (ch != str.end())//+
                        token.push_back(*ch++);//+
                    return;
                }

                if (*(ch + 1) >= '0' && *(ch + 1) <= '9') {//如果小数点之后有数字字符
                    SYN = 11;
                    token.push_back(*ch++);
                }
                else {//"0...3"
                    //小数点之后需要存在数字//+
                    SYN = -1 - lineNum;//+
                    while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                        token.push_back(*ch++);//+
                        if (ch == str.end())
                            break;
                    }
                    return;
                }

                while (*ch >= '0' && *ch <= '9') {//循环读取小数点之后的数字字符
                    token.push_back(*ch);
                    if (++ch == str.end())
                        break;
                }

                if (ch == str.end())
                    return;

                if ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                    SYN = -1 - lineNum;//+
                    while ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                        token.push_back(*ch++);//+
                        if (ch == str.end())
                            break;
                    }
                    return;
                }

                if ('.' == *ch) {//存在多个小数点
                    SYN = -1 - lineNum;//+
                    while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                        token.push_back(*ch++);
                        if (ch == str.end())
                            break;
                    }
                    return;
                }

                else if ('e' == *ch || 'E' == *ch)
                    eStatus = true;
                else {//处理形如"+0.001;"这样的形式
                    SYN = 11;
                    return;
                }
            }
            else {//处理形如"+0;"、"0;"这样的形式
                SYN = 11;
                return;
            }

        }

        //判断之前识别的单元是否为关键词或者运算符
        bool opFlag = (SYNPREV >= 13 && SYNPREV <= 27) || (SYNPREV >= 1 && SYNPREV <= 6) || -1 == SYNPREV;
        if ((*ch >= '1' && *ch <= '9') || opFlag && (('+' == *ch || '-' == *ch) && (*(ch + 1) >= '1' && *(ch + 1) <= '9'))) {
            if ('+' == *ch || '-' == *ch) {
                token.push_back(*ch++);
            }
            while (*ch >= '0' && *ch <= '9') {
                token.push_back(*ch);
                if (++ch == str.end())
                    break;
            }
            SYN = 11;

            if (ch == str.end())
                return;

            if ('.' != *ch && 'E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                SYN = -1 - lineNum;//+
                while ('.' != *ch && 'E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                    token.push_back(*ch++);//+
                    if (ch == str.end())
                        break;
                }
                return;
            }

            if ('.' == *ch || 'e' == *ch || 'E' == *ch) {
                if ('.' == *ch) {
                    if (*(str.end() - 1) == '.') {//"3.3e."
                        //小数点不能处于数字末端//+
                        SYN = -1 - lineNum;//+
                        while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                            token.push_back(*ch++);//+
                            if (ch == str.end())
                                break;
                        }
                        return;
                    }
                    if (*(ch + 1) >= '0' && *(ch + 1) <= '9') {
                        token.push_back(*ch++);

                    }
                    else {
                        //小数点之后需要存在数字//+
                        SYN = -1 - lineNum;//+
                        while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                            token.push_back(*ch++);//+
                            if (ch == str.end())
                                break;
                        }
                        return;
                    }
                    while (*ch >= '0' && *ch <= '9') {
                        token.push_back(*ch);
                        if (++ch == str.end())
                            break;
                    }

                    if (ch == str.end())
                        return;

                    if ('.' == *ch) {
                        SYN = -1 - lineNum;//+
                        while (1) {
                            token.push_back(*ch++);//+
                            if (ch == str.end())
                                break;
                        }
                        return;
                    }

                    if ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                        SYN = -1 - lineNum;//+
                        while ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                            token.push_back(*ch++);//+
                            if (ch == str.end())
                                break;
                        }
                        return;
                    }

                    if ('.' == *ch) {//存在多个小数点
                        SYN = -1 - lineNum;//+
                        while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                            token.push_back(*ch++);
                            if (ch == str.end())
                                break;
                        }
                        return;
                    }





                    if ('e' == *ch || 'E' == *ch)
                        eStatus = true;
                    else {//"+11.1"
                        SYN = 11;
                        return;
                    }
                }
                else if ('e' == *ch || 'E' == *ch)
                    eStatus = true;
                else {//处理形如"+1.0;"这样的形式
                    SYN = 11;
                    return;
                }

            }
            else {
                SYN = 11;
                return;
            }

        }

        if (eStatus) {//如果读到E或者e
            if (*(str.end() - 1) == 'e' || *(str.end() - 1) == 'E') {//"+211e"
                SYN = -1 - lineNum;
                token.push_back(*ch++);
                return;
            }
            token.push_back(*ch++);//将字符指针从e或E转到下一位

            if (*(str.end() - 1) == '+' || *(str.end() - 1) == '-') {//"'+211e+'"
                                                                     //不能处于数字末端//+
                SYN = -1 - lineNum;//+
                token.push_back(*ch++);
                return;
            }

            if ('+' == *ch || '-' == *ch) {
                token.push_back(*ch++);
            }

            if ('.' == *ch) {//"123e.123"
                SYN = -1 - lineNum;//+
                while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                    token.push_back(*ch++);//+
                    if (ch == str.end())
                        break;
                }
                return;
            }

            if (*ch >= '1' && *ch <= '9') {
                token.push_back(*ch++);
                if (ch != str.end())
                    while (*ch >= '0' && *ch <= '9') {
                        token.push_back(*ch);
                        if (++ch == str.end())
                            break;
                    }
                if (ch == str.end())
                    return;

                if ('+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                    SYN = -1 - lineNum;//+
                    while ('+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
                        token.push_back(*ch++);//+
                        if (ch == str.end())
                            break;
                    }
                    return;
                }

                if ('.' == *ch) {//如果在指数部分存在小数点
                    SYN = -1 - lineNum;//+
                    while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                        token.push_back(*ch++);
                        if (ch == str.end())
                            break;
                    }
                    return;
                }

            }
            else {
                if ('0' == *ch) {
                    //不能存在前导零//+
                    SYN = -1 - lineNum;//+
                    while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                        token.push_back(*ch++);//+
                        if (ch == str.end())
                            break;
                    }
                    return;
                }
                else {//"7.8e+;"
                    SYN = -1 - lineNum;//+
                    while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                        token.push_back(*ch++);//+
                        if (ch == str.end())
                            break;
                    }
                    return;
                }
            }
        }
        else if ('.' == *ch && (*(ch + 1) >= '0' && *(ch + 1) <= '9')) {//".123"
            token.push_back(*ch++);
            SYN = -1 - lineNum;//+
            while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
                token.push_back(*ch++);//+
                if (ch == str.end())
                    break;
            }
        }
        else {
            switch (*ch) {
            case '<':
                if (ch == prev(str.end())) {
                    token.push_back(*ch);
                    SYN = 20;
                    ++ch;
                }
                else
                    if (*(ch + 1) == '=') {//
                        token.push_back(*ch);
                        token.push_back(*(ch + 1));
                        SYN = 21;
                        ch += 2;
                    }
                    else {
                        token.push_back(*ch);
                        SYN = 20;
                        ++ch;
                    }
                break;

            case '>':
                if (ch == prev(str.end())) {
                    token.push_back(*ch);
                    SYN = 23;
                    ++ch;
                }
                else
                    if (*(ch + 1) == '=') {
                        token.push_back(*ch);
                        token.push_back(*(ch + 1));
                        SYN = 24;
                        ch += 2;
                    }
                    else {
                        token.push_back(*ch);
                        SYN = 23;
                        ++ch;
                    }

                break;

            case '=':
                if (ch == prev(str.end())) {
                    token.push_back(*ch);
                    SYN = 18;
                    ++ch;
                }
                else
                    if (*(ch + 1) == '=') {
                        token.push_back(*ch);
                        token.push_back(*(ch + 1));
                        SYN = 25;
                        ch += 2;
                    }
                    else {
                        token.push_back(*ch);
                        SYN = 18;
                        ++ch;
                    }
                break;

            case '!':
                if (ch == prev(str.end())) {
                    SYN = -1 - lineNum;
                    token.push_back(*ch);
                    ++ch;
                }
                else
                    if (*(ch + 1) == '=') {
                        token.push_back(*ch);
                        token.push_back(*(ch + 1));
                        SYN = 22;
                        ch += 2;
                    }
                break;

            case '+':
                token.push_back(*ch);
                SYN = 13;
                ++ch;
                break;

            case '-':
                token.push_back(*ch);
                SYN = 14;
                ++ch;
                break;

            case '*':
                token.push_back(*ch);
                SYN = 15;
                ++ch;
                break;

            case '/':
                token.push_back(*ch);
                SYN = 16;
                ++ch;
                break;

            case ';':
                token.push_back(*ch);
                SYN = 26;
                ++ch;
                break;

            case '(':
                token.push_back(*ch);
                SYN = 27;
                ++ch;
                break;

            case ')':
                token.push_back(*ch);
                SYN = 28;
                ++ch;
                break;

            case '#':
                token.push_back(*ch);
                SYN = 0;
                break;

            case '\n':
                break;

            case ' ':
                break;

            case '\t':
                break;

            default:
                token.push_back(*ch++);
                SYN = -1 - lineNum;
            }
        }
    }
}

//将结果格式化为字符串
void process(string str, int lineNum) {
    string resultStr;
    if (str.size() != 1) {
        string::iterator ch = str.begin();
        do {
            if (str.end() == ch)
                break;
            scanner(ch, str, lineNum);
            SYNPREV = SYN;
            if (SYN < -1) {
                int num = -1 - SYN;
                char buff[10];
                itoa(num, buff, 10);
                string temp(buff);
                resultStr = "(ERROR IN LINE:" + temp + ",'" + token + "')";
                result.push_back(resultStr);
            }
            else if (-1 != SYN) {
                char buff[10];
                itoa(SYN, buff, 10);
                string temp(buff);
                if (10 == SYN)
                    resultStr = "(" + temp + ",'" + token + "')";
                else
                    resultStr = "(" + temp + "," + token + ")";
                result.push_back(resultStr);
            }
            else {
                resultStr = "(ERROR)";
                result.push_back(resultStr);
            }

        } while (0 != SYN);
    }
    else {
        singleChar(str, lineNum);
        SYNPREV = SYN;
        if (-1 != SYN) {
            char buff[10];
            itoa(SYN, buff, 10);
            string temp(buff);
            if (10 == SYN)
                resultStr = "(" + temp + "," + "'" + token + "'" + ")";
            else
                resultStr = "(" + temp + "," + token + ")";
            result.push_back(resultStr);
        }
    }
}

int main(int argc, char *argv[]) {
    int displayFlag(argc);
    string originStr;

    if (1 != argc) {//当参数数量大于1时
        cout << "Display in Console?(Y/N):";
        char readKey = getchar();
        if ('Y' == readKey || 'y' == readKey || '\n' == readKey)
            displayFlag = 1;
    }
    if (1 == argc) {//当不指定参数时,手动进行输入数据
        cout << "Please input string:" << endl;

        int temp(0);
        char charSet[MAX_PROCESS_SIZE];
        char ch;

        do {
            scanf("%c", &ch);
            charSet[temp++] = ch;
        } while (ch != '#' && temp < MAX_PROCESS_SIZE);
        string inputStr(charSet);
        originStr = inputStr;
    }
    else if (2 == argc || 3 == argc) {//当指定一个或者两个参数时,从文件读入数据
        if (2 == argc && 1 != displayFlag)//当只指定一个参数时,输出路径为工程目录下的"Output"文件夹
            _mkdir("Output");
        ifstream infile(argv[1]);
        if (!infile)
            exit(1);

        char buff[MAX_PROCESS_SIZE];
        string fileStr;
        bool firstRead = true;
        while (infile.good() && !infile.eof()) {
            memset(buff, 0, MAX_PROCESS_SIZE);
            infile.getline(buff, MAX_PROCESS_SIZE);
            if (true == firstRead) {
                fileStr += buff;
                firstRead = false;
            }
            else
                fileStr = fileStr + '\n' + buff;
        }
        originStr = fileStr;
        infile.close();
    }
    else
        exit(1);
    originStr.erase(find(originStr.begin(), originStr.end(), '#'), originStr.end());
    originStr += " #";

    originStrPartition(originStr, subStrSet);

    for (vector<string >::iterator p = subStrSet.begin(); p != subStrSet.end(); ++p) {
        int lineNum = *(lineNumber.begin() + distance(subStrSet.begin(), p));
        process(*p, lineNum);
    }

    string outputStr;

    for (auto p : result) {
        outputStr += p + '\n';
    }

    if (1 == displayFlag)
        cout << outputStr << endl;
    if (2 == displayFlag) {
        ofstream outfile;
        outfile.open(".\\Output\\" + getNowTimeFileName("Output File", ".txt"));
        outfile << outputStr;
        outfile.close();
    }
    else if (3 == displayFlag) {
        ofstream outfile;
        outfile.open(argv[2]);
        outfile << outputStr;
        outfile.close();
    }

    system("pause");
    return 0;
}

猜你喜欢

转载自blog.csdn.net/xdg_blog/article/details/52865165