版权声明:本文为博主原创文章,欢迎转载,转载请贴上博客地址 http://blog.csdn.net/xdg_blog https://blog.csdn.net/xdg_blog/article/details/52865165
/*------------------------------------
author:XD_G
location:SWUN
time:05/2016
course:Compiler
teacher:Wei Zhou
如果认识周伟老师,请代我向他问好!
------------------------------------*/
#include <iostream>
#include <string>
#include <vector>
#include <iterator>
#include <fstream>//files
#include <iomanip>//put_time()
#include <ctime>//time
#include <sstream>//stringstream
#include <direct.h>//_mkdir()
using namespace std;
#pragma region 全局变量
const int MAX_PROCESS_SIZE(100);
const vector<string > reserveWordTable = { "function","if","then","while","do","endfunc" };
int SYN(-1);
int SYNPREV(SYN);
string token;
vector<string > subStrSet;
vector<int > lineNumber;
bool REC_FINISH_FLAG = false;
vector<string > result;
#pragma endregion
#pragma region 创建输出文件名字符串
///函数作用(使用系统时间生成文件名字符串)
///参数(可选参数1:字符串前缀,可选参数2:文件后缀名,例如".txt"、“.log”
///返回值(根据当前的系统时间以及提供的参数生成的带后缀名的字符串)
string getNowTimeFileName(const string preStr = "", const string suffixalNameStr = "") {//获取当前的系统时间以创建文件名
static string pastTime;
time_t t = time(NULL);
tm tm = *localtime(&t);
string nowTime;
stringstream os;
os.clear();
os << put_time(&tm, "%y%m%d_%H%M%S");
nowTime = os.str();
string fileName;
static short k = 0;
if (nowTime != pastTime && (!nowTime.empty())) {//因为获取时间只精确到秒,但是程序可以在一秒之内创建数百个文件,所以要对文件名进行区分
fileName = preStr + " - " + nowTime + "_0000" + suffixalNameStr;
k = 0;
}
else {
char extra[5];
sprintf_s(extra, sizeof(extra), "%04d", ++k);//
fileName = preStr + " - " + nowTime + "_" + extra + suffixalNameStr;
}
pastTime = nowTime;
return fileName;
}
#pragma endregion
//将输入字符串进行预处理并分割
void originStrPartition(const string source, vector<string > &destination) {
if (source.size() == 0)
return;
int num(1);
string temp;
for (string::const_iterator p = source.cbegin(); p != source.cend(); ++p) {
if ('\n' != *p && ' ' != *p && '\t' != *p)
temp.push_back(*p);
else {
if (!temp.empty()) {
destination.push_back(temp);
lineNumber.push_back(num);
}
if ('\n' == *p)
++num;
temp.clear();
}
}
if (!temp.empty()) {
destination.push_back(temp);
lineNumber.push_back(num);
}
temp.clear();
}
//处理预处理之后的字符串为单个字符的情况
void singleChar(string str, int lineNum) {
SYN = -1;
token.clear();
char ch = *str.begin();
if (ch >= 'a' && ch <= 'z' || ch >= 'A'&& ch <= 'Z') {
token.push_back(ch);
SYN = 10;
return;
}
if (ch >= '0' && ch <= '9') {
token.push_back(ch);
SYN = 11;
return;
}
switch (ch) {
case '<':
token.push_back(ch);
SYN = 20;
case '>':
token.push_back(ch);
SYN = 23;
break;
case '=':
token.push_back(ch);
SYN = 18;
break;
case '!':
token.push_back(ch);
SYN = -1 - lineNum;
break;
case '+':
token.push_back(ch);
SYN = 13;
break;
case '-':
token.push_back(ch);
SYN = 14;
break;
case '*':
token.push_back(ch);
SYN = 15;
break;
case '/':
token.push_back(ch);
SYN = 16;
break;
case ';':
token.push_back(ch);
SYN = 26;
break;
case '(':
token.push_back(ch);
SYN = 27;
break;
case ')':
token.push_back(ch);
SYN = 28;
break;
case '#':
token.push_back(ch);
SYN = 0;
break;
case '\n':
break;
case ' ':
break;
case '\t':
break;
default:
token.push_back(ch);
SYN = -1 - lineNum;
}
}
//以字符串为单位处理
void scanner(string::iterator &ch, string &str, int lineNum) {
SYN = -1;
token.clear();
if (*ch >= 'a'&& *ch <= 'z' || *ch >= 'A'&& *ch <= 'Z') {
while (*ch >= 'a' && *ch <= 'z' || *ch >= 'A' && *ch <= 'Z' || *ch >= '0' && *ch <= '9') {//判断字符串为变量名形式
token.push_back(*ch);
if (++ch == str.end())
break;
}
SYN = 10;
for (auto p = reserveWordTable.begin(); p != reserveWordTable.end(); ++p) {
if (token == *p) {
SYN = p - reserveWordTable.begin() + 1;
break;
}
}
if (ch == str.end())
return;
}
else {
bool eStatus = false;//是否进入科学记数法判断阶段
if ('.' == *ch) {
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
//判断之前识别的单元是否为关键词或者运算符
bool opFlag1 = (SYNPREV >= 13 && SYNPREV <= 27) || (SYNPREV >= 1 && SYNPREV <= 6) || -1 == SYNPREV;
if (*ch == '0' || opFlag1 && (('+' == *ch || '-' == *ch) && *(ch + 1) == '0')) {
if ('+' == *ch || '-' == *ch) {
token.push_back(*ch++);
}
if (*(str.end() - 1) == '0') {
SYN = 11;
token.push_back(*ch++);
return;
}
if (*(ch + 1) >= '0' && *(ch + 1) <= '9') {//如果首字符为0,并且第二个字符依然为数字字符
//不能存在前导零//+
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
token.push_back(*ch++);
if ('.' == *ch) {//如果0之后的'.'后面没有数字字符
if (*(str.end() - 1) == '.') {
//小数点不能处于数字末端//+
SYN = -1 - lineNum;//+
while (ch != str.end())//+
token.push_back(*ch++);//+
return;
}
if (*(ch + 1) >= '0' && *(ch + 1) <= '9') {//如果小数点之后有数字字符
SYN = 11;
token.push_back(*ch++);
}
else {//"0...3"
//小数点之后需要存在数字//+
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
while (*ch >= '0' && *ch <= '9') {//循环读取小数点之后的数字字符
token.push_back(*ch);
if (++ch == str.end())
break;
}
if (ch == str.end())
return;
if ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
SYN = -1 - lineNum;//+
while ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
if ('.' == *ch) {//存在多个小数点
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);
if (ch == str.end())
break;
}
return;
}
else if ('e' == *ch || 'E' == *ch)
eStatus = true;
else {//处理形如"+0.001;"这样的形式
SYN = 11;
return;
}
}
else {//处理形如"+0;"、"0;"这样的形式
SYN = 11;
return;
}
}
//判断之前识别的单元是否为关键词或者运算符
bool opFlag = (SYNPREV >= 13 && SYNPREV <= 27) || (SYNPREV >= 1 && SYNPREV <= 6) || -1 == SYNPREV;
if ((*ch >= '1' && *ch <= '9') || opFlag && (('+' == *ch || '-' == *ch) && (*(ch + 1) >= '1' && *(ch + 1) <= '9'))) {
if ('+' == *ch || '-' == *ch) {
token.push_back(*ch++);
}
while (*ch >= '0' && *ch <= '9') {
token.push_back(*ch);
if (++ch == str.end())
break;
}
SYN = 11;
if (ch == str.end())
return;
if ('.' != *ch && 'E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
SYN = -1 - lineNum;//+
while ('.' != *ch && 'E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
if ('.' == *ch || 'e' == *ch || 'E' == *ch) {
if ('.' == *ch) {
if (*(str.end() - 1) == '.') {//"3.3e."
//小数点不能处于数字末端//+
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
if (*(ch + 1) >= '0' && *(ch + 1) <= '9') {
token.push_back(*ch++);
}
else {
//小数点之后需要存在数字//+
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
while (*ch >= '0' && *ch <= '9') {
token.push_back(*ch);
if (++ch == str.end())
break;
}
if (ch == str.end())
return;
if ('.' == *ch) {
SYN = -1 - lineNum;//+
while (1) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
if ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
SYN = -1 - lineNum;//+
while ('E' != *ch && 'e' != *ch && '+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
if ('.' == *ch) {//存在多个小数点
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);
if (ch == str.end())
break;
}
return;
}
if ('e' == *ch || 'E' == *ch)
eStatus = true;
else {//"+11.1"
SYN = 11;
return;
}
}
else if ('e' == *ch || 'E' == *ch)
eStatus = true;
else {//处理形如"+1.0;"这样的形式
SYN = 11;
return;
}
}
else {
SYN = 11;
return;
}
}
if (eStatus) {//如果读到E或者e
if (*(str.end() - 1) == 'e' || *(str.end() - 1) == 'E') {//"+211e"
SYN = -1 - lineNum;
token.push_back(*ch++);
return;
}
token.push_back(*ch++);//将字符指针从e或E转到下一位
if (*(str.end() - 1) == '+' || *(str.end() - 1) == '-') {//"'+211e+'"
//不能处于数字末端//+
SYN = -1 - lineNum;//+
token.push_back(*ch++);
return;
}
if ('+' == *ch || '-' == *ch) {
token.push_back(*ch++);
}
if ('.' == *ch) {//"123e.123"
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
if (*ch >= '1' && *ch <= '9') {
token.push_back(*ch++);
if (ch != str.end())
while (*ch >= '0' && *ch <= '9') {
token.push_back(*ch);
if (++ch == str.end())
break;
}
if (ch == str.end())
return;
if ('+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
SYN = -1 - lineNum;//+
while ('+' != *ch && '-' != *ch && '*' != *ch && '/' != *ch && ')' != *ch && ';' != *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
if ('.' == *ch) {//如果在指数部分存在小数点
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);
if (ch == str.end())
break;
}
return;
}
}
else {
if ('0' == *ch) {
//不能存在前导零//+
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
else {//"7.8e+;"
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
return;
}
}
}
else if ('.' == *ch && (*(ch + 1) >= '0' && *(ch + 1) <= '9')) {//".123"
token.push_back(*ch++);
SYN = -1 - lineNum;//+
while ((*ch >= '0' && *ch <= '9') || '.' == *ch || 'e' == *ch || 'E' == *ch) {
token.push_back(*ch++);//+
if (ch == str.end())
break;
}
}
else {
switch (*ch) {
case '<':
if (ch == prev(str.end())) {
token.push_back(*ch);
SYN = 20;
++ch;
}
else
if (*(ch + 1) == '=') {//
token.push_back(*ch);
token.push_back(*(ch + 1));
SYN = 21;
ch += 2;
}
else {
token.push_back(*ch);
SYN = 20;
++ch;
}
break;
case '>':
if (ch == prev(str.end())) {
token.push_back(*ch);
SYN = 23;
++ch;
}
else
if (*(ch + 1) == '=') {
token.push_back(*ch);
token.push_back(*(ch + 1));
SYN = 24;
ch += 2;
}
else {
token.push_back(*ch);
SYN = 23;
++ch;
}
break;
case '=':
if (ch == prev(str.end())) {
token.push_back(*ch);
SYN = 18;
++ch;
}
else
if (*(ch + 1) == '=') {
token.push_back(*ch);
token.push_back(*(ch + 1));
SYN = 25;
ch += 2;
}
else {
token.push_back(*ch);
SYN = 18;
++ch;
}
break;
case '!':
if (ch == prev(str.end())) {
SYN = -1 - lineNum;
token.push_back(*ch);
++ch;
}
else
if (*(ch + 1) == '=') {
token.push_back(*ch);
token.push_back(*(ch + 1));
SYN = 22;
ch += 2;
}
break;
case '+':
token.push_back(*ch);
SYN = 13;
++ch;
break;
case '-':
token.push_back(*ch);
SYN = 14;
++ch;
break;
case '*':
token.push_back(*ch);
SYN = 15;
++ch;
break;
case '/':
token.push_back(*ch);
SYN = 16;
++ch;
break;
case ';':
token.push_back(*ch);
SYN = 26;
++ch;
break;
case '(':
token.push_back(*ch);
SYN = 27;
++ch;
break;
case ')':
token.push_back(*ch);
SYN = 28;
++ch;
break;
case '#':
token.push_back(*ch);
SYN = 0;
break;
case '\n':
break;
case ' ':
break;
case '\t':
break;
default:
token.push_back(*ch++);
SYN = -1 - lineNum;
}
}
}
}
//将结果格式化为字符串
void process(string str, int lineNum) {
string resultStr;
if (str.size() != 1) {
string::iterator ch = str.begin();
do {
if (str.end() == ch)
break;
scanner(ch, str, lineNum);
SYNPREV = SYN;
if (SYN < -1) {
int num = -1 - SYN;
char buff[10];
itoa(num, buff, 10);
string temp(buff);
resultStr = "(ERROR IN LINE:" + temp + ",'" + token + "')";
result.push_back(resultStr);
}
else if (-1 != SYN) {
char buff[10];
itoa(SYN, buff, 10);
string temp(buff);
if (10 == SYN)
resultStr = "(" + temp + ",'" + token + "')";
else
resultStr = "(" + temp + "," + token + ")";
result.push_back(resultStr);
}
else {
resultStr = "(ERROR)";
result.push_back(resultStr);
}
} while (0 != SYN);
}
else {
singleChar(str, lineNum);
SYNPREV = SYN;
if (-1 != SYN) {
char buff[10];
itoa(SYN, buff, 10);
string temp(buff);
if (10 == SYN)
resultStr = "(" + temp + "," + "'" + token + "'" + ")";
else
resultStr = "(" + temp + "," + token + ")";
result.push_back(resultStr);
}
}
}
int main(int argc, char *argv[]) {
int displayFlag(argc);
string originStr;
if (1 != argc) {//当参数数量大于1时
cout << "Display in Console?(Y/N):";
char readKey = getchar();
if ('Y' == readKey || 'y' == readKey || '\n' == readKey)
displayFlag = 1;
}
if (1 == argc) {//当不指定参数时,手动进行输入数据
cout << "Please input string:" << endl;
int temp(0);
char charSet[MAX_PROCESS_SIZE];
char ch;
do {
scanf("%c", &ch);
charSet[temp++] = ch;
} while (ch != '#' && temp < MAX_PROCESS_SIZE);
string inputStr(charSet);
originStr = inputStr;
}
else if (2 == argc || 3 == argc) {//当指定一个或者两个参数时,从文件读入数据
if (2 == argc && 1 != displayFlag)//当只指定一个参数时,输出路径为工程目录下的"Output"文件夹
_mkdir("Output");
ifstream infile(argv[1]);
if (!infile)
exit(1);
char buff[MAX_PROCESS_SIZE];
string fileStr;
bool firstRead = true;
while (infile.good() && !infile.eof()) {
memset(buff, 0, MAX_PROCESS_SIZE);
infile.getline(buff, MAX_PROCESS_SIZE);
if (true == firstRead) {
fileStr += buff;
firstRead = false;
}
else
fileStr = fileStr + '\n' + buff;
}
originStr = fileStr;
infile.close();
}
else
exit(1);
originStr.erase(find(originStr.begin(), originStr.end(), '#'), originStr.end());
originStr += " #";
originStrPartition(originStr, subStrSet);
for (vector<string >::iterator p = subStrSet.begin(); p != subStrSet.end(); ++p) {
int lineNum = *(lineNumber.begin() + distance(subStrSet.begin(), p));
process(*p, lineNum);
}
string outputStr;
for (auto p : result) {
outputStr += p + '\n';
}
if (1 == displayFlag)
cout << outputStr << endl;
if (2 == displayFlag) {
ofstream outfile;
outfile.open(".\\Output\\" + getNowTimeFileName("Output File", ".txt"));
outfile << outputStr;
outfile.close();
}
else if (3 == displayFlag) {
ofstream outfile;
outfile.open(argv[2]);
outfile << outputStr;
outfile.close();
}
system("pause");
return 0;
}