/**说明:本程序对以下五种符号做出规定:对于关键字采用一个符号一种编码,变量是一种编码,整型常数是一种编码,浮点数是一种编码,界限符号各有一种编码,运算符各有一种编码***/
#include<iostream>
#include<string>
#include<cstring>
#include<cstdio>
#include<string>
#include<cstdlib>
#include<cctype>
#include<map>
#include<cmath>
using namespace std;
const int maxSize=1000000;
const int maxSize_keyword=32;
const int maxSize_op_one=14;
const int maxSize_op_more=17;
const int maxSize_delimiter=12;
const int maxSize_Word=256;
const int maxSize_Digit=256;
string Word[maxSize_Word];
string Digit[maxSize_Digit];
int line;
bool isNegative=false;
string KeyWord[maxSize_keyword]=
{
"auto","break","case","char","const","continue","default",
"do","double","else","enum","extern","float","for",
"goto","if","int","long","register","return","short",
"signed","sizeof","static","struct","switch","typedef",
"union","unsigned","void","volatile","while"
};
map<char,int> mapchar;
map<string,int> mapstring;
char op_one[maxSize_op_one]={'+','-','*','/','%','>','<','!','&','|','^','~','=','.'};//14
string op_more[maxSize_op_more]={"++","--","==","!=",">=","<=","||","&&","<<",">>","+=","-=","*=","/=","%=","&=","^="};//17
char delimiter[maxSize_delimiter] = {
'(', ')' , ',' , ';','[' ,
']' , ':' , '{' , '}','\'','"','#'};
bool iskeywords(string str){
for(int i=0;i<maxSize_keyword;i++)
if(str==KeyWord[i])
return true;
return false;
}
bool isop_one(char op){
for(int i=0;i<maxSize_op_one;i++)
if(op==op_one[i])
return true;
return false;
}
bool isop_more(string str){
for(int i=0;i<maxSize_op_more;i++)
if(str==op_more[i])
return true;
return false;
}
bool isdelimiter(char deli){
for(int i=0;i<maxSize_delimiter;i++)
if(deli==delimiter[i])
return true;
return false;
}
void begin(char *buf,int buf_index);
void init()
{
//其他符号的种别码是0;
//关键字采用一符一码1-32;
//变量33
//整型常量34;
//实型常量采用35;
//字符串常数36
//运算符37-50,51-67;
//界限符68-79;
for(int i=0;i<32;i++)
mapstring[KeyWord[i]]=i+1;
for(int i=37;i<=50;i++)
mapchar[op_one[i-37]]=i;
for(int i=51;i<=67;i++)
mapstring[op_more[i-51]]=i;
for(int i=68;i<=79;i++)
mapchar[delimiter[i-68]]=i;
}
void read_input()
{
char buf[maxSize];
int buf_index;
line=0;
FILE *stream;
stream = fopen("input_file.txt", "r");
char ch;
line=0;
while ((ch=fgetc(stream))!=EOF) //也可以改为:while(!feof(stream))
{
if(ch!='\n')
buf[buf_index++]=ch;
else{
line++;
begin(buf,buf_index-1);
buf_index=0;
}
}
fclose(stream);
}
void begin(char *buf,int buf_index)
{
int index=0;
while(index<=buf_index){
//处理注释
if(buf[index]=='/'&&buf[index+1]=='/')
{
index+=2;
while(index<=buf_index)
index++;
}
else if(buf[index]=='/'&&buf[index+1]=='*')
{
index+=2;
while(buf[index]!='/')
index++;
index++;
}
else if(buf[index]==' '||buf[index]=='\t')
index++;
//标识符
else if(isalpha(buf[index])||buf[index]=='_')
{
enum States{Start,One,Two,Err};
int i = 0,flag = 0;
char ch;
int length;
enum States state = Start;
string str="";
while(index<=buf_index&&(buf[index]!=' '&&buf[index]!='\t')&&!isop_one(buf[index])&&!isdelimiter(buf[index])){
str+=buf[index++];
}
int len=str.length();
while(ch = str[i]){
switch (state){
case Start:
if(isalpha(ch)||ch=='_')
state=One;
i++;
break;
case One:
if (isdigit(ch)||isalpha(ch)||ch=='_')
state=One;
else{
state=Err;
break;
}
i++;
break;
case Err:
if (flag == 0){
cout<<"error:行"<<line<<"中"<<str<<"的第"<<i+1<<"列出现错误!"<<endl;
flag = 1;
}
i++;
break;
}
}
if(state==One){
if(iskeywords(str))
cout<<"("<<mapstring[str]<<","<<str<<")"<<endl;
else
cout<<"(33,"<<str<<")"<<endl;
}
else
cout<<str<<"为不合法变量"<<endl;
}
//数字开头
else if(isdigit(buf[index]))
{
enum States{Start,One,Two,Three,Four,Five,Err};
int i = 0,flag = 0;
char ch;
int length;
enum States state = Start;
string final_str="";
while(index<=buf_index&&(buf[index]!=' '&&buf[index]!='\t'&&!isdelimiter(buf[index])&&!(isop_one(buf[index])&(buf[index]!='.')))){
final_str+=buf[index++];
}
length = final_str.length();
while(ch = final_str[i]){
switch (state){
case Start :
if (isdigit(ch))
state = One;
i++;
break;
case One:
if (isdigit(ch))
state = One;
else if (ch == '.')
state = Two;
else if (ch == 'E'|| ch == 'D'|| ch == 'e'|| ch == 'd')
state = Three;
else{
state = Err;
break;
}
i++;
break;
case Two:
if (isdigit(ch))
state = Two;
else if (ch == 'E'|| ch == 'D'|| ch == 'e'|| ch == 'd')
state = Three;
else{
state = Err;
break;
}
i++;
break;
case Three:
if (ch == '+' || ch == '-')
state = Four;
else if (isdigit(ch))
state = Five;
else{
state = Err;
break;
}
i++;
break;
case Five:
if (isdigit(ch))
state = Five;
else{
state = Err;
break;
}
i++;
break;
case Err:
if (flag == 0){
cout<<"error:行"<<line<<"中"<<final_str<<"的第"<<i+1<<"列出现错误!"<<endl;
flag = 1;
}
i++;
break;
}
}
if (state == One || state == Two || state == Five){
if(state==One){
if(isNegative){
cout<<"(34,-"<<final_str<<")"<<endl;
isNegative=false;
}
else
cout<<"(34,"<<final_str<<")"<<endl;
}
else{
if(isNegative){
cout<<"(35,-"<<final_str<<")"<<endl;
isNegative=false;
}
else
cout<<"(35,"<<final_str<<")"<<endl;
}
}
else
cout<<final_str<<"为不合法的实型常数"<<endl;
}
//运算符和界限符
else
{
string final_str="";
if(isdelimiter(buf[index])){
char a=buf[index++];
string str_charator="";
if(a=='"'){
cout<<"("<<mapchar[a]<<","<<a<<")"<<endl;
while(buf[index]!='"'){
str_charator+=buf[index++];
}
if(isop_more(str_charator)){
cout<<"("<<mapstring[str_charator]<<","<<str_charator<<")"<<endl;
}else{
cout<<"(36"<<","<<str_charator<<")"<<endl;
}
cout<<"("<<mapchar[a]<<","<<a<<")"<<endl;
index++;
}
else
cout<<"("<<mapchar[a]<<","<<a<<")"<<endl;
}
else if(isop_one(buf[index])){
while(index<=buf_index&&(isop_one(buf[index])))
final_str+=buf[index++];
int len=final_str.length();
if(len==2){
if(isop_more(final_str))
cout<<"("<<mapstring[final_str]<<","<<final_str<<")"<<endl;
else if(isop_one(final_str[0])&&final_str[1]=='-'){
cout<<"("<<mapchar[final_str[0]]<<","<<final_str[0]<<")"<<endl;
isNegative=true;
}else{
cout<<"(0,"<<final_str<<")"<<endl;
index++;
}
}
else if(len==1){
char a=final_str[0];
if(isop_one(a))
cout<<"("<<mapchar[a]<<","<<a<<")"<<endl;
else{
cout<<"(0,"<<a<<")"<<endl;
index++;
}
}
}
else{
cout<<"(0,"<<buf[index]<<")"<<endl;
index++;
}
}
}
}
int main()
{
init();
read_input();
return 0;
}
编译原理之简陋的词法分析程序
猜你喜欢
转载自blog.csdn.net/zyf2695421695/article/details/88581272
今日推荐
周排行