Steps:
1. Select the test code is entered lexical analyzer, Pick bubble sort code (remove only preprocess commands) stored a.txt file, as shown in FIG.
2. Statistics for this test program word symbols contained in the symbol corresponding to the design of the word kind as code 2 in FIG.
3. Design lexical analyzer
code is as follows :( To see the code is running, the file must first prepare a.txt content the code of Figure 1, and places the file .cpp file in the same directory)
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
#define _KEY_WORD_END "waiting for your expanding"
//定义二元组的结构体
typedef struct
{
int typenum;
char *word;
}WORD;
char *str; //测试代码字符串
char token[255]; //单词缓冲区
int p_input; //测试代码字符串的指针
int p_token; //单词缓冲区指针
char ch; //当前读入字符
char *rwtab[] = {"void","int","if","for","break",_KEY_WORD_END};//关键字的指针数组,_KEY_WORD_END不加分号
//自定义函数
char m_getch(); //从代码字符串中读取一个字符到char中
void getbc(); //去掉回车、换行、制表符
void concat(); //为token拼接单词
int letter(); //判断当前字符char是否为字母
int digit(); //判断当前字符char是否为数字
int reserve(); //判断字母开头的是标识符还是关键字,返回其种别码
void retract(); //回退一个字符
char *dtb(); //十进制字符串转换成对应二进制字符串
WORD *scaner(); //词法扫描函数,获得返回一个单词指针
int main ()
{
FILE *p; //测试代码文件指针
FILE *q; //词法分析器输出的文件指针
int filesize; //代码字符串长度
char *fname_in = "a.txt"; //源程序所在的文件
char *fname_out = "b.txt"; //词法分析得到的单词符号所在的文件
p_input = 0;
int over = 1; //种别码
char typenum_str[10]; //种别码对应的字符串,用于fputs写入文件
WORD *oneword = new WORD; //单词,即二元组结构体的指针
//打开测试程序所在的文件
p = fopen(fname_in,"rb"); //p = fopen("D:\\a.txt","rb");
if(p == NULL)
{
printf("打开文件%s错误\n",fname_in);
exit(1);//exit(-1);
}
fseek(p,0,SEEK_END);
filesize = ftell(p);
str = (char*)malloc(filesize+1);//动态内存分配
rewind(p);
fread(str,sizeof(char),filesize,p);
str[filesize] = '\0';
fclose(p);
//输出文件中的代码
printf("a.txt文件中的代码为:\n");
puts(str);
fclose(p);
//创建并打开词法分析所得目标程序所在的文件
q = fopen(fname_out,"wb");
if(q == NULL)
{
printf("打开文件%s错误\n",fname_out);
exit(1);
}
while(over<1000 && over!=-1) //1000为'\0'的种别码(不会输出,仅作为for循环退出的标志),-1位错误单词的种别码
{
oneword = scaner();
if(oneword->typenum<1000)
{
itoa(oneword->typenum,typenum_str,10);//种别码整数转换为对应字符串
fputs(typenum_str,q); //将种别码写入b.txt
fputs(", ",q); //将,写入b.txt
fputs(oneword->word,q); //将单词的值写入b.txt
fputc('\n',q); //写入一个单词后换行
}
over = oneword->typenum;
}
printf("词法分析所得的单词符号已存入b.txt文件\n");
fclose(q);
return 0;
}
//******************************!!以下为自定义函数!!****************************************
//******************************************************************************************
//从测试代码字符串str中读取一个字符到char中
char m_getch()
{
ch = str[p_input];
p_input = p_input+1;
return (ch);
}
//去掉回车、换行、制表符
void getbc()
{
while(' '==ch || '\r'==ch || '\n'==ch || '\t'==ch) //文件回车='\r\n',windows
{
ch = str[p_input];
p_input++;
}
}
//为token拼接单词
void concat()
{
token[p_token] = ch;
p_token++;
token[p_token] = '\0'; //巧
}
//判断当前字符char是否为字母
int letter()
{
if(ch>='a'&&ch<='z' || ch>='A'&&ch<='Z')
return 1;
else
return 0;
}
//判断当前字符char是否为数字
int digit()
{
if(ch>='0' && ch<='9')
return 1;
else
return 0;
}
//判断字母开头的是标识符还是关键字,返回其种别码
int reserve()
{
int i = 0;
while(strcmp(rwtab[i],_KEY_WORD_END))
{
if(!strcmp(rwtab[i],token))
{
return (i+1); //关键字的种别码
}
i++;
}
return 20; //标识符的种别码
}
//回退一个字符
void retract()
{
p_input--;
}
//十进制字符串转换成对应二进制字符串
char *dtb()
{
int b = 0;
int i = 0;
int a = atoi(token);//字符串转换成整数
while(a != 0)
{
b = b+a%2*(int)pow(10,i++);
a = a/2;
}
itoa(b,token,10);//整数转换为字符串
return token;
}
//词法扫描函数,获得返回一个单词指针
WORD *scaner()
{
WORD *myword = new WORD;
myword->typenum = 20; //初始值赋值为标识符的种别码
myword->word = "";
p_token = 0;
m_getch();
getbc();
if(letter()) //字母开头,即标识符或者关键字
{
while(letter() || digit())
{
concat();
m_getch();
}
retract();
myword->typenum = reserve(); //reserve函数判断是关键字还是标识符
myword->word = token;
return (myword);
}
else if(digit()) //数字开头,即常数
{
while(digit())
{
concat();
m_getch();
}
if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z') //非常数,例如56x为错误输入
{
myword->typenum = -1;
myword->word = "ERROR";
return (myword);
}
else
{
retract();
myword->typenum = 21; //常数的种别码
myword->word = dtb(); //dtb返回十进制常数对应二进制的字符串
return (myword);
}
}
else switch(ch) //非数字或字母开头,即界符或运算符
{
case '[':
myword->typenum = 22;
myword->word = "[";
return (myword);
break;
case ']':
myword->typenum = 23;
myword->word = "]";
return (myword);
break;
case '(':
myword->typenum = 24;
myword->word = "(";
return (myword);
break;
case ')':
myword->typenum = 25;
myword->word = ")";
return (myword);
break;
case '{':
myword->typenum = 26;
myword->word = "{";
return (myword);
break;
case '}':
myword->typenum = 27;
myword->word = "}";
return (myword);
break;
case ',':
myword->typenum = 28;
myword->word = ",";
return (myword);
break;
case ';':
myword->typenum = 29;
myword->word = ";";
return (myword);
break;
case '<':
myword->typenum = 40;
myword->word = "<";
return (myword);
break;
case '>':
myword->typenum = 41;
myword->word = ">";
return (myword);
break;
case '=': //判断以=开头的运算符是=还是==
m_getch();
if('=' == ch)
{
myword->typenum = 45;
myword->word = "==";
return (myword);
}
retract();
myword->typenum = 44;
myword->word = "=";
return (myword);
break;
case '+': //判断以+开头的运算符是+还是++
m_getch();
if('+' == ch)
{
myword->typenum = 44;
myword->word = "++";
return (myword);
}
retract();
myword->typenum = 43;
myword->word = "+";
return (myword);
break;
case '\0': //代码字符串结束
myword->typenum = 1000;
myword->word = "OVER";
return (myword);
break;
default: //其他字符均为定义,种别码为-1,错误
myword->typenum = -1;
myword->word = "ERROR";
return (myword);
}
}
4. See lexical analyzer output, i.e. b.txt file, as part screenshot