The lexical analyzer scans a program to determine whether it is a keyword, identifier, constant, delimiter, or operator. Generally divided into one type and five classics;
Here I use the classic five, this lexical analyzer is written in c++;
/*
Reserved Word | Keyword: 1
Operator | Operator: 2
Delimiter: 3
Identifier: 4
Constant: 5
No Recognition: 6
*/
The main code is:
#include<iostream> using namespace std; #define MAX 10 /* Reserved Words | Keywords: 1 operator | operator: 2 Delimiter: 3 Identifier: 4 Constant: 5 No recognition: 6 */ char ch = ' '; char* keyWord[10] = {"void","main","break","include","begin","end","if","else","while","switch"}; char token[20];//Define the acquired character // Determine if it is a keyword bool isKey (char * token) { for(int i = 0;i < MAX;i++) { if(strcmp(token,keyWord[i]) == 0) return true; } return false; } // Determine if it is a letter bool isLetter(char letter) { if((letter >= 'a' && letter <= 'z')||(letter >= 'A' && letter <= 'Z')) return true; else return false; } // Determine if it is a number bool isDigit(char digit) { if(digit >= '0' && digit <= '9') return true; else return false; } //lexical analysis void analyze(FILE *fpin) { while((ch = fgetc(fpin)) != EOF){ if(ch == ' '||ch == '\t'||ch == '\n'){} else if(isLetter(ch)){ char token[20]={'\0'}; int i=0; while(isLetter(ch)||isDigit(ch)){ token[i] = ch; i++; ch = fgetc(fpin); } // rollback a pointer fseek(fpin,-1L,SEEK_CUR); if(isKey(token)){ //keyword cout<<token<<"\t1"<<"\t关键字"<<endl; } else{ //identifier cout<<token<<"\t4"<<"\t标识符"<<endl; } } else if(isDigit(ch)||(ch == '.')) { int i=0; char token[20]={'\0'}; while(isDigit(ch)||(ch == '.'&&isDigit(fgetc(fpin)))) { if(ch == '.')fseek(fpin,-1L,SEEK_CUR); token[i] = ch; i++; ch = fgetc(fpin); } fseek(fpin,-1L,SEEK_CUR); // is an unsigned constant cout<<token<<"\t5"<<"\t常数"<<endl; } else switch(ch){ // operator case '+':{ ch = fgetc(fpin); if(ch == '+')cout<<"++"<<"\t2"<<"\t运算符"<<endl; else { cout<<"+"<<"\t2"<<"\t运算符"<<endl; fseek(fpin,-1L,SEEK_CUR); } }break; case '-':{ ch = fgetc(fpin); if(ch == '-')cout<<"--"<<"\t2"<<"\t运算符"<<endl; else { cout<<"-"<<"\t2"<<"\t运算符"<<endl; fseek(fpin,-1L,SEEK_CUR); } }break; case '*':cout<<ch<<"\t2"<<"\t运算符"<<endl;break; case '/':cout<<ch<<"\t2"<<"\t运算符"<<endl;break; // delimiter case '(':cout<<ch<<"\t3"<<"\t分界符"<<endl;break; case ')':cout<<ch<<"\t3"<<"\t分界符"<<endl;break; case '[':cout<<ch<<"\t3"<<"\t分界符"<<endl;break; case ']':cout<<ch<<"\t3"<<"\t分界符"<<endl;break; case ';':cout<<ch<<"\t3"<<"\t分界符"<<endl;break; case '{':cout<<ch<<"\t3"<<"\t分界符"<<endl;break; case '}':cout<<ch<<"\t3"<<"\t分界符"<<endl;break; // operator case '=':{ ch = fgetc(fpin); if(ch == '=')cout<<"=="<<"\t2"<<"\t运算符"<<endl; else { cout<<"="<<"\t2"<<"\t运算符"<<endl; fseek(fpin,-1L,SEEK_CUR); } }break; case ':':{ ch = fgetc(fpin); if(ch == '=')cout<<":="<<"\t2"<<"\t运算符"<<endl; else { cout<<":"<<"\t2"<<"\t运算符"<<endl; fseek(fpin,-1L,SEEK_CUR); } }break; case '>':{ ch = fgetc(fpin); if(ch == '=')cout<<">="<<"\t2"<<"\t运算符"<<endl; else { cout<<">"<<"\t2"<<"\t运算符"<<endl; fseek(fpin,-1L,SEEK_CUR); } }break; case '<':{ ch = fgetc(fpin); if(ch == '=')cout<<"<="<<"\t2"<<"\t运算符"<<endl; else { cout<<"<"<<"\t2"<<"\t运算符"<<endl; fseek(fpin,-1L,SEEK_CUR); } }break; // no recognition default: cout<<ch<<"\t6"<<"\t无识别符"<<endl; } } } int main(){ char input[30]; FILE *fpin; cout<<"Please enter the source file name:\n"<<endl; for(;;){ cin>>input; if((fpin = fopen(input,"r")) != NULL) break; else cout<<"Path input error"<<endl; } cout<<"****************Lexical analysis result********************"<<endl; analyze(fpin); fclose(fpin); }
operation result: