The lexical analyzer scans a program to determine whether it is a keyword, identifier, constant, delimiter, or operator. Generally divided into one type and five classics;
Here I use the classic five, this lexical analyzer is written in java;
The main code is:
/** * This program is to read the characters of the file into the character array, then traverse the array, and read the characters * Classify and output * @author * */ public class WordAnalyze { private String keyWord[] = {"break","include","begin","end","if","else","while","switch"}; private char ch; //判断是否是关键字 boolean isKey(String str) { for(int i = 0;i < keyWord.length;i++) { if(keyWord[i].equals(str)) return true; } return false; } // Determine whether it is a letter boolean isLetter( char letter) { if((letter >= 'a' && letter <= 'z')||(letter >= 'A' && letter <= 'Z')) return true; else return false; } // Determine whether it is a number boolean isDigit( char digit) { if(digit >= '0' && digit <= '9') return true; else return false; } // Lexical analysis void analyze( char [] chars) { String arr = ""; for(int i = 0;i< chars.length;i++) { ch = chars[i]; arr = ""; if(ch == ' '||ch == '\t'||ch == '\n'||ch == '\r'){} else if(isLetter(ch)){ while(isLetter(ch)||isDigit(ch)){ arr += ch; ch = chars[++i]; } // Go back one character i-- ; if (isKey(arr)){ // Keyword System.out.println(arr+"\t4"+"\tKeyword" ); } else { // Identifier System.out.println(arr+"\t4"+"\tIdentifier" ); } } else if(isDigit(ch)||(ch == '.')) { while(isDigit(ch)||(ch == '.'&&isDigit(chars[++i]))) { if(ch == '.') i--; arr = arr + ch; ch = chars[++i]; } // belongs to unsigned constant System.out.println(arr+"\t5"+"\t constant" ); } else switch (ch){ // operator case '+':System.out.println(ch+"\t2"+"\toperator"); break ; case '-':System.out.println(ch+" \t2"+"\toperator"); break ; case '*':System.out.println(ch+"\t2"+"\toperator"); break ; case '/':System.out. println(ch+"\t2"+"\t operator"); break ; // delimiter case '(':System.out.println(ch+"\t3"+"\tdelimiter"); break ; case ')':System.out.println(ch+"\t3"+"\t分界符");break; case '[':System.out.println(ch+"\t3"+"\t分界符");break; case ']':System.out.println(ch+"\t3"+"\t分界符");break; case ';':System.out.println(ch+"\t3"+"\t分界符");break; case '{':System.out.println(ch+"\t3"+"\t分界符");break; case '}':System.out.println(ch+"\t3"+"\t分界符");break; //运算符 case '=':{ ch = chars[++i]; if(ch == '=')System.out.println("=="+"\t2"+"\t运算符"); else { System.out.println("="+"\t2"+"\t运算符"); i--; } }break; case ':':{ ch = chars[++i]; if(ch == '=')System.out.println(":="+"\t2"+"\t运算符"); else { System.out.println(":"+"\t2"+"\t运算符"); i--; } }break; case '>':{ ch = chars[++i]; if(ch == '=')System.out.println(">="+"\t2"+"\t运算符"); else { System.out.println(">"+"\t2"+"\t运算符"); i--; } }break; case '<':{ ch = chars[++i]; if(ch == '=')System.out.println("<="+"\t2"+"\t运算符"); else { System.out.println("<"+"\t2"+"\t运算符"); i--; } } break ; // No recognition default : System.out.println(ch+"\t6"+"\tNo identifier" ); } } } public static void main(String[] args) throws Exception { File file = new File("E:\\data.txt"); // Define a file object to initialize FileReader FileReader reader = new FileReader(file); // Define a fileReader object to initialize BufferedReader int length = ( int ) file.length(); // When defining a character array here, you need to define one more, because the lexical analyzer will encounter reading a character ahead of time, if it is the last // character is read, if An out-of-bounds exception occurs when reading the next character char buf[] = new char [length+1 ]; reader.read(buf); reader.close(); new WordAnalyze().analyze(buf); } }