Write a lexical analyzer in java

The lexical analyzer scans a program to determine whether it is a keyword, identifier, constant, delimiter, or operator. Generally divided into one type and five classics;

Here I use the classic five, this lexical analyzer is written in java;

The main code is:

/**
 * This program is to read the characters of the file into the character array, then traverse the array, and read the characters
 * Classify and output
 * @author
 *
 */
public class WordAnalyze {
    private String keyWord[] = {"break","include","begin","end","if","else","while","switch"};
    private char ch;
    //判断是否是关键字
    boolean isKey(String str)
    {
        for(int i = 0;i < keyWord.length;i++)
        {
            if(keyWord[i].equals(str))
                return true;
        }
        return false;
    }
    // Determine whether it is a letter 
    boolean isLetter( char letter)
    {
        if((letter >= 'a' && letter <= 'z')||(letter >= 'A' && letter <= 'Z'))
            return true;
        else
            return false;
    }
    // Determine whether it is a number 
    boolean isDigit( char digit)
    {
        if(digit >= '0' && digit <= '9')
            return true;
        else
            return false;
    }
    // Lexical analysis 
    void analyze( char [] chars)
    {
        String arr = "";
        for(int i = 0;i< chars.length;i++) {
            ch = chars[i];
            arr = "";
            if(ch == ' '||ch == '\t'||ch == '\n'||ch == '\r'){}
            else if(isLetter(ch)){
                while(isLetter(ch)||isDigit(ch)){    
                    arr += ch;
                    ch = chars[++i];
                }
                // Go back one character 
                i-- ;
                 if (isKey(arr)){
                     // Keyword 
                    System.out.println(arr+"\t4"+"\tKeyword" );
                }
                else {
                     // Identifier 
                    System.out.println(arr+"\t4"+"\tIdentifier" );
                }
            }
            else if(isDigit(ch)||(ch == '.'))
            {
                while(isDigit(ch)||(ch == '.'&&isDigit(chars[++i])))
                {
                    if(ch == '.') i--;
                    arr = arr + ch;
                    ch = chars[++i];
                }
                // belongs to unsigned constant 
                System.out.println(arr+"\t5"+"\t constant" );
            }
            else  switch (ch){
                 // operator 
                case '+':System.out.println(ch+"\t2"+"\toperator"); break ;
                 case '-':System.out.println(ch+" \t2"+"\toperator"); break ;
                 case '*':System.out.println(ch+"\t2"+"\toperator"); break ;
                 case '/':System.out. println(ch+"\t2"+"\t operator"); break ;
                 // delimiter 
                case '(':System.out.println(ch+"\t3"+"\tdelimiter"); break ;
                case ')':System.out.println(ch+"\t3"+"\t分界符");break;
                case '[':System.out.println(ch+"\t3"+"\t分界符");break;
                case ']':System.out.println(ch+"\t3"+"\t分界符");break;
                case ';':System.out.println(ch+"\t3"+"\t分界符");break;
                case '{':System.out.println(ch+"\t3"+"\t分界符");break;
                case '}':System.out.println(ch+"\t3"+"\t分界符");break;
                //运算符
                case '=':{
                            ch = chars[++i];
                            if(ch == '=')System.out.println("=="+"\t2"+"\t运算符");
                            else {
                                System.out.println("="+"\t2"+"\t运算符");
                                i--;
                            }
                         }break;
                case ':':{
                            ch = chars[++i];
                            if(ch == '=')System.out.println(":="+"\t2"+"\t运算符");
                            else {
                                System.out.println(":"+"\t2"+"\t运算符");
                                i--;
                            }
                         }break;
                case '>':{
                            ch = chars[++i];
                            if(ch == '=')System.out.println(">="+"\t2"+"\t运算符");
                            else {
                                System.out.println(">"+"\t2"+"\t运算符");
                                i--;
                            }
                         }break;
                case '<':{
                            ch = chars[++i];
                            if(ch == '=')System.out.println("<="+"\t2"+"\t运算符");
                            else {
                                System.out.println("<"+"\t2"+"\t运算符");
                                i--;
                            }
                         } break ;
                 // No recognition 
                default : System.out.println(ch+"\t6"+"\tNo identifier" );
            }
        }
    }
public static void main(String[] args) throws Exception {
    File file = new File("E:\\data.txt"); // Define a file object to initialize FileReader 
    FileReader reader = new FileReader(file); // Define a fileReader object to initialize BufferedReader 
    int length = ( int ) file.length();
     // When defining a character array here, you need to define one more, because the lexical analyzer will encounter reading a character ahead of time, if it is the last
     // character is read, if An out-of-bounds exception occurs when reading the next character 
    char buf[] = new  char [length+1 ];
    reader.read(buf);
    reader.close();
    new WordAnalyze().analyze(buf);
    
}
}

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325340140&siteId=291194637