编译原理(词法分析) Java 实现

1. 项目目录 

2. 需要解释的源代码

PROGRAM SOURCE;
/*定义变量*/
VAR X, Y, Z:INTEGER;
	A:INTEGER;
	B:INTEGER;
//程序开始
BEGIN
	X   := 5;
	A	:= 0;
	WHILE X > 0 DO
			A	:= A + X;
			A 	:= X - 1;
	IF A = 10 THEN
			B 	:= A * 2;
END

3. 程序代码 

import java.io.*;
import java.util.ArrayList;
class SortCode{
    String word;//单词
    Integer encode;//编码
    String symbol;//注记符
    public SortCode(String word, Integer encode, String symbol){
        this.word = word;
        this.encode = encode;
        this.symbol = symbol;
    }
}
class WordItem{
    String key;//关键词
    Integer encode;//编码
    Integer line;//行号
    public WordItem(String key, Integer encode, Integer line){
        this.key = key;
        this.encode = encode;
        this.line = line;
    }

    @Override
    public String toString() {
        return "WordItem{" +
                "key='" + key + '\'' +
                ", encode=" + encode +
                ", line=" + line +
                '}';
    }
}
public class ParseWords {

    private static ArrayList<SortCode> sortCodeList = new ArrayList<>();

    private static ArrayList<WordItem> wordItemList = new ArrayList<>();

    private static void loadSortCodeList(){
        sortCodeList.add(new SortCode("BEGIN", 1, "Start"));
        sortCodeList.add(new SortCode("END", 2, "End"));
        sortCodeList.add(new SortCode("IF", 3, "If"));
        sortCodeList.add(new SortCode("THEN", 4, "Then"));
        sortCodeList.add(new SortCode("ELSE", 5, "Else"));
        sortCodeList.add(new SortCode("WHILE", 6, "While"));
        sortCodeList.add(new SortCode("DO", 7, "Do"));
        sortCodeList.add(new SortCode("PROGRAM", 8, "Program"));
        sortCodeList.add(new SortCode("INTEGER", 9, "Integer"));
        sortCodeList.add(new SortCode("VAR", 10, "Var"));
        sortCodeList.add(new SortCode("<", 11, "Lt"));
        sortCodeList.add(new SortCode(">", 12, "Gt"));
        sortCodeList.add(new SortCode(",", 13, "Dot"));
        sortCodeList.add(new SortCode(";", 14, "Over"));
        sortCodeList.add(new SortCode(":", 15, "Mao"));
        sortCodeList.add(new SortCode("+", 16, "Add"));
        sortCodeList.add(new SortCode("-", 17, "Sub"));
        sortCodeList.add(new SortCode("*", 18, "Mul"));
        sortCodeList.add(new SortCode("/", 19, "Div"));
        sortCodeList.add(new SortCode("=", 20, "Eq"));
        sortCodeList.add(new SortCode("/*", 21, "NS"));
        sortCodeList.add(new SortCode("*/", 22, "NE"));
        sortCodeList.add(new SortCode("//", 23, "NL"));
        sortCodeList.add(new SortCode(":=", 24, "="));
        sortCodeList.add(new SortCode("标识符", 25, "Id"));
        sortCodeList.add(new SortCode("整数", 26, "Integer"));
    }

    private static boolean isNumber(char c){//是否为数字
        if (c >= '0' && c <= '9')
            return true;
        return false;
    }

    private static boolean isLetter(char c){//是否为字母
        if (c >= 'A' && c <= 'Z' || c == '_')
            return true;
        return false;
    }

    private static Integer search(String key){//搜索关键字所对应的编码
        for (SortCode sortCode : sortCodeList){
            if (sortCode.word.equals(key)){
                return sortCode.encode;
            }
        }
        return -1;
    }

    public static void main(String[] args) throws Exception{
        File file = new File("./file/source1.pascal");
        loadSortCodeList();
        BufferedReader reader = new BufferedReader(new FileReader(file));
        StringBuffer buffer = new StringBuffer();
        int line = 1;
        char key;
        key = (char) reader.read();
        while(true){
            if (key == '\uFFFF')//判断是否读取文档结束
                break;
            else if (key == '/'){//判断是否为 /  /* */  //
                key = (char) reader.read();
                if (key == '*'){//多行注释 /* */
                    while(true){
                        while((key = (char) reader.read()) != '*'){
                            if (key == '\n'){
                                line ++;
                            }
                        }
                        key = (char) reader.read();
                        if (key == '/'){
                            key = (char) reader.read();
                            break;
                        }
                    }
                }else if (key == '/'){//单行注释 //
                    while((key = (char) reader.read()) != '\r');
                }else{//除 /
                    wordItemList.add(new WordItem("/", search("/"), line));
                }
            }
            else if (key == ':'){//是否为 : 或者 :=
                key = (char) reader.read();
                if (key == '='){
                    wordItemList.add(new WordItem(":=", search(":="), line));
                    key = (char) reader.read();
                }else{
                    wordItemList.add(new WordItem(":", search(":"), line));
                }
            }
            else if (key == '\n'){//是否为 换行 \n
                line ++;
                key = (char) reader.read();
            }
            else if (key == '\r' || key == ' ' || key == '\t'){
                key = (char) reader.read();
            }
            else if (key == '+' || key == '-' || key == '*' || key == ',' || key == '>' || key == '<' || key == ';' || key == '='){
                String word = key + "";
                wordItemList.add(new WordItem(word, search(word), line));
                key = (char) reader.read();
            }
            else if (isLetter(key)){
                buffer.append(key);
                key = (char) reader.read();
                while (isLetter(key) || isNumber(key)){
                    buffer.append(key);
                    key = (char) reader.read();
                }
                int encode = search(buffer.toString());
                if (encode != -1){
                    wordItemList.add(new WordItem(buffer.toString(), encode, line));
                }else{
                    wordItemList.add(new WordItem(buffer.toString(), 25, line));
                }
                buffer.setLength(0);
            }
            else if (isNumber(key)){
                buffer.append(key);
                key = (char) reader.read();
                while (isNumber(key)){
                    buffer.append(key);
                    key = (char) reader.read();
                }
                Integer number = Integer.parseInt(buffer.toString());
                if (number >= Integer.MIN_VALUE && number <= Integer.MAX_VALUE){
                    wordItemList.add(new WordItem(buffer.toString(), 26, line));
                }else{
                    System.out.println("数字太大越界:line:"+line+" number:"+buffer.toString());
                }
                buffer.setLength(0);
            }else{
                System.out.println("非法字符:line:"+line+" key:"+key);
                break;
            }
        }
        reader.close();
        for (WordItem wordItem : wordItemList){
            System.out.println(wordItem);
        }
    }
}
发布了92 篇原创文章 · 获赞 23 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/assiduous_me/article/details/90515483