语法分析器(syntax analyzer)【JavaScript实现】

查看正文内容

/*
    Token: type, value, line, pos
*/
 
const TokenType = {
    Keyword_if: 1, Keyword_else: 2, Keyword_print: 3, Keyword_putc: 4, Keyword_while: 5,
    Op_add: 6, Op_and: 7, Op_assign: 8, Op_divide: 9, Op_equal: 10, Op_greater: 11,
    Op_greaterequal: 12, Op_less: 13, Op_lessequal: 14, Op_mod: 15, Op_multiply: 16, Op_not: 17,
    Op_notequal: 18, Op_or: 19, Op_subtract: 20,
    Integer: 21, String: 22, Identifier: 23,
    Semicolon: 24, Comma: 25,
    LeftBrace: 26, RightBrace: 27,
    LeftParen: 28, RightParen: 29,
    End_of_input: 99
}
 
class Lexer {
    constructor(source) {
        this.source = source
        this.pos = 1        // position in line
        this.position = 0   // position in source
        this.line = 1
        this.chr = this.source.charAt(0)
        this.keywords = {
            "if": TokenType.Keyword_if,
            "else": TokenType.Keyword_else,
            "print": TokenType.Keyword_print,
            "putc": TokenType.Keyword_putc,
            "while": TokenType.Keyword_while
        }
    }
    getNextChar() {
        this.pos++
        this.position++
 
        if (this.position >= this.source.length) {
            this.chr = undefined
            return this.chr
        }
        this.chr = this.source.charAt(this.position)
        if (this.chr === '\n') {
            this.line++
            this.pos = 0
        }
        return this.chr
    }
    error(line, pos, message) {
        if (line > 0 && pos > 0) {
            console.log(message + " in line " + line + ", pos " + pos + "\n")
        } else {
            console.log(message)
        }
        process.exit(1)
    }
    follow(expect, ifyes, ifno, line, pos) {
        if (this.getNextChar() === expect) {
            this.getNextChar()
            return { type: ifyes, value: "", line, pos }
        }
        if (ifno === TokenType.End_of_input) {
            this.error(line, pos, "follow: unrecognized character: (" + this.chr.charCodeAt(0) + ") '" + this.chr + "'")
        }
        return { type: ifno, value: "", line, pos }
    }
    div_or_comment(line, pos) {
        if (this.getNextChar() !== '*') {
            return { type: TokenType.Op_divide, value: "/", line, pos }
        }
        this.getNextChar()
        while (true) { 
            if (this.chr === '\u0000') {
                this.error(line, pos, "EOF in comment")
            } else if (this.chr === '*') {
                if (this.getNextChar() === '/') {
                    this.getNextChar()
                    return this.getToken()
                }
            } else {
                this.getNextChar()
            }
        }
    }
    char_lit(line, pos) {
        let c = this.getNextChar() // skip opening quote
        let n = c.charCodeAt(0)
        if (c === "\'") {
            this.error(line, pos, "empty character constant")
        } else if (c === "\\") {
            c = this.getNextChar()
            if (c == "n") {
                n = 10
            } else if (c === "\\") {
                n = 92
            } else {
                this.error(line, pos, "unknown escape sequence \\" + c)
            }
        }
        if (this.getNextChar() !== "\'") {
            this.error(line, pos, "multi-character constant")
        }
        this.getNextChar()
        return { type: TokenType.Integer, value: n, line, pos }
    }
    string_lit(start, line, pos) {
        let value = ""
        while (this.getNextChar() !== start) {
            if (this.chr === undefined) {
                this.error(line, pos, "EOF while scanning string literal")
            }
            if (this.chr === "\n") {
                this.error(line, pos, "EOL while scanning string literal")
            }
            value += this.chr
        }
        this.getNextChar()
        return { type: TokenType.String, value, line, pos }
    }
    identifier_or_integer(line, pos) {
        let is_number = true
        let text = ""
 
        while (/\w/.test(this.chr) || this.chr === '_') {
            text += this.chr
            if (!/\d/.test(this.chr)) {
                is_number = false
            }
            this.getNextChar()
        }
        if (text === "") {
            this.error(line, pos, "identifer_or_integer unrecopgnized character: follow: unrecognized character: (" + this.chr.charCodeAt(0) + ") '" + this.chr + "'")
        }
 
        if (/\d/.test(text.charAt(0))) {
            if (!is_number) {
                this.error(line, pos, "invaslid number: " + text)
            }
            return { type: TokenType.Integer, value: text, line, pos }
        }
 
        if (text in this.keywords) {
            return { type: this.keywords[text], value: "", line, pos }
        }
        return { type: TokenType.Identifier, value: text, line, pos }
    }
    getToken() {
        let pos, line
        // Ignore whitespaces
        while (/\s/.test(this.chr)) { this.getNextChar() }
        line = this.line; pos = this.pos
        switch (this.chr) {
            case undefined: return { type: TokenType.End_of_input, value: "", line: this.line, pos: this.pos }
            case "/":       return this.div_or_comment(line, pos)
            case "\'":      return this.char_lit(line, pos)
            case "\"":      return this.string_lit(this.chr, line, pos)
 
            case "<":       return this.follow("=", TokenType.Op_lessequal, TokenType.Op_less, line, pos)
            case ">":       return this.follow("=", TokenType.Op_greaterequal, TokenType.Op_greater, line, pos)
            case "=":       return this.follow("=", TokenType.Op_equal, TokenType.Op_assign, line, pos)
            case "!":       return this.follow("=", TokenType.Op_notequal, TokenType.Op_not, line, pos)
            case "&":       return this.follow("&", TokenType.Op_and, TokenType.End_of_input, line, pos)
            case "|":       return this.follow("|", TokenType.Op_or, TokenType.End_of_input, line, pos)
 
            case "{":       this.getNextChar(); return { type: TokenType.LeftBrace, value: "{", line, pos }
            case "}":       this.getNextChar(); return { type: TokenType.RightBrace, value: "}", line, pos }
            case "(":       this.getNextChar(); return { type: TokenType.LeftParen, value: "(", line, pos }
            case ")":       this.getNextChar(); return { type: TokenType.RightParen, value: ")", line, pos }
            case "+":       this.getNextChar(); return { type: TokenType.Op_add, value: "+", line, pos }
            case "-":       this.getNextChar(); return { type: TokenType.Op_subtract, value: "-", line, pos }
            case "*":       this.getNextChar(); return { type: TokenType.Op_multiply, value: "*", line, pos }
            case "%":       this.getNextChar(); return { type: TokenType.Op_mod, value: "%", line, pos }
            case ";":       this.getNextChar(); return { type: TokenType.Semicolon, value: ";", line, pos }
            case ",":       this.getNextChar(); return { type: TokenType.Comma, value: ",", line, pos }
 
            default:        return this.identifier_or_integer(line, pos)
        }
    }
    /*
    https://stackoverflow.com/questions/9907419/how-to-get-a-key-in-a-javascript-object-by-its-value
    */
    getTokenType(value) {
        return Object.keys(TokenType).find(key => TokenType[key] === value)
    }
    printToken(t) {
        //console.log(t.line+" "+t.pos+" "+this.getTokenType(t.type)+" "+t.value)
        let result = ("     " + t.line).substr(t.line.toString().length)
        result += ("       " + t.pos).substr(t.pos.toString().length)
        result += (" " + this.getTokenType(t.type) + "           ").substr(0, 16)
        //"%5d  %5d %-15s", this.line, this.pos, this.tokentype);
        switch (t.type) {
            case TokenType.Integer:
                result += "  " + t.value
                break;
            case TokenType.Identifier:
                result += " " + t.value
                break;
            case TokenType.String:
                result += " \""+ t.value + "\""
                break;
        }
        console.log(result)
    }
    printTokens() {
        let t
        while ((t = this.getToken()).type !== TokenType.End_of_input) {
            this.printToken(t)
        }
        this.printToken(t)
    }
}
const fs = require("fs")
fs.readFile(process.argv[2], "utf8", (err, data) => {
    l = new Lexer(data)
    l.printTokens()
})
发布了69 篇原创文章 · 获赞 189 · 访问量 9万+

猜你喜欢

转载自blog.csdn.net/qq_36721220/article/details/103601162
今日推荐