Lexical analyzer (Lexical Analyzer) [realize] Go

View text content

package main
 
import (
    "bufio"
    "fmt"
    "log"
    "os"
)
 
type TokenType int
 
const (
    tkEOI TokenType = iota
    tkMul
    tkDiv
    tkMod
    tkAdd
    tkSub
    tkNegate
    tkNot
    tkLss
    tkLeq
    tkGtr
    tkGeq
    tkEq
    tkNeq
    tkAssign
    tkAnd
    tkOr
    tkIf
    tkElse
    tkWhile
    tkPrint
    tkPutc
    tkLparen
    tkRparen
    tkLbrace
    tkRbrace
    tkSemi
    tkComma
    tkIdent
    tkInteger
    tkString
)
 
type Symbol struct {
    name string
    tok  TokenType
}
 
// symbol table
var symtab []Symbol
 
var scanner *bufio.Scanner
 
var (
    curLine = ""
    curCh   byte
    lineNum = 0
    colNum  = 0
)
 
const etx byte = 4 // used to signify EOI
 
func isDigit(ch byte) bool {
    return ch >= '0' && ch <= '9'
}
 
func isAlnum(ch byte) bool {
    return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || isDigit(ch)
}
 
func errorMsg(eline, ecol int, msg string) {
    log.Fatalf("(%d:%d) %s", eline, ecol, msg)
}
 
// add an identifier to the symbol table
func install(name string, tok TokenType) {
    sym := Symbol{name, tok}
    symtab = append(symtab, sym)
}
 
// search for an identifier in the symbol table
func lookup(name string) int {
    for i := 0; i < len(symtab); i++ {
        if symtab[i].name == name {
            return i
        }
    }
    return -1
}
 
// read the next line of input from the source file
func nextLine() {
    if scanner.Scan() {
        curLine = scanner.Text()
        lineNum++
        colNum = 0
        if curLine == "" { // skip blank lines
            nextLine()
        }
    } else {
        err := scanner.Err()
        if err == nil { // EOF
            curCh = etx
            curLine = ""
            lineNum++
            colNum = 1
        } else {
            log.Fatal(err)
        }
    }
}
 
// get the next char
func nextChar() {
    if colNum >= len(curLine) {
        nextLine()
    }
    if colNum < len(curLine) {
        curCh = curLine[colNum]
        colNum++
    }
}
 
func follow(eline, ecol int, expect byte, ifyes, ifno TokenType) TokenType {
    if curCh == expect {
        nextChar()
        return ifyes
    }
    if ifno == tkEOI {
        errorMsg(eline, ecol, "follow unrecognized character: "+string(curCh))
    }
    return ifno
}
 
func gettok() (eline, ecol int, tok TokenType, v string) {
    // skip whitespace
    for curCh == ' ' || curCh == '\t' || curCh == '\n' {
        nextChar()
    }
    eline = lineNum
    ecol = colNum
    switch curCh {
    case etx:
        tok = tkEOI
        return
    case '{':
        tok = tkLbrace
        nextChar()
        return
    case '}':
        tok = tkRbrace
        nextChar()
        return
    case '(':
        tok = tkLparen
        nextChar()
        return
    case ')':
        tok = tkRparen
        nextChar()
        return
    case '+':
        tok = tkAdd
        nextChar()
        return
    case '-':
        tok = tkSub
        nextChar()
        return
    case '*':
        tok = tkMul
        nextChar()
        return
    case '%':
        tok = tkMod
        nextChar()
        return
    case ';':
        tok = tkSemi
        nextChar()
        return
    case ',':
        tok = tkComma
        nextChar()
        return
    case '/': // div or comment
        nextChar()
        if curCh != '*' {
            tok = tkDiv
            return
        }
        // skip comments
        nextChar()
        for {
            if curCh == '*' {
                nextChar()
                if curCh == '/' {
                    nextChar()
                    eline, ecol, tok, v = gettok()
                    return
                }
            } else if curCh == etx {
                errorMsg(eline, ecol, "EOF in comment")
            } else {
                nextChar()
            }
        }
    case '\'': // single char literals
        nextChar()
        v = fmt.Sprintf("%d", curCh)
        if curCh == '\'' {
            errorMsg(eline, ecol, "Empty character constant")
        }
        if curCh == '\\' {
            nextChar()
            if curCh == 'n' {
                v = "10"
            } else if curCh == '\\' {
                v = "92"
            } else {
                errorMsg(eline, ecol, "unknown escape sequence: "+string(curCh))
            }
        }
        nextChar()
        if curCh != '\'' {
            errorMsg(eline, ecol, "multi-character constant")
        }
        nextChar()
        tok = tkInteger
        return
    case '<':
        nextChar()
        tok = follow(eline, ecol, '=', tkLeq, tkLss)
        return
    case '>':
        nextChar()
        tok = follow(eline, ecol, '=', tkGeq, tkGtr)
        return
    case '!':
        nextChar()
        tok = follow(eline, ecol, '=', tkNeq, tkNot)
        return
    case '=':
        nextChar()
        tok = follow(eline, ecol, '=', tkEq, tkAssign)
        return
    case '&':
        nextChar()
        tok = follow(eline, ecol, '&', tkAnd, tkEOI)
        return
    case '|':
        nextChar()
        tok = follow(eline, ecol, '|', tkOr, tkEOI)
        return
    case '"': // string
        v = string(curCh)
        nextChar()
        for curCh != '"' {
            if curCh == '\n' {
                errorMsg(eline, ecol, "EOL in string")
            }
            if curCh == etx {
                errorMsg(eline, ecol, "EOF in string")
            }
            v += string(curCh)
            nextChar()
        }
        v += string(curCh)
        nextChar()
        tok = tkString
        return
    default: // integers or identifiers
        isNumber := isDigit(curCh)
        v = ""
        for isAlnum(curCh) || curCh == '_' {
            if !isDigit(curCh) {
                isNumber = false
            }
            v += string(curCh)
            nextChar()
        }
        if len(v) == 0 {
            errorMsg(eline, ecol, "unknown character: "+string(curCh))
        }
        if isDigit(v[0]) {
            if !isNumber {
                errorMsg(eline, ecol, "invalid number: "+string(curCh))
            }
            tok = tkInteger
            return
        }
        index := lookup(v)
        if index == -1 {
            tok = tkIdent
        } else {
            tok = symtab[index].tok
        }
        return
    }
}
 
func initLex() {
    install("else", tkElse)
    install("if", tkIf)
    install("print", tkPrint)
    install("putc", tkPutc)
    install("while", tkWhile)
    nextChar()
}
 
func process() {
    tokMap := make(map[TokenType]string)
    tokMap[tkEOI] = "End_of_input"
    tokMap[tkMul] = "Op_multiply"
    tokMap[tkDiv] = "Op_divide"
    tokMap[tkMod] = "Op_mod"
    tokMap[tkAdd] = "Op_add"
    tokMap[tkSub] = "Op_subtract"
    tokMap[tkNegate] = "Op_negate"
    tokMap[tkNot] = "Op_not"
    tokMap[tkLss] = "Op_less"
    tokMap[tkLeq] = "Op_lessequal"
    tokMap[tkGtr] = "Op_greater"
    tokMap[tkGeq] = "Op_greaterequal"
    tokMap[tkEq] = "Op_equal"
    tokMap[tkNeq] = "Op_notequal"
    tokMap[tkAssign] = "Op_assign"
    tokMap[tkAnd] = "Op_and"
    tokMap[tkOr] = "Op_or"
    tokMap[tkIf] = "Keyword_if"
    tokMap[tkElse] = "Keyword_else"
    tokMap[tkWhile] = "Keyword_while"
    tokMap[tkPrint] = "Keyword_print"
    tokMap[tkPutc] = "Keyword_putc"
    tokMap[tkLparen] = "LeftParen"
    tokMap[tkRparen] = "RightParen"
    tokMap[tkLbrace] = "LeftBrace"
    tokMap[tkRbrace] = "RightBrace"
    tokMap[tkSemi] = "Semicolon"
    tokMap[tkComma] = "Comma"
    tokMap[tkIdent] = "Identifier"
    tokMap[tkInteger] = "Integer"
    tokMap[tkString] = "String"
 
    for {
        eline, ecol, tok, v := gettok()
        fmt.Printf("%5d  %5d %-16s", eline, ecol, tokMap[tok])
        if tok == tkInteger || tok == tkIdent || tok == tkString {
            fmt.Println(v)
        } else {
            fmt.Println()
        }
        if tok == tkEOI {
            return
        }
    }
}
 
func check(err error) {
    if err != nil {
        log.Fatal(err)
    }
}
 
func main() {
    if len(os.Args) < 2 {
        fmt.Println("Filename required")
        return
    }
    f, err := os.Open(os.Args[1])
    check(err)
    defer f.Close()
    scanner = bufio.NewScanner(f)
    initLex()
    process()
}

Output (test III)

    5     16 Keyword_print   
    5     40 Op_subtract     
    6     16 Keyword_putc    
    6     40 Op_less         
    7     16 Keyword_if      
    7     40 Op_greater      
    8     16 Keyword_else    
    8     40 Op_lessequal    
    9     16 Keyword_while   
    9     40 Op_greaterequal 
   10     16 LeftBrace       
   10     40 Op_equal        
   11     16 RightBrace      
   11     40 Op_notequal     
   12     16 LeftParen       
   12     40 Op_and          
   13     16 RightParen      
   13     40 Op_or           
   14     16 Op_subtract     
   14     40 Semicolon       
   15     16 Op_not          
   15     40 Comma           
   16     16 Op_multiply     
   16     40 Op_assign       
   17     16 Op_divide       
   17     40 Integer         42
   18     16 Op_mod          
   18     40 String          "String literal"
   19     16 Op_add          
   19     40 Identifier      variable_name
   20     26 Integer         10
   21     26 Integer         92
   22     26 Integer         32
   23      1 End_of_input    
Published 56 original articles · won praise 166 · views 60000 +

Guess you like

Origin blog.csdn.net/qq_36721220/article/details/103566724