(python)编译原理实验之词法分析

一、前言

主要是为了区分4种词(保留字(包括自定义字)、数字、字母、比较符),建立并输出token和符号表。

二、源代码

#coding:utf8
#编译原理实验一

reserved=['if', 'then', 'else', 'while', 'do']
num=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'ε']
letter=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h' , 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
op=['==', '>=', '<=', '!=', '+', '-', '*', '/', '>', '<', '=', '(', ')', ';', '‘','{','}']

def judge_reserve(temp):
    if temp in reserved:
        return ["reserved",reserved.index(temp)]
    elif temp in op:
        return ["op",op.index(temp)]
    elif temp in num:
        return ["num",num.index(temp)]
    elif temp in letter:
        return ["letter",letter.index(temp)]
    else:
        return [-1,-1]


if __name__=="__main__":
    #s="while (a==0);\n"
    print("请输入C源代码,保留字有'if', 'then', 'else', 'while', 'do',")
    print("比较符有'==', '>=', '<=', '!=', '+', '-', '*', '/', '>', '<', '=', '(', ')', ';', '‘','{','}',")
    s=input("以及所有数字和字母,如‘if (a==0)do {stha};else(b==15) do {sthb};’\n")
    s=s.replace(" ","").replace("\n","")

    symbol=[]
    address=0#记录符号表的位置
    token=[]
    index=0#记录字符串的处理位置
    target=s[0]

    while(index<len(s)):
        print("本轮开始处理位置为:",index)
        ret=judge_reserve(target)
        if ret[0]=="reserved":
            #print("1",s,index,target)
            token.append([target,"_"])
            print("最终得到保留字",target,"\n")
            if index+1<len(s):
                target=s[index+1]
        elif ret[0]=="op":
            #print("2",s,index,target)
            temp=target
            if index+1<len(s):
                target+=s[index+1]
            if judge_reserve(target)[0]=="op":
                token.append(judge_reserve(target))
                print("最终得到字符",judge_reserve(target),"\n")
                if index+2<len(s):
                    target=s[index+2]
                if index+1<len(s):
                    index+=1
            else:
                if index+1<len(s):
                    target=s[index+1]
                token.append(ret)
                print("最终得到字符",ret,"\n")
        elif ret[0]=="letter":
            #print("3",s,index,target)
            temp=index
            while(judge_reserve(target)[0]=="letter" or judge_reserve(target)[0]=="num"):
                if index+1<len(s):
                    index+=1
                target=s[index]
            name=s[temp:index]
            if judge_reserve(name)[0]=="reserved":
                token.append([name,"_"])
                print("最终得到保留字",name,"\n")
                target=s[index]
            else:
                token.append(["custom",address])
                print("最终得到自定义字",name,"\n")
                address+=1
                symbol.append([name,"custom","kind","value","field"])
                target=s[index]
            index-=1
        elif ret[0]=="num":
            #print("4",s,index,target)
            temp=index
            while( judge_reserve(target)[0]=="num"):
                if index+1<len(s):
                    index+=1
                target=s[index]
            value=s[temp:index]
            token.append(["num",value])
            print("最终得到数字/字母",name,"\n")
            target=s[index]
            index-=1
        else:
            target=s[index+1]
            print("第",index,"位置出现错误,已略过")
        index+=1

    print(token)
    print(symbol)

猜你喜欢

转载自blog.csdn.net/qq_41584385/article/details/104089954