Compilation Principle Experiment 2: Design and Implementation of Predictive Analysis Algorithms (python)

Purpose

Through the design and implementation of predictive analysis algorithms, deepen the understanding of top-down parsing methods, especially the understanding of top-down parsing conditions.

Experimental content

Input the grammar and the input string to be analyzed, and output the predictive analysis process and results.

experimental report

1) Text preprocessing:
read the grammar from the file, process the grammar into a dictionary form that can be used later, and record the terminal symbols and non-terminal symbols at the same time. This part involves two functions, namely data_input(): read grammar from text, and getGrammar(x): process grammar

def data_input(): #读取文法
    with open("input.txt", 'r+', encoding="utf-8") as f:
        temp = f.readlines()
    for i in temp:
        line = str(i.strip("\n"))
        if line[0] not in non_ter :
            non_ter.append(line[0])
            grammarElement.setdefault(line[0],line[5:] )
        else:
            grammarElement[line[0]] += "|"+line[5:]
    for i in temp:
        line = str(i.strip("\n")).replace(" -> ","")
        for j in line:
            if j not in non_ter and j not in terSymblo:
                terSymblo.append(j)
    terSymblo.remove('ε')

def getGrammar(x): #文本预处理
    data = []
    if '|' in grammarElement[x]:
        data = grammarElement[x].split('|')
    else:
        data.append(grammarElement[x])
    formules = []
    for i in range(len(data)):
        patten = re.compile("[*+\-a-zA-Zε()]'?")
        temp = re.findall(patten, data[i])
        if temp:
            temp  = "".join(temp)
            formules.append(temp)
    grammarElement[x]=formules

(2) Find the first collection:

def getFirst(T): #求first集
    if T in terSymblo or T == 'ε':
        firstSET[T] = T
        return T
    x = firstSET.get(T,None)
    if x:
        return x
    else:
        tempFirst = ""
        for prodForm in grammarElement[T]:
            tempFirst += getFirst(prodForm[0])
            if len(prodForm) >= 2 and prodForm[1] in non_ter and prodForm[0] in non_ter and 'ε' in grammarElement[prodForm[0]]:
                tempFirst += getFirst(prodForm[1])
        firstSET.update({
    
    T: tempFirst})
        return tempFirst

(3) Find the follow set

def get_follow(T): #求follow集
    formule = []
    top = []
    for i  in grammarElement.keys():
        for j in grammarElement[i]:
            formule.append(j)
            top.append(i)
    for i in formule:
        for j in range(len(i)):
            if T == i[j]:
                if j == len(i) - 1  :  #非终结符在文法末尾
                    for e in followSET[top[formule.index(i)]]:
                        if e != 'ε'  and e not in followSET[T]:
                            followSET[T] += e
                    break
                else: #非终结符不在文法末尾
                    if i[j+1] in terSymblo and i[j+1] not in followSET[T]:
                        followSET[T] += i[j+1]
                        break
                    else: #后面跟着的是非终结符
                        next = i[j+1]
                        if 'ε' not in firstSET[next]:
                            for e in firstSET[next]:
                                if e not in followSET[T]:
                                    followSET[T] += e
                        else:
                            for e in firstSET[next]:
                                if e !=  'ε' and e not in followSET[T]:
                                    followSET[T] += e
                            for e in followSET[top[formule.index(i)]]:
                                if e != 'ε' and e not in followSET[T]:
                                    followSET[T] += e

(4) Find the predictive analysis table.
The predictive analysis table in this program is stored in the form of a two-dimensional dictionary. First, a two-dimensional array setting function must be written.

def addtodict2(thedict, key_a, key_b, val): #设置二维字典的函数
    if key_a in thedict.keys():
        thedict[key_a].update({
    
    key_b: val})
    else:
        thedict.update({
    
    key_a:{
    
    key_b: val}})

Next is the construction of the predictive analysis table:

def analy():   #预测分析表
    for i in non_ter:
        for j in terSymblo:
            if j in firstSET[i]:
                if len(grammarElement[i])==1:
                    addtodict2(data,i,j,grammarElement[i][0])
                else:
                    for k in grammarElement[i]:
                        if j in k:
                            addtodict2(data,i,j,k)
            elif j in followSET[i]:
                addtodict2(data,i,j,'ε')
            else:
                addtodict2(data,i,j,'error')
    print("\t\t",end="")
    for i in terSymblo:
        print(i.ljust(8),end="")
    print()
    for i in data.keys():
        print(i.ljust(8),end="")
        for j in data[i]:
            if data[i][j] != 'error':
                temp = i+'->'+data[i][j]
                print(temp.ljust(8),end="")
            else:
                print('error'.ljust(8),end="")
        print()

(5) Main program
In the main program, the operation of each sub-function should be controlled, as well as the process of analyzing and displaying data

print("预处理:")
for i in grammarElement.keys():
    print(i+"->",end="")
    print(grammarElement[i])
#print(firstSET)
print("FIRST集合:")
for i in non_ter:
    print(i+" : "+firstSET[i])
#print(followSET)
print("FOLLOW集合:")
for i in non_ter:
    print(i+" : "+followSET[i])
data= dict()
print("预测分析表:")
analy()
#输入句子
instr = input("请输入需要进行分析的语句:\n")
instr+="#"
stack = []
stack.append('#')
stack.append(Start)
ind = 0
print("分析过程:\n符号栈\t\t\t输入串\t\t\t\t所用产生式")
try:
    while (1):
        print("".join(stack).ljust(16), end="")
        top = stack.pop()
        now = instr[ind]
        temp = ""
        for j in range(ind, len(instr)):
            temp += instr[j]
        print(temp.ljust(20), end="")

        if now == top:
            if now == "#":
                print()
                print("句子符合文法")
                flag = 1
                break
            else:
                ind += 1
                print()
                continue
        else:
            line = data[top][now]
            if line != 'ε' and line != 'error':
                temp = top + "->" + line
                print(temp.ljust(16))
                line = list(line)
                for i in range(len(line)):
                    s = line.pop()
                    stack.append(s)
                continue
            elif line == 'ε':
                temp = top + "->" + line
                print(temp.ljust(16))
                continue
            else:
                print()
                continue
except:
    print("\n句子不符合文法")

final program

final program

import re

grammarElement = {
    
    }
terSymblo = ['#']
non_ter = []
Start = 'S'
allSymbol = [] #所有符号
firstSET = {
    
    } # 各产生式右部的FIRST集
followSET = {
    
    } # 各产生式左部的FOLLOW集

def getGrammar(x): #文本预处理
    data = []
    if '|' in grammarElement[x]:
        data = grammarElement[x].split('|')
    else:
        data.append(grammarElement[x])
    formules = []
    for i in range(len(data)):
        patten = re.compile("[,*+^\-a-zA-Zε()]'?")
        temp = re.findall(patten, data[i])
        if temp:
            temp  = "".join(temp)
            formules.append(temp)
    grammarElement[x]=formules

def getFirst(T): #求first集
    if T in terSymblo or T == 'ε':
        firstSET[T] = T
        return T
    x = firstSET.get(T,None)
    if x:
        return x
    else:
        tempFirst = ""
        for prodForm in grammarElement[T]:
            tempFirst += getFirst(prodForm[0])
            if len(prodForm) >= 2 and prodForm[1] in non_ter and prodForm[0] in non_ter and 'ε' in grammarElement[prodForm[0]]:
                tempFirst += getFirst(prodForm[1])
        tempFirst = "".join(set(list(tempFirst)))
        firstSET.update({
    
    T: tempFirst})
        return tempFirst

def get_follow(T): #求follow集
    formule = []
    top = []
    for i  in grammarElement.keys():
        for j in grammarElement[i]:
            formule.append(j)
            top.append(i)
    for i in formule:
        for j in range(len(i)):
            if T == i[j]:
                if j == len(i) - 1  :  #非终结符在文法末尾
                    for e in followSET[top[formule.index(i)]]:
                        if e != 'ε'  and e not in followSET[T]:
                            followSET[T] += e
                    break
                else: #非终结符不在文法末尾
                    if i[j+1] in terSymblo and i[j+1] not in followSET[T]:
                        followSET[T] += i[j+1]
                        break
                    else: #后面跟着的是非终结符
                        next = i[j+1]
                        if 'ε' not in firstSET[next]:
                            for e in firstSET[next]:
                                if e not in followSET[T]:
                                    followSET[T] += e
                        else:
                            for e in firstSET[next]:
                                if e !=  'ε' and e not in followSET[T]:
                                    followSET[T] += e
                            for e in followSET[top[formule.index(i)]]:
                                if e != 'ε' and e not in followSET[T]:
                                    followSET[T] += e



def addtodict2(thedict, key_a, key_b, val): #设置二维字典
    if key_a in thedict.keys():
        thedict[key_a].update({
    
    key_b: val})
    else:
        thedict.update({
    
    key_a:{
    
    key_b: val}})

def analy():   #预测分析表
    for i in non_ter:
        for j in terSymblo:
            if j in firstSET[i]:
                if len(grammarElement[i])==1:
                    addtodict2(data,i,j,grammarElement[i][0])
                else:
                    for k in grammarElement[i]:
                        if j in k:
                            addtodict2(data,i,j,k)
            elif j in followSET[i]:
                addtodict2(data,i,j,'ε')
            else:
                addtodict2(data,i,j,'error')
    print("\t\t",end="")
    for i in terSymblo:
        print(i.ljust(8),end="")
    print()
    for i in data.keys():
        print(i.ljust(8),end="")
        for j in data[i]:
            if data[i][j] != 'error':
                temp = i+'->'+data[i][j]
                print(temp.ljust(8),end="")
            else:
                print('error'.ljust(8),end="")
        print()

def data_input(): #读取文法
    with open("input2.txt", 'r+', encoding="utf-8") as f:
        temp = f.readlines()
    for i in temp:
        line = str(i.strip("\n"))
        if line[0] not in non_ter :
            non_ter.append(line[0])
            grammarElement.setdefault(line[0],line[5:] )
        else:
            grammarElement[line[0]] += "|"+line[5:]
    for i in temp:
        line = str(i.strip("\n")).replace(" -> ","")
        for j in line:
            if j not in non_ter and j not in terSymblo:
                terSymblo.append(j)
    terSymblo.remove('ε')

data_input()
for i in non_ter:
    getGrammar(i)
sym = non_ter+terSymblo
for i in sym:
    if i ==Start:
        followSET.setdefault(i,"#")
    else:
        followSET.setdefault(i,"")
for i in non_ter:
    getFirst(i)
firstSET.setdefault(')',"")
for n in range(100):
    for i in non_ter:
        get_follow(i)

print("预处理:")
for i in grammarElement.keys():
    print(i+"->",end="")
    print(grammarElement[i])
#print(firstSET)
print("FIRST集合:")
for i in non_ter:
    print(i+" : "+firstSET[i])
#print(followSET)
print("FOLLOW集合:")
for i in non_ter:
    print(i+" : "+followSET[i])
data= dict()
print("预测分析表:")
analy()
#输入句子
instr = input("请输入需要进行分析的语句:\n")
instr+="#"
stack = []
stack.append('#')
stack.append(Start)
ind = 0
print("分析过程:\n符号栈\t\t\t输入串\t\t\t\t所用产生式")
try:
    while (1):
        print("".join(stack).ljust(16), end="")
        top = stack.pop()
        now = instr[ind]
        temp = ""
        for j in range(ind, len(instr)):
            temp += instr[j]
        print(temp.ljust(20), end="")

        if now == top:
            if now == "#":
                print()
                print("句子符合文法")
                flag = 1
                break
            else:
                ind += 1
                print()
                continue
        else:
            line = data[top][now]
            if line != 'ε' and line != 'error':
                temp = top + "->" + line
                print(temp.ljust(16))
                line = list(line)
                for i in range(len(line)):
                    s = line.pop()
                    stack.append(s)
                continue
            elif line == 'ε':
                temp = top + "->" + line
                print(temp.ljust(16))
                continue
            else:
                print()
                continue
except:
    print("\n句子不符合文法")

test

Test grammar:


S -> a
S -> ^
S -> (T)
T -> St
t -> ,St
t -> ε

run
insert image description here
insert image description here
complete

Guess you like

Origin blog.csdn.net/qq_51594676/article/details/128325569