Compilation Principle Experiment 3: Design and Implementation of Operator Priority Analysis Algorithm (python)

Purpose

According to the operator precedence analysis method, the expression is parsed to make it possible to judge whether an expression is correct or not. Through the implementation of the operator-first analysis method, deepen the understanding of the bottom-up syntax analysis method.

Experimental content

1. Input grammar. It can be the grammar of the following arithmetic expression (you can change it as needed):
E→E+T|ET|T
T→T*F|T/F|F
F→(E)|i
2. For a given expression Analyze the expression and output the judgment whether the expression is correct or not.
Program input/output example:
Input: 1+2;
Output: Correct
Input: (1+2)/3+4-(5+6/7);
Output: Correct
Input: ((1-2)/3+4
Output: Error
Input: 1+2-3+(*4/5)
Output: Error

experiment procedure

(1) Read grammar, data preprocessing
Read grammar, terminal symbols and non-terminal symbols from the file. The processing of this part is consistent with the method of reading the grammar in Experiment 2.

def data_input():  # 读取文法
    with open("input2.txt", 'r+', encoding="utf-8") as f:
        temp = f.readlines()
    for i in temp:
        line = str(i.strip("\n"))
        formules.append(line)
        if line[0] not in non_ter:
            non_ter.append(line[0])
            grammarElement.setdefault(line[0], line[5:])
        else:
            grammarElement[line[0]] += "|" + line[5:]
    for i in temp:
        line = str(i.strip("\n")).replace(" -> ", "")
        for j in line:
            if j not in non_ter and j not in terSymblo:
                terSymblo.append(j)
    if 'ε' in terSymblo: terSymblo.remove('ε')

(2) Find the firstVT set:

def get_fistVT(formule):
    x = formule[0]
    ind = non_ter.index(x)
    index = []
    i = 5
    if formule[i] in terSymblo and formule[i] not in firstVT[x]:  # 首位为终结符 P->a...
        firstVT[x] += formule[i]
    elif formule[i] in non_ter:  # 首位为非终结符
        for f in firstVT[formule[i]]:
            if f not in firstVT[x]:
                firstVT[x] += f
        if i + 1 < len(formule):
            if formule[i + 1] in terSymblo and formule[i + 1] not in firstVT[x]:  # P->Q..
                firstVT[x] += formule[i + 1]

(3) Find the lastVT set

def get_lastVT(formule):
    x = formule[0]
    i = len(formule) - 1
    if formule[i] in terSymblo and formule[i] not in lastVT[x]:
        lastVT[x] += formule[i]
    elif formule[i] in non_ter:
        for f in lastVT[formule[i]]:
            if f not in lastVT[x]:
                lastVT[x] += f
        if formule[i - 1] in terSymblo and formule[i - 1] not in lastVT[x]:
            lastVT[x] += formule[i - 1]

(4) The operator priority analysis table
also uses a two-dimensional dictionary for storage, and first writes the update function of the two-dimensional dictionary

def addtodict2(thedict, key_a, key_b, val):  # 设置二维字典的函数
    if key_a in thedict.keys():
        thedict[key_a].update({
    
    key_b: val})
    else:
        thedict.update({
    
    key_a: {
    
    key_b: val}})

Then find the operator priority analysis table

def analy(formule): #算符优先分析表
    start = 5
    end = len(formule) - 2
    if start == end: return
    for i in range(start, end):
        if formule[i] in terSymblo and formule[i + 1] in terSymblo:
            addtodict2(data, formule[i], formule[i + 1], "=")
        if formule[i] in terSymblo and formule[i + 1] in non_ter and formule[i + 2] in terSymblo:
            addtodict2(data, formule[i], formule[i + 2], "=")
        if formule[i] in terSymblo and formule[i + 1] in non_ter:
            for j in firstVT[formule[i + 1]]:
                addtodict2(data, formule[i], j, "<")
        if formule[i] in non_ter and formule[i + 1] in terSymblo:
            for j in lastVT[formule[i]]:
                addtodict2(data, j, formule[i + 1], ">")
        if formule[i + 1] in terSymblo and formule[i + 2] in non_ter:
            for j in firstVT[formule[i + 2]]:
                addtodict2(data, formule[i + 1], j, "<")
        if formule[i + 1] in non_ter and formule[i + 2] in terSymblo:
            for j in lastVT[formule[i + 1]]:
                addtodict2(data, j, formule[i + 2], ">")

(5) The analysis process of the operator priority algorithm
The first is the process function to be used in the process

def reverseString(string):
    return string[::-1]

Initialize two stacks

def initStack(string):
   # 分析栈,入栈#
   analysisStack = "#"
   # 当前输入串入栈,即string逆序入栈
   currentStack = reverseString(string)
   # 调用分析函数
   toAnalyze(analysisStack, currentStack)

Find the top terminal element of the analysis stack, and return the element and its subscript

def findVTele(string):
    ele = '\0'
    ele_index = 0
    for i in range(len(string)):
        if (string[i] in terSymblo):
            ele = string[i]
            ele_index = i
    return ele, ele_index
    ```
然后才是具体分析过程
根据栈中内容进行分析
```python
def toAnalyze(analysisStack, currentStack):
    global analyzeResult
    global analyzeStep
    analyzeStep += 1
    analysisStack_top, analysisStack_index = findVTele(analysisStack)  # 分析栈最顶终结符元素及下标
    currentStack_top = currentStack[-1]  # 当前输入串栈顶
    relation = data[analysisStack_top][currentStack_top]
    if relation == '<':
        print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                          reverseString(currentStack), '移进'))
        analysisStack += currentStack_top
        currentStack = currentStack[:-1]
        toAnalyze(analysisStack, currentStack)
    elif relation == '>':
        print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                          reverseString(currentStack), '归约'))
        currenChar = analysisStack_top
        temp_string = ""
        for i in range(len(analysisStack) - 1, -1, -1):
            if (analysisStack[i] >= 'A' and analysisStack[i] <= 'Z'):
                temp_string = analysisStack[i] + temp_string
                continue
            elif (data[analysisStack[i]][currenChar] == '<'):
                break;
            temp_string = analysisStack[i] + temp_string
            currenChar = analysisStack[i]
        if (temp_string in sentencePattern):
            analysisStack = analysisStack[0:i + 1]
            analysisStack += 'N'
            toAnalyze(analysisStack, currentStack)
        else:
            print("归约出错!待归约串为:", temp_string, "--->产生式右部无此句型!")
            analyzeResult = False
            return
    elif (relation == '='):
        if (analysisStack_top == '#' and currentStack_top == '#'):
            print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                              reverseString(currentStack), '完成'))
            analyzeResult = True
            return
        else:
            print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                              reverseString(currentStack), '移进'))
            analysisStack += currentStack_top
            currentStack = currentStack[:-1]
            toAnalyze(analysisStack, currentStack)
    elif (relation == None):
        print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, 'None',
                                                          reverseString(currentStack), '报错'))
        analyzeResult = False
        return

(6) After that is the main program, which controls the operation of each program and the output of the analysis process

data_input()
data = dict()
for i in non_ter:
    firstVT.setdefault(i, "")
    lastVT.setdefault(i, "")
for i in terSymblo:
    for j in terSymblo:
        addtodict2(data, i, j, '')
#print(data)
sym = non_ter + terSymblo

for n in range(10):
    for i in formules:
        get_fistVT(i)
        get_lastVT(i)
print("firstVT集合:")
for i in non_ter:
    print(i+" : "+firstVT[i])
print("lastVT集合:")
for i in non_ter:
    print(i+" : "+lastVT[i])

temp2 = Start +" -> #" +Start+"#"
formules.append(temp2)
for i in formules:
    analy(i)
print("算符优先分析表")
for i in terSymblo:
    print("\t" + i.ljust(4), end="")
print()
for i in terSymblo:
    print(i.ljust(4), end="")
    for j in terSymblo:
        if j in data[i]:
            print(data[i][j].ljust(8), end="")
        else:
            print("\t\t", end="")
    print()

sentencePattern = ["N+N", "N*N", "N/N", "(N)", "i","N^N","N,N","a"]
analyzeResult = False
analyzeStep = 0
print("请输入待分析的字符串:")
string = input()
string = string.replace(" ", "")
string+="#"
print(" {:^4} {:^13} {:^6} {:^12} {:^10} ".format('步骤', '分析栈', '优先关系', '当前输入串', '移进或归约'))
initStack(string)
if (analyzeResult):
    print("该字符串是文法的合法句子。\n")
else:
    print("该字符串不是文法的合法句子。\n")

total program

import re

grammarElement = {
    
    }
terSymblo = ['#']
non_ter = []
Start = 'S'
allSymbol = []  # 所有符号
firstVT = {
    
    }  # FIRSTVT集
lastVT = {
    
    }  # lastVT集
formules = []

def data_input():  # 读取文法
    with open("input2.txt", 'r+', encoding="utf-8") as f:
        temp = f.readlines()
    for i in temp:
        line = str(i.strip("\n"))
        formules.append(line)
        if line[0] not in non_ter:
            non_ter.append(line[0])
            grammarElement.setdefault(line[0], line[5:])
        else:
            grammarElement[line[0]] += "|" + line[5:]
    for i in temp:
        line = str(i.strip("\n")).replace(" -> ", "")
        for j in line:
            if j not in non_ter and j not in terSymblo:
                terSymblo.append(j)
    if 'ε' in terSymblo: terSymblo.remove('ε')


def get_fistVT(formule):
    x = formule[0]
    ind = non_ter.index(x)
    index = []
    i = 5
    if formule[i] in terSymblo and formule[i] not in firstVT[x]:  # 首位为终结符 P->a...
        firstVT[x] += formule[i]
    elif formule[i] in non_ter:  # 首位为非终结符
        for f in firstVT[formule[i]]:
            if f not in firstVT[x]:
                firstVT[x] += f
        if i + 1 < len(formule):
            if formule[i + 1] in terSymblo and formule[i + 1] not in firstVT[x]:  # P->Q..
                firstVT[x] += formule[i + 1]

def get_lastVT(formule):
    x = formule[0]
    i = len(formule) - 1
    if formule[i] in terSymblo and formule[i] not in lastVT[x]:
        lastVT[x] += formule[i]
    elif formule[i] in non_ter:
        for f in lastVT[formule[i]]:
            if f not in lastVT[x]:
                lastVT[x] += f
        if formule[i - 1] in terSymblo and formule[i - 1] not in lastVT[x]:
            lastVT[x] += formule[i - 1]

def addtodict2(thedict, key_a, key_b, val):  # 设置二维字典的函数
    if key_a in thedict.keys():
        thedict[key_a].update({
    
    key_b: val})
    else:
        thedict.update({
    
    key_a: {
    
    key_b: val}})

def analy(formule): #算符优先分析表
    start = 5
    end = len(formule) - 2
    if start == end: return
    for i in range(start, end):
        if formule[i] in terSymblo and formule[i + 1] in terSymblo:
            addtodict2(data, formule[i], formule[i + 1], "=")
        if formule[i] in terSymblo and formule[i + 1] in non_ter and formule[i + 2] in terSymblo:
            addtodict2(data, formule[i], formule[i + 2], "=")
        if formule[i] in terSymblo and formule[i + 1] in non_ter:
            for j in firstVT[formule[i + 1]]:
                addtodict2(data, formule[i], j, "<")
        if formule[i] in non_ter and formule[i + 1] in terSymblo:
            for j in lastVT[formule[i]]:
                addtodict2(data, j, formule[i + 1], ">")
        if formule[i + 1] in terSymblo and formule[i + 2] in non_ter:
            for j in firstVT[formule[i + 2]]:
                addtodict2(data, formule[i + 1], j, "<")
        if formule[i + 1] in non_ter and formule[i + 2] in terSymblo:
            for j in lastVT[formule[i + 1]]:
                addtodict2(data, j, formule[i + 2], ">")

def reverseString(string):
    return string[::-1]

# 初始化两个栈
def initStack(string):
    # 分析栈,入栈#
    analysisStack = "#"
    # 当前输入串入栈,即string逆序入栈
    currentStack = reverseString(string)
    # 调用分析函数
    toAnalyze(analysisStack, currentStack)

# 寻找分析栈最顶终结符元素,返回该元素及其下标
def findVTele(string):
    ele = '\0'
    ele_index = 0
    for i in range(len(string)):
        if (string[i] in terSymblo):
            ele = string[i]
            ele_index = i
    return ele, ele_index

# 根据栈中内容进行分析
def toAnalyze(analysisStack, currentStack):
    global analyzeResult
    global analyzeStep
    analyzeStep += 1
    analysisStack_top, analysisStack_index = findVTele(analysisStack)  # 分析栈最顶终结符元素及下标
    currentStack_top = currentStack[-1]  # 当前输入串栈顶
    relation = data[analysisStack_top][currentStack_top]
    if relation == '<':
        print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                          reverseString(currentStack), '移进'))
        analysisStack += currentStack_top
        currentStack = currentStack[:-1]
        toAnalyze(analysisStack, currentStack)
    elif relation == '>':
        print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                          reverseString(currentStack), '归约'))
        currenChar = analysisStack_top
        temp_string = ""
        for i in range(len(analysisStack) - 1, -1, -1):
            if (analysisStack[i] >= 'A' and analysisStack[i] <= 'Z'):
                temp_string = analysisStack[i] + temp_string
                continue
            elif (data[analysisStack[i]][currenChar] == '<'):
                break;
            temp_string = analysisStack[i] + temp_string
            currenChar = analysisStack[i]
        if (temp_string in sentencePattern):
            analysisStack = analysisStack[0:i + 1]
            analysisStack += 'N'
            toAnalyze(analysisStack, currentStack)
        else:
            print("归约出错!待归约串为:", temp_string, "--->产生式右部无此句型!")
            analyzeResult = False
            return
    elif (relation == '='):
        if (analysisStack_top == '#' and currentStack_top == '#'):
            print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                              reverseString(currentStack), '完成'))
            analyzeResult = True
            return
        else:
            print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, relation,
                                                              reverseString(currentStack), '移进'))
            analysisStack += currentStack_top
            currentStack = currentStack[:-1]
            toAnalyze(analysisStack, currentStack)
    elif (relation == None):
        print(" {:^5} {:^15} {:^9} {:^15} {:^12} ".format(analyzeStep, analysisStack, 'None',
                                                          reverseString(currentStack), '报错'))
        analyzeResult = False
        return

data_input()
data = dict()
for i in non_ter:
    firstVT.setdefault(i, "")
    lastVT.setdefault(i, "")
for i in terSymblo:
    for j in terSymblo:
        addtodict2(data, i, j, '')
#print(data)
sym = non_ter + terSymblo

for n in range(10):
    for i in formules:
        get_fistVT(i)
        get_lastVT(i)
print("firstVT集合:")
for i in non_ter:
    print(i+" : "+firstVT[i])
print("lastVT集合:")
for i in non_ter:
    print(i+" : "+lastVT[i])

temp2 = Start +" -> #" +Start+"#"
formules.append(temp2)
for i in formules:
    analy(i)
print("算符优先分析表")
for i in terSymblo:
    print("\t" + i.ljust(4), end="")
print()
for i in terSymblo:
    print(i.ljust(4), end="")
    for j in terSymblo:
        if j in data[i]:
            print(data[i][j].ljust(8), end="")
        else:
            print("\t\t", end="")
    print()

sentencePattern = ["N+N", "N*N", "N/N", "(N)", "i","N^N","N,N","a"]
analyzeResult = False
analyzeStep = 0
print("请输入待分析的字符串:")
string = input()
string = string.replace(" ", "")
string+="#"
print(" {:^4} {:^13} {:^6} {:^12} {:^10} ".format('步骤', '分析栈', '优先关系', '当前输入串', '移进或归约'))
initStack(string)
if (analyzeResult):
    print("该字符串是文法的合法句子。\n")
else:
    print("该字符串不是文法的合法句子。\n")

test

test grammar

S -> a
S -> ^
S -> (T)
T -> T,S
T -> S

insert image description here
insert image description here
Finish

Guess you like

Origin blog.csdn.net/qq_51594676/article/details/128326089