编译原理 之词法分析与语法分析的小程序

环境:python3.6 + jupyter notebook

词法分析中的NFA 转DFA

原理介绍(a case):

代码实现: 

import numpy as np
K = []
Sgm = []
F = []
Z = []
K_num = {}

def data_input():
    global K,Sgm,F,S,Z
    with open('nfa.txt', 'r') as f:
        lines = f.readlines()
    Sgm = lines[0].split()
    F = np.zeros((len(lines)-1)*len(Sgm), dtype=np.int32).reshape((len(lines)-1),len(Sgm))
    K_num['#'] = -1
    for i in range(1,len(lines)):
        K.append(lines[i][0])
        K_num[K[i-1]] = i-1
    for i in range(1,len(lines)):
        for j in range(len(lines[i][1:].split())):
            if len(lines[i][1:].split()[j]) > 1: # 能到多处
                mul = int(''.join([str(K_num[n]) for n in lines[i][1:].split()[j].split('_')]))
                F[i-1,j] = -mul #用于区分
            else:
                F[i-1,j] = K_num[lines[i][1:].split()[j]]

def ns2n(n):
    if n < -1:
        n = -n
        m = []
        while n > 0:
            m.append(str(n%10))
            n //= 10 
        return [int(mm) for mm in m]#np.array(m, dtype=np.int32)
    elif n == -1: #因为闭包函数的m接收list
        return []
    else:
        return [n]
    
def e_closure(K): # 传入状态集,不申明global可以这么写
    global II
    for k in K:
        n = F[k][0]
        m = ns2n(n) 
        II.add(k)
        if len(m) > 0:
            for n in m:            
                if n not in II:
                    II.add(n)        
                    II = II.union(e_closure([n]))           
    return II

def move(k, sgm): #传入状态和符号
    n = F[k, sgm] 
    return ns2n(n) #状态集
    
pre_I_queue = [] #新的 DFA集合,即将要访问的集合
I_queue = [] # DFA 集合队列
II = set() #求闭包前都要初始化
I = e_closure({0}) #一个set
pre_I_queue.append(I)
I_queue.append(I)
F_DFA = np.zeros(3).reshape(-1,3)

def main():
    while 1:
        global I,II #当前状态
        
        if len(pre_I_queue) == 0:
            break
        else:
            I = pre_I_queue[0]
            print(I, end='')           
            for sgm in range(1,len(Sgm)):
                I_next = set()
                print(" SGM:", sgm, end=',')
                for i in I:
                    II = set()
                    e_move = e_closure(move(i,sgm))
                    II = set()
#                     print(i, e_move, end='')
                    II = set()
                    I_next = I_next.union(e_move)
                print(I_next, end='')
                if I_next not in I_queue and len(I_next)!=0:
                    I_queue.append(I_next)
                    pre_I_queue.append(I_next)                  
            pre_I_queue.pop(0) 
            print()
    print("DFA状态数为:",len(I_queue)) 
	
data_input()	
main()

input:

   eps a b
X # # A
A B_F_E # # 
B D C #
C # # D 
D B_E # #
E A I #
F # # G 
G # # H
H E # #
I # # Y
Y # # #

data_intput结果:

转换完结果:

语法分析中 求first集 

利用算法:

扫描二维码关注公众号,回复: 6186267 查看本文章

代码实现:

NonTermSet = set()  # 非终结符集合
TermSet = set()  # 终结符集合
First = {}  # First集
GramaDict = {}  # 处理过的产生式
Code = []  # 读入的产生式
StartSym = ""  # 开始符号
EndSym = '#'  # 结束符号为“#“
Epsilon = "~"  # 由于没有epsilon符号用“~”代替
"""
(1)如果X是终结符,则FIRST(X)={X}
(2)如果X →ε是一个产生式则ε∈ FIRST(X)
(3)如果X是非终结符,且X →Y1 Y2…… Yk,则
   a)如 Y1 =>ε,则FIRST(Y1 )中的所有符号
都在FIRST(X)中
  b) 如Y1 Y2…… Yi-1=> ε, FIRST( Yi ),
中的所有符号都在FIRST(X)中
c) Y1 Y2…… Yk=> ε,则ε ∈ FIRST(X)
"""

def getFirst():
    global NonTermSet, TermSet, First, FirstA
    for X in NonTermSet:
        First[X] = set()  # 初始化非终结符First集为空
    for X in TermSet:
        First[X] = set(X)  # 初始化终结符First集为自己
    Change = True
    while Change:  # 当First集没有更新则算法结束
        Change = False
        for X in NonTermSet:#非终结符
            for Y in GramaDict[X]:#这个非终结符的产生式
                k = 0
                eps = True #要加入eps
                while eps and k < len(Y):
                    if not First[Y[k]] - set(Epsilon) <= First[X]:  # 没有一样的就添加,并且改变标志
                        if Epsilon not in First[Y[k]] and Y[k] in NonTermSet:  # Y1到Yi候选式都有~存在
                            eps = False
                        else:
                            First[X] |= First[Y[k]] - set(Epsilon)
                            Change = True
                    if Epsilon not in First[Y[k]]:
                        eps = False
                    k += 1
                if eps:  # X->~或者Y1到Yk均有~产生式
                    First[X] |= set(Epsilon)          
                            
# 显示格式
def display(show_list):
    for item in show_list:
        print('     %-25s' % item, end='')
    print()

# 读取文法
def readGrammar():
    try:
        file = open('grammar.txt', 'r')
        for line in file:
            line = line.replace('\n', "")
            Code.append(line)
    except IOError as e:
        print(e)
        exit()
    finally:
        file.close()
    return Code

# 初始化
def main():
    global NonTermSet, TermSet, First, StartSym, Code
    Code = readGrammar()
    n = int(len(Code))
    print('产生式个数:', n)
    StartSym = Code[0][0]
    print("开始符号:", StartSym)
    print('产生式:G[', StartSym, ']:')
    for i in range(n):
        X, Y = Code[i].split('->')
        print('     ', Code[i])
        NonTermSet.add(X)
        Y = Y.split('|')
        for Yi in Y:
            TermSet |= set(Yi)
        if X not in GramaDict:
            GramaDict[X] = set()
        GramaDict[X] |= set(Y)  # 生成产生式集
    TermSet -= NonTermSet
    print('非终结符:', NonTermSet)
    print('终结符:', TermSet)
    print('文法字典:',GramaDict)
    getFirst()
    print("FIRST集:")
    for k in NonTermSet:
        print('     FIRST[', k, ']: ', First[k])

main()

input:

E->eBaA
A->a|bAcB
B->aC|dEd
C->e|dC

output: 

猜你喜欢

转载自blog.csdn.net/weixin_40231212/article/details/89736531