第二章 生成版
一
1
int range(char fromChar, char toChar) {
int segmentId = pCharSetTable->size() + 1; // 获取新的 segmentId
CharSet* charSet = new CharSet{pCharSetTable->size() + 1, segmentId, fromChar, toChar}; // 创建新的 CharSet 对象
pCharSetTable->push_back(charSet); // 将新的 CharSet 对象添加到字符集表中
return charSet->indexId; // 返回字符集 id
}
2
int charUnion(char c1, char c2) {
CharSet* pNewSet = new CharSet;
pNewSet->indexId = pCharSetTable->size();
pNewSet->segmentId = 0;
pNewSet->fromChar = c1;
pNewSet->toChar = c2;
pCharSetTable->push_back(pNewSet);
return pNewSet->indexId;
}
3
int CharSetUnion(int charSetId, char c, List<CharSet *>* pCharSetTable) {
// 创建一个新的字符集
CharSet* newCharSet = new CharSet();
int newCharSetId = pCharSetTable->Add(newCharSet);
// 遍历原字符集,将原字符集中的所有段都添加到新字符集中
CharSet* charSet = pCharSetTable->Get(charSetId);
for (int i = 0; i < charSet->numOfSegments; i++) {
newCharSet->AddSegment(charSet->pSegmentTable[i]->start, charSet->pSegmentTable[i]->end);
}
// 将新字符与字符c的并集加入新字符集中
newCharSet->AddSegment(c, c);
return newCharSetId;
}
4
int union(int charSetId1, int charSetId2) {
// 创建一个新的字符集对象
CharSet* newCharSet = new CharSet();
newCharSet->charSetId = charSetTable->size();
// 获取charSetId1和charSetId2对应的字符集
CharSet* charSet1 = charSetTable->at(charSetId1);
CharSet* charSet2 = charSetTable->at(charSetId2);
// 合并两个字符集
for (auto it1 = charSet1->ranges->begin(); it1 != charSet1->ranges->end(); ++it1) {
newCharSet->ranges->push_back(*it1);
}
for (auto it2 = charSet2->ranges->begin(); it2 != charSet2->ranges->end(); ++it2) {
newCharSet->ranges->push_back(*it2);
}
// 将新字符集加入到字符集表中
charSetTable->push_back(newCharSet);
return newCharSet->charSetId;
}
5
int difference(int charSetId, char c) {
// 根据 charSetId 获取字符集对象
CharSet* pCharSet = charSetTable[charSetId];
CharSet* pNewCharSet = new CharSet(); // 创建新的字符集对象
// 遍历字符集对象的每个段,对每个段执行差运算
for (auto it = pCharSet->ranges.begin(); it != pCharSet->ranges.end(); ++it) {
int from = (*it)->fromChar;
int to = (*it)->toChar;
if (c >= from && c <= to) { // 如果字符在当前段内,就分段差运算
if (c == from) { // 如果字符在当前段的起点,就只取后半段
pNewCharSet->addRange(c + 1, to);
}
else if (c == to) { // 如果字符在当前段的终点,就只取前半段
pNewCharSet->addRange(from, c - 1);
}
else { // 否则就分成两段
pNewCharSet->addRange(from, c - 1);
pNewCharSet->addRange(c + 1, to);
}
}
else { // 如果字符不在当前段内,就保留原来的段
pNewCharSet->addRange(from, to);
}
}
// 将新的字符集对象添加到字符集表中,并返回其 id
int newCharSetId = charSetTable.size();
charSetTable.push_back(pNewCharSet);
return newCharSetId;
}
二
1
Graph *generateBasicNFA(DriverType driverType, int driverId) {
Graph *pNFA = new Graph();
State *pState0 = pNFA->addState(UNMATCH, EMPTY); // 初始状态
State *pState1 = pNFA->addState(MATCH, EMPTY); // 结束状态
Edge *pEdge = pNFA->addEdge(pState0->stateId, pState1->stateId, driverId, driverType); // 连接两个状态
return pNFA;
}
2
Graph* union(Graph* pNFA1, Graph* pNFA2) {
// 创建新的开始状态和结束状态
State* pStartState = new State();
State* pEndState = new State();
// 创建新的NFA
Graph* pNewNFA = new Graph(pStartState, pEndState);
// 添加两个NFA的开始状态和结束状态到新NFA的开始状态和结束状态的边集合中
pNewNFA->addEdge(pStartState, pNFA1->getStartState(), Driver(DriverType::NULL));
pNewNFA->addEdge(pStartState, pNFA2->getStartState(), Driver(DriverType::NULL));
pNewNFA->addEdge(pNFA1->getEndState(), pEndState, Driver(DriverType::NULL));
pNewNFA->addEdge(pNFA2->getEndState(), pEndState, Driver(DriverType::NULL));
// 返回新的NFA
return pNewNFA;
}
3
Graph * product(Graph *pNFA1, Graph *pNFA2) {
Graph *pNewNFA = new Graph();
// 统计两个NFA的节点数
int nN1 = pNFA1->getNumNodes();
int nN2 = pNFA2->getNumNodes();
// 复制第1个NFA的状态
for (int i = 0; i < nN1; ++i) {
Node *pNode = new Node();
*pNode = pNFA1->getNode(i);
pNewNFA->addNode(pNode);
}
// 复制第2个NFA的状态,并将状态编号加上第1个NFA的节点数
for (int i = 0; i < nN2; ++i) {
Node *pNode = new Node();
*pNode = pNFA2->getNode(i);
pNode->setId(pNode->getId() + nN1); // 将节点编号加上nN1
pNewNFA->addNode(pNode);
}
// 添加两个NFA的边,需要特别注意起点和终点的编号
vector<Edge> edges1 = pNFA1->getEdges();
for (int i = 0; i < edges1.size(); ++i) {
Edge edge = edges1[i];
pNewNFA->addEdge(edge.getFrom(), edge.getTo(), edge.getCondition());
}
vector<Edge> edges2 = pNFA2->getEdges();
for (int i = 0; i < edges2.size(); ++i) {
Edge edge = edges2[i];
pNewNFA->addEdge(edge.getFrom() + nN1, edge.getTo() + nN1, edge.getCondition());
}
// 添加连接的边
for (int i = 0; i < nN1; ++i) {
Node *pNode = pNewNFA->getNode(i);
if (pNode->isFinal()) {
for (int j = 0; j < nN2; ++j) {
Node *pNewFinal = pNewNFA->getNode(j + nN1);
pNewFinal->setFinal(true);
pNewNFA->addEdge(i, j + nN1, EPSILON);
}
}
}
return pNewNFA;
}
4
Graph * plusClosure(Graph *pNFA) {
// 创建新的起始和结束状态
State *start = new State();
State *end = new State(true);
// 连接新的起始状态和原始NFA的起始状态
start->addEdge(nullptr, pNFA->start);
// 连接原始NFA的结束状态和新的结束状态
pNFA->end->addEdge(nullptr, end);
// 连接原始NFA的结束状态和原始NFA的起始状态
pNFA->end->addEdge(nullptr, pNFA->start);
// 创建新的NFA
Graph *pNewNFA = new Graph(start, end);
return pNewNFA;
}
5
Graph* closure(Graph* pNFA) {
int nStates = pNFA->nStates + 2; // 新 NFA 状态数
int startState = 0; // 新 NFA 开始状态编号
int endState = nStates - 1; // 新 NFA 结束状态编号
// 构造新 NFA
Graph* pNewNFA = new Graph(nStates, startState, endState);
// 添加边,将新开始状态通过 ε 边连接原 NFA 开始状态和新结束状态
pNewNFA->addEdge(startState, pNFA->startState, EPSILON);
pNewNFA->addEdge(startState, endState, EPSILON);
// 将原 NFA 结束状态通过 ε 边连接原 NFA 开始状态和新结束状态
for (int i = 0; i < pNFA->nStates; i++) {
if (pNFA->isAccept[i]) {
pNewNFA->addEdge(i, pNFA->startState, EPSILON);
pNewNFA->addEdge(i, endState, EPSILON);
}
}
// 将原 NFA 的边复制到新 NFA
for (int i = 0; i < pNFA->nStates; i++) {
for (int j = 0; j < pNFA->nStates; j++) {
for (auto it = pNFA->edges[i][j].begin(); it != pNFA->edges[i][j].end(); it++) {
pNewNFA->addEdge(i + 1, j + 1, *it);
}
}
}
// 返回新 NFA
return pNewNFA;
}
6
Graph * zeroOrOne(Graph *pNFA) {
Graph *pNewNFA = new Graph();
// 创建两个新状态 q0 和 q1,其中 q1 为终止状态
State *q0 = new State(pNewNFA->stateList.size());
State *q1 = new State(pNewNFA->stateList.size());
q1->finalState = true;
// 将 q0 和 q1 加入到状态列表中
pNewNFA->stateList.push_back(q0);
pNewNFA->stateList.push_back(q1);
// 对 pNFA 进行拓展,从 q0 通过 ε 边到达 pNFA 的开始状态,再从 pNFA 的终止状态通过 ε 边到达 q1
q0->addEpsilonTransition(pNFA->startState);
pNFA->finalState->addEpsilonTransition(q1);
// 将 q0 和 q1 作为新的开始状态和终止状态
pNewNFA->startState = q0;
pNewNFA->finalState = q1;
return pNewNFA;
}
三
1
void move(const Graph& nfa, const std::unordered_set<int>& S, int c, std::unordered_set<int>& T) {
T.clear();
for (const auto& s : S) {
for (const auto& e : *(nfa.pEdgeTable)) {
if (e.fromState == s && e.driverId == c) {
T.insert(e.nextState);
}
}
}
}
// 计算给定状态的 epsilon 闭包
void epsilonClosure(int stateId, vector<bool>& visited, set<int>& closure, Graph* pNFA) {
visited[stateId] = true;
closure.insert(stateId);
// 遍历所有出边,对于 epsilon 驱动的边,递归计算目标状态的 epsilon 闭包
for (auto edge : pNFA->pEdgeTable[stateId]) {
if (edge->driverId == NULL_DRIVER) { // epsilon 驱动的边
int nextState = edge->nextState;
if (!visited[nextState]) {
epsilonClosure(nextState, visited, closure, pNFA);
}
}
}
}
// 计算给定状态集合的 epsilon 闭包
void epsilonClosure(set<int>& stateSet, Graph* pNFA) {
vector<bool> visited(pNFA->numOfStates, false);
// 遍历初始状态集合中的每个状态,计算其 epsilon 闭包
for (int stateId : stateSet) {
if (!visited[stateId]) {
set<int> closure;
epsilonClosure(stateId, visited, closure, pNFA);
stateSet.insert(closure.begin(), closure.end());
}
}
}
void DTran(DFA_State *pState, NFA *pNFA, vector<DFA_State *> &states) {
// 遍历字符集中所有的字符
for (int i = 0; i < pNFA->charSetSize; i++) {
CharSet *pCharSet = pNFA->pCharSetTable[i];
// 如果字符集不为空
if (pCharSet->fromChar != 0 && pCharSet->toChar != 0) {
// 获取该字符集中第一个字符对应的 NFA 状态集合
vector<NFA_State *> moveRes = move(pState->nfa_states, pCharSet->fromChar, pCharSet->toChar, pNFA);
// 如果该状态集合非空
if (!moveRes.empty()) {
// 计算该状态集合的闭包
vector<NFA_State *> closureRes = e_closure(moveRes, pNFA);
// 在 DFA 的状态集合中查找是否存在与该状态集合等价的状态
DFA_State *pEquivalentState = NULL;
for (int j = 0; j < states.size(); j++) {
DFA_State *pDFAState = states[j];
if (pDFAState->nfa_states == closureRes) {
pEquivalentState = pDFAState;
break;
}
}
// 如果不存在与该状态集合等价的状态,则创建一个新的 DFA 状态,并将其添加到 DFA 状态集合中
if (pEquivalentState == NULL) {
pEquivalentState = new DFA_State;
pEquivalentState->nfa_states = closureRes;
pEquivalentState->id = states.size();
states.push_back(pEquivalentState);
DTran(pEquivalentState, pNFA, states); // 递归计算新状态的转移
}
// 在 DFA 的转移表中添加一条转移
Edge *pEdge = new Edge;
pEdge->fromState = pState->id;
pEdge->nextState = pEquivalentState->id;
pEdge->driver = pCharSet;
pState->edges.push_back(pEdge);
}
}
}
}
2
Graph *NFA_to_DFA(Graph *pNFA) {
Graph *pDFA = new Graph(); // 创建一个空的 DFA
// 复制 NFA 中的状态到 DFA 中
for (int i = 0; i < pNFA->pStateTable->size(); i++) {
State *pNFAState = pNFA->pStateTable->at(i);
if (pNFAState->category != NONE) {
State *pDFAState = new State();
pDFAState->stateId = i;
pDFAState->category = pNFAState->category;
pDFAState->type = UNMATCH;
pDFA->pStateTable->push_back(pDFAState);
}
}
// 对 DFA 进行子集构造
vector<set<int>> workList;
set<int> startSet;
startSet.insert(0);
startSet = epsilon_closure(startSet, pNFA); // 计算ε闭包
workList.push_back(startSet);
pDFA->numOfStates++;
while (!workList.empty()) {
set<int> T = workList.back();
workList.pop_back();
State *pStateT = findStateBySet(T, pDFA);
for (int i = 0; i < pNFA->pCharSetTable->size(); i++) {
CharSet *pCharSet = pNFA->pCharSetTable->at(i);
set<int> U = move(T, pCharSet, pNFA); // 计算move
U = epsilon_closure(U, pNFA); // 计算ε闭包
if (!U.empty()) {
State *pStateU = findStateBySet(U, pDFA);
if (!pStateU) {
pStateU = new State();
pStateU->stateId = pDFA->numOfStates++;
pStateU->type = UNMATCH;
pDFA->pStateTable->push_back(pStateU);
workList.push_back(U);
}
Edge *pEdge = new Edge();
pEdge->fromState = pStateT->stateId;
pEdge->nextState = pStateU->stateId;
pEdge->driverId = i;
pEdge->type = CHARSET;
pDFA->pEdgeTable->push_back(pEdge);
}
}
}
return pDFA;
}
3
#include <iostream>
#include <string>
#include "Graph.h"
using namespace std;
int main() {
// 构造 NFA
Graph nfa;
nfa.graphId = 1;
State s1 = {1, UNMATCH, NONE};
State s2 = {2, UNMATCH, NONE};
State s3 = {3, UNMATCH, NONE};
State s4 = {4, UNMATCH, NONE};
State s5 = {5, MATCH, ID};
State s6 = {6, MATCH, NONE};
Edge e1 = {1, 2, 'a', CHAR};
Edge e2 = {1, 2, 'b', CHAR};
Edge e3 = {2, 3, EPSILON, EPSILON};
Edge e4 = {2, 6, EPSILON, EPSILON};
Edge e5 = {3, 4, 'a', CHAR};
Edge e6 = {4, 5, 'b', CHAR};
Edge e7 = {5, 6, EPSILON, EPSILON};
nfa.pStateTable = new List<State *>();
nfa.pStateTable->Add(&s1);
nfa.pStateTable->Add(&s2);
nfa.pStateTable->Add(&s3);
nfa.pStateTable->Add(&s4);
nfa.pStateTable->Add(&s5);
nfa.pStateTable->Add(&s6);
nfa.pEdgeTable = new List<Edge *>();
nfa.pEdgeTable->Add(&e1);
nfa.pEdgeTable->Add(&e2);
nfa.pEdgeTable->Add(&e3);
nfa.pEdgeTable->Add(&e4);
nfa.pEdgeTable->Add(&e5);
nfa.pEdgeTable->Add(&e6);
nfa.pEdgeTable->Add(&e7);
// 将 NFA 转化为 DFA
Graph *pDFA = NFA_to_DFA(&nfa);
// 打印 DFA 状态和边表
cout << "DFA State Table:" << endl;
for (int i = 0; i < pDFA->pStateTable->Count(); i++) {
State *pState = pDFA->pStateTable->Get(i);
cout << "State " << pState->stateId << ": {";
for (int j = 0; j < pState->categorySet.Count(); j++) {
cout << (int)pState->categorySet.Get(j) << ", ";
}
cout << "}" << endl;
}
cout << "DFA Edge Table:" << endl;
for (int i = 0; i < pDFA->pEdgeTable->Count(); i++) {
Edge *pEdge = pDFA->pEdgeTable->Get(i);
cout << "Edge: " << pEdge->fromState << " --" << (char)pEdge->driverId << "--> " << pEdge->nextState << endl;
}
return 0;
}