词法分析器2(ε

#词法分析器2(ε| 来源: 网络整理| 查看: 265

接上一篇我们已经得到了一个完整的ε-NFA，下面来说说如何将ε-NFA转换为DFA（确定有限自动机）。

DFA的状态

在DFA中，某个状态对应到ε-NFA中的若干状态，应此我们将会得到下面这样的一个结构。

struct DFA_State { set content; bool bFlag; #ifdef _DEBUG uint idx; #endif DFA_State(const set& x) : content(x), bFlag(false) { #ifdef _DEBUG idx = inc(); #endif } inline const bool operator==(const DFA_State& x)const { if (&x == this) return true; return content == x.content; } #ifdef _DEBUG inline uint inc() { static uint i = 0; return i++; } #endif };

可以看到，为了调试方便我们在结构中定义了状态的唯一ID以及对应到ε-NFA状态的集合和一个标记位。

DFA的边

根据上一篇的经验，不难想到DFA的边应该是什么样的，下面直接给出代码，不做说明。

struct DFA_Edge { struct { Char_Type char_value; String_Type string_value; }data; enum Edge_Type { TUnknown = 0, TNot = 1, TChar = 2, TString = 4 }; uchar edge_type; DFA_State* pFrom; DFA_State* pTo; DFA_Edge(const Char_Type& x, bool bNot, DFA_State* pFrom, DFA_State* pTo) : pFrom(pFrom), pTo(pTo) { data.char_value = x; edge_type = bNot ? (TChar | TNot) : TChar; } DFA_Edge(const String_Type& x, bool bNot, DFA_State* pFrom, DFA_State* pTo) : pFrom(pFrom), pTo(pTo) { data.string_value = x; edge_type = bNot ? (TString | TNot) : TString; } inline const bool isNot()const { return (edge_type & TNot) == TNot; } inline const bool isChar()const { return (edge_type & TChar) == TChar; } inline const bool isString()const { return (edge_type & TString) == TString; } const Edge_Type edgeType()const { if (isChar()) return TChar; else if (isString()) return TString; else return TUnknown; } const bool operatorisEpsilon()) { if (info.states.insert(j->pTo).second) tmp.insert(j->pTo); } else if (j->isChar()) info.chars.insert(pair(j->data.char_value, j->isNot())); else if (j->isString()) info.strings.insert(pair(j->data.string_value, j->isNot())); } tmp.erase(pState); } } }

其中用到的EpsilonClosureInfo结构为

struct EpsilonClosureInfo { set states; set chars; set strings; EpsilonClosureInfo() {} EpsilonClosureInfo(const set& states, const set& chars, const set& strings) : states(states) , chars(chars) , strings(strings) {} EpsilonClosureInfo(const EpsilonClosureInfo& x) { states = x.states; chars = x.chars; strings = x.strings; } };

需要保存的是状态集和向后字符集。

状态转移函数

通过状态转移函数，输入一个集合T和一个字符a将可得到所有通过T中的每一个状态和a边所能达到的状态的集合。应此代码如下

set move(const DFA_State& t, const Char_Type& c, bool bNot) { set result; for (typename set::const_iterator i = t.content.begin(), m = t.content.end(); i != m; ++i) { for (typename vector::const_iterator j = epsilonNFA_Edges[*i].begin(), n = epsilonNFA_Edges[*i].end(); j != n; ++j) { if (j->isChar() && j->data.char_value == c && j->isNot() == bNot) result.insert(j->pTo); } } return result; } set move(const DFA_State& t, const String_Type& s, bool bNot) { set result; for (typename set::const_iterator i = t.content.begin(), m = t.content.end(); i != m; ++i) { for (typename vector::const_iterator j = epsilonNFA_Edges[*i].begin(), n = epsilonNFA_Edges[*i].end(); j != n; ++j) { if (j->isString() && j->data.string_value == s && j->isNot() == bNot) result.insert(j->pTo); } } return result; }

为了分别支持Char_Type和String_Type的字符我们定义了两个move函数。

最后我们给出子集构造算法的代码

void buildDFA() { set start; start.insert(pEpsilonStart); typedef pair c_type; map c; queue c2; pDFAStart = DFA_State_Alloc::allocate(); EpsilonClosureInfo info; epsilonClosure(start, info); construct(pDFAStart, info.states); c_type ct(pDFAStart, info); c[info.states.size()].push_back(ct); c2.push(ct); if (isEndDFAStatus(pDFAStart)) pDFAEnds.insert(pDFAStart); context.dfa_States.insert(pDFAStart); while (!c2.empty()) { DFA_State* t = c2.front().first; set chars = c2.front().second.chars; set strings = c2.front().second.strings; t->bFlag = true; for (typename set::const_iterator i = chars.begin(), m = chars.end(); i != m; ++i) { EpsilonClosureInfo info; epsilonClosure(move(*t, i->first, i->second), info); DFA_State* p = getDFAState(info.states, c); if (p) // 如果这个状态已存在 { dfa_Edges.insert(DFA_Edge(i->first, i->second, t, p)); } else { DFA_State* pState = DFA_State_Alloc::allocate(); construct(pState, info.states); context.dfa_States.insert(pState); if (isEndDFAStatus(pState)) pDFAEnds.insert(pState); c_type ct(pState, info); c[info.states.size()].push_back(ct); c2.push(ct); dfa_Edges.insert(DFA_Edge(i->first, i->second, t, pState)); } } for (typename set::const_iterator i = strings.begin(), m = strings.end(); i != m; ++i) { EpsilonClosureInfo info; epsilonClosure(move(*t, i->first, i->second), info); DFA_State* p = getDFAState(info.states, c); if (p) // 如果这个状态已存在 { dfa_Edges.insert(DFA_Edge(i->first, i->second, t, p)); } else { DFA_State* pState = DFA_State_Alloc::allocate(); construct(pState, info.states); context.dfa_States.insert(pState); if (isEndDFAStatus(pState)) pDFAEnds.insert(pState); c_type ct(pState, info); c[info.states.size()].push_back(ct); c2.push(ct); dfa_Edges.insert(DFA_Edge(i->first, i->second, t, pState)); } } c2.pop(); } }

尾声

同样我们来编写一个函数来打印出DFA。

void printDFA() { printf("---------- DFA Start ----------\n"); set tmp; for (typename set::const_iterator i = dfa_Edges.begin(), m = dfa_Edges.end(); i != m; ++i) { printf("%03d -> %03d", i->pFrom->idx, i->pTo->idx); switch (i->edgeType()) { case DFA_Edge::TChar: printf("(%c)", i->data.char_value); break; case DFA_Edge::TString: printf("(%s)", i->data.string_value.c_str()); break; default: break; } if (i->isNot()) printf("(not)"); printf("\n"); tmp.insert(i->pFrom); tmp.insert(i->pTo); } printf("start: %03d -> ends: ", pDFAStart->idx); for (typename set::const_iterator i = pDFAEnds.begin(), m = pDFAEnds.end(); i != m; ++i) { printf("%03d ", (*i)->idx); } printf("\n"); #if DEBUG_LEVEL == 3 printf("-------------------------------\n"); for (typename set::const_iterator i = tmp.begin(), m = tmp.end(); i != m; ++i) { printf("State: %03d\n", (*i)->idx); for (typename set::const_iterator j = (*i)->content.begin(), n = (*i)->content.end(); j != n; ++j) { printf("%03d ", (*j)->idx); } printf("\n"); } #endif printf("----------- DFA End -----------\n"); }

最后我们加入测试代码

Rule_Type::Context context; Rule_Type a('a', context), b('b', context), d('d', context); Rule_Type result = (a - d).opt() + (+b | !(a + b)); result.buildDFA(); #ifdef _DEBUG result.printEpsilonNFA(); result.printDFA(); #endif

可打印出如下内容

画成图如下

完整的代码可到http://code.google.com/p/qlanguage下载

【本文地址】

公司简介

联系我们