【编译原理】LALR(1)语法分析方法（c++实现）

2024-02-05 10:48| 来源: 网络整理| 查看: 265

前文回顾

【编译原理】LR(0)分析方法（c++实现）【编译原理】SLR(1)分析方法（c++实现）【编译原理】LR(1)分析方法（c++实现）

这几个程序的代码大部分是一样的，根据不同算法特点做了部分修改而已

代码

LALR(1)的代码就是在LR(1)的基础上合并了同心项

Item类

在LR(1)基础上搜索符由string改成了vector

#include #include #include #include #include #include #include #include #include #include #include using namespace std; //返回s的第一个词 string firstWord(string s) { s+=" "; string first=s.substr(0,s.find(" ")); return first; } //将字符串划分为一个个词 vector split(string s,string separator) { vectorv; string::size_type pos1, pos2; pos2 = s.find(separator); pos1 = 0; while(string::npos != pos2) { v.push_back(s.substr(pos1, pos2-pos1)); pos1 = pos2 + separator.size(); pos2 = s.find(separator, pos1); } if(pos1 != s.length()) v.push_back(s.substr(pos1)); return v; } class Item { private: string item;//项目 string left;//项目左部 string right;//项目右部 vector symbol;//向前搜索符号 static int count; public: int id; //参数是产生式 Item(string i) { id=count++; left=i.substr(0,i.find("->")); right=i.substr(i.find("->")+2); item=left+"->"+right; symbol.push_back("$");//初始向前搜索符为"$" if(right.find(".")==string::npos) addDot(0); } //参数是左部和右部 Item(string l,string r) { id=count++; left=l; right=r; symbol.push_back("$"); item=left+"->"+right; if(right.find(".")==string::npos) addDot(0); } //参数是左部和右部和向前搜索符号 Item(string l,string r,string s) { id=count++; left=l; right=r; symbol.push_back(s); item=left+"->"+right; if(right.find(".")==string::npos) addDot(0); } string getLeft() { return left; } string getRight() { return right; } string getItem() { item=left+"->"+right; return item; } vector getSymbol() { return symbol; } //找点的位置 int getDot(string item) { return item.find("."); } //设置向前搜索符号 //Insert为1，则插入搜索符，为0，则重置搜索符 void setSymbol(int Insert,string new_symbol) { if(Insert&&find(symbol.begin(),symbol.end(),new_symbol)==symbol.end()) symbol.push_back(new_symbol); else { symbol.clear(); symbol.push_back(new_symbol); } sort(symbol.begin(), symbol.end()); } //给文法加点 void addDot(int pos) { if(right[pos]=='@') right="."; else if(pos==0) right.insert(pos,". "); else if(pos==right.size()) right.insert(pos," ."); else right.insert(pos," . "); } //判断一个项目进度是否到结尾 int hasNextDot() { vectorbuffer=split(right,"."); if(buffer.size()>1) return 1; else return 0; } //得到"."后面的一个文法符号 string getPath() { vectorbuffer=split(item,"."); buffer[1].erase(0,1); string first=firstWord(buffer[1]); return first; } //返回下一个点的串 string nextDot() { int dotPos=right.find("."); vectorbuffer=split(item,"."); buffer[1].erase(0,1); string first=firstWord(buffer[1]); int nextPos=dotPos+first.size(); right.erase(right.find("."),2); right.insert(nextPos," ."); return right; } bool operator ==(Item &x) { return getItem()==x.getItem(); } }; int Item::count=0; LALR(1)语法分析过程 #include #include #include #include #include #include #include #include #include #include #include #include #include "Item.h" using namespace std; //DFA的边 struct GOTO { int from; int to; string path; GOTO(int s,string p,int t) { from=s; path=p; to=t; } }; //DFA中状态 struct State { int id;//状态编号 setitems;//项目集 }; /*一些操作符的重载*/ bool operator return x.id==y.id; } bool operator return x.getItem() auto it1=x.begin(); auto it2=y.begin(); for(; it1!=x.end(),it2!=y.end(); it1++,it2++) { Item a=*it1; Item b=*it2; if(a==b&&a.getSymbol()==b.getSymbol()) continue; //有一个项目不相等，两项目集一定不相等 else return false; } return true; } class LR1 { private: int number=0; vectorT;//终结符号集合 vectorNT;//非终结符号集合 string S;//开始符号 mapproduction;//产生式 mapnumPro;//编号的产生式集合，用于规约 vectorStates;//状态集合 vectorGO;//转换函数 mapFIRST;//FIRST集 mapFOLLOW;//FOLLOW集 mapactionTable;//action表 mapgotoTable;//goto表 //读取文法规则 void readGrammar(string fileName) { ifstream input(fileName); if(!input) { cout left+=line[i]; } NT.push_back(left);//左部加入非终结符号集 //读取右部 string right=line.substr(i+2,line.size()-i);//获取产生式右部 addP(left,right);//添加产生式 } addT();//添加终极符 S=*NT.begin(); input.close(); } //填产生式 void addP(string left,string right) { right+="#";//'#'作为每句文法结尾标志 string pRight=""; for(int i=0; i production[left].push_back(pRight); pRight=""; } else { pRight+=right[i]; } } } //带标号的产生式集 void addNumP() { int i=0; for(string left:NT) { for(string right:production[left]) { numPro[left+"->"+right]=i; i++; } } } //填终结符 void addT() { string temp=""; for(string left:NT) { for(string right:production[left]) { right+="#"; for(int i=0; i //不是非终结，且不是空，则加入终结符号 if((find(NT.begin(),NT.end(),temp)==NT.end())&&temp!="@") { T.push_back(temp); } temp=""; } else { temp+=right[i]; } } } }//end left //终结符去重 sort(T.begin(),T.end()); T.erase(unique(T.begin(), T.end()), T.end()); } //判断是否是非终极符号 bool isNT(string token) { if(find(NT.begin(),NT.end(),token)!=NT.end()) return true; return false; } //判断temp在不在集合c中 bool isIn(Item temp,setc) { for(Item i:c) { if(i.getItem()==temp.getItem()&&i.getSymbol()==temp.getSymbol()) return true; } return false; } //判断是否应该规约 string tableReduce(int num) { for(State s:States) { //目标状态 if(s.id==num) { //遍历项目集 for(Item i:s.items) { //还有下一个点，肯定不是规约项目 if(i.hasNextDot()) return ""; //是规约项目 else return i.getLeft();//返回左部NT } } } return ""; } //找到item规约到的产生式，返回其编号 int findReduce(int num) { for(State s:States) { if(s.id==num) { for(Item i:s.items) { string temp=i.getItem(); temp.erase(temp.find(".")); temp.pop_back(); return numPro.find(temp)->second; } } } return -1; } //根据项目找规约的产生式编号 int findReduce(Item item) { string temp=item.getItem(); temp.erase(temp.find(".")); temp.pop_back(); if(numPro.find(temp)!=numPro.end()) return numPro.find(temp)->second; return -1; } //找到产生式序号为pro的产生式右部数量 int rightCount(string &left,int pro) { for(auto it=numPro.begin(); it!=numPro.end(); it++) { if(it->second==pro) { cout Item &item1=const_cast(*s1.items.find(*it1)); Item temp=*it2; item1.setSymbol(1,temp.getSymbol()[0]); } //检查是否有规约-规约冲突 for(Item i1:s1.items) { for(Item i2:s1.items) { //同一个状态中的两个规约项目具有相同搜索符，则判断为冲突 if(!i1.hasNextDot()&&!i2.hasNextDot()&&i1.getItem() if(x.items.size()!=y.items.size()) return false; settemp; //取交集，结果放在temp里 set_intersection(x.items.begin(),x.items.end(),y.items.begin(),y.items.end(),inserter(temp,temp.begin())); if(temp.size()==x.items.size()) return true; else return false; } //删除一个状态 void delete_State(int erase_id,int new_id) { //重复状态后面的所有状态序号-1 for(int i=0; i States[i].id--; } } //状态转移函数 for(int i=0; i //删除这一状态的goto if(new_id==-1) GO.erase(find(GO.begin(),GO.end(),GO[i])); //更改这一状态的goto else { if(GO[i].from==erase_id) GO[i].from=new_id; if(GO[i].to==erase_id) GO[i].to=new_id; } } if(GO[i].from>erase_id) GO[i].from--; if(GO[i].to>erase_id) GO[i].to--; } // printGO(); } public: //构造函数，读入所需的四元组 LR1(string fileName) { readGrammar(fileName); Extension();//拓广文法 } //拓广文法 void Extension() { string newS=S; newS+="'"; NT.insert(NT.begin(),newS); production[newS].push_back(S); S=newS; } //状态集是否已经包含该状态 int hasState(setJ) { for(State s:States) { if(s.items.size()!=J.size()) continue; if(s.items==J) return s.id; else continue; } return -1; } //获得FIRST集合 void getFirst() { FIRST.clear(); //终结符号或@ FIRST["@"].insert("@"); for(string X:T) { FIRST[X].insert(X); } //非终结符号 int j=0; while(j string A=NT[i]; //遍历A的每个产生式 for(int k=0; k for(string firstX:FIRST[X]) { if(firstX=="@") continue; else { FIRST[A].insert(firstX); Continue=0; } } if(Continue) FIRST[A].insert("@"); } } } j++; } } //获得FOLLOW集合 void getFollow() { getFirst(); //将界符加入开始符号的follow集 FOLLOW[S].insert("$"); int j=0; while(j for(string right:production[A]) { for(string B:NT) { //A->Bb if(right.find(B)!=string::npos) { /*找B后的字符b*/ string b; int flag=0; //识别到E' if(right[right.find(B)+B.size()]!=' '&&right[right.find(B)+B.size()]!='\0') { string s=right.substr(right.find(B));//E'b string temp=right.substr(right.find(B)+B.size());//' b //A->E' if(temp.find(" ")==string::npos) { B=s;//B:E->E' FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end());//左部的FOLLOW赋给B flag=1; } //A->E'b else { B=s.substr(0,s.find(" ")); temp=temp.substr(temp.find(" ")+1);//b //b后无字符 if(temp.find(" ")==string::npos) b=temp; //b后有字符 else b=temp.substr(0,temp.find(" ")); } } //A->aEb else if(right[right.find(B)+B.size()]==' ') { string temp=right.substr(right.find(B)+B.size()+1);//B后的子串 //b后无字符 if(temp.find(" ")==string::npos) b=temp; //b后有字符 else b=temp.substr(0,temp.find(" ")); } //A->aE else { FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end()); flag=1; } //FOLLOW[B]还没求到 if(flag==0) { //FIRST[b]中不包含@ if(FIRST[b].find("@")==FIRST[b].end()) { FOLLOW[B].insert(FIRST[b].begin(),FIRST[b].end()); } else { for(string follow:FIRST[b]) { if(follow=="@") continue; else FOLLOW[B].insert(follow); } FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end()); } } } } } } j++; } //printFIRST(); printFOLLOW(); } //项目闭包 set closure(Item item) { setC;//项目闭包 C.insert(item); queuebfs;//bfs求所有闭包项 bfs.push(item); while(!bfs.empty()) { Item now=bfs.front(); bfs.pop(); vectorbuffer=split(now.getRight(),"."); if(buffer.size()>1) { string first=firstWord(buffer[1].erase(0,1)); setfst; vectorcandidate=split(now.getRight(),first); //如果first后面有符号，则first后面符号的FIRST作为新的向前搜索符号 if(candidate.size()>1) { candidate[1].erase(0,1); for(string f:FIRST[firstWord(candidate[1])]) fst.insert(f); } //如果first后面没有符号，则向前搜索符号为新的向前搜索符号 else fst.insert(now.getSymbol()[0]); if(isNT(first))//如果"."后面第一个字符是NT { for(auto it2=production[first].begin(); it2!=production[first].end(); it2++) { Item temp(first,*it2); if(!isIn(temp,C)) { for(string f:fst) { temp.setSymbol(0,f); C.insert(temp); bfs.push(temp); } } } } } } return C; } //构造DFA void dfa() { State s0;//初始项目集 s0.id=number++;//状态序号 //初始项目集 string firstRight=*(production[S].begin()); Item start(S,firstRight); s0.items=closure(start);//加到状态中 States.push_back(s0); //构建DFA State temp; for(int k=0; k now.getItem(); if(now.hasNextDot()) { string path=now.getPath();//path Item nextD(now.getLeft(),now.nextDot(),now.getSymbol()[0]);//新状态核心项，向前搜索符继承 setnext=closure(nextD);//to //该状态已经有这条路径了，则将新产生的闭包添加到原有目的状态中 int oldDes; if(Paths.find(path)!=Paths.end()) { oldDes=Paths.find(path)->second; for(int j=0; j States[j].items.insert(next.begin(),next.end()); next=States[j].items; int tID=hasState(next); if(tID!=-1) { //temp=dest; for(int i=0; i GO[i].to=tID; } } } } } } //如果该目的状态在状态集里没有出现过，就加入状态集 int tID=hasState(next); if(tID==-1) { State t; t.id=number++; t.items=next; States.push_back(t); Paths.insert(pair(path,t.id)); GO.push_back(GOTO(s.id,path,t.id)); } //该目的状态已经在状态集中了 else { Paths.insert(pair(path,tID)); GO.push_back(GOTO(s.id,path,tID)); } } } } //删除重复GOTO sort(GO.begin(),GO.end()); GO.erase(unique(GO.begin(), GO.end()), GO.end()); //处理重复状态 for(int i=0; i State s2=States[j]; //发现重复状态集 if(s2.id>s1.id&&s1.items==s2.items) { int erase_id=s2.id; States.erase(States.begin()+j); delete_State(erase_id,-1); } } } } //合并同心项,成功返回1，失败返回0 int Merge() { for(int i=0; i State s2=States[j]; //cout int erase_id=s2.id; States.erase(States.begin()+j); delete_State(erase_id,s1.id); } else return 0; } } } //删除重复GOTO sort(GO.begin(),GO.end()); GO.erase(unique(GO.begin(), GO.end()), GO.end()); printS(); printGO(); return 1; } //构造LR(1)分析表 void getTable() { addNumP(); string s=S; s=s.erase(s.find("'")); pairtitle(1,"$"); actionTable[title]="acc"; for(GOTO go:GO) { //目的地是NT if(isNT(go.path)) { pairtitle(go.from,go.path); gotoTable[title]=go.to; } //加入action表 else { //shift pairtitle(go.from,go.path); actionTable[title]="s"+to_string(go.to); } //reduce，行为状态号，列为向前搜索符 string rNT=tableReduce(go.to); if(rNT!="") { if(go.path!=s) { //找该状态的项目集 State temp; temp.id=go.to; temp=*(find(States.begin(),States.end(),temp)); setitm(temp.items.begin(),temp.items.end()); //向前搜索符下的规约项规约到对应产生式 for(Item i:itm) { vectorsymbols(i.getSymbol()); for(string ss:symbols) { pairtitle(go.to,ss); int nReduce=findReduce(i); if(nReduce!=-1) actionTable[title]="r"+to_string(nReduce); else actionTable[title]="error"; } } } } } //printTable(); } //语法分析过程 int parsing(string input) { stackAnalysis; input+=" $"; //0状态入栈 Analysis.push("$"); Analysis.push("0"); vectorw=split(input," ");//将输入串分成一个个词 int ip=0;//输入串的指针 while(true) { pairtitle(stoi(Analysis.top()),w[ip]);//stoi函数用于string to int string res=actionTable[title]; cout cout cout ifstream input(fileName); if(!input) { cout cout cout cout cout cout if (it1==x.begin()) cout //cout if(it1==y.begin()) cout pairtitle(i,t); if(gotoTable[title]!=0) { cout cout cout cout" cout string filename="grammar-input.txt"; LR1 grammar(filename); grammar.printP();//输出产生式 grammar.parser("Tokens.txt"); return 0; } /* 测试文法（LALR）： A->( A )|a 测试文法（不是LALR）： S->a A d|b B d|a B e|b A e A->c B->c */

之前状态集States都是用的set，想着可以直接去重，删除查找也更方便，但是LALR里合并同心项要修改的太多了，set修改是真的麻烦，改了一上午也没改好…最后改成用vector了

测试结果 1. LALR文法

在这里插入图片描述

2. 非LALR文法

在这里插入图片描述

【本文地址】

公司简介

联系我们