LR0文法分析器实现：代码优化及SLR1扩展

{ "title": "class LR\n {\n //产生式结点类\n public class LRNode\n {\n public string Left;\n public string Right;\n public LRNode(string Left, string Right)\n {\n this.Left = Left;\n this.Right = Right;\n }\n }\n //项目集类\n public class LRitemsets\n {\n public List Container\n = new List(100);\n //记录项目在项目集合中的序号\n }\n\n //DFA结点\n public struct DFA\n {\n public int from;\n public char symbol;\n public int to;\n public DFA(int from, char symbol, int to)\n {\n this.from = from;\n this.symbol = symbol;\n this.to = to;\n }\n }\n\n //分析表结点\n public class Table\n {\n public bool error;//是否为ERROR\n public char type;//结点类型\n public int id;//数值\n public Table()\n {\n this.error = true;\n }\n public Table(char type, int id)\n {\n this.type = type;\n this.id = id;\n this.error = false;\n }\n }\n\n \n public DFA[] dfa = new DFA[100];\n public int Pindex = 0; //dfa数组指针\n public Table[][] LRAna;//分析表\n public Analyze Jz;\n public bool Success = false;\n public List LRproNum = new List(50);//产生式列表\n public List LRobjNum = new List(50);//项目列表\n public List proitemset = new List(100);//项目集合\n public List Gy_obj = new List(50);//归约项目序号集合\n public List Gy_itemset = new List(50);//含有归约项目的集合的序号的集合\n public List Nchar = new List(50);//非终结符集合\n public List Echar = new List(50);//终结符集合\n\n public string RStr = "";\n public string RStr_obitemset = "";//输出返回\n public string RStr_DFA = "";\n public string RStr_ANA = "";\n\n\n public void Buildprod(string str)\n {\n\n LRNode Lr;\n int i = 0;\n string left = "";\n string right = "";\n left += 'S';\n right += str[0];\n Lr = new LRNode(left, right);//拓广文法开始\n LRproNum.Add(Lr);\n while (i < str.Length)\n {\n left = right = "";//还原\n int j = i;\n while (i < str.Length && str[i] != '\r' && str[i] != '\n')//换行符‘\r\n’\n {\n if (str[i] == ' ')\n {\n i++;\n continue;\n }\n if (str[i] == '|') // 遇到‘|’可构造一条产生式\n {\n Lr = new LRNode(left, right);\n LRproNum.Add(Lr);\n right = ""; //产生式左边相同右边重新积累\n i++; //跳过‘|’\n continue;\n }\n if ((i - j) == 0)\n {\n if (!exist(Nchar, str[i]))//如果非终结符集合中不存在str[i],加入Nchar 产生式左边只有非终结符不必判断终结符\n Nchar.Add(str[i]);\n left += str[i++];\n }\n else if (i - j <= 2)\n i++;\n else\n {\n if (isFinalsymbol(str[i]) && !exist(Nchar, str[i]))//如果非终结符集合中不存在str[i],加入Nchar isfinalsymbol 非终结符返回T 终结符返回F\n Nchar.Add(str[i]);\n else if (!isFinalsymbol(str[i]) && !exist(Echar, str[i]))//产生式右边需要判断终结符\n Echar.Add(str[i]);\n right += str[i++];\n }\n\n\n }//while\n\n i++;//跳过换行符\n if (left != "" && right != "")\n {\n Lr = new LRNode(left, right);//构造每一行最后一个产生式，不存在"|"时就是该行产生式本身\n LRproNum.Add(Lr);\n }\n }//while\n Echar.Add('#');\n\n //构造项目对产生式集合LRproNum中的所有产生式都循环插‘.’\n LRNode Lobj;\n for (i = 0; i < LRproNum.Count; i++)\n {\n left = "";\n right = "";\n for (int j = 0; j <= LRproNum[i].Right.Length; j++)//j可以等于length 项目共length+1个\n {\n left = LRproNum[i].Left;\n right = CreObj(LRproNum[i].Right, j);//在第j个位置插入‘.’\n if (j == LRproNum[i].Right.Length && LRobjNum.Count != 1)\n {\n //在产生式最后的位置插入. 即为归约项目项目集中1号位置为接受项目\n Gy_obj.Add(LRobjNum.Count);//归约项目在项目集中的序号不用+1 本身就是从0开始的\n }\n Lobj = new LRNode(left, right);\n LRobjNum.Add(Lobj);\n left = "";//还原\n right = "";\n }\n }\n Creteitemsets();//项目集\n RStr_obitemset += "\r\n项目集构建：\r\n";\n for (int j = 0; j < proitemset.Count; j++)\n {\n RStr_obitemset += 'I' + j.ToString() + ":" + "\r\n";\n for (i = 0; i < proitemset[j].Container.Count; i++)\n {\n RStr_obitemset += LRobjNum[proitemset[j].Container[i]].Left.ToString() + "->" + LRobjNum[proitemset[j].Container[i]].Right.ToString() + "\r\n";\n }\n }\n //return RStr_obitemset;\n\n\n\n }\n\n\n public Table[][] GET_ANA()\n {\n LRAnaly();\n RStr_ANA += "\r\nLR0分析表:\r\n ";\n int i;\n for (i = 0; i < Echar.Count; i++)\n {\n RStr_ANA += Echar[i].ToString() + " ";\n }\n for (i = 0; i < Nchar.Count; i++)\n {\n RStr_ANA += Nchar[i].ToString() + " ";\n }\n RStr_ANA += "\r\n";\n for (i = 0; i < proitemset.Count; i++)\n {\n RStr_ANA += i.ToString() + " ";\n for (int j = 0; j < Echar.Count + Nchar.Count; j++)\n {\n\n if (LRAna[i][j].error)\n {\n RStr_ANA += " " + " ";\n }\n else if (i == 1 && j == Echar.Count - 1)\n {\n RStr_ANA += "AC" + " ";\n }\n else if (LRAna[i][j].type != 'N')\n {\n RStr_ANA += LRAna[i][j].type.ToString() + LRAna[i][j].id.ToString() + " ";\n }\n else\n RStr_ANA += LRAna[i][j].id.ToString() + " ";\n }\n RStr_ANA += "\r\n";\n }\n\n return LRAna;\n\n }\n \n\n //求项目集\n public void Creteitemsets()\n {\n List lr_item = new List(100);//记录项目的序号\n lr_item.Add(0);\n lr_item = Closure(lr_item);//构造初始项目集求闭包\n\n LRitemsets LR_C = new LRitemsets();\n LR_C.Container = lr_item;//集合----项目集序号的集合\n proitemset.Add(LR_C);//集合的集合----存放项目集序号集合的集合\n\n\n for (int i = 0; i < proitemset.Count; i++)//整体集合中第i个项目集\n {\n proitemset[i].Container.Sort();//排序由小到大后面用于判断是否存在的比较\n int[] flag = new int[proitemset[i].Container.Count];\n for (int fi = 0; fi < proitemset[i].Container.Count; fi++)//标志位，用来判断该序号是否已经构造\n {\n flag[fi] = 0;\n }\n\n for (int j = 0; j < proitemset[i].Container.Count; j++)//第i个项目集的第j个项目\n {\n if (flag[j] == 1)//如果已经访问过就不再构造找下一个项目\n continue;\n int index = proitemset[i].Container[j];\n for (int pi = 0; pi < LRobjNum[index].Right.Length - 1; pi++)//length-1是避免匹配到.在最后的规约项目\n {\n if (LRobjNum[index].Right[pi] == '.')\n {\n\n List lr2_club = new List(100);//记录项目的序号\n char symbol = LRobjNum[index].Right[pi + 1];//记录.a转移状态a.的符号a\n lr2_club.Add((index + 1));//如果遇到.a形式的项目序号为index 那么项目a.的序号为index+1\n for (int m1 = j + 1; m1 < proitemset[i].Container.Count; m1++)\n {\n //在第i个项目集中找到了可以移动的.：.a 重新遍历第i个项目集j项目之后的找到同样可以移动a的项目集\n int index2 = proitemset[i].Container[m1];\n for (int m2 = 0; m2 < LRobjNum[index2].Right.Length - 1; m2++)\n {\n if (LRobjNum[index2].Right[m2] == '.' && LRobjNum[index2].Right[m2 + 1] == symbol)\n {\n flag[m1] = 1;//标记位置为1 已经访问之后不再访问\n lr2_club.Add(index2 + 1);\n }\n }\n }\n lr2_club = Closure(lr2_club);//求闭包\n int value = isexist(lr2_club);\n if (value == -1)//-1表示不存在相同的\n {\n for (int m3 = 0; m3 < Gy_obj.Count; m3++)\n {\n if (isnexist(lr2_club, Gy_obj[m3]))\n {\n Gy_itemset.Add(proitemset.Count);\n }\n }\n LRitemsets LR_C2 = new LRitemsets();\n dfa[Pindex++] = new DFA(i, symbol, proitemset.Count);//count不用加1 本身从0开始\n LR_C2.Container = lr2_club;\n proitemset.Add(LR_C2);\n }\n else\n {\n dfa[Pindex++] = new DFA(i, symbol, value);\n }\n break;\n }\n }\n }//end-forj\n }//end-fori\n\n }//end-Cre_club\n\n //分析表\n public void LRAnaly()\n {\n Table tnode = new Table();\n\n LRAna = new Table[proitemset.Count][];\n for (int i = 0; i < proitemset.Count; i++)\n LRAna[i] = new Table[Echar.Count + Nchar.Count];\n\n for (int i = 0; i < proitemset.Count; i++)//初始化赋予ERROR属性\n for (int j = 0; j < Echar.Count + Nchar.Count; j++)//为终结符加r状态 \n LRAna[i][j] = tnode;\n\n tnode = new Table('A', 0);\n LRAna[1][FindID(Echar, '#')] = tnode;//项目集1必定是接受项目构建[1][#]:acc的情况先直接赋值好 dfa里没有\n\n for (int i = 0; i < Gy_itemset.Count; i++)\n {\n tnode = new Table('r', Find_pro(LRobjNum[proitemset[Gy_itemset[i]].Container[0]]));//归约项目找到原产生式序号添加状态r\n for (int j = 0; j < Echar.Count; j++)\n {\n LRAna[Gy_itemset[i]][j] = tnode;\n }\n }\n\n for (int i = 0; i < Pindex; i++)\n {\n\n if (isFinalsymbol(dfa[i].symbol))//symbol为非终结符添加状态N\n {\n int CID = FindID(Nchar, dfa[i].symbol);\n tnode = new Table('N', dfa[i].to);\n LRAna[dfa[i].from][CID + Echar.Count] = tnode;\n }\n else //不是归约项目添加状态S\n {\n int CID = FindID(Echar, dfa[i].symbol);\n tnode = new Table('S', dfa[i].to);\n LRAna[dfa[i].from][CID] = tnode;\n }\n\n }\n }\n //计算Follow集\n public Dictionary<char, HashSet> Follow()\n {\n Dictionary<char, HashSet> follow = new Dictionary<char, HashSet>(); // 用Dictionary存储每个非终结符的Follow集\n // 初始化\n foreach (char c in Nchar)\n {\n follow[c] = new HashSet(); // 每个非终结符的Follow集初始化为空集\n }\n follow['S'] = new HashSet(); follow['S'].Add('#'); // S' 的Follow集包含#\n\n // 迭代计算Follow集 bool changed = true; // 标记Follow集是否发生变化，用于循环终止条件 while (changed) { // 循环直到Follow集不再发生变化 changed = false; // 假设Follow集没有变化 foreach (char A in Nchar) { // 遍历每个非终结符A foreach (LRNode prod in LRproNum) { // 遍历每个产生式 if (prod.Right.Contains(A)) // 如果产生式右边包含A { // 计算A的Follow集 for (int i = prod.Right.IndexOf(A) + 1; i < prod.Right.Length; i++) { // 从A后面的符号开始遍历 if (i == prod.Right.Length - 1 && follow[prod.Left].Union(follow[A]).Except(new HashSet { '#' }).Count() > 0) { // 如果是产生式的最后一个符号并且prod.Left的Follow集不为空 changed = follow[prod.Left].Union(follow[A]).Except(new HashSet { '#' }).Aggregate(changed, (current, c) => current | follow[A].Add(c)); } else if (isFinalsymbol(prod.Right[i])) { // 如果下一个符号是终结符 changed = follow[A].Add(prod.Right[i]) || changed; // 将终结符加入A的Follow集 break; // 结束循环 } else { // 如果下一个符号是非终结符 if (follow[prod.Right[i]].Except(new HashSet { '#' }).Count() > 0) { // 如果该非终结符的Follow集不为空 changed = follow[A].Union(follow[prod.Right[i]].Except(new HashSet { '#' })).Aggregate(changed, (current, c) => current | follow[A].Add(c)); break; // 结束循环 } else if (follow[prod.Right[i]].Contains('#')) { // 如果该非终结符的Follow集包含#，继续循环 continue; } else { // 如果该非终结符的Follow集为空，结束循环 break; } } } } } } } return follow; // 返回计算后的Follow集 } //SLR1分析表构建 public void SLRAnaly()\n {\n Dictionary<char, HashSet> follow = Follow(); // 计算Follow集 Table tnode = new Table(); // 创建一个新的Table对象

        LRAna = new Table[proitemset.Count][]; // 初始化分析表LRAna
        for (int i = 0; i < proitemset.Count; i++)
            LRAna[i] = new Table[Echar.Count + Nchar.Count]; // 为每个项目集分配足够的空间

        for (int i = 0; i < proitemset.Count; i++) // 初始化分析表，所有位置都设置为error
            for (int j = 0; j < Echar.Count + Nchar.Count; j++)
                LRAna[i][j] = tnode; // 初始化为error状态

        tnode = new Table('A', 0); // 创建一个接受状态
        LRAna[1][FindID(Echar, '#')] = tnode; // 项目集1必定是接受项目，构建[1][#]:acc

        for (int i = 0; i < Gy_itemset.Count; i++)
        { // 遍历每个含有归约项目的项目集
            tnode = new Table('r', Find_pro(LRobjNum[proitemset[Gy_itemset[i]].Container[0]])); // 找到归约项目的原产生式序号
            for (int j = 0; j < Echar.Count; j++)
            { // 对于每个终结符
                LRAna[Gy_itemset[i]][j] = tnode; // 将归约项目添加至分析表中
            }
            foreach (char b in follow[LRobjNum[proitemset[Gy_itemset[i]].Container[0]].Left])
            { // 遍历该产生式左部的非终结符的Follow集
                int j = FindID(Echar, b); // 找到该终结符在Echar中的索引
                if (j >= 0)
                { // 如果该终结符存在于Echar中
                    LRAna[Gy_itemset[i]][j] = new Table('r', Find_pro(LRobjNum[proitemset[Gy_itemset[i]].Container[0]])); // 添加归约状态
                }
            }
        }

        for (int i = 0; i < Pindex; i++)
        { // 遍历dfa数组
            if (isFinalsymbol(dfa[i].symbol))
            { // 如果dfa[i].symbol是非终结符
                int CID = FindID(Nchar, dfa[i].symbol); // 找到该非终结符在Nchar中的索引
                tnode = new Table('N', dfa[i].to); // 创建一个转移状态
                LRAna[dfa[i].from][CID + Echar.Count] = tnode; // 添加转移状态
            }
            else
            { // 如果dfa[i].symbol是终结符
                int CID = FindID(Echar, dfa[i].symbol); // 找到该终结符在Echar中的索引
                tnode = new Table('S', dfa[i].to); // 创建一个移进状态
                LRAna[dfa[i].from][CID] = tnode; // 添加移进状态
            }
        }
    }
}