LR0文法分析器实现 - C# 代码解析与优化 - 常规

{ //产生式结点类 public class LRNode { public string Left; public string Right; public LRNode(string Left, string Right) { this.Left = Left; this.Right = Right; } } //项目集类 public class LRitemsets { public List Container = new List(100); //记录项目在项目集合中的序号 }
//DFA结点
public struct DFA
{
    public int from;
    public char symbol;
    public int to;
    public DFA(int from, char symbol, int to)
    {
        this.from = from;
        this.symbol = symbol;
        this.to = to;
    }
}

//分析表 结点
public class Table
{
    public bool error;//是否为ERROR
    public char type;//结点类型
    public int id;//数值
    public Table()
    {
        this.error = true;
    }
    public Table(char type, int id)
    {
        this.type = type;
        this.id = id;
        this.error = false;
    }
}


public DFA[] dfa = new DFA[100];
public int Pindex = 0; //dfa数组指针
public Table[][] LRAna;//分析表
public Analyze Jz;
public bool Success = false;
public List<LRNode> LRproNum = new List<LRNode>(50);//产生式 列表
public List<LRNode> LRobjNum = new List<LRNode>(50);//项目 列表
public List<LRitemsets> proitemset = new List<LRitemsets>(100);//项目集合
public List<int> Gy_obj = new List<int>(50);//归约项目序号集合
public List<int> Gy_itemset = new List<int>(50);//含有归约项目的集合的序号 的集合
public List<char> Nchar = new List<char>(50);//非终结符集合
public List<char> Echar = new List<char>(50);//终结符集合

public string RStr = '';
public string RStr_obitemset = '';//输出返回
public string RStr_DFA = '';
public string RStr_ANA = '';


public void Buildprod(string str)
{

    LRNode Lr;
    int i = 0;
    string left = '';
    string right = '';
    left += 'S'';
    right += str[0];
    Lr = new LRNode(left, right);//拓广文法开始
    LRproNum.Add(Lr);
    while (i < str.Length)
    {
        left = right = '';//还原
        int j = i;
        while (i < str.Length && str[i] != '\r' && str[i] != '\n')//换行符‘\r\n’
        {
            if (str[i] == ' ')
            {
                i++;
                continue;
            }
            if (str[i] == '|')                 //  遇到'|'可构造一条产生式
            {
                Lr = new LRNode(left, right);
                LRproNum.Add(Lr);
                right = '';                    //产生式左边相同 右边重新积累
                i++;                           //跳过'|'
                continue;
            }
            if ((i - j) == 0)
            {
                if (!exist(Nchar, str[i]))//如果非终结符集合中不存在str[i],加入Nchar  产生式左边 只有非终结符 不必判断终结符
                    Nchar.Add(str[i]);
                left += str[i++];
            }
            else if (i - j <= 2)
                i++;
            else
            {
                if (isFinalsymbol(str[i]) && !exist(Nchar, str[i]))//如果非终结符集合中不存在str[i],加入Nchar  isfinalsymbol 非终结符返回T 终结符返回F
                    Nchar.Add(str[i]);
                else if (!isFinalsymbol(str[i]) && !exist(Echar, str[i]))//产生式右边 需要判断终结符
                    Echar.Add(str[i]);
                right += str[i++];
            }


        }//while

        i++;//跳过换行符
        if (left != '' && right != '')
        {
            Lr = new LRNode(left, right);//构造每一行最后一个产生式，不存在'|'时就是该行产生式本身
            LRproNum.Add(Lr);
        }
    }//while
    Echar.Add('#');

    //构造项目 对产生式集合LRproNum中的所有产生式都循环插'.'
    LRNode Lobj;
    for (i = 0; i < LRproNum.Count; i++)
    {
        left = '';
        right = '';
        for (int j = 0; j <= LRproNum[i].Right.Length; j++)//j可以等于length  项目共length+1个
        {
            left = LRproNum[i].Left;
            right = CreObj(LRproNum[i].Right, j);//在第j个位置插入'.'
            if (j == LRproNum[i].Right.Length && LRobjNum.Count != 1)
            {
                //在产生式最后的位置插入. 即为归约项目   项目集中1号位置为接受项目
                Gy_obj.Add(LRobjNum.Count);//归约项目在项目集中的序号 不用+1 本身就是从0开始的
            }
            Lobj = new LRNode(left, right);
            LRobjNum.Add(Lobj);
            left = '';//还原
            right = '';
        }
    }
    Creteitemsets();//项目集
    RStr_obitemset += '\r\n项目集构建：\r\n';
    for (int j = 0; j < proitemset.Count; j++)
    {
        RStr_obitemset += 'I' + j.ToString() + ':' + '\r\n';
        for (i = 0; i < proitemset[j].Container.Count; i++)
        {
            RStr_obitemset += LRobjNum[proitemset[j].Container[i]].Left.ToString() + '->' + LRobjNum[proitemset[j].Container[i]].Right.ToString() + '\r\n';
        }
    }
    //return RStr_obitemset;


}


public Table[][] GET_ANA()
{
    LRAnaly();
    RStr_ANA += '\r\nLR0分析表:\r\n    ';
    int i;
    for (i = 0; i < Echar.Count; i++)
    {
        RStr_ANA += Echar[i].ToString() + '     ';
    }
    for (i = 0; i < Nchar.Count; i++)
    {
        RStr_ANA += Nchar[i].ToString() + '     ';
    }
    RStr_ANA += '\r\n';
    for (i = 0; i < proitemset.Count; i++)
    {
        RStr_ANA += i.ToString() + '  ';
        for (int j = 0; j < Echar.Count + Nchar.Count; j++)
        {

            if (LRAna[i][j].error)
            {
                RStr_ANA += '  ' + '    ';
            }
            else if (i == 1 && j == Echar.Count - 1)
            {
                RStr_ANA += 'AC' + '    ';
            }
            else if (LRAna[i][j].type != 'N')
            {
                RStr_ANA += LRAna[i][j].type.ToString() + LRAna[i][j].id.ToString() + '    ';
            }
            else
                RStr_ANA += LRAna[i][j].id.ToString() + '    ';
        }
        RStr_ANA += '\r\n';
    }

    return LRAna;

}


//求项目集
public void Creteitemsets()
{
    List<int> lr_item = new List<int>(100);//记录项目的序号
    lr_item.Add(0);
    lr_item = Closure(lr_item);//构造初始项目集 求闭包

    LRitemsets LR_C = new LRitemsets();
    LR_C.Container = lr_item;//集合----项目集序号的集合
    proitemset.Add(LR_C);//集合的集合----存放项目集序号集合 的集合


    for (int i = 0; i < proitemset.Count; i++)//整体集合中 第i个项目集
    {
        proitemset[i].Container.Sort();//排序由小到大 后面用于判断是否存在的比较
        int[] flag = new int[proitemset[i].Container.Count];
        for (int fi = 0; fi < proitemset[i].Container.Count; fi++)//标志位，用来判断该序号是否已经构造
        {
            flag[fi] = 0;
        }

        for (int j = 0; j < proitemset[i].Container.Count; j++)//第i个项目集的第j个项目
        {
            if (flag[j] == 1)//如果已经访问过 就不再构造 找下一个项目
                continue;
            int index = proitemset[i].Container[j];
            for (int pi = 0; pi < LRobjNum[index].Right.Length - 1; pi++)//length-1是避免匹配到.在最后的规约项目
            {
                if (LRobjNum[index].Right[pi] == '.')
                {

                    List<int> lr2_club = new List<int>(100);//记录项目的序号
                    char symbol = LRobjNum[index].Right[pi + 1];//记录.a转移状态a.的符号a
                    lr2_club.Add((index + 1));//如果遇到.a形式的项目序号为index 那么项目a.的序号为index+1
                    for (int m1 = j + 1; m1 < proitemset[i].Container.Count; m1++)
                    {
                        //在第i个项目集中找到了可以移动的.：.a  重新遍历第i个项目集j项目之后的 找到同样可以移动a的项目集
                        int index2 = proitemset[i].Container[m1];
                        for (int m2 = 0; m2 < LRobjNum[index2].Right.Length - 1; m2++)
                        {
                            if (LRobjNum[index2].Right[m2] == '.' && LRobjNum[index2].Right[m2 + 1] == symbol)
                            {
                                flag[m1] = 1;//标记位置为1 已经访问 之后不再访问
                                lr2_club.Add(index2 + 1);
                            }
                        }
                    }
                    lr2_club = Closure(lr2_club);//求闭包
                    int value = isexist(lr2_club);
                    if (value == -1)//-1表示不存在相同的
                    {
                        for (int m3 = 0; m3 < Gy_obj.Count; m3++)
                        {
                            if (isnexist(lr2_club, Gy_obj[m3]))
                            {
                                Gy_itemset.Add(proitemset.Count);
                            }
                        }
                        LRitemsets LR_C2 = new LRitemsets();
                        dfa[Pindex++] = new DFA(i, symbol, proitemset.Count);//count不用加1  本身从0开始
                        LR_C2.Container = lr2_club;
                        proitemset.Add(LR_C2);
                    }
                    else
                    {
                        dfa[Pindex++] = new DFA(i, symbol, value);
                    }
                    break;
                }
            }
        }//end-forj
    }//end-fori

}//end-Cre_club

//分析表
public void LRAnaly()
{
    Table tnode = new Table();

    LRAna = new Table[proitemset.Count][];
    for (int i = 0; i < proitemset.Count; i++)
        LRAna[i] = new Table[Echar.Count + Nchar.Count];

    for (int i = 0; i < proitemset.Count; i++)//初始化 赋予ERROR属性
        for (int j = 0; j < Echar.Count + Nchar.Count; j++)//为终结符加r状态 
            LRAna[i][j] = tnode;

    tnode = new Table('A', 0);
    LRAna[1][FindID(Echar, '#')] = tnode;//项目集1必定是接受项目   构建[1][#]:acc的情况 先直接赋值好 dfa里没有

    for (int i = 0; i < Gy_itemset.Count; i++)
    {
        tnode = new Table('r', Find_pro(LRobjNum[proitemset[Gy_itemset[i]].Container[0]]));//归约项目 找到原产生式序号 添加状态r
        for (int j = 0; j < Echar.Count; j++)
        {
            LRAna[Gy_itemset[i]][j] = tnode;
        }
    }

    for (int i = 0; i < Pindex; i++)
    {

        if (isFinalsymbol(dfa[i].symbol))//symbol为非终结符  添加状态N
        {
            int CID = FindID(Nchar, dfa[i].symbol);
            tnode = new Table('N', dfa[i].to);
            LRAna[dfa[i].from][CID + Echar.Count] = tnode;
        }
        else //不是归约项目 添加状态S
        {
            int CID = FindID(Echar, dfa[i].symbol);
            tnode = new Table('S', dfa[i].to);
            LRAna[dfa[i].from][CID] = tnode;
        }

    }
}
//计算Follow集
public void Follow()
{
    List<char> first = new List<char>(50);//first集
    List<char> follow = new List<char>(50);//follow集
    List<int> pos = new List<int>(50);//记录非终结符在产生式左边的位置
    List<int> pos2 = new List<int>(50);//记录非终结符在产生式右边的位置
    List<int> temp = new List<int>(50);//记录每一轮计算Follow集时新增的非终结符

    //初始化follow集
    for (int i = 0; i < Nchar.Count; i++)
    {
        follow.Add(Nchar[i]);
    }

    //计算每个非终结符的follow集
    for (int i = 0; i < Nchar.Count; i++)
    {
        temp.Clear();
        char ch = Nchar[i];//当前非终结符
        for (int j = 0; j < LRproNum.Count; j++)
        {
            pos.Clear();
            pos2.Clear();
            //找到产生式中所有出现当前非终结符的位置
            for (int k = 0; k < LRproNum[j].Right.Length; k++)
            {
                if (LRproNum[j].Right[k] == ch)
                {
                    pos2.Add(k);
                }
            }
            if (pos2.Count == 0)
            {
                continue;
            }
            pos.Add(0);
            //找到当前非终结符在产生式左边的位置
            for (int k = 0; k < LRproNum[j].Right.Length; k++)
            {
                if (LRproNum[j].Right[k] == ch)
                {
                    pos[0] = k + 1;
                    break;
                }
            }
            //计算follow集
            for (int k = 0; k < pos2.Count; k++)
            {
                int p = pos2[k];
                if (p == LRproNum[j].Right.Length - 1)//当前非终结符在产生式右边最后一个位置
                {
                    if (LRproNum[j].Left != ch.ToString())
                    {
                        //将产生式左边非终结符的follow集加入当前非终结符的follow集
                        temp.AddRange(follow.GetRange(FindID(Nchar, LRproNum[j].Left) + 1, follow.Count - FindID(Nchar, LRproNum[j].Left) - 1));
                    }
                }
                else
                {
                    first.Clear();
                    //计算当前非终结符后面的符号的first集
                    if (isFinalsymbol(LRproNum[j].Right[p + 1]))
                    {
                        first.Add(LRproNum[j].Right[p + 1]);
                    }
                    else
                    {
                        first.AddRange(First(LRproNum[j].Right.Substring(p + 1)));
                    }
                    //将first集中的非终结符的follow集加入当前非终结符的follow集
                    for (int m = 0; m < first.Count; m++)
                    {
                        if (!isFinalsymbol(first[m]))
                        {
                            temp.AddRange(follow.GetRange(FindID(Nchar, first[m]) + 1, follow.Count - FindID(Nchar, first[m]) - 1));
                        }
                        else
                        {
                            temp.Add(first[m]);
                        }
                    }
                }
            }
        }
        //去重
        temp = temp.Distinct().ToList();
        //将当前非终结符的follow集更新
        follow.Remove(ch);
        follow.AddRange(temp);
    }

    //将$加入follow集
    follow.Add('$');
}
//SLR1分析表
public Table[][] SLRAnaly()
{
    Follow(); //计算Follow集
    Table tnode = new Table();
    Table[][] SLR = new Table[proitemset.Count][];
    for (int i = 0; i < proitemset.Count; i++)
        SLR[i] = new Table[Echar.Count + Nchar.Count];
    for (int i = 0; i < proitemset.Count; i++)
        for (int j = 0; j < Echar.Count + Nchar.Count; j++)
            SLR[i][j] = tnode;
    tnode = new Table('A', 0);
    SLR[1][FindID(Echar, '#')] = tnode;
    for (int i = 0; i < Gy_itemset.Count; i++)
    {
        tnode = new Table('r', Find_pro(LRobjNum[proitemset[Gy_itemset[i]].Container[0]]));
        List<char> followSet = new List<char>();
        followSet.AddRange(FollowSet(LRobjNum[proitemset[Gy_itemset[i]].Container[0]].Left[0]));
        for (int j = 0; j < followSet.Count; j++)
        {
            if (isFinalsymbol(followSet[j]))
            {
                int CID = FindID(Echar, followSet[j]);
                SLR[Gy_itemset[i]][CID] = tnode;
            }
        }
    }
    for (int i = 0; i < Pindex; i++)
    {
        if (isFinalsymbol(dfa[i].symbol))
        {
            int CID = FindID(Nchar, dfa[i].symbol);
            tnode = new Table('N', dfa[i].to);
            SLR[dfa[i].from][CID + Echar.Count] = tnode;
        }
        else
        {
            int CID = FindID(Echar, dfa[i].symbol);
            tnode = new Table('S', dfa[i].to);
            SLR[dfa[i].from][CID] = tnode;
        }
    }
    return SLR;
}

//获取非终结符的Follow集
public List<char> FollowSet(char ch)
{
    List<char> followSet = new List<char>();
    for (int i = 0; i < follow.Count; i++)
    {
        if (follow[i] == ch)
        {
            followSet.AddRange(follow.GetRange(i + 1, follow.Count - i - 1));
            break;
        }
    }
    return followSet;
}
// ... 其他函数 ...