SLR1 分析：文法分析表错误分析及修正

问题描述

给定文法：

E->E+T|T
T->T*F|F
F->(E)|d

在进行 SLR1 分析时，分析表中没有正确添加归约状态。

错误分析

在 SLR 分析表中，归约状态的添加应该根据每个含有归约项目的项目集合的序号集合中的项目进行添加，而不是根据每个归约项目本身进行添加。

改正后的代码

在 SLRAnaly() 函数中，修改对于归约状态的添加方式：

for (int i = 0; i < Gy_itemset.Count; i++)
{
    SLRitemsets itemset = proitemset[Gy_itemset[i]];
    foreach (int item in itemset.Container)
    {
        SLRNode node = SLRobjNum[item];
        if (node.Right == 'd')
        {
            int CID = FindID(Echar, '#');
            SLRAna[Gy_itemset[i]][CID] = new Table('r', Find_pro(node));
        }
        else
        {
            List<char> follow = GetFollow(node.Left[0]);
            foreach (char c in follow)
            {
                int CID = FindID(Echar, c);
                SLRAna[Gy_itemset[i]][CID] = new Table('r', Find_pro(node));
            }
        }
    }
}

需要调整的代码

在 GET_ANA() 函数中，调用 SLRAnaly() 函数：

public Table[][] GET_ANA()
{
    SLRAnaly();
    // ...
}

完整代码示例

public class SLRNode
{
    public string Left;
    public string Right;
    public SLRNode(string Left, string Right)
    {
        this.Left = Left;
        this.Right = Right;
    }
}

// 项目集类
public class SLRitemsets
{
    public List<int> Container = new List<int>(100);
    // 记录项目在项目集合中的序号
}

// DFA 结点
public struct DFA
{
    public int from;
    public char symbol;
    public int to;
    public DFA(int from, char symbol, int to)
    {
        this.from = from;
        this.symbol = symbol;
        this.to = to;
    }
}

// 分析表 结点
public class Table
{
    public bool error; // 是否为 ERROR
    public char type; // 结点类型
    public int id; // 数值
    public Table()
    {
        this.error = true;
    }
    public Table(char type, int id)
    {
        this.type = type;
        this.id = id;
        this.error = false;
    }
}

// 分析句子
public class Analyze
{
    public List<string> stack_state = new List<string>(100); // 记录状态栈
    public List<string> stack_symbol = new List<string>(100); // 记录符号栈
    public List<string> Input_str = new List<string>(100); // 记录输入串
    public List<string> Tran_pro = new List<string>(100); // 记录所用产生式
}

public DFA[] dfa = new DFA[100];
public int Pindex = 0; // dfa 数组指针
public Table[][] SLRAna; // 分析表
public Analyze Jz;
public bool Success = false;
public List<SLRNode> SLRproNum = new List<SLRNode>(50); // 产生式列表
public List<SLRNode> SLRobjNum = new List<SLRNode>(50); // 项目列表
public List<SLRitemsets> proitemset = new List<SLRitemsets>(100); // 项目集合
public List<int> Gy_obj = new List<int>(50); // 归约项目序号集合
public List<int> Gy_itemset = new List<int>(50); // 含有归约项目的集合的序号的集合
public List<char> Nchar = new List<char>(50); // 非终结符集合
public List<char> Echar = new List<char>(50); // 终结符集合

public List<char>[] Follow; // 每个非终结符的 follow 集合

public string RStr = '';
public string RStr_obitemset = ''; // 输出返回
public string RStr_DFA = '';
public string RStr_ANA = '';

public Table[][] GET_ANA()
{
    SLRAnaly();
    RStr_ANA += '\r\nSLR0 分析表:\r\n    ';
    int i;
    for (i = 0; i < Echar.Count; i++)
    {
        RStr_ANA += Echar[i].ToString() + '     ';
    }
    for (i = 0; i < Nchar.Count; i++)
    {
        RStr_ANA += Nchar[i].ToString() + '     ';
    }
    RStr_ANA += '\r\n';
    for (i = 0; i < proitemset.Count; i++)
    {
        RStr_ANA += i.ToString() + '  ';
        for (int j = 0; j < Echar.Count + Nchar.Count; j++)
        {
            if (SLRAna[i][j].error)
            {
                RStr_ANA += '  ' + '    ';
            }
            else if (i == 1 && j == Echar.Count - 1)
            {
                RStr_ANA += 'AC' + '    ';
            }
            else if (SLRAna[i][j].type != 'N')
            {
                RStr_ANA += SLRAna[i][j].type.ToString() + SLRAna[i][j].id.ToString() + '    ';
            }
            else
                RStr_ANA += SLRAna[i][j].id.ToString() + '    ';
        }
        RStr_ANA += '\r\n';
    }

    return SLRAna;

}

// 分析表
public void SLRAnaly()
{
    Table tnode = new Table();

    SLRAna = new Table[proitemset.Count][];
    for (int i = 0; i < proitemset.Count; i++)
        SLRAna[i] = new Table[Echar.Count + Nchar.Count];

    for (int i = 0; i < proitemset.Count; i++) // 初始化 赋予 ERROR 属性
        for (int j = 0; j < Echar.Count + Nchar.Count; j++) // 为终结符加 r 状态
            SLRAna[i][j] = tnode;

    tnode = new Table('A', 0);
    SLRAna[1][FindID(Echar, '#')] = tnode; // 项目集 1 必定是接受项目   构建 [1][#]:acc 的情况 先直接赋值好 dfa 里没有

    for (int i = 0; i < Gy_itemset.Count; i++)
    {
        SLRitemsets itemset = proitemset[Gy_itemset[i]];
        foreach (int item in itemset.Container)
        {
            SLRNode node = SLRobjNum[item];
            if (node.Right == 'd')
            {
                int CID = FindID(Echar, '#');
                SLRAna[Gy_itemset[i]][CID] = new Table('r', Find_pro(node));
            }
            else
            {
                List<char> follow = GetFollow(node.Left[0]);
                foreach (char c in follow)
                {
                    int CID = FindID(Echar, c);
                    SLRAna[Gy_itemset[i]][CID] = new Table('r', Find_pro(node));
                }
            }
        }
    }

    for (int i = 0; i < Pindex; i++)
    {
        if (isFinalsymbol(dfa[i].symbol)) // symbol 为非终结符  添加状态 N
        {
            int CID = FindID(Nchar, dfa[i].symbol);
            SLRAna[dfa[i].from][CID + Echar.Count] = new Table('N', dfa[i].to);
        }
        else // 不是归约项目 添加状态 S
        {
            int CID = FindID(Echar, dfa[i].symbol);
            SLRAna[dfa[i].from][CID] = new Table('S', dfa[i].to);
        }
    }
}


public List<char> GetFollow(char c)
{
    List<char> follow = new List<char>();
    if (c == 'E')
        follow.Add('#');
    foreach (SLRNode node in SLRproNum)
    {
        int index = node.Right.IndexOf(c);
        if (index != -1 && index < node.Right.Length - 1)
        {
            char next = node.Right[index + 1];
            if (isFinalsymbol(next))
                follow.Add(next);
            else
            {
                List<char> first = GetFirst(next);
                if (first.Contains('#'))
                {
                    first.Remove('#');
                    follow.AddRange(first);
                    follow.AddRange(GetFollow(node.Left[0]));
                }
                else
                {
                    follow.AddRange(first);
                }
            }
        }
        else if (index != -1 && index == node.Right.Length - 1)
        {
            follow.AddRange(GetFollow(node.Left[0]));
        }
    }
    follow = follow.Distinct().ToList();
    return follow;
}


public List<char> GetFirst(char c)
{
    List<char> first = new List<char>();
    if (isFinalsymbol(c))
        first.Add(c);
    else
    {
        foreach (SLRNode node in SLRproNum)
        {
            if (node.Left[0] == c)
            {
                if (node.Right[0] == c)
                    continue;
                else if (isFinalsymbol(node.Right[0]))
                    first.Add(node.Right[0]);
                else
                {
                    List<char> subFirst = GetFirst(node.Right[0]);
                    if (subFirst.Contains('#'))
                    {
                        subFirst.Remove('#');
                        first.AddRange(subFirst);
                        first.AddRange(GetFirst(node.Right[1]));
                    }
                    else
                    {
                        first.AddRange(subFirst);
                    }
                }
            }
        }
    }
    first = first.Distinct().ToList();
    return first;
}

代码解释

代码中使用了以下数据结构：

SLRNode：用于表示文法规则，包含左部和右部。
SLRitemsets：用于表示项目集合，包含项目集合中的项目序号。
DFA：用于表示状态机，包含状态转换信息。
Table：用于表示分析表，包含状态、动作、产生式序号等信息。
Analyze：用于分析句子，包含状态栈、符号栈、输入串、所用产生式等信息。

代码的主要功能是：

构建 SLR 分析表，包括状态转换和动作信息。
使用 SLR 分析表对输入句子进行分析，并判断是否接受该句子。

总结

本文分析了 SLR1 分析表中归约状态添加错误的原因，并提供了修正后的代码。文章还包括了对 SLR1 分析法的基础介绍以及代码示例，帮助读者理解 SLR1 分析表的构建过程以及常见错误的排查方法。