SLR1 分析器构造：代码详解与示例

本文将详细介绍如何将 LR0 分析器代码修改为构造 SLR1 分析器的代码，并提供关键函数的完整代码实现。

LR0 分析器代码修改

要将 LR0 分析器代码修改为 SLR1 分析器，需要添加对“展望符”的处理。这意味着需要修改以下关键函数:

Closure 函数: 该函数用于计算项目集的闭包。在 SLR1 中，我们需要为每个项目添加展望符，因此需要修改 Closure 函数，使其能够计算包含展望符的项目集闭包。
CreateItemsets 函数: 该函数用于构造所有的项目集。在 SLR1 中，我们需要根据展望符来划分项目集，因此需要修改 CreateItemsets 函数，使其能够根据展望符构造项目集。
CreateSLRTable 函数: 该函数用于构造 SLR1 分析表。在 SLR1 中，我们需要根据展望符来决定分析表中每个元素的类型和值，因此需要修改 CreateSLRTable 函数，使其能够根据展望符构造 SLR1 分析表。

函数代码实现

以下代码展示了 SLRNode, SLRitemsets, DFA, Table 类以及关键函数的完整代码实现:

public class SLRNode
{
    public string Left;
    public string Right;
    public HashSet<char> First;
    public HashSet<char> Follow;
    public SLRNode(string Left, string Right)
    {
        this.Left = Left;
        this.Right = Right;
        First = new HashSet<char>();
        Follow = new HashSet<char>();
    }
}
//项目集类
public class SLRitemsets
{
    public List<int> Container
        = new List<int>(100);
    //记录项目在项目集合中的序号
    public HashSet<char> LookAhead
        = new HashSet<char>();
    //记录展望符
}

//DFA结点
public struct DFA
{
    public int from;
    public char symbol;
    public int to;
    public HashSet<char> LookAhead;
    public DFA(int from, char symbol, int to, HashSet<char> LookAhead)
    {
        this.from = from;
        this.symbol = symbol;
        this.to = to;
        this.LookAhead = LookAhead;
    }
}

//分析表 结点
public class Table
{
    public bool error;//是否为ERROR
    public char type;//结点类型
    public int id;//数值
    public Table()
    {
        this.error = true;
    }
    public Table(char type, int id)
    {
        this.type = type;
        this.id = id;
        this.error = false;
    }
}

public DFA[] dfa = new DFA[100];
public int Pindex = 0; //dfa数组指针
public Table[][] SLRAna;//分析表
public Analyze Jz;
public bool Success = false;
public List<SLRNode> SLRproNum = new List<SLRNode>(50);//产生式 列表
public List<SLRNode> SLRobjNum = new List<SLRNode>(50);//项目 列表
public List<SLRitemsets> proitemset = new List<SLRitemsets>(100);//项目集合
public List<int> Gy_obj = new List<int>(50);//归约项目序号集合
public List<int> Gy_itemset = new List<int>(50);//含有归约项目的集合的序号 的集合
public List<char> Nchar = new List<char>(50);//非终结符集合
public List<char> Echar = new List<char>(50);//终结符集合

Dictionary<char, HashSet<char>> first = new Dictionary<char, HashSet<char>>();//非终结符的First集
Dictionary<char, HashSet<char>> follow = new Dictionary<char, HashSet<char>>();//非终结符的Follow集

public string RStr = '';
public string RStr_obitemset = '';//输出返回
public string RStr_DFA = '';
public string RStr_ANA = '';

public SLRitemsets Closure(SLRitemsets I)
{
    // 初始化一个新的 SLRitemsets 对象，用于存储闭包结果
    SLRitemsets closure = new SLRitemsets();
    // 将当前项目集中的所有项目添加到闭包中
    closure.Container.AddRange(I.Container);
    // 将当前项目集的展望符添加到闭包中
    closure.LookAhead.UnionWith(I.LookAhead);
    // 循环遍历闭包中的所有项目
    for (int index = 0; index < closure.Container.Count; index++)
    {
        // 获取当前项目
        int itemIndex = closure.Container[index];
        // 获取项目的左侧非终结符
        char leftSymbol = SLRobjNum[itemIndex].Left[0];
        // 获取项目的右侧字符串
        string rightString = SLRobjNum[itemIndex].Right;
        // 检查项目是否为归约项目
        if (rightString[rightString.Length - 1] == '.')
        {
            // 如果是归约项目，则将其展望符添加到 closure.LookAhead 中
            closure.LookAhead.UnionWith(SLRobjNum[itemIndex].Follow);
        }
        else
        {
            // 否则，找到项目右侧字符串中第一个点号的位置
            int dotIndex = rightString.IndexOf('.');
            // 获取点号后面的符号
            char nextSymbol = rightString[dotIndex + 1];
            // 检查点号后面的符号是否是终结符
            if (Echar.Contains(nextSymbol))
            {
                // 如果是终结符，则将其添加到 closure.LookAhead 中
                closure.LookAhead.Add(nextSymbol);
            }
            else
            {
                // 如果是终结符，则遍历所有以 nextSymbol 为左侧非终结符的项目
                for (int j = 0; j < SLRobjNum.Count; j++)
                {
                    // 检查项目是否已经存在于闭包中
                    if (isnexist(closure.Container, j))
                        continue;
                    // 检查项目的左侧非终结符是否为 nextSymbol
                    if (SLRobjNum[j].Left == nextSymbol.ToString() && SLRobjNum[j].Right[0] == '.')
                    {
                        // 如果是，则将该项目的 First 集添加到 closure.LookAhead 中
                        closure.LookAhead.UnionWith(SLRobjNum[j].First);
                        // 将该项目的序号添加到闭包中
                        closure.Container.Add(j);
                    }
                }
            }
        }
    }
    // 返回闭包结果
    return closure;
}

public void CreateItemsets()
{
    //初始化第一个项目集
    SLRitemsets itemset0 = new SLRitemsets();
    itemset0.Container.Add(0);
    itemset0.LookAhead.Add('#');
    itemset0 = Closure(itemset0);
    proitemset.Add(itemset0);
    //构造所有项目集
    bool flag = true;
    while (flag)
    {
        flag = false;
        for (int i = 0; i < proitemset.Count; i++)
        {
            for (int j = 0; j < Echar.Count + Nchar.Count; j++)
            {
                char symbol = j < Echar.Count ? Echar[j] : Nchar[j - Echar.Count];
                HashSet<char> lookAhead = new HashSet<char>();
                foreach (int index in proitemset[i].Container)
                {
                    if (SLRobjNum[index].Right.Contains('.') && SLRobjNum[index].Right.IndexOf('.') < SLRobjNum[index].Right.Length - 1 && SLRobjNum[index].Right[SLRobjNum[index].Right.IndexOf('.') + 1] == symbol)
                    {
                        if (SLRobjNum[index].Right.Length - 1 == SLRobjNum[index].Right.IndexOf('.') + 1)
                        {
                            lookAhead.UnionWith(proitemset[i].LookAhead);
                        }
                        else
                        {
                            bool canEpsilon = true;
                            for (int k = SLRobjNum[index].Right.IndexOf('.') + 2; k < SLRobjNum[index].Right.Length; k++)
                            {
                                if (Echar.Contains(SLRobjNum[index].Right[k]))
                                {
                                    lookAhead.UnionWith(first[SLRobjNum[index].Right[k]]);
                                    if (!first[SLRobjNum[index].Right[k]].Contains('ε'))
                                    {
                                        canEpsilon = false;
                                        break;
                                    }
                                }
                                else
                                {
                                    canEpsilon = false;
                                    lookAhead.UnionWith(first[SLRobjNum[index].Right[k]]);
                                    break;
                                }
                            }
                            if (canEpsilon)
                            {
                                lookAhead.UnionWith(proitemset[i].LookAhead);
                            }
                        }
                    }
                }
                if (lookAhead.Count > 0)
                {
                    SLRitemsets newItemset = new SLRitemsets();
                    foreach (int index in proitemset[i].Container)
                    {
                        if (SLRobjNum[index].Right.Contains('.') && SLRobjNum[index].Right.IndexOf('.') < SLRobjNum[index].Right.Length - 1 && SLRobjNum[index].Right[SLRobjNum[index].Right.IndexOf('.') + 1] == symbol)
                        {
                            SLRNode newNode = new SLRNode(SLRobjNum[index].Left, SLRobjNum[index].Right.Insert(SLRobjNum[index].Right.IndexOf('.') + 2, '$'));
                            if (!SLRobjNum.Contains(newNode))
                            {
                                SLRobjNum.Add(newNode);
                                newItemset.Container.Add(SLRobjNum.Count - 1);
                                newItemset.LookAhead.Add(symbol);
                            }
                            else
                            {
                                int index2 = SLRobjNum.IndexOf(newNode);
                                newItemset.Container.Add(index2);
                                newItemset.LookAhead.Add(symbol);
                            }
                        }
                    }
                    newItemset = Closure(newItemset);
                    if (!proitemset.Contains(newItemset))
                    {
                        proitemset.Add(newItemset);
                        flag = true;
                    }
                    dfa[Pindex++] = new DFA(i, symbol, proitemset.IndexOf(newItemset), lookAhead);
                }
            }
        }
    }
}

public void CreateSLRTable()
{
    CreateItemsets();
    CreateDFA();
    SLRAna = new Table[proitemset.Count][];
    for (int i = 0; i < proitemset.Count; i++)
    {
        SLRAna[i] = new Table[Echar.Count + Nchar.Count];
        for (int j = 0; j < Echar.Count + Nchar.Count; j++)
        {
            char symbol = j < Echar.Count ? Echar[j] : Nchar[j - Echar.Count];
            int nextState = -1;
            HashSet<char> lookAhead = new HashSet<char>();
            foreach (DFA d in dfa)
            {
                if (d.from == i && d.symbol == symbol)
                {
                    nextState = d.to;
                    lookAhead.UnionWith(d.LookAhead);
                    break;
                }
            }
            if (nextState == -1)
            {
                SLRAna[i][j] = new Table();
            }
            else if (Gy_obj.Contains(nextState))
            {
                SLRAna[i][j] = new Table('R', Gy_obj.IndexOf(nextState) + 1);
            }
            else if (proitemset[nextState].LookAhead.Contains('#'))
            {
                SLRAna[i][j] = new Table('A', 0);
            }
            else
            {
                SLRAna[i][j] = new Table('S', nextState);
            }
        }
        foreach (int index in Gy_itemset)
        {
            if (proitemset[index].Container.Contains(i))
            {
                foreach (char c in proitemset[index].LookAhead)
                {
                    if (Echar.IndexOf(c) >= 0)
                    {
                        SLRAna[i][Echar.IndexOf(c)] = new Table('R', Gy_obj.IndexOf(index) + 1);
                    }
                    else if (c == '#')
                    {
                        SLRAna[i][Nchar.Count - 1] = new Table('R', Gy_obj.IndexOf(index) + 1);
                    }
                }
            }
        }
    }
    Success = true;
}

示例演示

文法:

E->E+T|T
T->T*F|F
F->(E)|d

SLR1 分析表:

| 状态 | + | * | ( | ) | d | # | E | |---|---|---|---|---|---|---|---| | 4 | | | S4 | | S5 | | 8 | | 5 | r6 | r6 | | r6 | | r6 | | | 6 | | | S4 | | S5 | | | | 7 | | | S4 | | S5 | | | | 8 | S6 | | | S11 | | | | | 9 | r1 | S7 | | r1 | | r1 | | | 10 | r3 | r3 | | r3 | | r3 | | | 11 | r5 | r5 | | r5 | | r5 | |

解释:

状态 4 中，遇到符号 ( 和 d 时，进行移进操作，分别移进到状态 4 和 5。
状态 5 中，遇到符号 + 和 * 时，进行归约操作，使用产生式 6 进行归约。
状态 8 中，遇到符号 d 时，进行移进操作，移进到状态 6。
状态 9 中，遇到符号 + 和 * 时，进行归约操作，使用产生式 1 进行归约。

分析结果:

通过 SLR1 分析表，我们可以对输入字符串进行语法分析，判断字符串是否符合文法规则。

注意:

上述代码中，isnexist 函数用于判断一个项目是否已经存在于项目集中，具体实现请根据实际情况编写。
本文仅提供了关键函数的代码实现，完整的 SLR1 分析器代码还需要包含其他辅助函数，例如 getFirst 函数用于计算非终结符的 First 集，getFollow 函数用于计算非终结符的 Follow 集等。

总结

本文详细介绍了如何将 LR0 分析器代码修改为构造 SLR1 分析器的代码，并提供关键函数的完整代码实现和示例演示。希望本文能够帮助读者更好地理解 SLR1 分析器的构造过程和工作原理。

参考文献: