类C语言词法分析程序：语法描述、实验要求及代码示例

一. 类C语言语法描述和实验要求

类C语言中关键字有20个，见下表

| 序号 | 关键字 | 序号 | 关键字 | |---|---|---|---| | 1 | begin | 11 | exit | | 2 | do | 12 | if | | 3 | while | 13 | else | | 4 | for | 14 | select | | 5 | int | 15 | case | | 6 | string | 16 | break | | 7 | double | 17 | return | | 8 | bool | 18 | end | | 9 | char | 19 | write | | 10 | float | 20 | read |

类C语言中运算符见下表：

| 序号 | 运算符 | 序号 | 运算符 | |---|---|---|---| | 21 | + | 36 | ! | | 22 | - | 37 | && | | 23 | * | 38 | || | | 24 | / | 39 | ++ | | 25 | ** (乘方) | 40 | -- | | 26 | % (求余) | 41 | , | | 27 | = (赋值) | 42 | - (负号) | | 28 | += | 43 | & | | 29 | -= | 44 | % (格式运算符) | | 30 | == | | | | 31 | <> | | | | 32 | > | | | | 33 | >= | | | | 34 | < | | | | 35 | <= | | |

类C语言中分界符见下表：

| 序号 | 分界符 | 序号 | 分界符 | |---|---|---|---| | 45 | ( | 48 | ; | | 46 | ) | 49 | ' | | 47 | , | 50 | ' |

标识符和关键字的构词规则为'以大写字母（A-Z）或小写字母（a-z）开头，后面可以跟大写字母或小写字母或数字（0-9）或下划线。如果有下划线，只能出现一次，长度最多为15个'。区分大小写。
常量分为整型、实型、字符型、字符串型、布尔型。其中整型常量包括十进制、八进制和十六进制，实型包括十进制和科学计数法。其中：

(1) 整型常量八进制、十进制和十六进制与C语言要求一致。 (2) 实型常量要求与C语言要求一致。注意小数点的个数。 (3) 字符类型和字符串类型常量要求与C语言要求一致。 (4) 布尔型常量有2个，分别是TRUE和FALSE。

能识别出部分单词拼写错误并输出错误行数和错误信息。
以类C语言编写测试用例源程序，以二元式（单词类别、单词值）形式为输出结果。关键字、运算符、分界符、标识符单词类别自己指定或按照表格中的数字表示均可，标识符单词的值则以其在符号表中的地址表示；常量一个类型一种类别，并自己指定每种类型的单词类别，整型和实型常量单词值以二进制形式表示，其余类型照原样输出。

类C语言源程序样例：

int f(int a,double b)
begin 
  bool c=TRUE;
  char d;
  float e;
string f;
read('"%f"',&e);
d='345';
 while(a<=e)
   begin
     b=a**2+b/3;
     select(c)
     case   f='"a"';   break;
     case   e=-4.3+3.52e12.3+0x123-0123;
     case   a+=8; 
     case   c=5<>9;
   end
write('"%f%d"',e,a);
END

二. 实验步骤

用类C语言编写测试用例源程序（如上所示）。用C语言或其他语言编写能够编译该源程序的词法分析程序。
运行词法分析程序，读入类C语言源程序，进行词法分析，并以二元式形式输出结果。
词法分析程序出错可设立断点，单步运行词法分析程序，依次单个输出单词。分析和理解词法分析程序，解释词法分析程序中的数据和变量变化的原因和输出结果。
运行修改后的词法分析程序，读入修改后的类C语言测试用例源程序，进行词法分析。

类C语言词法分析程序示例 (C语言)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// 定义关键字数组
char keywords[20][20] = {
    "begin", "do", "while", "for", "int",
    "string", "double", "bool", "char",
    "float", "exit", "if", "else", "select",
    "case", "break", "return", "end", "write", "read"
};

// 定义运算符数组
char operators[25][5] = {
    "+", "-", "*", "/", "**", "%", "=", "+", "-",
    "==", "<>", ">", ">=", "<", "<=", "!", "&&",
    "||", "++", "--", ",", "-", "&", "%"
};

// 定义分界符数组
char delimiters[5][2] = {
    "(", ")", ",", ";", "'"
};

// 判断是否为字母
int isLetter(char c) {
    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}

// 判断是否为数字
int isDigit(char c) {
    return c >= '0' && c <= '9';
}

// 判断是否为关键字
int isKeyword(char *word) {
    int i;
    for (i = 0; i < 20; i++) {
        if (strcmp(word, keywords[i]) == 0) {
            return i + 1;
        }
    }
    return 0;
}

// 判断是否为运算符
int isOperator(char *word) {
    int i;
    for (i = 0; i < 25; i++) {
        if (strcmp(word, operators[i]) == 0) {
            return i + 21;
        }
    }
    return 0;
}

// 判断是否为分界符
int isDelimiter(char *word) {
    int i;
    for (i = 0; i < 5; i++) {
        if (strcmp(word, delimiters[i]) == 0) {
            return i + 45;
        }
    }
    return 0;
}

// 词法分析函数
void lexAnalysis(char *sourceCode) {
    char word[20];
    int i = 0, j = 0;
    
    while (sourceCode[i] != '\0') {
        // 跳过空格和换行符
        if (sourceCode[i] == ' ' || sourceCode[i] == '\n') {
            i++;
            continue;
        }
        
        // 判断是否为字母或下划线，是则可能为关键字或标识符
        if (isLetter(sourceCode[i]) || sourceCode[i] == '_') {
            j = 0;
            while (isLetter(sourceCode[i]) || isDigit(sourceCode[i]) || sourceCode[i] == '_') {
                word[j] = sourceCode[i];
                i++;
                j++;
            }
            word[j] = '\0';
            
            // 判断是否为关键字
            int keywordIndex = isKeyword(word);
            if (keywordIndex != 0) {
                printf("(%d, %s)\n", keywordIndex, word);
            } else {
                printf("(ID, %s)\n", word);
            }
        }
        
        // 判断是否为数字，是则可能为整型常量或实型常量
        else if (isDigit(sourceCode[i])) {
            j = 0;
            while (isDigit(sourceCode[i])) {
                word[j] = sourceCode[i];
                i++;
                j++;
            }
            
            // 判断是否为实型常量
            if (sourceCode[i] == '.') {
                word[j] = sourceCode[i];
                i++;
                j++;
                while (isDigit(sourceCode[i])) {
                    word[j] = sourceCode[i];
                    i++;
                    j++;
                }
                word[j] = '\0';
                printf("(FLOAT_CONST, %s)\n", word);
            }
            
            // 判断是否为整型常量
            else {
                word[j] = '\0';
                printf("(INT_CONST, %s)\n", word);
            }
        }
        
        // 判断是否为运算符
        else if (sourceCode[i] == '+' || sourceCode[i] == '-' || sourceCode[i] == '*' || sourceCode[i] == '/' || sourceCode[i] == '%' || sourceCode[i] == '=' || sourceCode[i] == '<' || sourceCode[i] == '>' || sourceCode[i] == '!' || sourceCode[i] == '&' || sourceCode[i] == '|') {
            j = 0;
            while (sourceCode[i] == '+' || sourceCode[i] == '-' || sourceCode[i] == '*' || sourceCode[i] == '/' || sourceCode[i] == '%' || sourceCode[i] == '=' || sourceCode[i] == '<' || sourceCode[i] == '>' || sourceCode[i] == '!' || sourceCode[i] == '&' || sourceCode[i] == '|') {
                word[j] = sourceCode[i];
                i++;
                j++;
            }
            word[j] = '\0';
            
            // 判断是否为运算符
            int operatorIndex = isOperator(word);
            if (operatorIndex != 0) {
                printf("(%d, %s)\n", operatorIndex, word);
            }
        }
        
        // 判断是否为分界符
        else {
            j = 0;
            while (sourceCode[i] == '(' || sourceCode[i] == ')' || sourceCode[i] == ',' || sourceCode[i] == ';' || sourceCode[i] == "'") {
                word[j] = sourceCode[i];
                i++;
                j++;
            }
            word[j] = '\0';
            
            // 判断是否为分界符
            int delimiterIndex = isDelimiter(word);
            if (delimiterIndex != 0) {
                printf("(%d, %s)\n", delimiterIndex, word);
            }
        }
    }
}

int main() {
    char sourceCode[1000] = "int f(int a,double b)\nbegin\n  bool c=TRUE;
  char d;
  float e;
string f;
read('"%f"',&e);
d='345';
 while(a<=e)
   begin
     b=a**2+b/3;
     select(c)
     case   f='"a"';   break;
     case   e=-4.3+3.52e12.3+0x123-0123;
     case   a+=8; 
     case   c=5<>9;
   end
write('"%f%d"',e,a);
END";
    
    lexAnalysis(sourceCode);
    
    return 0;
}

注意：

该示例程序仅提供一个基本的词法分析框架，可能无法处理所有情况。您需要根据实际需求进行修改和扩展。
在实际应用中，需要使用更完善的词法分析算法，例如有限自动机 (DFA) 或正则表达式匹配等。
您可以使用更强大的编程语言，例如 Python 或 Java，来编写更加复杂和高效的词法分析程序。

希望这些信息对您有所帮助！如果您还有其他问题，请随时提出。