简单编程语言的语法制导翻译和中间代码生成
#include
// 词法分析器 enum TokenType { FUNC, ARG, OP, NUM, VAR, EQ, COMMA, LPAREN, RPAREN, LBRACKET, RBRACKET, END }; struct Token { TokenType type; string value; };
class Lexer { public: Lexer(string input) { this->input = input; pos = 0; }
Token getNextToken() {
while (pos < input.length()) {
char ch = input[pos];
if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
pos++;
continue;
}
if (ch == '+' || ch == '-' || ch == '*' || ch == '/') {
pos++;
return { OP, string(1, ch) };
}
if (ch >= '0' && ch <= '9') {
string num = "";
while (pos < input.length() && input[pos] >= '0' && input[pos] <= '9') {
num += input[pos];
pos++;
}
return { NUM, num };
}
if (ch >= 'a' && ch <= 'z') {
string var = "";
while (pos < input.length() && input[pos] >= 'a' && input[pos] <= 'z') {
var += input[pos];
pos++;
}
if (var == "f") {
return { FUNC, var };
} else {
return { VAR, var };
}
}
if (ch == '=') {
pos++;
return { EQ, "=" };
}
if (ch == ',') {
pos++;
return { COMMA, "," };
}
if (ch == '(') {
pos++;
return { LPAREN, "(" };
}
if (ch == ')') {
pos++;
return { RPAREN, ")" };
}
if (ch == '[') {
pos++;
return { LBRACKET, "[" };
}
if (ch == ']') {
pos++;
return { RBRACKET, "]" };
}
throw "Invalid character";
}
return { END, "" };
}
private: string input; int pos; };
// 语法分析器 enum NonTerminal { S, A, E, T, F }; enum Action { SHIFT, REDUCE, ACCEPT }; struct SLRAction { Action action; int nextState; int production; };
class Parser {
public:
Parser(vector
void parse() {
stack<int> stateStack;
stack<NonTerminal> symbolStack;
stateStack.push(0);
symbolStack.push(S);
while (true) {
int state = stateStack.top();
Token token = tokens[pos];
int terminal = getTokenType(token);
if (actionTable[state].count(terminal) == 0) {
throw "Syntax error";
}
SLRAction slrAction = actionTable[state][terminal];
if (slrAction.action == SHIFT) {
stateStack.push(slrAction.nextState);
symbolStack.push(getNonTerminal(token));
pos++;
} else if (slrAction.action == REDUCE) {
int production = slrAction.production;
int numSymbols = getNumSymbols(production);
for (int i = 0; i < numSymbols; i++) {
stateStack.pop();
symbolStack.pop();
}
int newState = gotoTable[stateStack.top()][getLHS(production)];
stateStack.push(newState);
symbolStack.push(getLHS(production));
} else {
break;
}
}
cout << "Accepted" << endl;
}
private:
vector
void initActionTable() {
// state 0
actionTable[0][FUNC] = { SHIFT, 1, -1 };
// state 1
actionTable[1][LBRACKET] = { SHIFT, 2, -1 };
// state 2
actionTable[2][VAR] = { SHIFT, 3, -1 };
// state 3
actionTable[3][COMMA] = { SHIFT, 4, -1 };
actionTable[3][RBRACKET] = { REDUCE, -1, 3 };
// state 4
actionTable[4][VAR] = { SHIFT, 5, -1 };
// state 5
actionTable[5][COMMA] = { SHIFT, 6, -1 };
actionTable[5][RBRACKET] = { REDUCE, -1, 3 };
// state 6
actionTable[6][VAR] = { SHIFT, 7, -1 };
// state 7
actionTable[7][RBRACKET] = { REDUCE, -1, 3 };
// state 8
actionTable[8][EQ] = { SHIFT, 9, -1 };
// state 9
actionTable[9][NUM] = { SHIFT, 10, -1 };
actionTable[9][LPAREN] = { SHIFT, 11, -1 };
actionTable[9][VAR] = { SHIFT, 12, -1 };
// state 10
actionTable[10][OP] = { SHIFT, 13, -1 };
actionTable[10][COMMA] = { REDUCE, -1, 5 };
actionTable[10][RPAREN] = { REDUCE, -1, 5 };
// state 11
actionTable[11][NUM] = { SHIFT, 10, -1 };
actionTable[11][LPAREN] = { SHIFT, 11, -1 };
actionTable[11][VAR] = { SHIFT, 12, -1 };
// state 12
actionTable[12][OP] = { REDUCE, -1, 7 };
actionTable[12][COMMA] = { REDUCE, -1, 7 };
actionTable[12][RPAREN] = { REDUCE, -1, 7 };
// state 13
actionTable[13][NUM] = { SHIFT, 14, -1 };
actionTable[13][LPAREN] = { SHIFT, 15, -1 };
actionTable[13][VAR] = { SHIFT, 16, -1 };
// state 14
actionTable[14][COMMA] = { REDUCE, -1, 4 };
actionTable[14][RPAREN] = { REDUCE, -1, 4 };
// state 15
actionTable[15][NUM] = { SHIFT, 14, -1 };
actionTable[15][LPAREN] = { SHIFT, 15, -1 };
actionTable[15][VAR] = { SHIFT, 16, -1 };
// state 16
actionTable[16][OP] = { REDUCE, -1, 6 };
actionTable[16][COMMA] = { REDUCE, -1, 6 };
actionTable[16][RPAREN] = { REDUCE, -1, 6 };
// state 17
actionTable[17][OP] = { SHIFT, 18, -1 };
// state 18
actionTable[18][NUM] = { SHIFT, 19, -1 };
actionTable[18][LPAREN] = { SHIFT, 20, -1 };
actionTable[18][VAR] = { SHIFT, 21, -1 };
// state 19
actionTable[19][OP] = { REDUCE, -1, 1 };
actionTable[19][COMMA] = { REDUCE, -1, 1 };
actionTable[19][RPAREN] = { REDUCE, -1, 1 };
// state 20
actionTable[20][NUM] = { SHIFT, 19, -1 };
actionTable[20][LPAREN] = { SHIFT, 20, -1 };
actionTable[20][VAR] = { SHIFT, 21, -1 };
// state 21
actionTable[21][OP] = { REDUCE, -1, 2 };
actionTable[21][COMMA] = { REDUCE, -1, 2 };
actionTable[21][RPAREN] = { REDUCE, -1, 2 };
}
void initGotoTable() {
gotoTable[0][S] = 8;
gotoTable[2][A] = 17;
gotoTable[5][A] = 22;
gotoTable[6][A] = 23;
gotoTable[7][A] = 24;
gotoTable[9][E] = 25;
gotoTable[11][E] = 26;
gotoTable[13][T] = 27;
gotoTable[15][T] = 28;
gotoTable[16][F] = 29;
}
int getTokenType(Token token) {
switch (token.type) {
case FUNC: return FUNC;
case ARG: return VAR;
case OP: return (token.value == "+" ? 0 : token.value == "-" ? 1 : token.value == "*" ? 2 : 3);
case NUM: return NUM;
case VAR: return VAR;
case EQ: return EQ;
case COMMA: return COMMA;
case LPAREN: return LPAREN;
case RPAREN: return RPAREN;
case LBRACKET: return LBRACKET;
case RBRACKET: return RBRACKET;
case END: return END;
}
return -1;
}
NonTerminal getNonTerminal(Token token) {
switch (token.type) {
case FUNC: return S;
case ARG: return A;
case OP: return E;
case NUM: return F;
case VAR: return F;
case EQ: return -1;
case COMMA: return -1;
case LPAREN: return -1;
case RPAREN: return -1;
case LBRACKET: return -1;
case RBRACKET: return -1;
case END: return -1;
}
return -1;
}
int getLHS(int production) {
return productions[production][0];
}
int getNumSymbols(int production) {
return numSymbols[production];
}
};
// 语法制导翻译 enum OpCode { DEF_FUNC, DEF_ARG, ADD, SUB, MUL, DIV, ASSIGN, END_FUNC }; struct Quad { OpCode op; string arg1; string arg2; string result; };
class Translator {
public:
Translator(vector
void translate() {
Quad quad;
parseS();
quad.op = END_FUNC;
quads.push_back(quad);
printQuads();
}
void printQuads() {
for (int i = 0; i < quads.size(); i++) {
cout << "(" << getOpCode(quads[i].op) << ", " << quads[i].arg1 << ", " << quads[i].arg2 << ", " << quads[i].result << ")" << endl;
}
}
private:
vector
void parseS() {
if (tokens[pos].type != FUNC) {
throw "Syntax error";
}
pos++;
parseA();
if (tokens[pos].type != EQ) {
throw "Syntax error";
}
pos++;
string result = tokens[pos - 2].value;
parseE();
Quad quad;
quad.op = DEF_FUNC;
quad.result = tokens[pos - 4].value;
quads.push_back(quad);
}
void parseA() {
if (tokens[pos].type != LBRACKET) {
throw "Syntax error";
}
pos++;
parseArgList();
if (tokens[pos].type != RBRACKET) {
throw "Syntax error";
}
pos++;
}
void parseArgList() {
if (tokens[pos].type != VAR) {
throw "Syntax error";
}
Quad quad;
quad.op = DEF_ARG;
quad.arg1 = tokens[pos].value;
quads.push_back(quad);
pos++;
while (tokens[pos].type == COMMA) {
pos++;
if (tokens[pos].type != VAR) {
throw "Syntax error";
}
Quad quad;
quad.op = DEF_ARG;
quad.arg1 = tokens[pos].value;
quads.push_back(quad);
pos++;
}
}
void parseE() {
parseT();
while (tokens[pos].type == OP && (tokens[pos].value == "+" || tokens[pos].value == "-")) {
string op = tokens[pos].value;
pos++;
parseT();
Quad quad;
quad.op = (op == "+" ? ADD : SUB);
quad.arg1 = quads.back().result;
quad.arg2 = tokens[pos - 2].value;
quad.result = "T" + to_string(currTemp++);
quads.push_back(quad);
}
}
void parseT() {
parseF();
while (tokens[pos].type == OP && (tokens[pos].value == "*" || tokens[pos].value == "/")) {
string op = tokens[pos].value;
pos++;
parseF();
Quad quad;
quad.op = (op == "*" ? MUL : DIV);
quad.arg1 = quads.back().result;
quad.arg2 = tokens[pos - 2].value;
quad.result = "T" + to_string(currTemp++);
quads.push_back(quad);
}
}
void parseF() {
if (tokens[pos].type == LPAREN) {
pos++;
parseE();
if (tokens[pos].type != RPAREN) {
throw "Syntax error";
}
pos++;
} else if (tokens[pos].type == VAR || tokens[pos].type == NUM) {
Quad quad;
quad.op = ASSIGN;
quad.arg1 = tokens[pos].value;
quad.result = tokens[pos].value;
quads.push_back(quad);
pos++;
} else {
throw "Syntax error";
}
}
string getOpCode(OpCode op) {
switch (op) {
case DEF_FUNC: return "DEF_FUNC";
case DEF_ARG: return "DEF_ARG";
case ADD: return "ADD";
case SUB: return "SUB";
case MUL: return "MUL";
case DIV: return "DIV";
case ASSIGN: return "ASSIGN";
case END_FUNC: return "END_FUNC";
}
return "";
}
};
int main() { string input; cout << "Enter the program: "; getline(cin, input);
Lexer lexer(input);
vector<Token> tokens;
while (true) {
Token token = lexer.getNextToken();
if (token.type == END) {
break;
}
tokens.push_back(token);
}
try {
Parser parser(tokens);
parser.parse();
Translator translator(tokens);
translator.translate();
} catch (const char* error) {
cout << "Error: " << error << endl;
}
return 0;
}
原文地址: http://www.cveoy.top/t/topic/osDG 著作权归作者所有。请勿转载和采集!