#include #include #include #include #include using namespace std;

// 词法分析器 enum TokenType { FUNC, ARG, OP, NUM, VAR, EQ, COMMA, LPAREN, RPAREN, LBRACKET, RBRACKET, END }; struct Token { TokenType type; string value; };

class Lexer { public: Lexer(string input) { this->input = input; pos = 0; }

Token getNextToken() {
    while (pos < input.length()) {
        char ch = input[pos];
        if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
            pos++;
            continue;
        }
        if (ch == '+' || ch == '-' || ch == '*' || ch == '/') {
            pos++;
            return { OP, string(1, ch) };
        }
        if (ch >= '0' && ch <= '9') {
            string num = "";
            while (pos < input.length() && input[pos] >= '0' && input[pos] <= '9') {
                num += input[pos];
                pos++;
            }
            return { NUM, num };
        }
        if (ch >= 'a' && ch <= 'z') {
            string var = "";
            while (pos < input.length() && input[pos] >= 'a' && input[pos] <= 'z') {
                var += input[pos];
                pos++;
            }
            if (var == "f") {
                return { FUNC, var };
            } else {
                return { VAR, var };
            }
        }
        if (ch == '=') {
            pos++;
            return { EQ, "=" };
        }
        if (ch == ',') {
            pos++;
            return { COMMA, "," };
        }
        if (ch == '(') {
            pos++;
            return { LPAREN, "(" };
        }
        if (ch == ')') {
            pos++;
            return { RPAREN, ")" };
        }
        if (ch == '[') {
            pos++;
            return { LBRACKET, "[" };
        }
        if (ch == ']') {
            pos++;
            return { RBRACKET, "]" };
        }
        throw "Invalid character";
    }
    return { END, "" };
}

private: string input; int pos; };

// 语法分析器 enum NonTerminal { S, A, E, T, F }; enum Action { SHIFT, REDUCE, ACCEPT }; struct SLRAction { Action action; int nextState; int production; };

class Parser { public: Parser(vector tokens) { this->tokens = tokens; pos = 0; initActionTable(); initGotoTable(); }

void parse() {
    stack<int> stateStack;
    stack<NonTerminal> symbolStack;
    stateStack.push(0);
    symbolStack.push(S);

    while (true) {
        int state = stateStack.top();
        Token token = tokens[pos];
        int terminal = getTokenType(token);

        if (actionTable[state].count(terminal) == 0) {
            throw "Syntax error";
        }

        SLRAction slrAction = actionTable[state][terminal];
        if (slrAction.action == SHIFT) {
            stateStack.push(slrAction.nextState);
            symbolStack.push(getNonTerminal(token));
            pos++;
        } else if (slrAction.action == REDUCE) {
            int production = slrAction.production;
            int numSymbols = getNumSymbols(production);
            for (int i = 0; i < numSymbols; i++) {
                stateStack.pop();
                symbolStack.pop();
            }
            int newState = gotoTable[stateStack.top()][getLHS(production)];
            stateStack.push(newState);
            symbolStack.push(getLHS(production));
        } else {
            break;
        }
    }

    cout << "Accepted" << endl;
}

private: vector tokens; int pos; map<int, map<int, SLRAction>> actionTable; map<int, map<NonTerminal, int>> gotoTable; const int numProductions = 7; const int numSymbols[numProductions] = { 4, 3, 3, 3, 1, 1, 1 }; const int productions[numProductions][4] = { { S, LBRACKET, A, RBRACKET }, { A, VAR, COMMA }, { A, A, VAR, COMMA }, { A, A, VAR }, { E, E, OP, T }, { E, T }, { T, F } };

void initActionTable() {
    // state 0
    actionTable[0][FUNC] = { SHIFT, 1, -1 };
    // state 1
    actionTable[1][LBRACKET] = { SHIFT, 2, -1 };
    // state 2
    actionTable[2][VAR] = { SHIFT, 3, -1 };
    // state 3
    actionTable[3][COMMA] = { SHIFT, 4, -1 };
    actionTable[3][RBRACKET] = { REDUCE, -1, 3 };
    // state 4
    actionTable[4][VAR] = { SHIFT, 5, -1 };
    // state 5
    actionTable[5][COMMA] = { SHIFT, 6, -1 };
    actionTable[5][RBRACKET] = { REDUCE, -1, 3 };
    // state 6
    actionTable[6][VAR] = { SHIFT, 7, -1 };
    // state 7
    actionTable[7][RBRACKET] = { REDUCE, -1, 3 };
    // state 8
    actionTable[8][EQ] = { SHIFT, 9, -1 };
    // state 9
    actionTable[9][NUM] = { SHIFT, 10, -1 };
    actionTable[9][LPAREN] = { SHIFT, 11, -1 };
    actionTable[9][VAR] = { SHIFT, 12, -1 };
    // state 10
    actionTable[10][OP] = { SHIFT, 13, -1 };
    actionTable[10][COMMA] = { REDUCE, -1, 5 };
    actionTable[10][RPAREN] = { REDUCE, -1, 5 };
    // state 11
    actionTable[11][NUM] = { SHIFT, 10, -1 };
    actionTable[11][LPAREN] = { SHIFT, 11, -1 };
    actionTable[11][VAR] = { SHIFT, 12, -1 };
    // state 12
    actionTable[12][OP] = { REDUCE, -1, 7 };
    actionTable[12][COMMA] = { REDUCE, -1, 7 };
    actionTable[12][RPAREN] = { REDUCE, -1, 7 };
    // state 13
    actionTable[13][NUM] = { SHIFT, 14, -1 };
    actionTable[13][LPAREN] = { SHIFT, 15, -1 };
    actionTable[13][VAR] = { SHIFT, 16, -1 };
    // state 14
    actionTable[14][COMMA] = { REDUCE, -1, 4 };
    actionTable[14][RPAREN] = { REDUCE, -1, 4 };
    // state 15
    actionTable[15][NUM] = { SHIFT, 14, -1 };
    actionTable[15][LPAREN] = { SHIFT, 15, -1 };
    actionTable[15][VAR] = { SHIFT, 16, -1 };
    // state 16
    actionTable[16][OP] = { REDUCE, -1, 6 };
    actionTable[16][COMMA] = { REDUCE, -1, 6 };
    actionTable[16][RPAREN] = { REDUCE, -1, 6 };
    // state 17
    actionTable[17][OP] = { SHIFT, 18, -1 };
    // state 18
    actionTable[18][NUM] = { SHIFT, 19, -1 };
    actionTable[18][LPAREN] = { SHIFT, 20, -1 };
    actionTable[18][VAR] = { SHIFT, 21, -1 };
    // state 19
    actionTable[19][OP] = { REDUCE, -1, 1 };
    actionTable[19][COMMA] = { REDUCE, -1, 1 };
    actionTable[19][RPAREN] = { REDUCE, -1, 1 };
    // state 20
    actionTable[20][NUM] = { SHIFT, 19, -1 };
    actionTable[20][LPAREN] = { SHIFT, 20, -1 };
    actionTable[20][VAR] = { SHIFT, 21, -1 };
    // state 21
    actionTable[21][OP] = { REDUCE, -1, 2 };
    actionTable[21][COMMA] = { REDUCE, -1, 2 };
    actionTable[21][RPAREN] = { REDUCE, -1, 2 };
}

void initGotoTable() {
    gotoTable[0][S] = 8;
    gotoTable[2][A] = 17;
    gotoTable[5][A] = 22;
    gotoTable[6][A] = 23;
    gotoTable[7][A] = 24;
    gotoTable[9][E] = 25;
    gotoTable[11][E] = 26;
    gotoTable[13][T] = 27;
    gotoTable[15][T] = 28;
    gotoTable[16][F] = 29;
}

int getTokenType(Token token) {
    switch (token.type) {
    case FUNC: return FUNC;
    case ARG: return VAR;
    case OP: return (token.value == "+" ? 0 : token.value == "-" ? 1 : token.value == "*" ? 2 : 3);
    case NUM: return NUM;
    case VAR: return VAR;
    case EQ: return EQ;
    case COMMA: return COMMA;
    case LPAREN: return LPAREN;
    case RPAREN: return RPAREN;
    case LBRACKET: return LBRACKET;
    case RBRACKET: return RBRACKET;
    case END: return END;
    }
    return -1;
}

NonTerminal getNonTerminal(Token token) {
    switch (token.type) {
    case FUNC: return S;
    case ARG: return A;
    case OP: return E;
    case NUM: return F;
    case VAR: return F;
    case EQ: return -1;
    case COMMA: return -1;
    case LPAREN: return -1;
    case RPAREN: return -1;
    case LBRACKET: return -1;
    case RBRACKET: return -1;
    case END: return -1;
    }
    return -1;
}

int getLHS(int production) {
    return productions[production][0];
}

int getNumSymbols(int production) {
    return numSymbols[production];
}

};

// 语法制导翻译 enum OpCode { DEF_FUNC, DEF_ARG, ADD, SUB, MUL, DIV, ASSIGN, END_FUNC }; struct Quad { OpCode op; string arg1; string arg2; string result; };

class Translator { public: Translator(vector tokens) { this->tokens = tokens; pos = 0; currTemp = 1; }

void translate() {
    Quad quad;
    parseS();
    quad.op = END_FUNC;
    quads.push_back(quad);
    printQuads();
}

void printQuads() {
    for (int i = 0; i < quads.size(); i++) {
        cout << "(" << getOpCode(quads[i].op) << ", " << quads[i].arg1 << ", " << quads[i].arg2 << ", " << quads[i].result << ")" << endl;
    }
}

private: vector tokens; int pos; int currTemp; vector quads;

void parseS() {
    if (tokens[pos].type != FUNC) {
        throw "Syntax error";
    }
    pos++;
    parseA();
    if (tokens[pos].type != EQ) {
        throw "Syntax error";
    }
    pos++;
    string result = tokens[pos - 2].value;
    parseE();
    Quad quad;
    quad.op = DEF_FUNC;
    quad.result = tokens[pos - 4].value;
    quads.push_back(quad);
}

void parseA() {
    if (tokens[pos].type != LBRACKET) {
        throw "Syntax error";
    }
    pos++;
    parseArgList();
    if (tokens[pos].type != RBRACKET) {
        throw "Syntax error";
    }
    pos++;
}

void parseArgList() {
    if (tokens[pos].type != VAR) {
        throw "Syntax error";
    }
    Quad quad;
    quad.op = DEF_ARG;
    quad.arg1 = tokens[pos].value;
    quads.push_back(quad);
    pos++;
    while (tokens[pos].type == COMMA) {
        pos++;
        if (tokens[pos].type != VAR) {
            throw "Syntax error";
        }
        Quad quad;
        quad.op = DEF_ARG;
        quad.arg1 = tokens[pos].value;
        quads.push_back(quad);
        pos++;
    }
}

void parseE() {
    parseT();
    while (tokens[pos].type == OP && (tokens[pos].value == "+" || tokens[pos].value == "-")) {
        string op = tokens[pos].value;
        pos++;
        parseT();
        Quad quad;
        quad.op = (op == "+" ? ADD : SUB);
        quad.arg1 = quads.back().result;
        quad.arg2 = tokens[pos - 2].value;
        quad.result = "T" + to_string(currTemp++);
        quads.push_back(quad);
    }
}

void parseT() {
    parseF();
    while (tokens[pos].type == OP && (tokens[pos].value == "*" || tokens[pos].value == "/")) {
        string op = tokens[pos].value;
        pos++;
        parseF();
        Quad quad;
        quad.op = (op == "*" ? MUL : DIV);
        quad.arg1 = quads.back().result;
        quad.arg2 = tokens[pos - 2].value;
        quad.result = "T" + to_string(currTemp++);
        quads.push_back(quad);
    }
}

void parseF() {
    if (tokens[pos].type == LPAREN) {
        pos++;
        parseE();
        if (tokens[pos].type != RPAREN) {
            throw "Syntax error";
        }
        pos++;
    } else if (tokens[pos].type == VAR || tokens[pos].type == NUM) {
        Quad quad;
        quad.op = ASSIGN;
        quad.arg1 = tokens[pos].value;
        quad.result = tokens[pos].value;
        quads.push_back(quad);
        pos++;
    } else {
        throw "Syntax error";
    }
}

string getOpCode(OpCode op) {
    switch (op) {
    case DEF_FUNC: return "DEF_FUNC";
    case DEF_ARG: return "DEF_ARG";
    case ADD: return "ADD";
    case SUB: return "SUB";
    case MUL: return "MUL";
    case DIV: return "DIV";
    case ASSIGN: return "ASSIGN";
    case END_FUNC: return "END_FUNC";
    }
    return "";
}

};

int main() { string input; cout << "Enter the program: "; getline(cin, input);

Lexer lexer(input);
vector<Token> tokens;
while (true) {
    Token token = lexer.getNextToken();
    if (token.type == END) {
        break;
    }
    tokens.push_back(token);
}

try {
    Parser parser(tokens);
    parser.parse();
    Translator translator(tokens);
    translator.translate();
} catch (const char* error) {
    cout << "Error: " << error << endl;
}
return 0;

}

简单编程语言的语法制导翻译和中间代码生成

原文地址: http://www.cveoy.top/t/topic/osDG 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录