PL/0语言词法分析器:实现词元序列输出
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class PL0Tokenizer {
private static final Map<String, String> RESERVED_WORDS = new HashMap<>();
private static final Map<String, String> OPERATORS = new HashMap<>();
private static final Map<String, String> DELIMITERS = new HashMap<>();
static {
RESERVED_WORDS.put('begin', 'beginsym');
RESERVED_WORDS.put('call', 'callsym');
RESERVED_WORDS.put('const', 'constsym');
RESERVED_WORDS.put('do', 'dosym');
RESERVED_WORDS.put('end', 'endsym');
RESERVED_WORDS.put('if', 'ifsym');
RESERVED_WORDS.put('odd', 'oddsym');
RESERVED_WORDS.put('procedure', 'proceduresym');
RESERVED_WORDS.put('read', 'readsym');
RESERVED_WORDS.put('then', 'thensym');
RESERVED_WORDS.put('var', 'varsym');
RESERVED_WORDS.put('while', 'whilesym');
RESERVED_WORDS.put('write', 'writesym');
OPERATORS.put('+', 'plus');
OPERATORS.put('-', 'minus');
OPERATORS.put('*', 'times');
OPERATORS.put('/', 'slash');
OPERATORS.put('odd', 'oddsym');
OPERATORS.put('=', 'eql');
OPERATORS.put('<>', 'neq');
OPERATORS.put('<', 'lss');
OPERATORS.put('<=', 'leq');
OPERATORS.put('>', 'gtr');
OPERATORS.put('>=', 'geq');
OPERATORS.put(':=', 'becomes');
DELIMITERS.put('(', 'lparen');
DELIMITERS.put(')', 'rparen');
DELIMITERS.put(',', 'comma');
DELIMITERS.put(';', 'semicolon');
DELIMITERS.put('.', 'period');
}
public static void main(String[] args) {
try {
BufferedReader reader = new BufferedReader(new FileReader('input.txt'));
BufferedWriter writer = new BufferedWriter(new FileWriter('output.txt'));
String line;
while ((line = reader.readLine()) != null) {
String[] tokens = line.split('\s+|(?<=[(),;.])|(?=[(),;.])');
for (String token : tokens) {
if (!token.isEmpty()) {
String tokenType = getTokenType(token);
writer.write('( ' + tokenType + ', ' + token + ' )\n');
}
}
}
reader.close();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
private static String getTokenType(String token) {
if (RESERVED_WORDS.containsKey(token)) {
return RESERVED_WORDS.get(token);
} else if (isIdentifier(token)) {
return 'IDENT';
} else if (isNumber(token)) {
return 'NUMBER';
} else if (OPERATORS.containsKey(token)) {
return OPERATORS.get(token);
} else if (DELIMITERS.containsKey(token)) {
return DELIMITERS.get(token);
} else {
throw new IllegalArgumentException('Unknown token: ' + token);
}
}
private static boolean isIdentifier(String token) {
return token.matches('[A-Z][A-Z0-9]*');
}
private static boolean isNumber(String token) {
return token.matches('\d+');
}
}
原文地址: https://www.cveoy.top/t/topic/cfYa 著作权归作者所有。请勿转载和采集!