Pixiv - Miracle
1-编译原理
1702 字
9 分钟
1-编译原理
实现DFA
实现如下两个函数
bool frontend::DFA::next(char input, Token& buf) {#ifdef DEBUG_DFA#include<iostream> std::cout << "in state [" << toString(cur_state) << "], input = \'" << input << "\', str = " << cur_str << "\t";#endif TODO;#ifdef DEBUG_DFA std::cout << "next state is [" << toString(cur_state) << "], next str = " << cur_str << "\t, ret = " << ret << std::endl;#endif}void frontend::DFA::reset() { cur_state = State::Empty; cur_str = "";}5个状态并不够用,增加状态
enum class State{ Empty, // space, \n, \r ... Ident, // a keyword or identifier, like 'int' 'a0' 'else' ... IntLiteral, // int literal, like '1' '1900', only in decimal FloatLiteral, // float literal, like '0.1' op_ext, // 操作符<,>,=,! op_equ, // 操作符<=.>=,==.!= op_and, // 操作符& op_andand, // 操作符&& op_or, // 操作符| op_oror, // 操作符|| op // 其他操作符};设计具体的状态转移图

产出token后不一定消费了字符,故要设一个consumed变量
代码
#include"front/lexical.h"
#include<map>#include<cassert>#include<cctype>#include<stdexcept>#include<string>#include<unordered_set>
#define TODO assert(0 && "todo")
// #define DEBUG_DFA// #define DEBUG_SCANNER
namespace utils {bool isExtop(char& c) { std::unordered_set<char> ops = { '<', '>', '=', '!' }; return ops.count(c);}
bool isOp(char& c) { std::unordered_set<char> ops = { '+', '-', '*', '/', '%', ':', ';', ',', '(', ')', '[', ']', '{', '}' }; return ops.count(c);}
bool isAnd(char& c) { return c == '&';}
bool isOr(char& c) { return c == '|';}
bool isIdentStart(char c) { return std::isalpha(static_cast<unsigned char>(c)) || c == '_';}
bool isIdentChar(char c) { return std::isalnum(static_cast<unsigned char>(c)) || c == '_';}
bool isHexDigit(char c) { return std::isxdigit(static_cast<unsigned char>(c));}
bool isOctDigit(char c) { return c >= '0' && c <= '7';}
frontend::TokenType keywordOrIdentType(const std::string& str) { if (str == "const") return frontend::TokenType::CONSTTK; if (str == "void") return frontend::TokenType::VOIDTK; if (str == "int") return frontend::TokenType::INTTK; if (str == "float") return frontend::TokenType::FLOATTK; if (str == "if") return frontend::TokenType::IFTK; if (str == "else") return frontend::TokenType::ELSETK; if (str == "while") return frontend::TokenType::WHILETK; if (str == "continue") return frontend::TokenType::CONTINUETK; if (str == "break") return frontend::TokenType::BREAKTK; if (str == "return") return frontend::TokenType::RETURNTK; return frontend::TokenType::IDENFR;}
frontend::TokenType operatorType(const std::string& str) { if (str == "+") return frontend::TokenType::PLUS; if (str == "-") return frontend::TokenType::MINU; if (str == "*") return frontend::TokenType::MULT; if (str == "/") return frontend::TokenType::DIV; if (str == "%") return frontend::TokenType::MOD; if (str == "<") return frontend::TokenType::LSS; if (str == ">") return frontend::TokenType::GTR; if (str == ":") return frontend::TokenType::COLON; if (str == "=") return frontend::TokenType::ASSIGN; if (str == ";") return frontend::TokenType::SEMICN; if (str == ",") return frontend::TokenType::COMMA; if (str == "(") return frontend::TokenType::LPARENT; if (str == ")") return frontend::TokenType::RPARENT; if (str == "[") return frontend::TokenType::LBRACK; if (str == "]") return frontend::TokenType::RBRACK; if (str == "{") return frontend::TokenType::LBRACE; if (str == "}") return frontend::TokenType::RBRACE; if (str == "!") return frontend::TokenType::NOT; if (str == "<=") return frontend::TokenType::LEQ; if (str == ">=") return frontend::TokenType::GEQ; if (str == "==") return frontend::TokenType::EQL; if (str == "!=") return frontend::TokenType::NEQ; if (str == "&&") return frontend::TokenType::AND; if (str == "||") return frontend::TokenType::OR; assert(0 && "invalid operator"); return frontend::TokenType::PLUS;}
std::string preprocess(std::ifstream& fin) { enum class CommentState { Normal, Slash, LineComment, BlockComment, BlockCommentStar, };
std::string ret; CommentState state = CommentState::Normal; char c;
while (fin.get(c)) { switch (state) { case CommentState::Normal: if (c == '/') { state = CommentState::Slash; } else { ret += c; } break; case CommentState::Slash: if (c == '/') { state = CommentState::LineComment; } else if (c == '*') { state = CommentState::BlockComment; } else { ret += '/'; ret += c; state = CommentState::Normal; } break; case CommentState::LineComment: if (c == '\n') { ret += '\n'; state = CommentState::Normal; } break; case CommentState::BlockComment: if (c == '*') { state = CommentState::BlockCommentStar; } else if (c == '\n') { ret += '\n'; } break; case CommentState::BlockCommentStar: if (c == '/') { state = CommentState::Normal; } else if (c == '*') { state = CommentState::BlockCommentStar; } else { if (c == '\n') { ret += '\n'; } state = CommentState::BlockComment; } break; default: assert(0 && "invalid preprocess state"); } }
if (state == CommentState::Slash) { ret += '/'; } else if (state == CommentState::BlockComment || state == CommentState::BlockCommentStar) { throw std::runtime_error("lexical error: unterminated block comment"); }
ret += ' '; return ret;}} // namespace utils
std::string frontend::toString(State s) { switch (s) { case State::Empty: return "Empty"; case State::Ident: return "Ident"; case State::IntLiteral: return "IntLiteral"; case State::FloatLiteral: return "FloatLiteral"; case State::op_ext: return "op_ext"; case State::op_equ: return "op_equ"; case State::op_and: return "op_and"; case State::op_andand: return "op_andand"; case State::op_or: return "op_or"; case State::op_oror: return "op_oror"; case State::op: return "op"; default: assert(0 && "invalid State"); } return "";}
std::set<std::string> frontend::keywords= { "const", "int", "float", "if", "else", "while", "continue", "break", "return", "void"};
frontend::DFA::DFA(): cur_state(frontend::State::Empty), cur_str() {}
frontend::DFA::~DFA() {}
bool frontend::DFA::next(char input, Token& buf, bool& consumed) { bool ret = false;#ifdef DEBUG_DFA#include<iostream> std::cout << "in state [" << toString(cur_state) << "], input = \'" << input << "\', str = " << cur_str << "\t";#endif switch (cur_state) { case State::Empty : if (std::isspace(static_cast<unsigned char>(input))) break;
cur_str += input;
if (std::isdigit(static_cast<unsigned char>(input))) { cur_state = State::IntLiteral; } else if (input == '.') { cur_state = State::FloatLiteral; } else if (utils::isIdentStart(input)) { cur_state = State::Ident; } else if (utils::isExtop(input)) { cur_state = State::op_ext; } else if (utils::isAnd(input)) { cur_state = State::op_and; } else if (utils::isOr(input)) { cur_state = State::op_or; } else if (utils::isOp(input)) { cur_state = State::op; } else { throw std::runtime_error(std::string("lexical error: invalid character '") + input + "'"); } break; case State::IntLiteral : if (cur_str == "0") { if (input == 'x' || input == 'X') { cur_str += input; break; } if (input == '.') { cur_str += input; cur_state = State::FloatLiteral; break; } if (utils::isOctDigit(input)) { cur_str += input; break; } if (std::isdigit(static_cast<unsigned char>(input))) { throw std::runtime_error("lexical error: invalid octal literal"); } consumed = 0; buf = {TokenType::INTLTR, cur_str}; reset(); ret = true; break; } if (cur_str.size() >= 2 && cur_str[0] == '0' && (cur_str[1] == 'x' || cur_str[1] == 'X')) { if (utils::isHexDigit(input)) { cur_str += input; break; } if (cur_str.size() == 2) { throw std::runtime_error("lexical error: invalid hex literal"); } consumed = 0; buf = {TokenType::INTLTR, cur_str}; reset(); ret = true; break; } if (cur_str.size() >= 2 && cur_str[0] == '0') { if (input == '.') { cur_str += input; cur_state = State::FloatLiteral; break; } if (utils::isOctDigit(input)) { cur_str += input; break; } if (std::isdigit(static_cast<unsigned char>(input))) { throw std::runtime_error("lexical error: invalid octal literal"); } consumed = 0; buf = {TokenType::INTLTR, cur_str}; reset(); ret = true; break; } if (std::isdigit(static_cast<unsigned char>(input))) { cur_str += input; break; } if (input == '.') { cur_str += input; cur_state = State::FloatLiteral; break; } consumed = 0; buf = {TokenType::INTLTR, cur_str}; reset(); ret = true; break; case State::FloatLiteral: if (std::isdigit(static_cast<unsigned char>(input))) { cur_str += input; break; } if (cur_str == ".") { throw std::runtime_error("lexical error: invalid float literal"); } consumed = 0; buf = {TokenType::FLOATLTR, cur_str}; reset(); ret = true; break; case State::Ident: if (utils::isIdentChar(input)) { cur_str += input; break; } consumed = 0; buf = {utils::keywordOrIdentType(cur_str), cur_str}; reset(); ret = true; break; case State::op_ext: if (input == '=') { cur_str += input; cur_state = State::op_equ; break; } consumed = 0; buf = {utils::operatorType(cur_str), cur_str}; reset(); ret = true; break; case State::op_equ: consumed = 0; buf = {utils::operatorType(cur_str), cur_str}; reset(); ret = true; break; case State::op_and: if (input == '&') { cur_str += input; cur_state = State::op_andand; break; } throw std::runtime_error("lexical error: invalid operator '&'"); break; case State::op_andand: consumed = 0; buf = {utils::operatorType(cur_str), cur_str}; reset(); ret = true; break; case State::op_or: if (input == '|') { cur_str += input; cur_state = State::op_oror; break; } throw std::runtime_error("lexical error: invalid operator '|'"); break; case State::op_oror: consumed = 0; buf = {utils::operatorType(cur_str), cur_str}; reset(); ret = true; break; case State::op: consumed = 0; buf = {utils::operatorType(cur_str), cur_str}; reset(); ret = true; break; default: assert(0 && "invalid State"); }#ifdef DEBUG_DFA std::cout << "next state is [" << toString(cur_state) << "], next str = " << cur_str << "\t, ret = " << ret << std::endl;#endif return ret;}
void frontend::DFA::reset() { cur_state = State::Empty; cur_str = "";}
frontend::Scanner::Scanner(std::string filename): fin(filename) { if(!fin.is_open()) { throw std::runtime_error("lexical error: input file cannot open"); }}
frontend::Scanner::~Scanner() { fin.close();}
std::vector<frontend::Token> frontend::Scanner::run() { std::vector<frontend::Token> ret; frontend::DFA dfa; frontend::Token tk; std::string s = utils::preprocess(fin);
size_t i = 0; while (i < s.size()) { bool consumed = 1; if (dfa.next(s[i], tk, consumed)) {#ifdef DEBUG_SCANNER#include<iostream> std::cout << "token: " << toString(tk.type) << "\t" << tk.value << std::endl;#endif ret.push_back(tk); } if (consumed) ++i; } return ret;}实现扫描器
文件预处理,五个状态的状态机

std::string preprocess(std::ifstream& fin) { enum class CommentState { Normal, Slash, LineComment, BlockComment, BlockCommentStar, };
std::string ret; CommentState state = CommentState::Normal; char c;
while (fin.get(c)) { switch (state) { case CommentState::Normal: if (c == '/') { state = CommentState::Slash; } else { ret += c; } break; case CommentState::Slash: if (c == '/') { state = CommentState::LineComment; } else if (c == '*') { state = CommentState::BlockComment; } else { ret += '/'; ret += c; state = CommentState::Normal; } break; case CommentState::LineComment: if (c == '\n') { ret += '\n'; state = CommentState::Normal; } break; case CommentState::BlockComment: if (c == '*') { state = CommentState::BlockCommentStar; } else if (c == '\n') { ret += '\n'; } break; case CommentState::BlockCommentStar: if (c == '/') { state = CommentState::Normal; } else if (c == '*') { state = CommentState::BlockCommentStar; } else { if (c == '\n') { ret += '\n'; } state = CommentState::BlockComment; } break; default: assert(0 && "invalid preprocess state"); } }
if (state == CommentState::Slash) { ret += '/'; } else if (state == CommentState::BlockComment || state == CommentState::BlockCommentStar) { assert(0 && "unterminated block comment"); }
ret += ' '; return ret;}调试指令
cd /home/skaco2/Compiler/testpython3 build.pycd /home/skaco2/Compiler./bin/compiler ./test/SysY.cpp -s0 -o ./test/SysY.tk文章分享
如果这篇文章对你有帮助,欢迎分享给更多人!
相关文章 智能推荐
1
2-基于Selenium的Web应用自动化测试
课内 课内
2
17-数理统计
数学 数学
3
16-矩阵
数学 数学
4
15-行列式
数学 数学
5
14-对称
数学 数学
随机文章 随机推荐