MimIR 0.1
MimIR is my Intermediate Representation
Loading...
Searching...
No Matches
regex2nfa.cpp
Go to the documentation of this file.
2
3#include <numeric>
4
5#include <automaton/nfa.h>
6
7#include <mim/lam.h>
8#include <mim/world.h>
9
10using namespace mim;
11
12namespace mim::plug::regex {
13
14namespace {
15struct Regex2NfaConverter {
16 Regex2NfaConverter()
17 : nfa_(std::make_unique<automaton::NFA>()) {}
18
19 void add_range_transitions(automaton::NFANode* from, automaton::NFANode* to, std::uint16_t lb, std::uint16_t ub) {
20 for (auto i = lb; i <= ub; ++i) from->add_transition(to, i);
21 }
22
23 void add_range_transitions(automaton::NFANode* from, automaton::NFANode* to, const Def* lit0, const Def* lit1) {
24 auto lb = lit0->as<Lit>()->get();
25 auto ub = lit1->as<Lit>()->get();
26 add_range_transitions(from, to, lb, ub);
27 }
28
29 void
30 convert(const Def* regex, automaton::NFANode* start, automaton::NFANode* end, automaton::NFANode* error = nullptr) {
31 if (auto conj = mim::match<regex::conj>(regex)) {
32 auto middle = nfa_->add_state();
33 convert(conj->arg(0), start, middle, error);
34 convert(conj->arg(1), middle, end, error);
35 } else if (auto any = mim::match<regex::any>(regex)) {
36 add_range_transitions(start, end, 0, 255);
37 } else if (auto range = mim::match<regex::range>(regex)) {
38 add_range_transitions(start, end, range->arg(0), range->arg(1));
39 if (error) add_range_transitions(start, error, 0, 255);
40 } else if (auto not_ = mim::match<regex::not_>(regex)) {
41 auto first = nfa_->add_state();
42 auto error = nfa_->add_state();
43 error->set_erroring(true);
44
45 convert(not_->arg(), first, error, end);
47 } else if (auto disj = mim::match<regex::disj>(regex)) {
48 convert(disj->arg(0), start, end, error);
49 convert(disj->arg(1), start, end, error);
50 } else if (auto opt = mim::match(quant::optional, regex)) {
52 convert(opt->arg(), start, end);
53 } else if (auto star = mim::match(quant::star, regex)) {
54 auto m1 = nfa_->add_state();
55 auto m2 = nfa_->add_state();
58 m2->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
59 m2->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
60 convert(star->arg(), m1, m2);
61 } else if (auto plus = mim::match(quant::plus, regex)) {
62 auto m0 = nfa_->add_state();
63 auto m1 = nfa_->add_state();
64 auto m2 = nfa_->add_state();
65 convert(plus->arg(), start, m0, error);
66 m0->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
67 m2->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
68 m2->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
69 m0->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
70 convert(plus->arg(), m1, m2);
71 }
72 }
73
74 void convert(const Def* regex) {
75 auto start = nfa_->add_state();
76 nfa_->set_start(start);
77 auto end = nfa_->add_state();
78 end->set_accepting(true);
79 convert(regex, start, end);
80 }
81
82 std::unique_ptr<automaton::NFA> nfa() { return std::move(nfa_); }
83
84private:
85 std::unique_ptr<automaton::NFA> nfa_;
86};
87
88} // namespace
89
90std::unique_ptr<automaton::NFA> regex2nfa(const Def* regex) {
91 Regex2NfaConverter converter;
92 converter.convert(regex);
93 return converter.nfa();
94}
95
96} // namespace mim::plug::regex
97
98extern "C" automaton::NFA* regex2nfa(const Def* regex) { return mim::plug::regex::regex2nfa(regex).release(); }
void set_accepting(bool accepting)
Definition nfa.h:36
void add_transition(const NFANode *to, std::uint16_t c)
Definition nfa.cpp:7
Base class for all Defs.
Definition def.h:198
const Sigma * convert(const TBound< up > *b)
Definition core.cpp:19
The regex Plugin
Definition lower_regex.h:5
std::unique_ptr< automaton::NFA > regex2nfa(const Def *regex)
Definition regex2nfa.cpp:90
Definition ast.h:14
void error(Loc loc, const char *f, Args &&... args)
Definition dbg.h:122
constexpr decltype(auto) get(Span< T, N > span) noexcept
Definition span.h:102
auto match(const Def *def)
Definition axiom.h:112
@ Lit
Definition def.h:85
automaton::NFA * regex2nfa(const Def *regex)
You can dl::get this function.
Definition regex2nfa.cpp:98