MimIR 0.1
MimIR is my Intermediate Representation
Loading...
Searching...
No Matches
regex2nfa.cpp
Go to the documentation of this file.
2
3#include <numeric>
4
5#include <automaton/nfa.h>
6
7#include <mim/lam.h>
8#include <mim/world.h>
9
10using namespace mim;
11
12namespace mim::plug::regex {
13
14namespace {
15struct Regex2NfaConverter {
16 Regex2NfaConverter()
17 : nfa_(std::make_unique<automaton::NFA>()) {}
18
19 void add_range_transitions(automaton::NFANode* from, automaton::NFANode* to, std::uint16_t lb, std::uint16_t ub) {
20 for (auto i = lb; i <= ub; ++i) from->add_transition(to, i);
21 }
22
23 void add_range_transitions(automaton::NFANode* from, automaton::NFANode* to, Ref lit0, Ref lit1) {
24 auto lb = lit0->as<Lit>()->get();
25 auto ub = lit1->as<Lit>()->get();
26 add_range_transitions(from, to, lb, ub);
27 }
28
29 void convert(Ref regex, automaton::NFANode* start, automaton::NFANode* end, automaton::NFANode* error = nullptr) {
30 if (auto conj = mim::match<regex::conj>(regex)) {
31 auto middle = nfa_->add_state();
32 convert(conj->arg(0), start, middle, error);
33 convert(conj->arg(1), middle, end, error);
34 } else if (auto any = mim::match<regex::any>(regex)) {
35 add_range_transitions(start, end, 0, 255);
36 } else if (auto range = mim::match<regex::range>(regex)) {
37 add_range_transitions(start, end, range->arg(0), range->arg(1));
38 if (error) add_range_transitions(start, error, 0, 255);
39 } else if (auto not_ = mim::match<regex::not_>(regex)) {
40 auto first = nfa_->add_state();
41 auto error = nfa_->add_state();
42 error->set_erroring(true);
43
44 convert(not_->arg(), first, error, end);
46 } else if (auto disj = mim::match<regex::disj>(regex)) {
47 convert(disj->arg(0), start, end, error);
48 convert(disj->arg(1), start, end, error);
49 } else if (auto opt = mim::match(quant::optional, regex)) {
51 convert(opt->arg(), start, end);
52 } else if (auto star = mim::match(quant::star, regex)) {
53 auto m1 = nfa_->add_state();
54 auto m2 = nfa_->add_state();
57 m2->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
58 m2->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
59 convert(star->arg(), m1, m2);
60 } else if (auto plus = mim::match(quant::plus, regex)) {
61 auto m0 = nfa_->add_state();
62 auto m1 = nfa_->add_state();
63 auto m2 = nfa_->add_state();
64 convert(plus->arg(), start, m0, error);
65 m0->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
66 m2->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
67 m2->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
68 m0->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
69 convert(plus->arg(), m1, m2);
70 }
71 }
72
73 void convert(Ref regex) {
74 auto start = nfa_->add_state();
75 nfa_->set_start(start);
76 auto end = nfa_->add_state();
77 end->set_accepting(true);
78 convert(regex, start, end);
79 }
80
81 std::unique_ptr<automaton::NFA> nfa() { return std::move(nfa_); }
82
83private:
84 std::unique_ptr<automaton::NFA> nfa_;
85};
86
87} // namespace
88
89std::unique_ptr<automaton::NFA> regex2nfa(Ref regex) {
90 Regex2NfaConverter converter;
91 converter.convert(regex);
92 return converter.nfa();
93}
94
95} // namespace mim::plug::regex
96
97extern "C" automaton::NFA* regex2nfa(Ref regex) { return mim::plug::regex::regex2nfa(regex).release(); }
void set_accepting(bool accepting)
Definition nfa.h:34
void add_transition(const NFANode *to, std::uint16_t c)
Definition nfa.cpp:7
Helper class to retrieve Infer::arg if present.
Definition def.h:86
void * get(void *handle, const char *symbol_name)
Definition dl.cpp:36
const Sigma * convert(const TBound< up > *b)
Definition core.cpp:19
The regex Plugin
Definition lower_regex.h:5
std::unique_ptr< automaton::NFA > regex2nfa(Ref regex)
Definition regex2nfa.cpp:89
Definition cfg.h:11
auto match(Ref def)
Definition axiom.h:112
void error(Loc loc, const char *f, Args &&... args)
Definition dbg.h:122
Definition span.h:104
automaton::NFA * regex2nfa(Ref regex)
You can dl::get this function.
Definition regex2nfa.cpp:97