MimIR 0.1
MimIR is my Intermediate Representation
Loading...
Searching...
No Matches
regex2nfa.cpp
Go to the documentation of this file.
2
3#include <algorithm>
4
5#include <automaton/nfa.h>
6
7#include <mim/lam.h>
8#include <mim/world.h>
9
11
12using namespace mim;
13
14namespace mim::plug::regex {
15
16namespace {
17struct Regex2NfaConverter {
18 Regex2NfaConverter()
19 : nfa_(std::make_unique<automaton::NFA>()) {}
20
21 void add_range_transitions(automaton::NFANode* from, automaton::NFANode* to, std::uint16_t lb, std::uint16_t ub) {
22 for (auto i = lb; i <= ub; ++i)
23 from->add_transition(to, i);
24 }
25
26 void add_range_transitions(automaton::NFANode* from, automaton::NFANode* to, const Def* lit0, const Def* lit1) {
27 auto lb = lit0->as<Lit>()->get();
28 auto ub = lit1->as<Lit>()->get();
29 add_range_transitions(from, to, lb, ub);
30 }
31
32 void
33 convert(const Def* regex, automaton::NFANode* start, automaton::NFANode* end, automaton::NFANode* error = nullptr) {
34 if (auto conj = Axm::isa<regex::conj>(regex)) {
35 auto middle = nfa_->add_state();
36 convert(conj->arg(0), start, middle, error);
37 convert(conj->arg(1), middle, end, error);
38 } else if (auto any = Axm::isa<regex::any>(regex)) {
39 add_range_transitions(start, end, 0_u16, 255);
40 } else if (auto range = Axm::isa<regex::range>(regex)) {
41 add_range_transitions(start, end, range->arg(0), range->arg(1));
42 if (error) add_range_transitions(start, error, 0, 255);
43 } else if (auto not_ = Axm::isa<regex::not_>(regex)) {
44 auto first = nfa_->add_state();
45 auto error = nfa_->add_state();
46
48 auto args = detail::flatten_in_arg<regex::disj>(not_->arg());
49 std::vector<std::pair<std::uint8_t, std::uint8_t>> ranges;
50 for (auto& arg : args) {
51 assert(Axm::isa<regex::range>(arg)
52 && "as per normalizer, if we're in a 'not_' argument, we must only have disjs and ranges!");
53
54 auto rng_match = Axm::isa<regex::range, false>(arg);
55 ranges.emplace_back(Lit::as<std::uint8_t>(rng_match->arg(0)), Lit::as<std::uint8_t>(rng_match->arg(1)));
56 }
57 std::sort(ranges.begin(), ranges.end());
58 std::uint8_t last{0};
59 for (auto rng : ranges) {
60 if (rng.first > last) add_range_transitions(first, end, last, rng.first - 1);
61 add_range_transitions(first, error, rng.first, rng.second);
62 last = std::min(rng.second + 1_u16, 255);
63 }
64 if (last < 255) add_range_transitions(first, end, last, 255);
65 } else if (auto neg = Axm::isa<regex::neg_lookahead>(regex)) {
66 auto first = nfa_->add_state();
67 auto error = nfa_->add_state();
68 error->set_erroring(true);
69
71
72 convert(neg->arg(), first, error, end);
74 } else if (auto disj = Axm::isa<regex::disj>(regex)) {
75 convert(disj->arg(0), start, end, error);
76 convert(disj->arg(1), start, end, error);
77 } else if (auto opt = Axm::isa(quant::optional, regex)) {
79 convert(opt->arg(), start, end);
80 } else if (auto star = Axm::isa(quant::star, regex)) {
81 auto m1 = nfa_->add_state();
82 auto m2 = nfa_->add_state();
85 m2->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
86 m2->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
87 convert(star->arg(), m1, m2);
88 } else if (auto plus = Axm::isa(quant::plus, regex)) {
89 auto m0 = nfa_->add_state();
90 auto m1 = nfa_->add_state();
91 auto m2 = nfa_->add_state();
92 convert(plus->arg(), start, m0, error);
93 m0->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
94 m2->add_transition(m1, automaton::NFA::SpecialTransitons::EPSILON);
95 m2->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
96 m0->add_transition(end, automaton::NFA::SpecialTransitons::EPSILON);
97 convert(plus->arg(), m1, m2);
98 } else if (auto empty = Axm::isa<regex::empty>(regex)) {
100 }
101 }
102
103 void convert(const Def* regex) {
104 auto start = nfa_->add_state();
105 nfa_->set_start(start);
106 auto end = nfa_->add_state();
107 end->set_accepting(true);
108 convert(regex, start, end);
109 }
110
111 std::unique_ptr<automaton::NFA> nfa() { return std::move(nfa_); }
112
113private:
114 std::unique_ptr<automaton::NFA> nfa_;
115};
116
117} // namespace
118
119std::unique_ptr<automaton::NFA> regex2nfa(const Def* regex) {
120 Regex2NfaConverter converter;
121 converter.convert(regex);
122 return converter.nfa();
123}
124
125} // namespace mim::plug::regex
126
127extern "C" automaton::NFA* regex2nfa(const Def* regex) { return mim::plug::regex::regex2nfa(regex).release(); }
void set_accepting(bool accepting)
Definition nfa.h:36
void add_transition(const NFANode *to, std::uint16_t c)
Definition nfa.cpp:7
static auto isa(const Def *def)
Definition axm.h:107
Base class for all Defs.
Definition def.h:251
static T as(const Def *def)
Definition def.h:830
const Sigma * convert(const TBound< up > *b)
Definition core.cpp:18
The regex Plugin
Definition lower_regex.h:5
std::unique_ptr< automaton::NFA > regex2nfa(const Def *regex)
Definition ast.h:14
void error(Loc loc, const char *f, Args &&... args)
Definition dbg.h:125
constexpr decltype(auto) get(Span< T, N > span) noexcept
Definition span.h:115
@ Lit
Definition def.h:114
automaton::NFA * regex2nfa(const Def *regex)
You can dl::get this function.