MimIR 0.1
MimIR is my Intermediate Representation
Loading...
Searching...
No Matches
normalizers.cpp
Go to the documentation of this file.
1#include <algorithm>
2#include <iterator>
3#include <numeric>
4#include <ranges>
5#include <vector>
6
8#include <fe/assert.h>
9
10#include "mim/axm.h"
11#include "mim/def.h"
12#include "mim/tuple.h"
13#include "mim/world.h"
14
15#include "mim/util/log.h"
16
18
21
22namespace mim::plug::regex {
23
24template<quant id> const Def* normalize_quant(const Def* type, const Def* callee, const Def* arg) {
25 auto& world = type->world();
26
27 // quantifiers are idempotent
28 if (Axm::isa(id, arg)) return arg;
29
30 if constexpr (id == quant::plus) {
31 // (\d?)+ and (\d*)+ == \d*
32 if (auto optional_app = Axm::isa(quant::optional, arg))
33 return world.call(quant::star, optional_app->arg());
34 else if (auto star_app = Axm::isa(quant::star, arg))
35 return arg;
36 } else if constexpr (id == quant::star) {
37 // (\d?)* and (\d+)* == \d*
38 if (auto quant_app = Axm::isa<quant>(arg)) return world.app(callee, quant_app->arg());
39 } else if constexpr (id == quant::optional) {
40 // (\d*)? and (\d+)? == \d*
41 if (auto star_app = Axm::isa(quant::star, arg))
42 return arg;
43 else if (auto plus_app = Axm::isa(quant::plus, arg))
44 return world.call(quant::star, plus_app->arg());
45 }
46
47 return {};
48}
49
50template<class ConjOrDisj> void flatten_in_arg(const Def* arg, DefVec& new_args) {
51 for (const auto* proj : arg->projs()) {
52 // flatten conjs in conjs / disj in disjs
53 if (auto seq_app = Axm::isa<ConjOrDisj>(proj))
54 flatten_in_arg<ConjOrDisj>(seq_app->arg(), new_args);
55 else
56 new_args.push_back(proj);
57 }
58}
59
60template<class ConjOrDisj> DefVec flatten_in_arg(const Def* arg) {
61 DefVec new_args;
62 flatten_in_arg<ConjOrDisj>(arg, new_args);
63 return new_args;
64}
65
66template<class ConjOrDisj> const Def* make_binary_tree(Defs args) {
67 assert(!args.empty());
68 auto& world = args.front()->world();
69 return std::accumulate(args.begin() + 1, args.end(), args.front(), [&world](const Def* lhs, const Def* rhs) {
70 return world.call<ConjOrDisj, false>(Defs{lhs, rhs});
71 });
72}
73
74const Def* normalize_conj(const Def* type, const Def* callee, const Def* arg) {
75 auto& world = type->world();
76 world.DLOG("conj {}:{} ({})", type, callee, arg);
77 if (arg->as_lit_arity() > 2) {
78 auto flat_args = flatten_in_arg<conj>(arg);
79 return make_binary_tree<conj>(flat_args);
80 }
81
82 return arg->as_lit_arity() == 1 ? arg : nullptr;
83}
84
85bool compare_re(const Def* lhs, const Def* rhs) {
86 auto lhs_range = Axm::isa<range>(lhs);
87 auto rhs_range = Axm::isa<range>(rhs);
88 // sort ranges by increasing lower bound
89 if (lhs_range && rhs_range) return Lit::as(lhs_range->arg()->proj(0)) < Lit::as(rhs_range->arg()->proj(0));
90 // ranges to the end
91 if (lhs_range) return false;
92 if (rhs_range) return true;
93
94 return lhs->gid() < rhs->gid(); // make irreflexive
95}
96
98 std::stable_sort(args.begin(), args.end(), &compare_re);
99 {
100 auto new_end = std::unique(args.begin(), args.end());
101 args.erase(new_end, args.end());
102 }
103}
104
105bool is_in_range(Range range, nat_t needle) { return needle >= range.first && needle <= range.second; }
106
107auto get_range(const Def* rng) -> Range {
108 auto rng_match = Axm::isa<range, false>(rng);
109 return {Lit::as<std::uint8_t>(rng_match->arg(0)), Lit::as<std::uint8_t>(rng_match->arg(1))};
110}
111
112struct app_range {
114 const Def* operator()(Range rng) { return w.call<range>(Defs{w.lit_i8(rng.first), w.lit_i8(rng.second)}); }
115};
116
117void merge_ranges(DefVec& args) {
118 auto ranges_begin = args.begin();
119 while (ranges_begin != args.end() && !Axm::isa<range>(*ranges_begin)) ranges_begin++;
120 if (ranges_begin == args.end()) return;
121
122 std::set<const Def*> to_remove;
123 Ranges old_ranges;
124 auto& world = (*ranges_begin)->world();
125
126 std::transform(ranges_begin, args.end(), std::back_inserter(old_ranges), get_range);
127
128 auto new_ranges = automaton::merge_ranges(
129 old_ranges, [&world](auto&&... args) { world.DLOG(std::forward<decltype(args)>(args)...); });
130
131 // invalidates ranges_begin
132 args.erase(ranges_begin, args.end());
133 std::transform(new_ranges.begin(), new_ranges.end(), std::back_inserter(args), app_range{world});
134
135 make_vector_unique(args);
136}
137
138template<cls A, cls B> bool equals_any(const Def* cls0, const Def* cls1) {
139 return (Axm::isa(A, cls0) && Axm::isa(B, cls1)) || (Axm::isa(A, cls1) && Axm::isa(B, cls0));
140}
141
142bool equals_any(const Def* lhs, const Def* rhs) {
143 auto check_arg_equiv = [](const Def* lhs, const Def* rhs) {
144 if (auto rng_lhs = Axm::isa<range>(lhs))
145 if (auto not_rhs = Axm::isa<not_>(rhs)) {
146 if (auto rng_rhs = Axm::isa<range>(not_rhs->arg())) return rng_lhs == rng_rhs;
147 }
148 return false;
149 };
150
151 return check_arg_equiv(lhs, rhs) || check_arg_equiv(rhs, lhs);
152}
153
154bool equals_any(Defs lhs, Defs rhs) {
155 Ranges lhs_ranges, rhs_ranges;
156 auto only_ranges = std::ranges::views::filter([](auto d) { return Axm::isa<range>(d); });
157 std::ranges::transform(lhs | only_ranges, std::back_inserter(lhs_ranges), get_range);
158 std::ranges::transform(rhs | only_ranges, std::back_inserter(rhs_ranges), get_range);
159 return std::ranges::includes(lhs_ranges, rhs_ranges) || std::ranges::includes(rhs_ranges, lhs_ranges);
160}
161
162const Def* normalize_disj(const Def* type, const Def*, const Def* arg) {
163 auto& world = type->world();
164 if (arg->as_lit_arity() > 1) {
165 auto contains_any = [](auto args) {
166 return std::ranges::find_if(args, [](const Def* ax) -> bool { return Axm::isa<any>(ax); }) != args.end();
167 };
168
169 auto new_args = flatten_in_arg<disj>(arg);
170 if (contains_any(new_args)) return world.annex<any>();
171 make_vector_unique(new_args);
172 merge_ranges(new_args);
173
174 const Def* to_remove = nullptr;
175 for (const auto* cls0 : new_args) {
176 for (const auto* cls1 : new_args)
177 if (equals_any(cls0, cls1)) return world.annex<any>();
178
179 if (auto not_rhs = Axm::isa<not_>(cls0)) {
180 if (auto disj_rhs = Axm::isa<disj>(not_rhs->arg())) {
181 auto rngs = flatten_in_arg<disj>(disj_rhs->arg());
182 make_vector_unique(rngs);
183 if (equals_any(new_args, rngs)) return world.annex<any>();
184 }
185 }
186 }
187
188 erase(new_args, to_remove);
189 world.DLOG("final ranges {, }", new_args);
190
191 if (new_args.size() > 2) return make_binary_tree<disj>(new_args);
192 if (new_args.size() > 1) return world.call<disj, false>(new_args);
193 return new_args.back();
194 }
195 return arg;
196}
197
198const Def* normalize_range(const Def* type, const Def* callee, const Def* arg) {
199 auto& world = type->world();
200 auto [lhs, rhs] = arg->projs<2>();
201
202 if (!lhs->isa<Var>() && !rhs->isa<Var>()) // before first PE.
203 if (lhs->as<Lit>()->get() > rhs->as<Lit>()->get()) return world.raw_app(type, callee, {rhs, lhs});
204
205 return {};
206}
207
208const Def* normalize_not(const Def*, const Def*, const Def*) { return {}; }
209
211
212} // namespace mim::plug::regex
static auto isa(const Def *def)
Definition axm.h:104
Base class for all Defs.
Definition def.h:197
nat_t as_lit_arity() const
Definition def.h:251
World & world() const noexcept
Definition def.cpp:387
auto projs(F f) const
Splits this Def via Def::projections into an Array (if A == std::dynamic_extent) or std::array (other...
Definition def.h:344
constexpr u32 gid() const noexcept
Global id - unique number for this Def.
Definition def.h:216
T get() const
Definition def.h:700
static T as(const Def *def)
Definition def.h:717
This is a thin wrapper for absl::InlinedVector<T, N, A> which is a drop-in replacement for std::vecto...
Definition vector.h:17
The World represents the whole program and manages creation of MimIR nodes (Defs).
Definition world.h:33
automaton::Range Range
Vector< Range > Ranges
std::pair< std::uint64_t, std::uint64_t > Range
std::optional< Range > merge_ranges(Range a, Range b) noexcept
The regex Plugin
Definition lower_regex.h:5
void merge_ranges(DefVec &args)
void make_vector_unique(DefVec &args)
auto get_range(const Def *rng) -> Range
void flatten_in_arg(const Def *arg, DefVec &new_args)
bool is_in_range(Range range, nat_t needle)
const Def * normalize_conj(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_range(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_disj(const Def *type, const Def *, const Def *arg)
const Def * normalize_quant(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_not(const Def *, const Def *, const Def *)
bool compare_re(const Def *lhs, const Def *rhs)
const Def * make_binary_tree(Defs args)
bool equals_any(const Def *cls0, const Def *cls1)
View< const Def * > Defs
Definition def.h:48
u64 nat_t
Definition types.h:43
Vector< const Def * > DefVec
Definition def.h:49
Vector< T, N, A >::size_type erase(Vector< T, N, A > &c, const U &value) noexcept
Definition vector.h:67
#define MIM_regex_NORMALIZER_IMPL
Definition autogen.h:93
const Def * operator()(Range rng)