MimIR 0.1
MimIR is my Intermediate Representation
Loading...
Searching...
No Matches
normalizers.cpp
Go to the documentation of this file.
1#include <algorithm>
2#include <iterator>
3#include <numeric>
4#include <ranges>
5#include <vector>
6
8#include <fe/assert.h>
9
10#include "mim/axiom.h"
11#include "mim/def.h"
12#include "mim/tuple.h"
13#include "mim/world.h"
14
15#include "mim/util/log.h"
16
18
21
22namespace mim::plug::regex {
23
24template<quant id> Ref normalize_quant(Ref type, Ref callee, Ref arg) {
25 auto& world = type->world();
26
27 // quantifiers are idempotent
28 if (mim::match(id, arg)) return arg;
29
30 if constexpr (id == quant::plus) {
31 // (\d?)+ and (\d*)+ == \d*
32 if (auto optional_app = mim::match(quant::optional, arg))
33 return world.call(quant::star, optional_app->arg());
34 else if (auto star_app = mim::match(quant::star, arg))
35 return arg;
36 } else if constexpr (id == quant::star) {
37 // (\d?)* and (\d+)* == \d*
38 if (auto quant_app = mim::match<quant>(arg)) return world.app(callee, quant_app->arg());
39 } else if constexpr (id == quant::optional) {
40 // (\d*)? and (\d+)? == \d*
41 if (auto star_app = mim::match(quant::star, arg))
42 return arg;
43 else if (auto plus_app = mim::match(quant::plus, arg))
44 return world.call(quant::star, plus_app->arg());
45 }
46
47 return world.raw_app(type, callee, arg);
48}
49
50template<class ConjOrDisj> void flatten_in_arg(Ref arg, DefVec& new_args) {
51 for (const auto* proj : arg->projs()) {
52 // flatten conjs in conjs / disj in disjs
53 if (auto seq_app = mim::match<ConjOrDisj>(proj))
54 flatten_in_arg<ConjOrDisj>(seq_app->arg(), new_args);
55 else
56 new_args.push_back(proj);
57 }
58}
59
60template<class ConjOrDisj> DefVec flatten_in_arg(Ref arg) {
61 DefVec new_args;
62 flatten_in_arg<ConjOrDisj>(arg, new_args);
63 return new_args;
64}
65
66template<class ConjOrDisj> Ref make_binary_tree(Ref type, Defs args) {
67 assert(!args.empty());
68 auto& world = args.front()->world();
69 return std::accumulate(args.begin() + 1, args.end(), args.front(), [&type, &world](const Def* lhs, const Def* rhs) {
70 return world.raw_app(type, world.call<ConjOrDisj>(world.lit_nat(2)), world.tuple({lhs, rhs}));
71 });
72}
73
74Ref normalize_conj(Ref type, Ref callee, Ref arg) {
75 auto& world = type->world();
76 world.DLOG("conj {}:{} ({})", type, callee, arg);
77 if (arg->as_lit_arity() > 2) {
78 auto flat_args = flatten_in_arg<conj>(arg);
79 return make_binary_tree<conj>(type, flat_args);
80 }
81 if (arg->as_lit_arity() > 1) return world.raw_app(type, callee, arg);
82 return arg;
83}
84
85bool compare_re(const Def* lhs, const Def* rhs) {
86 auto lhs_range = mim::match<range>(lhs);
87 auto rhs_range = mim::match<range>(rhs);
88 if (lhs_range) {
89 // sort ranges by increasing lower bound
90 if (rhs_range) return Lit::as(lhs_range->arg()->proj(0)) < Lit::as(rhs_range->arg()->proj(0));
91 // ranges to the end
92 return false;
93 } else if (rhs_range) {
94 // ranges to the end
95 return true;
96 }
97 // keep
98 return true;
99}
100
102 std::stable_sort(args.begin(), args.end(), &compare_re);
103 {
104 auto new_end = std::unique(args.begin(), args.end());
105 args.erase(new_end, args.end());
106 }
107}
108
109bool is_in_range(Range range, nat_t needle) { return needle >= range.first && needle <= range.second; }
110
111auto get_range(const Def* rng) -> Range {
112 auto rng_match = mim::match<range, false>(rng);
113 return {Lit::as<std::uint8_t>(rng_match->arg(0)), Lit::as<std::uint8_t>(rng_match->arg(1))};
114}
115
116struct app_range {
118 Ref operator()(Range rng) { return w.call<range>(Defs{w.lit_i8(rng.first), w.lit_i8(rng.second)}); }
119};
120
121void merge_ranges(DefVec& args) {
122 auto ranges_begin = args.begin();
123 while (ranges_begin != args.end() && !mim::match<range>(*ranges_begin)) ranges_begin++;
124 if (ranges_begin == args.end()) return;
125
126 std::set<const Def*> to_remove;
127 Ranges old_ranges;
128 auto& world = (*ranges_begin)->world();
129
130 std::transform(ranges_begin, args.end(), std::back_inserter(old_ranges), get_range);
131
132 auto new_ranges = automaton::merge_ranges(
133 old_ranges, [&world](auto&&... args) { world.DLOG(std::forward<decltype(args)>(args)...); });
134
135 // invalidates ranges_begin
136 args.erase(ranges_begin, args.end());
137 std::transform(new_ranges.begin(), new_ranges.end(), std::back_inserter(args), app_range{world});
138
139 make_vector_unique(args);
140}
141
142template<cls A, cls B> bool equals_any(const Def* cls0, const Def* cls1) {
143 return (mim::match(A, cls0) && mim::match(B, cls1)) || (mim::match(A, cls1) && mim::match(B, cls0));
144}
145
146bool equals_any(const Def* lhs, const Def* rhs) {
147 auto check_arg_equiv = [](const Def* lhs, const Def* rhs) {
148 if (auto rng_lhs = mim::match<range>(lhs))
149 if (auto not_rhs = mim::match<not_>(rhs)) {
150 if (auto rng_rhs = mim::match<range>(not_rhs->arg())) return rng_lhs == rng_rhs;
151 }
152 return false;
153 };
154
155 return check_arg_equiv(lhs, rhs) || check_arg_equiv(rhs, lhs);
156}
157
158bool equals_any(Defs lhs, Defs rhs) {
159 Ranges lhs_ranges, rhs_ranges;
160 auto only_ranges = std::ranges::views::filter([](auto d) { return match<range>(d); });
161 std::ranges::transform(lhs | only_ranges, std::back_inserter(lhs_ranges), get_range);
162 std::ranges::transform(rhs | only_ranges, std::back_inserter(rhs_ranges), get_range);
163 return std::ranges::includes(lhs_ranges, rhs_ranges) || std::ranges::includes(rhs_ranges, lhs_ranges);
164}
165
167 auto& world = type->world();
168 if (arg->as_lit_arity() > 1) {
169 auto contains_any = [](auto args) {
170 return std::ranges::find_if(args, [](const Def* ax) -> bool { return mim::match<any>(ax); }) != args.end();
171 };
172
173 auto new_args = flatten_in_arg<disj>(arg);
174 if (contains_any(new_args)) return world.annex<any>();
175 make_vector_unique(new_args);
176 merge_ranges(new_args);
177
178 const Def* to_remove = nullptr;
179 for (const auto* cls0 : new_args) {
180 for (const auto* cls1 : new_args)
181 if (equals_any(cls0, cls1)) return world.annex<any>();
182
183 if (auto not_rhs = mim::match<not_>(cls0)) {
184 if (auto disj_rhs = mim::match<disj>(not_rhs->arg())) {
185 auto rngs = flatten_in_arg<disj>(disj_rhs->arg());
186 make_vector_unique(rngs);
187 if (equals_any(new_args, rngs)) return world.annex<any>();
188 }
189 }
190 }
191
192 erase(new_args, to_remove);
193 world.DLOG("final ranges {, }", new_args);
194
195 const Def* retval = new_args.back();
196 if (new_args.size() > 2)
197 retval = make_binary_tree<disj>(type, new_args);
198 else if (new_args.size() > 1)
199 retval = world.raw_app(type, world.call<disj>(world.lit_nat(2)), new_args);
200 world.DLOG("disj app: {}", retval);
201 return retval;
202 }
203 return arg;
204}
205
206Ref normalize_range(Ref type, Ref callee, Ref arg) {
207 auto& world = type->world();
208 auto [lhs, rhs] = arg->projs<2>();
209
210 if (!lhs->isa<Var>() && !rhs->isa<Var>()) // before first PE.
211 if (lhs->as<Lit>()->get() > rhs->as<Lit>()->get()) return world.raw_app(type, callee, world.tuple({rhs, lhs}));
212
213 return world.raw_app(type, callee, arg);
214}
215
216Ref normalize_not(Ref type, Ref callee, Ref arg) {
217 auto& world = type->world();
218 return world.raw_app(type, callee, arg);
219}
220
222
223} // namespace mim::plug::regex
Base class for all Defs.
Definition def.h:223
nat_t as_lit_arity() const
Definition def.h:259
World & world() const
Definition def.cpp:415
auto projs(F f) const
Splits this Def via Def::projections into an Array (if A == -1_n) or std::array (otherwise).
Definition def.h:367
T get() const
Definition def.h:751
Helper class to retrieve Infer::arg if present.
Definition def.h:86
This is a thin wrapper for std::span<T, N> with the following additional features:
Definition span.h:28
This is a thin wrapper for absl::InlinedVector<T, N, / A> which in turn is a drop-in replacement for ...
Definition vector.h:16
The World represents the whole program and manages creation of MimIR nodes (Defs).
Definition world.h:33
Ref raw_app(Ref type, Ref callee, Ref arg)
Definition world.cpp:217
Vector< Range > Ranges
automaton::Range Range
std::pair< std::uint64_t, std::uint64_t > Range
std::optional< Range > merge_ranges(Range a, Range b) noexcept
The regex Plugin
Definition lower_regex.h:5
void make_vector_unique(DefVec &args)
auto get_range(const Def *rng) -> Range
Ref normalize_range(Ref type, Ref callee, Ref arg)
Ref normalize_not(Ref type, Ref callee, Ref arg)
bool is_in_range(Range range, nat_t needle)
Ref normalize_quant(Ref type, Ref callee, Ref arg)
Ref normalize_disj(Ref type, Ref, Ref arg)
Ref make_binary_tree(Ref type, Defs args)
void flatten_in_arg(Ref arg, DefVec &new_args)
bool compare_re(const Def *lhs, const Def *rhs)
Ref normalize_conj(Ref type, Ref callee, Ref arg)
bool equals_any(const Def *cls0, const Def *cls1)
u64 nat_t
Definition types.h:43
auto match(Ref def)
Definition axiom.h:112
constexpr Vector< T, N, A >::size_type erase(Vector< T, N, A > &c, const U &value)
Definition vector.h:61
#define MIM_regex_NORMALIZER_IMPL
Definition autogen.h:93