MimIR 0.1
MimIR is my Intermediate Representation
Loading...
Searching...
No Matches
normalizers.cpp
Go to the documentation of this file.
1#include <algorithm>
2#include <iterator>
3#include <numeric>
4#include <ranges>
5#include <vector>
6
8#include <fe/assert.h>
9
10#include "mim/axm.h"
11#include "mim/def.h"
12#include "mim/tuple.h"
13#include "mim/world.h"
14
15#include "mim/util/dbg.h"
16#include "mim/util/log.h"
17
20
23
24namespace mim::plug::regex {
25
26template<quant id>
27const Def* normalize_quant(const Def* type, const Def* callee, const Def* arg) {
28 auto& world = type->world();
29
30 // quantifiers are idempotent
31 if (Axm::isa(id, arg)) return arg;
32
33 if constexpr (id == quant::plus) {
34 // (\d?)+ and (\d*)+ == \d*
35 if (auto optional_app = Axm::isa(quant::optional, arg))
36 return world.call(quant::star, optional_app->arg());
37 else if (auto star_app = Axm::isa(quant::star, arg))
38 return arg;
39 } else if constexpr (id == quant::star) {
40 // (\d?)* and (\d+)* == \d*
41 if (auto quant_app = Axm::isa<quant>(arg)) return world.app(callee, quant_app->arg());
42 } else if constexpr (id == quant::optional) {
43 // (\d*)? and (\d+)? == \d*
44 if (auto star_app = Axm::isa(quant::star, arg))
45 return arg;
46 else if (auto plus_app = Axm::isa(quant::plus, arg))
47 return world.call(quant::star, plus_app->arg());
48 }
49
50 return {};
51}
52
53template<class ConjOrDisj>
55 assert(!args.empty());
56 auto& world = args.front()->world();
57 return std::accumulate(args.begin() + 1, args.end(), args.front(), [&world](const Def* lhs, const Def* rhs) {
58 return world.call<ConjOrDisj, false>(Defs{lhs, rhs});
59 });
60}
61
62const Def* normalize_conj(const Def* type, const Def* callee, const Def* arg) {
63 auto& world = type->world();
64 world.DLOG("conj {}:{} ({})", type, callee, arg);
65
66 if (auto a = Lit::isa(arg->arity())) {
67 switch (*a) {
68 case 0: return world.lit_tt();
69 case 1: return arg;
70 default: return make_binary_tree<conj>(detail::flatten_in_arg<conj>(arg));
71 }
72 }
73
74 return {};
75}
76
77bool compare_re(const Def* lhs, const Def* rhs) {
78 auto lhs_range = Axm::isa<range>(lhs);
79 auto rhs_range = Axm::isa<range>(rhs);
80 // sort ranges by increasing lower bound
81 if (lhs_range && rhs_range) return Lit::as(lhs_range->arg()->proj(0)) < Lit::as(rhs_range->arg()->proj(0));
82 // ranges to the end
83 if (lhs_range) return false;
84 if (rhs_range) return true;
85
86 return lhs->gid() < rhs->gid(); // make irreflexive
87}
88
90 std::stable_sort(args.begin(), args.end(), &compare_re);
91 {
92 auto new_end = std::unique(args.begin(), args.end());
93 args.erase(new_end, args.end());
94 }
95}
96
97bool is_in_range(Range range, nat_t needle) { return needle >= range.first && needle <= range.second; }
98
99auto get_range(const Def* rng) -> Range {
100 auto rng_match = Axm::isa<range, false>(rng);
101 return {Lit::as<std::uint8_t>(rng_match->arg(0)), Lit::as<std::uint8_t>(rng_match->arg(1))};
102}
103
104struct app_range {
106 const Def* operator()(Range rng) { return w.call<range>(Defs{w.lit_i8(rng.first), w.lit_i8(rng.second)}); }
107};
108
109void merge_ranges(DefVec& args) {
110 auto ranges_begin = args.begin();
111 while (ranges_begin != args.end() && !Axm::isa<range>(*ranges_begin))
112 ranges_begin++;
113 if (ranges_begin == args.end()) return;
114
115 std::set<const Def*> to_remove;
116 Ranges old_ranges;
117 auto& world = (*ranges_begin)->world();
118
119 std::transform(ranges_begin, args.end(), std::back_inserter(old_ranges), get_range);
120
121 auto new_ranges = automaton::merge_ranges(
122 old_ranges, [&world](auto&&... args) { world.DLOG(std::forward<decltype(args)>(args)...); });
123
124 // invalidates ranges_begin
125 args.erase(ranges_begin, args.end());
126 std::transform(new_ranges.begin(), new_ranges.end(), std::back_inserter(args), app_range{world});
127
128 make_vector_unique(args);
129}
130
131template<cls A, cls B>
132bool equals_any(const Def* cls0, const Def* cls1) {
133 return (Axm::isa(A, cls0) && Axm::isa(B, cls1)) || (Axm::isa(A, cls1) && Axm::isa(B, cls0));
134}
135
136bool equals_any(const Def* lhs, const Def* rhs) {
137 auto check_arg_equiv = [](const Def* lhs, const Def* rhs) {
138 if (auto rng_lhs = Axm::isa<range>(lhs))
139 if (auto not_rhs = Axm::isa<not_>(rhs)) {
140 if (auto rng_rhs = Axm::isa<range>(not_rhs->arg())) return rng_lhs == rng_rhs;
141 }
142 return false;
143 };
144
145 return check_arg_equiv(lhs, rhs) || check_arg_equiv(rhs, lhs);
146}
147
148bool equals_any(Defs lhs, Defs rhs) {
149 Ranges lhs_ranges, rhs_ranges;
150 auto only_ranges = std::ranges::views::filter([](auto d) { return Axm::isa<range>(d); });
151 std::ranges::transform(lhs | only_ranges, std::back_inserter(lhs_ranges), get_range);
152 std::ranges::transform(rhs | only_ranges, std::back_inserter(rhs_ranges), get_range);
153 return std::ranges::includes(lhs_ranges, rhs_ranges) || std::ranges::includes(rhs_ranges, lhs_ranges);
154}
155
156const Def* normalize_disj(const Def* type, const Def*, const Def* arg) {
157 auto& world = type->world();
158 if (auto a = Lit::isa(arg->arity())) {
159 switch (*a) {
160 case 0: return world.lit_ff();
161 case 1: return arg;
162 default:
163 auto contains_any = [](auto args) {
164 return std::ranges::find_if(args, [](const Def* ax) -> bool { return Axm::isa<any>(ax); })
165 != args.end();
166 };
167
168 auto new_args = detail::flatten_in_arg<disj>(arg);
169 if (contains_any(new_args)) return world.annex<any>();
170 make_vector_unique(new_args);
171 merge_ranges(new_args);
172
173 const Def* to_remove = nullptr;
174 for (const auto* cls0 : new_args) {
175 for (const auto* cls1 : new_args)
176 if (equals_any(cls0, cls1)) return world.annex<any>();
177
178 if (auto not_rhs = Axm::isa<not_>(cls0)) {
179 if (auto disj_rhs = Axm::isa<disj>(not_rhs->arg())) {
180 auto rngs = detail::flatten_in_arg<disj>(disj_rhs->arg());
181 make_vector_unique(rngs);
182 if (equals_any(new_args, rngs)) return world.annex<any>();
183 }
184 }
185 }
186
187 erase(new_args, to_remove);
188 world.DLOG("final ranges {, }", new_args);
189
190 if (new_args.size() > 2) return make_binary_tree<disj>(new_args);
191 if (new_args.size() > 1) return world.call<disj, false>(new_args);
192 return new_args.back();
193 }
194 }
195 return {};
196}
197
198const Def* normalize_range(const Def* type, const Def* callee, const Def* arg) {
199 auto& world = type->world();
200 auto [lhs, rhs] = arg->projs<2>();
201
202 if (!lhs->isa<Var>() && !rhs->isa<Var>()) // before first PE.
203 if (lhs->as<Lit>()->get() > rhs->as<Lit>()->get()) return world.raw_app(type, callee, {rhs, lhs});
204
205 return {};
206}
207
208const Def* any_unwanted(const Def* arg) {
209 if (auto ax = Axm::isa<regex::range>(arg)) return nullptr;
210 if (auto disj = Axm::isa<regex::disj>(arg)) {
211 for (const auto* disj_arg : disj->args())
212 if (auto ret = any_unwanted(disj_arg)) return ret;
213 return nullptr;
214 }
215 return arg;
216}
217
218const Def* normalize_not(const Def*, const Def* callee, const Def* arg) {
219 if (auto unwanted = any_unwanted(arg)) {
220 throw Error()
221 .error(arg->loc(), "regex.not_ must only be used with regex.disj and regex.range: {} {}", callee, arg)
222 .error(unwanted->loc(), "found unwanted: {}", unwanted);
223 }
224 return {};
225}
226
228
229} // namespace mim::plug::regex
static auto isa(const Def *def)
Definition axm.h:107
Base class for all Defs.
Definition def.h:251
auto projs(F f) const
Splits this Def via Def::projections into an Array (if A == std::dynamic_extent) or std::array (other...
Definition def.h:390
Loc loc() const
Definition def.h:506
constexpr u32 gid() const noexcept
Global id - unique number for this Def.
Definition def.h:270
virtual const Def * arity() const
Definition def.cpp:553
Error & error(Loc loc, const char *s, Args &&... args)
Definition dbg.h:72
static std::optional< T > isa(const Def *def)
Definition def.h:824
T get() const
Definition def.h:811
static T as(const Def *def)
Definition def.h:830
A variable introduced by a binder (mutable).
Definition def.h:700
This is a thin wrapper for absl::InlinedVector<T, N, A> which is a drop-in replacement for std::vecto...
Definition vector.h:18
The World represents the whole program and manages creation of MimIR nodes (Defs).
Definition world.h:32
automaton::Range Range
Vector< Range > Ranges
std::pair< std::uint64_t, std::uint64_t > Range
std::optional< Range > merge_ranges(Range a, Range b) noexcept
The regex Plugin
Definition lower_regex.h:5
void merge_ranges(DefVec &args)
void make_vector_unique(DefVec &args)
const Def * any_unwanted(const Def *arg)
auto get_range(const Def *rng) -> Range
bool is_in_range(Range range, nat_t needle)
const Def * normalize_conj(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_range(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_disj(const Def *type, const Def *, const Def *arg)
const Def * normalize_quant(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_not(const Def *, const Def *callee, const Def *arg)
bool compare_re(const Def *lhs, const Def *rhs)
const Def * make_binary_tree(Defs args)
bool equals_any(const Def *cls0, const Def *cls1)
View< const Def * > Defs
Definition def.h:76
u64 nat_t
Definition types.h:43
Vector< const Def * > DefVec
Definition def.h:77
Vector< T, N, A >::size_type erase(Vector< T, N, A > &c, const U &value) noexcept
Definition vector.h:70
#define MIM_regex_NORMALIZER_IMPL
Definition autogen.h:107
const Def * operator()(Range rng)