5using namespace std::literals;
 
    9namespace utf8 = fe::utf8;
 
   13    : Super(istream, 
path)
 
   16#define CODE(t, str) keywords_[ast.sym(str)] = Tag::t; 
   21    if (Tag::t != Tag::Nil) keywords_[ast.sym(str)] = Tag::t; 
 
   35        if (accept(utf8::EoF)) 
return tok(Tag::EoF);
 
   36        if (accept(utf8::isspace)) 
continue;
 
   37        if (accept(utf8::Null)) {
 
   38            ast().
error(loc_, 
"invalid UTF-8 character");
 
   44        if (accept( 
'(')) 
return tok(Tag::D_paren_l);
 
   45        if (accept( 
')')) 
return tok(Tag::D_paren_r);
 
   46        if (accept( 
'[')) 
return tok(Tag::D_brckt_l);
 
   47        if (accept( 
']')) 
return tok(Tag::D_brckt_r);
 
   48        if (accept( 
'{')) 
return tok(Tag::D_brace_l);
 
   49        if (accept( 
'}')) 
return tok(Tag::D_brace_r);
 
   50        if (accept(U
'⦃')) 
return tok(Tag::D_curly_l);
 
   51        if (accept(U
'⦄')) 
return tok(Tag::D_curly_r);
 
   52        if (accept(U
'«')) 
return tok(Tag::D_quote_l);
 
   53        if (accept(U
'»')) 
return tok(Tag::D_quote_r);
 
   54        if (accept(U
'⟪')) 
return tok(Tag::D_quote_l);
 
   55        if (accept(U
'⟫')) 
return tok(Tag::D_quote_r);
 
   56        if (accept(U
'‹')) 
return tok(Tag::D_angle_l);
 
   57        if (accept(U
'›')) 
return tok(Tag::D_angle_r);
 
   58        if (accept(U
'⟨')) 
return tok(Tag::D_angle_l);
 
   59        if (accept(U
'⟩')) 
return tok(Tag::D_angle_r);
 
   61            if (accept( 
'<')) 
return tok(Tag::D_quote_l);
 
   62            return tok(Tag::D_angle_l);
 
   65            if (accept( 
'>')) 
return tok(Tag::D_quote_r);
 
   66            return tok(Tag::D_angle_r);
 
   69        if (accept(U
'→')) 
return tok(Tag::T_arrow);
 
   70        if (accept( 
'@')) 
return tok(Tag::T_at);
 
   72            if (accept(
'>')) 
return tok(Tag::T_fat_arrow);
 
   73            return tok(Tag::T_assign);
 
   75        if (accept(U
'⊥')) 
return tok(Tag::T_bot);
 
   76        if (accept(U
'⊤')) 
return tok(Tag::T_top);
 
   77        if (accept(U
'□')) 
return tok(Tag::T_box);
 
   78        if (accept( 
',')) 
return tok(Tag::T_comma);
 
   79        if (accept( 
'$')) 
return tok(Tag::T_dollar);
 
   80        if (accept( 
'#')) 
return tok(Tag::T_extract);
 
   81        if (accept(U
'λ')) 
return tok(Tag::T_lm);
 
   82        if (accept( 
';')) 
return tok(Tag::T_semicolon);
 
   83        if (accept(U
'★')) 
return tok(Tag::T_star);
 
   84        if (accept( 
'*')) 
return tok(Tag::T_star);
 
   85        if (accept( 
':')) 
return tok(Tag::T_colon);
 
   86        if (accept(U
'∪')) 
return tok(Tag::T_union);
 
   90            if (lex_id()) 
return {loc_, Tag::M_anx, sym()};
 
   91            ast().
error(loc_, 
"invalid axm name '{}'", str_);
 
   96            if (accept(utf8::isdigit)) {
 
   99                return {loc_, 
f64(std::strtod(str_.c_str(), 
nullptr))};
 
  102            return tok(Tag::T_dot);
 
  107            if (accept(
'\'')) 
return {loc_, c};
 
  108            ast().
error(loc_, 
"invalid character literal {}", str_);
 
  112        if (accept<Append::Off>(
'\"')) {
 
  113            while (lex_char() != 
'"') {}
 
  115            return {loc_, Tag::L_str, sym()};
 
  119            if (
auto i = keywords_.find(sym()); i != keywords_.end()) 
return tok(i->second);
 
  120            return {loc_, Tag::M_id, sym()};
 
  123        if (utf8::isdigit(ahead()) || utf8::any(
'+', 
'-')(ahead())) {
 
  124            if (
auto lit = parse_lit()) 
return *lit;
 
  140                while (ahead() != utf8::EoF && ahead() != 
'\n') next();
 
  144            ast().
error({loc_.path, peek_}, 
"invalid input char '/'; maybe you wanted to start a comment?");
 
  148        ast().
error({loc_.path, peek_}, 
"invalid input char '{}'", utf8::Char32(ahead()));
 
 
  153bool Lexer::lex_id() {
 
  154    if (accept([](
char32_t c) { 
return c == 
'_' || utf8::isalpha(c); })) {
 
  155        while (accept([](
char32_t c) { 
return c == 
'_' || c == 
'.' || utf8::isalnum(c); })) {}
 
  162std::optional<Tok> Lexer::parse_lit() {
 
  164    std::optional<bool> sign;
 
  166    if (accept<Append::Off>(
'+')) {
 
  168    } 
else if (accept<Append::Off>(
'-')) {
 
  169        if (accept(
'>')) 
return tok(Tag::T_arrow);
 
  174    if (accept<Append::Off>(
'0')) {
 
  175        if      (accept<Append::Off>(
'b')) base =  2;
 
  176        else if (accept<Append::Off>(
'B')) base =  2;
 
  177        else if (accept<Append::Off>(
'o')) base =  8;
 
  178        else if (accept<Append::Off>(
'O')) base =  8;
 
  179        else if (accept<Append::Off>(
'x')) base = 16;
 
  180        else if (accept<Append::Off>(
'X')) base = 16;
 
  185    if (accept(utf8::any(
'i', 
'I'))) {
 
  186        if (sign) str_.insert(0, 
"-"sv);
 
  187        auto val = std::strtoull(str_.c_str(), 
nullptr, base);
 
  190        auto width = std::strtoull(str_.c_str(), 
nullptr, 10);
 
  194    if (!sign && base == 10) {
 
  195        if (utf8::isrange(ahead(), U
'₀', U
'₉')) {
 
  196            auto i = std::strtoull(str_.c_str(), 
nullptr, 10);
 
  198            while (utf8::isrange(ahead(), U
'₀', U
'₉')) mod += next() - U
'₀' + 
'0';
 
  199            auto m = std::strtoull(mod.c_str(), 
nullptr, 10);
 
  201        } 
else if (accept<Append::Off>(
'_')) {
 
  202            auto i = std::strtoull(str_.c_str(), 
nullptr, 10);
 
  204            if (accept(utf8::isdigit)) {
 
  206                auto m = std::strtoull(str_.c_str(), 
nullptr, 10);
 
  209                ast().
error(loc_, 
"stray underscore in Idx literal; size is missing");
 
  210                auto i = std::strtoull(str_.c_str(), 
nullptr, 10);
 
  211                return Tok{loc_, 
u64(i)};
 
  216    bool is_float = 
false;
 
  217    if (base == 10 || base == 16) {
 
  224        bool has_exp = parse_exp(base);
 
  225        if (base == 16 && is_float && !has_exp) 
ast().
error(loc_, 
"hexadecimal floating constants require an exponent");
 
  229    if (sign && str_.empty()) {
 
  230        ast().
error(loc_, 
"stray '{}'", *sign ? 
"-" : 
"+");
 
  234    if (is_float && base == 16) str_.insert(0, 
"0x"sv);
 
  235    if (sign && *sign) str_.insert(0, 
"-"sv);
 
  237    if (is_float) 
return Tok{loc_, 
f64(std::strtod  (str_.c_str(), 
nullptr      ))};
 
  238    if (sign)     
return Tok{loc_, 
u64(std::strtoll (str_.c_str(), 
nullptr, base))};
 
  239    else          return Tok{loc_, 
u64(std::strtoull(str_.c_str(), 
nullptr, base))};
 
  242void Lexer::parse_digits(
int base ) {
 
  245        case  2: 
while (accept(utf8::isbdigit)) {} 
break;
 
  246        case  8: 
while (accept(utf8::isodigit)) {} 
break;
 
  247        case 10: 
while (accept(utf8::isdigit))  {} 
break;
 
  248        case 16: 
while (accept(utf8::isxdigit)) {} 
break;
 
  250        default: fe::unreachable();
 
  254bool Lexer::parse_exp(
int base ) {
 
  255    if (accept(base == 10 ? utf8::any(
'e', 
'E') : utf8::any(
'p', 
'P'))) {
 
  256        accept(utf8::any(
'+', 
'-'));
 
  257        if (!utf8::isdigit(ahead())) 
ast().
error(loc_, 
"exponent has no digits");
 
  265char8_t Lexer::lex_char() {
 
  266    if (accept<Append::Off>(
'\\')) {
 
  269        else if (accept<Append::Off>(
'\'')) str_ += 
'\'';
 
  270        else if (accept<Append::Off>(
'\\')) str_ += 
'\\';
 
  271        else if (accept<Append::Off>( 
'"')) str_ += 
'\"';
 
  272        else if (accept<Append::Off>( 
'0')) str_ += 
'\0';
 
  273        else if (accept<Append::Off>( 
'a')) str_ += 
'\a';
 
  274        else if (accept<Append::Off>( 
'b')) str_ += 
'\b';
 
  275        else if (accept<Append::Off>( 
'f')) str_ += 
'\f';
 
  276        else if (accept<Append::Off>( 
'n')) str_ += 
'\n';
 
  277        else if (accept<Append::Off>( 
'r')) str_ += 
'\r';
 
  278        else if (accept<Append::Off>( 
't')) str_ += 
'\t';
 
  279        else if (accept<Append::Off>( 
'v')) str_ += 
'\v';
 
  280        else ast().
error(loc_.anew_finis(), 
"invalid escape character '\\{}'", (
char)ahead());
 
  286    if (utf8::isascii(c)) 
return c;
 
  287    ast().
error(loc_, 
"invalid character '{}'", (
char)c);
 
  291void Lexer::eat_comments() {
 
  293        while (ahead() != utf8::EoF && ahead() != 
'*') next();
 
  294        if (accept(utf8::EoF)) {
 
  295            ast().
error(loc_, 
"non-terminated multiline comment");
 
  299        if (accept(
'/')) 
break;
 
  303void Lexer::emit_md(
bool start_of_file) {
 
  304    if (!start_of_file) md_fence();
 
  308        for (
int i = 0; i < 3; ++i) next();
 
  312        while (ahead() != utf8::EoF && ahead() != 
'\n') next();
 
  314    } 
while (start_md());
 
  316    if (ahead() == utf8::EoF)
 
  322Sym Lexer::sym() { 
return ast().
sym(str_); }
 
const Lit * lit_idx_mod(nat_t mod, u64 val)
Constructs a Lit of type Idx of size mod.
 
const Lit * lit_int(nat_t width, u64 val)
Constructs a Lit of type Idx of size 2^width.
 
Lexer(AST &, std::istream &, const fs::path *path=nullptr, std::ostream *md=nullptr)
Creates a lexer to read *.mim files (see Lexical Structure).
 
const fs::path * path() const