/* * Copyright 2023-2024 FalsePattern * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.falsepattern.zigbrains.zig.lexer; import com.intellij.lexer.FlexLexer; import com.intellij.psi.tree.IElementType; import static com.intellij.psi.TokenType.WHITE_SPACE; import static com.intellij.psi.TokenType.BAD_CHARACTER; import static com.falsepattern.zigbrains.zig.psi.ZigTypes.*; %% %class ZigFlexLexer %implements FlexLexer %function advance %type IElementType CRLF=\R WHITE_SPACE=[\s]+ bin=[01] bin_="_"? {bin} oct=[0-7] oct_="_"? {oct} hex=[0-9a-fA-F] hex_="_"? {hex} dec=[0-9] dec_="_"? {dec} bin_int={bin} {bin_}* oct_int={oct} {oct_}* dec_int={dec} {dec_}* hex_int={hex} {hex_}* ox80_oxBF=[\200-\277] oxF4=\364 ox80_ox8F=[\200-\217] oxF1_oxF3=[\361-\363] oxF0=\360 ox90_0xBF=[\220-\277] oxEE_oxEF=[\356-\357] oxED=\355 ox80_ox9F=[\200-\237] oxE1_oxEC=[\341-\354] oxE0=\340 oxA0_oxBF=[\240-\277] oxC2_oxDF=[\302-\337] // From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ // First Byte Second Byte Third Byte Fourth Byte // [0x00,0x7F] // [0xC2,0xDF] [0x80,0xBF] // 0xE0 [0xA0,0xBF] [0x80,0xBF] // [0xE1,0xEC] [0x80,0xBF] [0x80,0xBF] // 0xED [0x80,0x9F] [0x80,0xBF] // [0xEE,0xEF] [0x80,0xBF] [0x80,0xBF] // 0xF0 [0x90,0xBF] [0x80,0xBF] [0x80,0xBF] // [0xF1,0xF3] [0x80,0xBF] [0x80,0xBF] [0x80,0xBF] // 0xF4 [0x80,0x8F] [0x80,0xBF] [0x80,0xBF] mb_utf8_literal= {oxF4} {ox80_ox8F} {ox80_oxBF} {ox80_oxBF} | {oxF1_oxF3} {ox80_oxBF} {ox80_oxBF} {ox80_oxBF} | {oxF0} {ox90_0xBF} {ox80_oxBF} {ox80_oxBF} | {oxEE_oxEF} {ox80_oxBF} {ox80_oxBF} | {oxED} {ox80_ox9F} {ox80_oxBF} | {oxE1_oxEC} {ox80_oxBF} {ox80_oxBF} | {oxE0} {oxA0_oxBF} {ox80_oxBF} | {oxC2_oxDF} {ox80_oxBF} ascii_char_not_nl_slash_squote=[\000-\011\013-\046\050-\133\135-\177] char_escape= "\\x" {hex} {hex} | "\\u{" {hex}+ "}" | "\\" [nr\\t'\"] char_char= {mb_utf8_literal} | {char_escape} | {ascii_char_not_nl_slash_squote} string_char= \\ . | [^\"\n] all_nl_wrap=[^\n]* [ \n]* all_nl_nowrap=[^\n]* \n FLOAT= "0x" {hex_int} "." {hex_int} ([pP] [-+]? {dec_int})? | {dec_int} "." {dec_int} ([eE] [-+]? {dec_int})? | "0x" {hex_int} [pP] [-+]? {dec_int} | {dec_int} [eE] [-+]? {dec_int} INTEGER= "0b" {bin_int} | "0o" {oct_int} | "0x" {hex_int} | {dec_int} IDENTIFIER_PLAIN=[A-Za-z_][A-Za-z0-9_]* BUILTINIDENTIFIER="@"[A-Za-z_][A-Za-z0-9_]* %state STR_LIT %state STR_MULT_LINE %state CHAR_LIT %state ID_QUOT %state UNT_QUOT %state CDOC_CMT %state DOC_CMT %state LINE_CMT %% //Comments "//!" { yybegin(CDOC_CMT); } {all_nl_wrap} "//!" { } {all_nl_nowrap} { yybegin(YYINITIAL); return CONTAINER_DOC_COMMENT; } "///" { yybegin(DOC_CMT); } {all_nl_wrap} "///" { } {all_nl_nowrap} { yybegin(YYINITIAL); return DOC_COMMENT; } "//" { yybegin(LINE_CMT); } {all_nl_wrap} "//" { } {all_nl_nowrap} { yybegin(YYINITIAL); return LINE_COMMENT; } //Symbols "&" { return AMPERSAND; } "&=" { return AMPERSANDEQUAL; } "*" { return ASTERISK; } "**" { return ASTERISK2; } "*=" { return ASTERISKEQUAL; } "*%" { return ASTERISKPERCENT; } "*%=" { return ASTERISKPERCENTEQUAL; } "*|" { return ASTERISKPIPE; } "*|=" { return ASTERISKPIPEEQUAL; } "^" { return CARET; } "^=" { return CARETEQUAL; } ":" { return COLON; } "," { return COMMA; } "." { return DOT; } ".." { return DOT2; } "..." { return DOT3; } ".*" { return DOTASTERISK; } ".?" { return DOTQUESTIONMARK; } "=" { return EQUAL; } "==" { return EQUALEQUAL; } "=>" { return EQUALRARROW; } "!" { return EXCLAMATIONMARK; } "!=" { return EXCLAMATIONMARKEQUAL; } "<" { return LARROW; } "<<" { return LARROW2; } "<<=" { return LARROW2EQUAL; } "<<|" { return LARROW2PIPE; } "<<|=" { return LARROW2PIPEEQUAL; } "<=" { return LARROWEQUAL; } "{" { return LBRACE; } "[" { return LBRACKET; } "(" { return LPAREN; } "-" { return MINUS; } "-=" { return MINUSEQUAL; } "-%" { return MINUSPERCENT; } "-%=" { return MINUSPERCENTEQUAL; } "-|" { return MINUSPIPE; } "-|=" { return MINUSPIPEEQUAL; } "->" { return MINUSRARROW; } "%" { return PERCENT; } "%=" { return PERCENTEQUAL; } "|" { return PIPE; } "||" { return PIPE2; } "|=" { return PIPEEQUAL; } "+" { return PLUS; } "++" { return PLUS2; } "+=" { return PLUSEQUAL; } "+%" { return PLUSPERCENT; } "+%=" { return PLUSPERCENTEQUAL; } "+|" { return PLUSPIPE; } "+|=" { return PLUSPIPEEQUAL; } //This one is directly in the tokenizer, because it conflicts with identifiers without context // "c" { return LETTERC; } "?" { return QUESTIONMARK; } ">" { return RARROW; } ">>" { return RARROW2; } ">>=" { return RARROW2EQUAL; } ">=" { return RARROWEQUAL; } "}" { return RBRACE; } "]" { return RBRACKET; } ")" { return RPAREN; } ";" { return SEMICOLON; } "/" { return SLASH; } "/=" { return SLASHEQUAL; } "~" { return TILDE; } // keywords "addrspace" { return KEYWORD_ADDRSPACE; } "align" { return KEYWORD_ALIGN; } "allowzero" { return KEYWORD_ALLOWZERO; } "and" { return KEYWORD_AND; } "anyframe" { return KEYWORD_ANYFRAME; } "anytype" { return KEYWORD_ANYTYPE; } "asm" { return KEYWORD_ASM; } "async" { return KEYWORD_ASYNC; } "await" { return KEYWORD_AWAIT; } "break" { return KEYWORD_BREAK; } "callconv" { return KEYWORD_CALLCONV; } "catch" { return KEYWORD_CATCH; } "comptime" { return KEYWORD_COMPTIME; } "const" { return KEYWORD_CONST; } "continue" { return KEYWORD_CONTINUE; } "defer" { return KEYWORD_DEFER; } "else" { return KEYWORD_ELSE; } "enum" { return KEYWORD_ENUM; } "errdefer" { return KEYWORD_ERRDEFER; } "error" { return KEYWORD_ERROR; } "export" { return KEYWORD_EXPORT; } "extern" { return KEYWORD_EXTERN; } "fn" { return KEYWORD_FN; } "for" { return KEYWORD_FOR; } "if" { return KEYWORD_IF; } "inline" { return KEYWORD_INLINE; } "noalias" { return KEYWORD_NOALIAS; } "nosuspend" { return KEYWORD_NOSUSPEND; } "noinline" { return KEYWORD_NOINLINE; } "opaque" { return KEYWORD_OPAQUE; } "or" { return KEYWORD_OR; } "orelse" { return KEYWORD_ORELSE; } "packed" { return KEYWORD_PACKED; } "pub" { return KEYWORD_PUB; } "resume" { return KEYWORD_RESUME; } "return" { return KEYWORD_RETURN; } "linksection" { return KEYWORD_LINKSECTION; } "struct" { return KEYWORD_STRUCT; } "suspend" { return KEYWORD_SUSPEND; } "switch" { return KEYWORD_SWITCH; } "test" { return KEYWORD_TEST; } "threadlocal" { return KEYWORD_THREADLOCAL; } "try" { return KEYWORD_TRY; } "union" { return KEYWORD_UNION; } "unreachable" { return KEYWORD_UNREACHABLE; } "usingnamespace" { return KEYWORD_USINGNAMESPACE; } "var" { return KEYWORD_VAR; } "volatile" { return KEYWORD_VOLATILE; } "while" { return KEYWORD_WHILE; } "'" { yybegin(CHAR_LIT); } {char_char}"'" { yybegin(YYINITIAL); return CHAR_LITERAL; } [^] { yypushback(1); yybegin(UNT_QUOT); } {FLOAT} { return FLOAT; } {INTEGER} { return INTEGER; } "\"" { yybegin(STR_LIT); } {string_char}*"\"" { yybegin(YYINITIAL); return STRING_LITERAL_SINGLE; } [^] { yypushback(1); yybegin(UNT_QUOT); } "\\\\" { yybegin(STR_MULT_LINE); } {all_nl_wrap} "\\\\" { } {all_nl_nowrap} { yybegin(YYINITIAL); return STRING_LITERAL_MULTI; } {IDENTIFIER_PLAIN} { return IDENTIFIER; } "@\"" { yybegin(ID_QUOT); } {string_char}*"\"" { yybegin(YYINITIAL); return IDENTIFIER; } [^] { yypushback(1); yybegin(UNT_QUOT); } {BUILTINIDENTIFIER} { return BUILTINIDENTIFIER; } [^\n]*{CRLF} { yybegin(YYINITIAL); return BAD_CHARACTER; } {WHITE_SPACE} { return WHITE_SPACE; } [^] { return BAD_CHARACTER; }