fix: improve comment and multiline string tokenizer
This commit is contained in:
parent
e33c96d658
commit
868c37c567
3 changed files with 59 additions and 44 deletions
|
@ -17,6 +17,11 @@ Changelog structure reference:
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Zig
|
||||
- Documentation comment after regular comment was being highlighted as regular comment
|
||||
|
||||
## [23.0.1]
|
||||
|
||||
### Fixed
|
||||
|
|
|
@ -36,8 +36,10 @@ import static com.falsepattern.zigbrains.zig.psi.ZigTypes.*;
|
|||
%type IElementType
|
||||
%unicode
|
||||
|
||||
CRLF=\R
|
||||
WHITE_SPACE=[\s]+
|
||||
WHITE_SPACE=\s+
|
||||
|
||||
// visual studio parity
|
||||
LF=\r\n?|[\n\u0085\u2028\u2029]
|
||||
|
||||
bin=[01]
|
||||
bin_="_"? {bin}
|
||||
|
@ -54,13 +56,13 @@ dec_int={dec} {dec_}*
|
|||
hex_int={hex} {hex_}*
|
||||
|
||||
char_char= \\ .
|
||||
| [^\'\n]
|
||||
| [^\'\r\n\u0085\u2028\u2029]
|
||||
|
||||
string_char= \\ .
|
||||
| [^\"\n]
|
||||
| [^\"\r\n\u0085\u2028\u2029]
|
||||
|
||||
all_nl_wrap=[^\n]* [ \n]*
|
||||
all_no_nl=[^\n]+
|
||||
nl_wrap={LF} (\s|{LF})*
|
||||
all_no_nl=[^\r\n\u0085\u2028\u2029]+
|
||||
|
||||
|
||||
FLOAT= "0x" {hex_int} "." {hex_int} ([pP] [-+]? {dec_int})?
|
||||
|
@ -84,30 +86,35 @@ BUILTINIDENTIFIER="@"[A-Za-z_][A-Za-z0-9_]*
|
|||
%state UNT_SQUOT
|
||||
%state UNT_DQUOT
|
||||
|
||||
%state CDOC_CMT
|
||||
%state DOC_CMT
|
||||
%state LINE_CMT
|
||||
%state CMT_LINE
|
||||
%state CMT_DOC
|
||||
%state CMT_CDOC
|
||||
%%
|
||||
|
||||
//Comments
|
||||
|
||||
<YYINITIAL> "//!" { yybegin(CDOC_CMT); }
|
||||
<CDOC_CMT> {all_nl_wrap} "//!" { }
|
||||
<CDOC_CMT> {all_no_nl} { }
|
||||
<CDOC_CMT> \n { yybegin(YYINITIAL); return CONTAINER_DOC_COMMENT; }
|
||||
<CDOC_CMT> <<EOF>> { yybegin(YYINITIAL); return CONTAINER_DOC_COMMENT; }
|
||||
<YYINITIAL> "//!" { yybegin(CMT_CDOC); }
|
||||
<YYINITIAL> "////" { yybegin(CMT_LINE); }
|
||||
<YYINITIAL> "///" { yybegin(CMT_DOC); }
|
||||
<YYINITIAL> "//" { yybegin(CMT_LINE); }
|
||||
|
||||
<YYINITIAL> "///" { yybegin(DOC_CMT); }
|
||||
<DOC_CMT> {all_nl_wrap} "///" { }
|
||||
<DOC_CMT> {all_no_nl} { }
|
||||
<DOC_CMT> \n { yybegin(YYINITIAL); return DOC_COMMENT; }
|
||||
<DOC_CMT> <<EOF>> { yybegin(YYINITIAL); return DOC_COMMENT; }
|
||||
<CMT_LINE> {all_no_nl} { }
|
||||
<CMT_LINE> {nl_wrap} "////" { }
|
||||
<CMT_LINE> {nl_wrap} "///" { yypushback(yylength()); yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
<CMT_LINE> {nl_wrap} "//" { }
|
||||
<CMT_LINE> {LF} { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
<CMT_LINE> <<EOF>> { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
|
||||
<YYINITIAL> "//" { yybegin(LINE_CMT); }
|
||||
<LINE_CMT> {all_nl_wrap} "//" { }
|
||||
<LINE_CMT> {all_no_nl} { }
|
||||
<LINE_CMT> \n { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
<LINE_CMT> <<EOF>> { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
<CMT_DOC> {all_no_nl} { }
|
||||
<CMT_DOC> {nl_wrap} "////" { yypushback(yylength()); yybegin(YYINITIAL); return DOC_COMMENT; }
|
||||
<CMT_DOC> {nl_wrap} "///" { }
|
||||
<CMT_DOC> {LF} { yybegin(YYINITIAL); return DOC_COMMENT; }
|
||||
<CMT_DOC> <<EOF>> { yybegin(YYINITIAL); return DOC_COMMENT; }
|
||||
|
||||
<CMT_CDOC> {all_no_nl} { }
|
||||
<CMT_CDOC> {nl_wrap} "//!" { }
|
||||
<CMT_CDOC> {LF} { yybegin(YYINITIAL); return CONTAINER_DOC_COMMENT; }
|
||||
<CMT_CDOC> <<EOF>> { yybegin(YYINITIAL); return CONTAINER_DOC_COMMENT; }
|
||||
|
||||
//Symbols
|
||||
<YYINITIAL> "&" { return AMPERSAND; }
|
||||
|
@ -239,10 +246,11 @@ BUILTINIDENTIFIER="@"[A-Za-z_][A-Za-z0-9_]*
|
|||
<STR_LIT> {string_char}*"\"" { yybegin(YYINITIAL); return STRING_LITERAL_SINGLE; }
|
||||
<STR_LIT> <<EOF>> { yybegin(YYINITIAL); return BAD_DQUOT; }
|
||||
<STR_LIT> [^] { yypushback(1); yybegin(UNT_DQUOT); }
|
||||
|
||||
<YYINITIAL> "\\\\" { yybegin(STR_MULT_LINE); }
|
||||
<STR_MULT_LINE> {all_nl_wrap} "\\\\" { }
|
||||
<STR_MULT_LINE> {all_no_nl} { }
|
||||
<STR_MULT_LINE> \n { yybegin(YYINITIAL); return STRING_LITERAL_MULTI; }
|
||||
<STR_MULT_LINE> {nl_wrap} "\\\\" { }
|
||||
<STR_MULT_LINE> {LF} { yybegin(YYINITIAL); return STRING_LITERAL_MULTI; }
|
||||
<STR_MULT_LINE> <<EOF>> { yybegin(YYINITIAL); return STRING_LITERAL_MULTI; }
|
||||
|
||||
<YYINITIAL> {IDENTIFIER_PLAIN} { return IDENTIFIER; }
|
||||
|
@ -254,10 +262,10 @@ BUILTINIDENTIFIER="@"[A-Za-z_][A-Za-z0-9_]*
|
|||
<YYINITIAL> {BUILTINIDENTIFIER} { return BUILTINIDENTIFIER; }
|
||||
|
||||
<UNT_SQUOT> <<EOF>> { yybegin(YYINITIAL); return BAD_SQUOT; }
|
||||
<UNT_SQUOT> {CRLF} { yybegin(YYINITIAL); return BAD_SQUOT; }
|
||||
<UNT_SQUOT> {LF} { yybegin(YYINITIAL); return BAD_SQUOT; }
|
||||
<UNT_SQUOT> {all_no_nl} { }
|
||||
<UNT_DQUOT> <<EOF>> { yybegin(YYINITIAL); return BAD_DQUOT; }
|
||||
<UNT_DQUOT> {CRLF} { yybegin(YYINITIAL); return BAD_DQUOT; }
|
||||
<UNT_DQUOT> {LF} { yybegin(YYINITIAL); return BAD_DQUOT; }
|
||||
<UNT_DQUOT> {all_no_nl} { }
|
||||
|
||||
<YYINITIAL> {WHITE_SPACE} { return WHITE_SPACE; }
|
||||
|
|
|
@ -36,8 +36,10 @@ import static com.falsepattern.zigbrains.zon.psi.ZonTypes.*;
|
|||
%type IElementType
|
||||
%unicode
|
||||
|
||||
CRLF=\R
|
||||
WHITE_SPACE=[\s]+
|
||||
WHITE_SPACE=\s+
|
||||
|
||||
// visual studio parity
|
||||
LF=\r\n?|[\n\u0085\u2028\u2029]
|
||||
|
||||
bin=[01]
|
||||
bin_="_"? {bin}
|
||||
|
@ -54,13 +56,13 @@ dec_int={dec} {dec_}*
|
|||
hex_int={hex} {hex_}*
|
||||
|
||||
char_char= \\ .
|
||||
| [^\'\n]
|
||||
| [^\'\r\n\u0085\u2028\u2029]
|
||||
|
||||
string_char= \\ .
|
||||
| [^\"\n]
|
||||
| [^\"\r\n\u0085\u2028\u2029]
|
||||
|
||||
all_nl_wrap=[^\n]* [ \n]*
|
||||
all_no_nl=[^\n]+
|
||||
nl_wrap={LF} (\s|{LF})*
|
||||
all_no_nl=[^\r\n\u0085\u2028\u2029]+
|
||||
|
||||
|
||||
FLOAT= "0x" {hex_int} "." {hex_int} ([pP] [-+]? {dec_int})?
|
||||
|
@ -83,16 +85,16 @@ IDENTIFIER_PLAIN=[A-Za-z_][A-Za-z0-9_]*
|
|||
%state UNT_SQUOT
|
||||
%state UNT_DQUOT
|
||||
|
||||
%state LINE_CMT
|
||||
%state CMT_LINE
|
||||
%%
|
||||
|
||||
//Comments
|
||||
|
||||
<YYINITIAL> "//" { yybegin(LINE_CMT); }
|
||||
<LINE_CMT> {all_nl_wrap} "//" { }
|
||||
<LINE_CMT> {all_no_nl} { }
|
||||
<LINE_CMT> \n { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
<LINE_CMT> <<EOF>> { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
<YYINITIAL> "//" { yybegin(CMT_LINE); }
|
||||
<CMT_LINE> {all_no_nl} { }
|
||||
<CMT_LINE> {nl_wrap} "//" { }
|
||||
<CMT_LINE> \R { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
<CMT_LINE> <<EOF>> { yybegin(YYINITIAL); return LINE_COMMENT; }
|
||||
|
||||
//Symbols
|
||||
|
||||
|
@ -123,9 +125,9 @@ IDENTIFIER_PLAIN=[A-Za-z_][A-Za-z0-9_]*
|
|||
<STR_LIT> [^] { yypushback(1); yybegin(UNT_DQUOT); }
|
||||
|
||||
<YYINITIAL> "\\\\" { yybegin(STR_MULT_LINE); }
|
||||
<STR_MULT_LINE> {all_nl_wrap} "\\\\" { }
|
||||
<STR_MULT_LINE> {all_no_nl} { }
|
||||
<STR_MULT_LINE> \n { yybegin(YYINITIAL); return STRING_LITERAL_MULTI; }
|
||||
<STR_MULT_LINE> {nl_wrap} "\\\\" { }
|
||||
<STR_MULT_LINE> {LF} { yybegin(YYINITIAL); return STRING_LITERAL_MULTI; }
|
||||
<STR_MULT_LINE> <<EOF>> { yybegin(YYINITIAL); return STRING_LITERAL_MULTI; }
|
||||
|
||||
//Numbers
|
||||
|
@ -144,10 +146,10 @@ IDENTIFIER_PLAIN=[A-Za-z_][A-Za-z0-9_]*
|
|||
//Error handling
|
||||
|
||||
<UNT_SQUOT> <<EOF>> { yybegin(YYINITIAL); return BAD_SQUOT; }
|
||||
<UNT_SQUOT> {CRLF} { yybegin(YYINITIAL); return BAD_SQUOT; }
|
||||
<UNT_SQUOT> {LF} { yybegin(YYINITIAL); return BAD_SQUOT; }
|
||||
<UNT_SQUOT> {all_no_nl} { }
|
||||
<UNT_DQUOT> <<EOF>> { yybegin(YYINITIAL); return BAD_DQUOT; }
|
||||
<UNT_DQUOT> {CRLF} { yybegin(YYINITIAL); return BAD_DQUOT; }
|
||||
<UNT_DQUOT> {LF} { yybegin(YYINITIAL); return BAD_DQUOT; }
|
||||
<UNT_DQUOT> {all_no_nl} { }
|
||||
|
||||
//Misc
|
||||
|
|
Loading…
Add table
Reference in a new issue