fix: character literal escape sequences and unicode

This commit is contained in:
FalsePattern 2024-11-11 09:56:50 +01:00
parent c9a3388c57
commit f067ca647c
Signed by: falsepattern
GPG key ID: E930CDEC50C50E23
4 changed files with 37 additions and 47 deletions

View file

@ -17,6 +17,16 @@ Changelog structure reference:
## [Unreleased] ## [Unreleased]
### Added
- Zig
- Escape sequence highlighting in char literals
### Fixed
- Zig
- Unicode characters in char literals triggered an error
## [20.0.1] ## [20.0.1]
### Fixed ### Fixed

View file

@ -52,49 +52,8 @@ oct_int={oct} {oct_}*
dec_int={dec} {dec_}* dec_int={dec} {dec_}*
hex_int={hex} {hex_}* hex_int={hex} {hex_}*
ox80_oxBF=[\200-\277] char_char= \\ .
oxF4=\364 | [^\'\n]
ox80_ox8F=[\200-\217]
oxF1_oxF3=[\361-\363]
oxF0=\360
ox90_0xBF=[\220-\277]
oxEE_oxEF=[\356-\357]
oxED=\355
ox80_ox9F=[\200-\237]
oxE1_oxEC=[\341-\354]
oxE0=\340
oxA0_oxBF=[\240-\277]
oxC2_oxDF=[\302-\337]
// From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
// First Byte Second Byte Third Byte Fourth Byte
// [0x00,0x7F]
// [0xC2,0xDF] [0x80,0xBF]
// 0xE0 [0xA0,0xBF] [0x80,0xBF]
// [0xE1,0xEC] [0x80,0xBF] [0x80,0xBF]
// 0xED [0x80,0x9F] [0x80,0xBF]
// [0xEE,0xEF] [0x80,0xBF] [0x80,0xBF]
// 0xF0 [0x90,0xBF] [0x80,0xBF] [0x80,0xBF]
// [0xF1,0xF3] [0x80,0xBF] [0x80,0xBF] [0x80,0xBF]
// 0xF4 [0x80,0x8F] [0x80,0xBF] [0x80,0xBF]
mb_utf8_literal= {oxF4} {ox80_ox8F} {ox80_oxBF} {ox80_oxBF}
| {oxF1_oxF3} {ox80_oxBF} {ox80_oxBF} {ox80_oxBF}
| {oxF0} {ox90_0xBF} {ox80_oxBF} {ox80_oxBF}
| {oxEE_oxEF} {ox80_oxBF} {ox80_oxBF}
| {oxED} {ox80_ox9F} {ox80_oxBF}
| {oxE1_oxEC} {ox80_oxBF} {ox80_oxBF}
| {oxE0} {oxA0_oxBF} {ox80_oxBF}
| {oxC2_oxDF} {ox80_oxBF}
ascii_char_not_nl_slash_squote=[\000-\011\013-\046\050-\133\135-\177]
char_escape= "\\x" {hex} {hex}
| "\\u{" {hex}+ "}"
| "\\" [nr\\t'\"]
char_char= {mb_utf8_literal}
| {char_escape}
| {ascii_char_not_nl_slash_squote}
string_char= \\ . string_char= \\ .
| [^\"\n] | [^\"\n]
@ -261,7 +220,7 @@ BUILTINIDENTIFIER="@"[A-Za-z_][A-Za-z0-9_]*
<YYINITIAL> "while" { return KEYWORD_WHILE; } <YYINITIAL> "while" { return KEYWORD_WHILE; }
<YYINITIAL> "'" { yybegin(CHAR_LIT); } <YYINITIAL> "'" { yybegin(CHAR_LIT); }
<CHAR_LIT> {char_char}"'" { yybegin(YYINITIAL); return CHAR_LITERAL; } <CHAR_LIT> {char_char}*"'" { yybegin(YYINITIAL); return CHAR_LITERAL; }
<CHAR_LIT> [^] { yypushback(1); yybegin(UNT_QUOT); } <CHAR_LIT> [^] { yypushback(1); yybegin(UNT_QUOT); }
<YYINITIAL> {FLOAT} { return FLOAT; } <YYINITIAL> {FLOAT} { return FLOAT; }

View file

@ -40,17 +40,21 @@ import static com.intellij.psi.StringEscapesTokenTypes.*;
hex=[0-9a-fA-F] hex=[0-9a-fA-F]
char_escape_unicode= "\\x" {hex} {hex} | "\\u{" {hex}+ "}" char_escape_unicode= "\\x" {hex} {hex} | "\\u{" {hex}+ "}"
char_escape_unicode_invalid= "\\x" | "\\u" char_escape_unicode_invalid= "\\x" .? .? | "\\u" ("{" [^}]* "}"?)?
char_escape_single_valid= "\\" [nr\\t'\"] char_escape_single_valid= "\\" [nr\\t'\"]
char_escape_single_invalid= "\\" [^nr\\t'\"] char_escape_single_invalid= "\\" [^nr\\t'\"]
%state STR %state STR
%state CHAR
%state CHAR_END
%state CHAR_FINISH
%% %%
<YYINITIAL> { <YYINITIAL> {
"\"" { yybegin(STR); return STRING_LITERAL_SINGLE; } "\"" { yybegin(STR); return STRING_LITERAL_SINGLE; }
"'" { yybegin(CHAR); return CHAR_LITERAL; }
[^] { return STRING_LITERAL_SINGLE; } [^] { return STRING_LITERAL_SINGLE; }
} }
@ -61,3 +65,20 @@ char_escape_single_invalid= "\\" [^nr\\t'\"]
{char_escape_single_invalid} { return INVALID_CHARACTER_ESCAPE_TOKEN; } {char_escape_single_invalid} { return INVALID_CHARACTER_ESCAPE_TOKEN; }
[^] { return STRING_LITERAL_SINGLE; } [^] { return STRING_LITERAL_SINGLE; }
} }
<CHAR> {
{char_escape_unicode} { yybegin(CHAR_END); return VALID_STRING_ESCAPE_TOKEN; }
{char_escape_unicode_invalid} { yybegin(CHAR_END); return INVALID_UNICODE_ESCAPE_TOKEN; }
{char_escape_single_valid} { yybegin(CHAR_END); return VALID_STRING_ESCAPE_TOKEN; }
{char_escape_single_invalid} { yybegin(CHAR_END); return INVALID_CHARACTER_ESCAPE_TOKEN; }
[^] { yybegin(CHAR_END); return CHAR_LITERAL; }
}
<CHAR_END> {
"'" { yybegin(CHAR_FINISH); return CHAR_LITERAL; }
[^] { return BAD_CHARACTER; }
}
<CHAR_FINISH> {
[^] { return BAD_CHARACTER; }
}

View file

@ -34,9 +34,9 @@ class ZigHighlightingLexer: LayeredLexer(ZigLexerAdapter()) {
registerSelfStoppingLayer( registerSelfStoppingLayer(
MergingLexerAdapter( MergingLexerAdapter(
ZigLexerStringAdapter(), ZigLexerStringAdapter(),
TokenSet.create(ZigTypes.STRING_LITERAL_SINGLE) TokenSet.create(ZigTypes.STRING_LITERAL_SINGLE, ZigTypes.CHAR_LITERAL)
), ),
arrayOf(ZigTypes.STRING_LITERAL_SINGLE), arrayOf(ZigTypes.STRING_LITERAL_SINGLE, ZigTypes.CHAR_LITERAL),
IElementType.EMPTY_ARRAY IElementType.EMPTY_ARRAY
) )
} }