fix: character literal escape sequences and unicode
This commit is contained in:
parent
c9a3388c57
commit
f067ca647c
4 changed files with 37 additions and 47 deletions
10
CHANGELOG.md
10
CHANGELOG.md
|
@ -17,6 +17,16 @@ Changelog structure reference:
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Zig
|
||||
- Escape sequence highlighting in char literals
|
||||
|
||||
### Fixed
|
||||
|
||||
- Zig
|
||||
- Unicode characters in char literals triggered an error
|
||||
|
||||
## [20.0.1]
|
||||
|
||||
### Fixed
|
||||
|
|
|
@ -52,49 +52,8 @@ oct_int={oct} {oct_}*
|
|||
dec_int={dec} {dec_}*
|
||||
hex_int={hex} {hex_}*
|
||||
|
||||
ox80_oxBF=[\200-\277]
|
||||
oxF4=\364
|
||||
ox80_ox8F=[\200-\217]
|
||||
oxF1_oxF3=[\361-\363]
|
||||
oxF0=\360
|
||||
ox90_0xBF=[\220-\277]
|
||||
oxEE_oxEF=[\356-\357]
|
||||
oxED=\355
|
||||
ox80_ox9F=[\200-\237]
|
||||
oxE1_oxEC=[\341-\354]
|
||||
oxE0=\340
|
||||
oxA0_oxBF=[\240-\277]
|
||||
oxC2_oxDF=[\302-\337]
|
||||
|
||||
// From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
|
||||
// First Byte Second Byte Third Byte Fourth Byte
|
||||
// [0x00,0x7F]
|
||||
// [0xC2,0xDF] [0x80,0xBF]
|
||||
// 0xE0 [0xA0,0xBF] [0x80,0xBF]
|
||||
// [0xE1,0xEC] [0x80,0xBF] [0x80,0xBF]
|
||||
// 0xED [0x80,0x9F] [0x80,0xBF]
|
||||
// [0xEE,0xEF] [0x80,0xBF] [0x80,0xBF]
|
||||
// 0xF0 [0x90,0xBF] [0x80,0xBF] [0x80,0xBF]
|
||||
// [0xF1,0xF3] [0x80,0xBF] [0x80,0xBF] [0x80,0xBF]
|
||||
// 0xF4 [0x80,0x8F] [0x80,0xBF] [0x80,0xBF]
|
||||
|
||||
mb_utf8_literal= {oxF4} {ox80_ox8F} {ox80_oxBF} {ox80_oxBF}
|
||||
| {oxF1_oxF3} {ox80_oxBF} {ox80_oxBF} {ox80_oxBF}
|
||||
| {oxF0} {ox90_0xBF} {ox80_oxBF} {ox80_oxBF}
|
||||
| {oxEE_oxEF} {ox80_oxBF} {ox80_oxBF}
|
||||
| {oxED} {ox80_ox9F} {ox80_oxBF}
|
||||
| {oxE1_oxEC} {ox80_oxBF} {ox80_oxBF}
|
||||
| {oxE0} {oxA0_oxBF} {ox80_oxBF}
|
||||
| {oxC2_oxDF} {ox80_oxBF}
|
||||
|
||||
ascii_char_not_nl_slash_squote=[\000-\011\013-\046\050-\133\135-\177]
|
||||
|
||||
char_escape= "\\x" {hex} {hex}
|
||||
| "\\u{" {hex}+ "}"
|
||||
| "\\" [nr\\t'\"]
|
||||
char_char= {mb_utf8_literal}
|
||||
| {char_escape}
|
||||
| {ascii_char_not_nl_slash_squote}
|
||||
char_char= \\ .
|
||||
| [^\'\n]
|
||||
|
||||
string_char= \\ .
|
||||
| [^\"\n]
|
||||
|
@ -261,7 +220,7 @@ BUILTINIDENTIFIER="@"[A-Za-z_][A-Za-z0-9_]*
|
|||
<YYINITIAL> "while" { return KEYWORD_WHILE; }
|
||||
|
||||
<YYINITIAL> "'" { yybegin(CHAR_LIT); }
|
||||
<CHAR_LIT> {char_char}"'" { yybegin(YYINITIAL); return CHAR_LITERAL; }
|
||||
<CHAR_LIT> {char_char}*"'" { yybegin(YYINITIAL); return CHAR_LITERAL; }
|
||||
<CHAR_LIT> [^] { yypushback(1); yybegin(UNT_QUOT); }
|
||||
|
||||
<YYINITIAL> {FLOAT} { return FLOAT; }
|
||||
|
|
|
@ -40,17 +40,21 @@ import static com.intellij.psi.StringEscapesTokenTypes.*;
|
|||
hex=[0-9a-fA-F]
|
||||
|
||||
char_escape_unicode= "\\x" {hex} {hex} | "\\u{" {hex}+ "}"
|
||||
char_escape_unicode_invalid= "\\x" | "\\u"
|
||||
char_escape_unicode_invalid= "\\x" .? .? | "\\u" ("{" [^}]* "}"?)?
|
||||
|
||||
char_escape_single_valid= "\\" [nr\\t'\"]
|
||||
char_escape_single_invalid= "\\" [^nr\\t'\"]
|
||||
|
||||
%state STR
|
||||
%state CHAR
|
||||
%state CHAR_END
|
||||
%state CHAR_FINISH
|
||||
%%
|
||||
|
||||
|
||||
<YYINITIAL> {
|
||||
"\"" { yybegin(STR); return STRING_LITERAL_SINGLE; }
|
||||
"'" { yybegin(CHAR); return CHAR_LITERAL; }
|
||||
[^] { return STRING_LITERAL_SINGLE; }
|
||||
}
|
||||
|
||||
|
@ -61,3 +65,20 @@ char_escape_single_invalid= "\\" [^nr\\t'\"]
|
|||
{char_escape_single_invalid} { return INVALID_CHARACTER_ESCAPE_TOKEN; }
|
||||
[^] { return STRING_LITERAL_SINGLE; }
|
||||
}
|
||||
|
||||
<CHAR> {
|
||||
{char_escape_unicode} { yybegin(CHAR_END); return VALID_STRING_ESCAPE_TOKEN; }
|
||||
{char_escape_unicode_invalid} { yybegin(CHAR_END); return INVALID_UNICODE_ESCAPE_TOKEN; }
|
||||
{char_escape_single_valid} { yybegin(CHAR_END); return VALID_STRING_ESCAPE_TOKEN; }
|
||||
{char_escape_single_invalid} { yybegin(CHAR_END); return INVALID_CHARACTER_ESCAPE_TOKEN; }
|
||||
[^] { yybegin(CHAR_END); return CHAR_LITERAL; }
|
||||
}
|
||||
|
||||
<CHAR_END> {
|
||||
"'" { yybegin(CHAR_FINISH); return CHAR_LITERAL; }
|
||||
[^] { return BAD_CHARACTER; }
|
||||
}
|
||||
|
||||
<CHAR_FINISH> {
|
||||
[^] { return BAD_CHARACTER; }
|
||||
}
|
|
@ -34,9 +34,9 @@ class ZigHighlightingLexer: LayeredLexer(ZigLexerAdapter()) {
|
|||
registerSelfStoppingLayer(
|
||||
MergingLexerAdapter(
|
||||
ZigLexerStringAdapter(),
|
||||
TokenSet.create(ZigTypes.STRING_LITERAL_SINGLE)
|
||||
TokenSet.create(ZigTypes.STRING_LITERAL_SINGLE, ZigTypes.CHAR_LITERAL)
|
||||
),
|
||||
arrayOf(ZigTypes.STRING_LITERAL_SINGLE),
|
||||
arrayOf(ZigTypes.STRING_LITERAL_SINGLE, ZigTypes.CHAR_LITERAL),
|
||||
IElementType.EMPTY_ARRAY
|
||||
)
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue