From f067ca647ce6d2d6a7ddc641f64b59625045682c Mon Sep 17 00:00:00 2001 From: FalsePattern Date: Mon, 11 Nov 2024 09:56:50 +0100 Subject: [PATCH] fix: character literal escape sequences and unicode --- CHANGELOG.md | 10 ++++ core/src/main/grammar/Zig.flex | 47 ++----------------- core/src/main/grammar/ZigString.flex | 23 ++++++++- .../zig/lexer/ZigHighlightingLexer.kt | 4 +- 4 files changed, 37 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb9d87d9..0c1289fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,16 @@ Changelog structure reference: ## [Unreleased] +### Added + +- Zig + - Escape sequence highlighting in char literals + +### Fixed + +- Zig + - Unicode characters in char literals triggered an error + ## [20.0.1] ### Fixed diff --git a/core/src/main/grammar/Zig.flex b/core/src/main/grammar/Zig.flex index 5ebc0e23..3a011010 100644 --- a/core/src/main/grammar/Zig.flex +++ b/core/src/main/grammar/Zig.flex @@ -52,49 +52,8 @@ oct_int={oct} {oct_}* dec_int={dec} {dec_}* hex_int={hex} {hex_}* -ox80_oxBF=[\200-\277] -oxF4=\364 -ox80_ox8F=[\200-\217] -oxF1_oxF3=[\361-\363] -oxF0=\360 -ox90_0xBF=[\220-\277] -oxEE_oxEF=[\356-\357] -oxED=\355 -ox80_ox9F=[\200-\237] -oxE1_oxEC=[\341-\354] -oxE0=\340 -oxA0_oxBF=[\240-\277] -oxC2_oxDF=[\302-\337] - -// From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ -// First Byte Second Byte Third Byte Fourth Byte -// [0x00,0x7F] -// [0xC2,0xDF] [0x80,0xBF] -// 0xE0 [0xA0,0xBF] [0x80,0xBF] -// [0xE1,0xEC] [0x80,0xBF] [0x80,0xBF] -// 0xED [0x80,0x9F] [0x80,0xBF] -// [0xEE,0xEF] [0x80,0xBF] [0x80,0xBF] -// 0xF0 [0x90,0xBF] [0x80,0xBF] [0x80,0xBF] -// [0xF1,0xF3] [0x80,0xBF] [0x80,0xBF] [0x80,0xBF] -// 0xF4 [0x80,0x8F] [0x80,0xBF] [0x80,0xBF] - -mb_utf8_literal= {oxF4} {ox80_ox8F} {ox80_oxBF} {ox80_oxBF} - | {oxF1_oxF3} {ox80_oxBF} {ox80_oxBF} {ox80_oxBF} - | {oxF0} {ox90_0xBF} {ox80_oxBF} {ox80_oxBF} - | {oxEE_oxEF} {ox80_oxBF} {ox80_oxBF} - | {oxED} {ox80_ox9F} {ox80_oxBF} - | {oxE1_oxEC} {ox80_oxBF} {ox80_oxBF} - | {oxE0} {oxA0_oxBF} {ox80_oxBF} - | {oxC2_oxDF} {ox80_oxBF} - -ascii_char_not_nl_slash_squote=[\000-\011\013-\046\050-\133\135-\177] - -char_escape= "\\x" {hex} {hex} - | "\\u{" {hex}+ "}" - | "\\" [nr\\t'\"] -char_char= {mb_utf8_literal} - | {char_escape} - | {ascii_char_not_nl_slash_squote} +char_char= \\ . + | [^\'\n] string_char= \\ . | [^\"\n] @@ -261,7 +220,7 @@ BUILTINIDENTIFIER="@"[A-Za-z_][A-Za-z0-9_]* "while" { return KEYWORD_WHILE; } "'" { yybegin(CHAR_LIT); } - {char_char}"'" { yybegin(YYINITIAL); return CHAR_LITERAL; } + {char_char}*"'" { yybegin(YYINITIAL); return CHAR_LITERAL; } [^] { yypushback(1); yybegin(UNT_QUOT); } {FLOAT} { return FLOAT; } diff --git a/core/src/main/grammar/ZigString.flex b/core/src/main/grammar/ZigString.flex index 5b5b6a86..24fb085d 100644 --- a/core/src/main/grammar/ZigString.flex +++ b/core/src/main/grammar/ZigString.flex @@ -40,17 +40,21 @@ import static com.intellij.psi.StringEscapesTokenTypes.*; hex=[0-9a-fA-F] char_escape_unicode= "\\x" {hex} {hex} | "\\u{" {hex}+ "}" -char_escape_unicode_invalid= "\\x" | "\\u" +char_escape_unicode_invalid= "\\x" .? .? | "\\u" ("{" [^}]* "}"?)? char_escape_single_valid= "\\" [nr\\t'\"] char_escape_single_invalid= "\\" [^nr\\t'\"] %state STR +%state CHAR +%state CHAR_END +%state CHAR_FINISH %% { "\"" { yybegin(STR); return STRING_LITERAL_SINGLE; } + "'" { yybegin(CHAR); return CHAR_LITERAL; } [^] { return STRING_LITERAL_SINGLE; } } @@ -61,3 +65,20 @@ char_escape_single_invalid= "\\" [^nr\\t'\"] {char_escape_single_invalid} { return INVALID_CHARACTER_ESCAPE_TOKEN; } [^] { return STRING_LITERAL_SINGLE; } } + + { + {char_escape_unicode} { yybegin(CHAR_END); return VALID_STRING_ESCAPE_TOKEN; } + {char_escape_unicode_invalid} { yybegin(CHAR_END); return INVALID_UNICODE_ESCAPE_TOKEN; } + {char_escape_single_valid} { yybegin(CHAR_END); return VALID_STRING_ESCAPE_TOKEN; } + {char_escape_single_invalid} { yybegin(CHAR_END); return INVALID_CHARACTER_ESCAPE_TOKEN; } + [^] { yybegin(CHAR_END); return CHAR_LITERAL; } +} + + { + "'" { yybegin(CHAR_FINISH); return CHAR_LITERAL; } + [^] { return BAD_CHARACTER; } +} + + { + [^] { return BAD_CHARACTER; } +} \ No newline at end of file diff --git a/core/src/main/kotlin/com/falsepattern/zigbrains/zig/lexer/ZigHighlightingLexer.kt b/core/src/main/kotlin/com/falsepattern/zigbrains/zig/lexer/ZigHighlightingLexer.kt index 798f39d2..0a9006fc 100644 --- a/core/src/main/kotlin/com/falsepattern/zigbrains/zig/lexer/ZigHighlightingLexer.kt +++ b/core/src/main/kotlin/com/falsepattern/zigbrains/zig/lexer/ZigHighlightingLexer.kt @@ -34,9 +34,9 @@ class ZigHighlightingLexer: LayeredLexer(ZigLexerAdapter()) { registerSelfStoppingLayer( MergingLexerAdapter( ZigLexerStringAdapter(), - TokenSet.create(ZigTypes.STRING_LITERAL_SINGLE) + TokenSet.create(ZigTypes.STRING_LITERAL_SINGLE, ZigTypes.CHAR_LITERAL) ), - arrayOf(ZigTypes.STRING_LITERAL_SINGLE), + arrayOf(ZigTypes.STRING_LITERAL_SINGLE, ZigTypes.CHAR_LITERAL), IElementType.EMPTY_ARRAY ) }