From c2c45b771a983078e6e55457f734b1f9844c8526 Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Thu, 4 Jul 2019 19:32:47 +0000 Subject: [PATCH] implement char escapes (#20) --- README.md | 16 +++++--- src/compiler/Error.elm | 5 +++ src/compiler/Stage/Parse/Parser.elm | 63 ++++++++++++++++++++++++----- tests/ParserTest.elm | 31 ++++++++++++++ 4 files changed, 100 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 433b2022..c1f8f54e 100644 --- a/README.md +++ b/README.md @@ -72,8 +72,8 @@ Oh God please yes! :heart: Feel free to look around the [help wanted] | ----------------- | -------------------- | -------------------- | ------------------ | ------------------ | -------------------- | ------------------ | ------------------ | | integers | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :warning: [[2]](#f2) | :heavy_check_mark: | :heavy_check_mark: | | floats | :x: [[3]](#f3) | :x: [[4]](#f4) | :x: [[3]](#f3) | :x: [[3]](#f3) | :x: [[5]](#f5) | :x: [[3]](#f3) | :x: [[3]](#f3) | -| characters | :warning: [[6]](#f6) | :warning: [[7]](#f7) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| strings | :warning: [[8]](#f8) | :warning: [[9]](#f9) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| characters | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| strings | :warning: [[6]](#f6) | :warning: [[7]](#f7) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | booleans | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | variables | :warning: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :warning: | :heavy_check_mark: | :heavy_check_mark: | | lists | :x: | :x: | :x: | :x: | :x: | :x: | :x: | @@ -98,10 +98,8 @@ Oh God please yes! :heart: Feel free to look around the [help wanted] 3. Not implemented; tracked in [#17](https://github.com/elm-in-elm/compiler/issues/17) 4. Not implemented; not tracked yet 5. To be optimized the same way Ints are; not tracked yet -6. Comprehensive tests missing; will be fixed in [#15](https://github.com/elm-in-elm/compiler/pull/15) -7. Escape sequences not implemented; not tracked yet -8. Comprehensive tests missing; tracked in [#21](https://github.com/elm-in-elm/compiler/issues/21) -9. Multiline strings (and maybe more) missing; not tracked yet +6. Comprehensive tests missing; not tracked yet +7. Multiline strings (and maybe more) missing; not tracked yet ## Prerequisites @@ -234,6 +232,12 @@ Make sure to format code before submitting a pull request!
Maxime Dantec + + +
+ Aaron Janse + diff --git a/src/compiler/Error.elm b/src/compiler/Error.elm index 454fece2..49b73035 100644 --- a/src/compiler/Error.elm +++ b/src/compiler/Error.elm @@ -80,6 +80,11 @@ type ParseProblem | ExpectingInt | ExpectingSingleQuote | ExpectingChar + | ExpectingEscapeBackslash + | ExpectingEscapeCharacter Char + | ExpectingUnicodeEscapeLeftBrace + | ExpectingUnicodeEscapeRightBrace + | InvalidUnicodeCodePoint | ExpectingDoubleQuote | ExpectingPlusOperator | ExpectingModuleDot -- `import Foo>. P.inContext InLiteralInt -{-| TODO escapes -TODO Unicode escapes --} + +-- for literalChar and, in the future, literalString + + +character = + P.oneOf + [ P.succeed identity + |. P.token (P.Token "\\" ExpectingEscapeBackslash) + |= P.oneOf + [ P.map (\_ -> '"') (P.token (P.Token "\"" (ExpectingEscapeCharacter '"'))) -- " (elm-vscode workaround) + , P.map (\_ -> '\'') (P.token (P.Token "'" (ExpectingEscapeCharacter '\''))) + , P.map (\_ -> '\n') (P.token (P.Token "n" (ExpectingEscapeCharacter 'n'))) + , P.map (\_ -> '\t') (P.token (P.Token "t" (ExpectingEscapeCharacter 't'))) + , P.map (\_ -> '\u{000D}') (P.token (P.Token "r" (ExpectingEscapeCharacter 'r'))) + , P.succeed identity + |. P.token (P.Token "u" (ExpectingEscapeCharacter 'u')) + |. P.token (P.Token "{" ExpectingUnicodeEscapeLeftBrace) + |= unicode + |. P.token (P.Token "}" ExpectingUnicodeEscapeRightBrace) + ] + , P.succeed identity + |= P.getChompedString (P.chompIf (always True) ExpectingChar) + |> P.andThen + (\string -> + string + |> String.uncons + |> Maybe.map (Tuple.first >> P.succeed) + |> Maybe.withDefault (P.problem (CompilerBug "Multiple characters chomped in `character`")) + ) + ] + + literalChar : Parser_ Literal literalChar = (P.succeed identity |. P.symbol (P.Token "'" ExpectingSingleQuote) - |= P.getChompedString (P.chompIf (always True) ExpectingChar) + |= character |. P.symbol (P.Token "'" ExpectingSingleQuote) ) + |> P.map Char + + +unicode : Parser_ Char +unicode = + P.getChompedString (P.chompWhile Char.isHexDigit) |> P.andThen - (\string -> - string - |> String.uncons - |> Maybe.map (Tuple.first >> Char >> P.succeed) - |> Maybe.withDefault (P.problem (CompilerBug "Multiple characters chomped in `literalChar`")) + (\str -> + let + len = + String.length str + in + if len < 4 || len > 6 then + P.problem InvalidUnicodeCodePoint + + else + str + |> String.toLower + |> Hex.fromString + |> Result.map Char.fromCode + |> Result.map P.succeed + |> Result.withDefault (P.problem InvalidUnicodeCodePoint) ) diff --git a/tests/ParserTest.elm b/tests/ParserTest.elm index 0af55c17..efd1928a 100644 --- a/tests/ParserTest.elm +++ b/tests/ParserTest.elm @@ -555,6 +555,37 @@ expr = , "'A'" , Ok (Literal (Char 'A')) ) + + -- https://github.com/elm/compiler/blob/dcbe51fa22879f83b5d94642e117440cb5249bb1/compiler/src/Parse/String.hs#L279-L285 + , ( "escape n" + , "'\\n'" + , Ok (Literal (Char '\n')) + ) + , ( "escape r" + , "'\\r'" + , Ok (Literal (Char '\u{000D}')) + ) + , ( "escape t" + , "'\\t'" + , Ok (Literal (Char '\t')) + ) + , ( "double quote" + , "'\\\"'" + , Ok (Literal (Char '"')) + -- " (for vscode-elm bug) + ) + , ( "single quote" + , "'\\''" + , Ok (Literal (Char '\'')) + ) + , ( "emoji" + , "'😃'" + , Ok (Literal (Char '😃')) + ) + , ( "escaped unicode code point" + , "'\\u{1F648}'" + , Ok (Literal (Char '🙈')) + ) ] ) , ( "literal string"