diff --git a/Tomlet.Tests/QuotedKeyTests.cs b/Tomlet.Tests/QuotedKeyTests.cs new file mode 100644 index 0000000..35acbfe --- /dev/null +++ b/Tomlet.Tests/QuotedKeyTests.cs @@ -0,0 +1,97 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Tomlet.Exceptions; +using Xunit; + +namespace Tomlet.Tests +{ + public class QuotedKeysTests + { + [Theory] + [InlineData("\"a.'b\"", "a.'b")] // a.'b + [InlineData("\"a.\\\"b\"", "a.\"b")] // a."b + [InlineData("\"\"", "")] // + [InlineData("\"\\\"\"", "\"")] // " + [InlineData("\"a.🐱b\"", "a.🐱b")] // a.🐱b + [InlineData("'a.\"b'", "a.\"b")] // a."b + [InlineData("'a.\\\"b'", "a.\\\"b")] // a.\"b + [InlineData("''", "")] // + [InlineData("'\"'", "\"")] // \" + [InlineData("'\\\"'", "\\\"")] // \" + [InlineData("'a.🐱b'", "a.🐱b")] // a.🐱b + [InlineData("\"a.b\\\".c\"", "a.b\".c")] // a.b".c + public void NonDottedKeysWork(string inputKey, string expectedKey) + { + var inputString = $"{inputKey} = \"value\""; + var dict = TomletMain.To>(inputString); + Assert.Contains(expectedKey, (IDictionary)dict); + } + + [Theory] + [InlineData("\"a\"b\"")] + [InlineData("'a'b'")] + [InlineData("'a\\'b'")] + //[InlineData("a\"b")] // Illegal in specs, but no harm in reading it + //[InlineData("a'b")] // Illegal in specs, but no harm in reading it + //[InlineData("a🐱b")] // Illegal in specs, but no harm in reading it + [InlineData("'ab\"")] + public void IllegalNonDottedKeysThrow(string inputKey) + { + var inputString = $"{inputKey} = \"value\""; + Assert.ThrowsAny(() => _ = TomletMain.To>(inputString)); + } + + [Theory] + [InlineData("'a.b'.c", "a.b", "c")] + [InlineData("'a.b'.\"c\"", "a.b", "c")] + [InlineData("a.'b.c'", "a", "b.c")] + [InlineData("\"a\".'b.c'", "a", "b.c")] + [InlineData("\"a\\\".b.c", "a", "b.c")] + [InlineData("'a.\"b'.c", "a.\"b", "c")] + [InlineData("\"a.b\\\"c\".d", "a.b\"c", "d")] + public void DottedKeysWork(string inputKey, string expectedKey, string expectedSubkey) + { + var inputString = $"{inputKey} = \"value\""; + var dict = TomletMain.To>>(inputString); + var subDict = Assert.Contains(expectedKey, (IDictionary>)dict); + Assert.Contains(expectedSubkey, (IDictionary)subDict); + } + + [Theory] + [InlineData("'a.\"b'.c\"")] + [InlineData("\"a.bc\".d\"")] + [InlineData("\"a.b\"c\".d\"")] + [InlineData("\"a.b\"c\".d")] + [InlineData("\"a.b\\\"c\".d\"")] + [InlineData("'a.b'c'.d")] + [InlineData("'a.b\\'c'.d")] + [InlineData("'a.bc'.d'")] + public void IllegalDottedKeysThrow(string inputKey) + { + var inputString = $"{inputKey} = \"value\""; + Assert.ThrowsAny(() => _ = TomletMain.To>(inputString)); + } + + + [Theory] + [InlineData("\"a\"b\"", @"(?:'""a""b""')|(?:""\\""a\\""b\\"""")")] // Simple or Literal + [InlineData("'a'b'", @"""'a'b'""")] // Simple only + [InlineData("'a\\'b'", @"""'a\\'b'""")] // Simple only + [InlineData("a\"b", @"(?:'a""b')|(?:""a\\""b"")")] // Simple or Literal + [InlineData("a'b", @"""a'b""")] // Simple only + [InlineData("a🐱b", @"(?:'a🐱b')|(?:""a🐱b"")")] // Simple or Literal + [InlineData("'ab\"", @"""'ab\\""""")] // Simple only + public void SerializingIllegalKeysWorks(string inputKey, string expectedOutput) + { + var dict = new Dictionary + { + { inputKey, "a" }, + }; + var document = TomletMain.DocumentFrom(dict); + Assert.NotEmpty(document.Keys); + var parsedKey = document.Keys.First(); + Assert.Matches(expectedOutput, parsedKey); + } + } +} \ No newline at end of file diff --git a/Tomlet/Exceptions/InvalidTomlKeyException.cs b/Tomlet/Exceptions/InvalidTomlKeyException.cs index a300c69..d9186f7 100644 --- a/Tomlet/Exceptions/InvalidTomlKeyException.cs +++ b/Tomlet/Exceptions/InvalidTomlKeyException.cs @@ -9,6 +9,6 @@ public InvalidTomlKeyException(string key) _key = key; } - public override string Message => $"The string |{_key}| (between the two bars) contains at least one of both a double quote and a single quote, so it cannot be used for a TOML key."; + public override string Message => $"The string |{_key}| (between the two bars) contains invalid characters, so it cannot be used for a TOML key."; } } \ No newline at end of file diff --git a/Tomlet/TomlKeyUtils.cs b/Tomlet/TomlKeyUtils.cs index ced8714..07a07b9 100644 --- a/Tomlet/TomlKeyUtils.cs +++ b/Tomlet/TomlKeyUtils.cs @@ -1,57 +1,136 @@ using System; +using System.Text.RegularExpressions; +using Tomlet.Exceptions; namespace Tomlet { internal static class TomlKeyUtils { + private static readonly Regex UnquotedKeyRegex = new Regex("^[a-zA-Z0-9-_]+$"); + internal static void GetTopLevelAndSubKeys(string key, out string ourKeyName, out string restOfKey) { - var wholeKeyIsQuoted = key.StartsWith("\"") && key.EndsWith("\"") || key.StartsWith("'") && key.EndsWith("'"); - var firstPartOfKeyIsQuoted = !wholeKeyIsQuoted && (key.StartsWith("\"") || key.StartsWith("'")); + var isBasicString = key.StartsWith("\""); + var isLiteralString = key.StartsWith("'"); - if (!key.Contains(".") || wholeKeyIsQuoted) + if (isLiteralString) { - ourKeyName = key; - restOfKey = ""; + // Literal strings can't be escaped + var literalEnd = key.IndexOf('\'', 1); + if (literalEnd + 1 == key.Length) + { + // Full key, no splitting needed. + ourKeyName = key; + restOfKey = ""; + return; + } + + if (key[literalEnd + 1] != '.') + { + // Literal strings cannot contain ' + // TODO: Find better exception + throw new InvalidTomlKeyException(key); + } + + if (literalEnd + 2 == key.Length) + { + // You cannot have an empty unquoted key + // TODO: Find better exception + throw new InvalidTomlKeyException(key); + } + + ourKeyName = key.Substring(0, literalEnd + 1); + restOfKey = key.Substring(literalEnd + 2); return; } - //Unquoted dotted key means we put this in a sub-table. + if (!isBasicString) + { + var firstDot = key.IndexOf(".", StringComparison.Ordinal); + if (firstDot == -1) + { + // Key is undotted. + // We could make a check for illegal characters here, but there isn't much point to it. + ourKeyName = key; + restOfKey = ""; + return; + } + + if (firstDot + 1 == key.Length) + { + // You cannot have an empty unquoted key + // TODO: Find better exception + throw new InvalidTomlKeyException(key); + } + + ourKeyName = key.Substring(0, firstDot); + restOfKey = key.Substring(firstDot + 1); + return; + } - //First get the name of the key in *this* table. - if (!firstPartOfKeyIsQuoted) + var firstUnquote = FindNextUnescapedQuote(key, 1); + if (firstUnquote == -1) { - var split = key.Split('.'); - ourKeyName = split[0]; + // Quoted string was never closed + // TODO: Find better exception + throw new InvalidTomlKeyException(key); } - else + + if (firstUnquote + 1 == key.Length) { + // Full key, no splitting needed. ourKeyName = key; - var keyNameWithoutOpeningQuote = ourKeyName.Substring(1); - if (ourKeyName.Contains("\"")) - ourKeyName = ourKeyName.Substring(0, 2 + keyNameWithoutOpeningQuote.IndexOf("\"", StringComparison.Ordinal)); - else - ourKeyName = ourKeyName.Substring(0, 2 + keyNameWithoutOpeningQuote.IndexOf("'", StringComparison.Ordinal)); + restOfKey = ""; + return; } - //And get the remainder of the key, relative to the sub-table. - restOfKey = key.Substring(ourKeyName.Length + 1); + if (key[firstUnquote + 1] != '.') + { + // Quoted strings cannot contain unescaped " + // TODO: Find better exception + throw new InvalidTomlKeyException(key); + } - ourKeyName = ourKeyName.Trim(); + if (firstUnquote + 2 == key.Length) + { + // You cannot have an empty unquoted key + // TODO: Find better exception + throw new InvalidTomlKeyException(key); + } + + ourKeyName = key.Substring(0, firstUnquote + 1); + restOfKey = key.Substring(firstUnquote + 2); } - public static string FullStringToProperKey(string key) - { - GetTopLevelAndSubKeys(key, out var a, out var b); - var keyLooksQuoted = key.StartsWith("\"") || key.StartsWith("'"); - var keyLooksDotted = key.Contains("."); - if (keyLooksQuoted || keyLooksDotted || !string.IsNullOrEmpty(b)) + private static int FindNextUnescapedQuote(string input, int startingIndex) + { + var i = startingIndex; + var isEscaped = false; + for (; i < input.Length; i++) { - return TomlUtils.AddCorrectQuotes(key); + if (input[i] == '\\') + { + isEscaped = !isEscaped; + continue; + } + + if (input[i] != '"' || isEscaped) + { + isEscaped = false; + continue; + } + + return i; } - - return key; + + return -1; // Return -1 if no unescaped quote is found + } + + internal static string FullStringToProperKey(string key) + { + var canBeUnquoted = UnquotedKeyRegex.Match(key).Success; + return canBeUnquoted ? key : TomlUtils.AddCorrectQuotes(key); } } } \ No newline at end of file diff --git a/Tomlet/TomlUtils.cs b/Tomlet/TomlUtils.cs index 26c5f28..7453592 100644 --- a/Tomlet/TomlUtils.cs +++ b/Tomlet/TomlUtils.cs @@ -1,26 +1,55 @@ -using Tomlet.Exceptions; +using System.Text.RegularExpressions; +using Tomlet.Exceptions; namespace Tomlet { internal static class TomlUtils { + // Characters that can't be in either literal or quoted strings. *Technically* these can be converted to \u + // characters, but somebody else can implement this functionality. + private static readonly Regex CanBeBasicRegex = + new Regex(@"^[\x08-\x0A\x0C-\x0D\x20-\x7E\x80-\uD7FF\uE000-\uFFFF]+$"); + + // Toml defines non-ascii as %x80-D7FF / %xE000-10FFFF, so this will break hard for UTF16 + private static readonly Regex CanBeLiteralRegex = + new Regex(@"^[\x09\x20-\x26\x28-\x7E\x80-\uD7FF\uE000-\uFFFF]+$"); + public static string EscapeStringValue(string key) { - var escaped = key.Replace(@"\", @"\\") - .Replace("\n", @"\n") - .Replace("\r", ""); - + // Escaped characters allowed in simple strings: + // https://github.com/toml-lang/toml/blob/8eae5e1c005bc5836098505f85a7aa06568999dd/toml.abnf#L74 + var escaped = + key.Replace(@"\", @"\\") + .Replace("\n", @"\n") + .Replace("\t", @"\t") + .Replace("\"", @"\""") + .Replace("\b", @"\b") // Backspace + .Replace("\f", @"\f") // Form Feed + .Replace("\r", @"\r") // Carriage Return + // \uXXXX and \UXXXXXXXX get parsed as unicode, thus we should escape strings that the parser + // would mistake for such an escape value. Since unicode symbols are allowed we don't need to + // escape *actual* unicode characters in the text + .Replace(@"\u", @"\\u") + .Replace(@"\U", @"\\U"); return escaped; } public static string AddCorrectQuotes(string key) { - if (key.Contains("'") && key.Contains("\"")) - throw new InvalidTomlKeyException(key); - - if (key.Contains("\"")) + var literal = CanBeLiteralRegex.Match(key).Success; + if (literal) + { + // Literal strings aren't escaped return $"'{key}'"; + } + + var basic = CanBeBasicRegex.Match(key).Success; + if (!basic) + { + throw new InvalidTomlKeyException(key); + } + key = EscapeStringValue(key); return $"\"{key}\""; } }