Skip to content

Commit

Permalink
fix: Valid key-parsing with mixed quotes.
Browse files Browse the repository at this point in the history
Ideally fixes #37, but current implementation has some issues.
Will elaborate on issues with this "fix" in a comment.
  • Loading branch information
ITR13 committed Jan 14, 2024
1 parent 2f9b83f commit f02a662
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 38 deletions.
97 changes: 97 additions & 0 deletions Tomlet.Tests/QuotedKeyTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Tomlet.Exceptions;
using Xunit;

namespace Tomlet.Tests
{
public class QuotedKeysTests
{
[Theory]
[InlineData("\"a.'b\"", "a.'b")] // a.'b
[InlineData("\"a.\\\"b\"", "a.\"b")] // a."b
[InlineData("\"\"", "")] //
[InlineData("\"\\\"\"", "\"")] // "
[InlineData("\"a.🐱b\"", "a.🐱b")] // a.🐱b
[InlineData("'a.\"b'", "a.\"b")] // a."b
[InlineData("'a.\\\"b'", "a.\\\"b")] // a.\"b
[InlineData("''", "")] //
[InlineData("'\"'", "\"")] // \"
[InlineData("'\\\"'", "\\\"")] // \"
[InlineData("'a.🐱b'", "a.🐱b")] // a.🐱b
[InlineData("\"a.b\\\".c\"", "a.b\".c")] // a.b".c
public void NonDottedKeysWork(string inputKey, string expectedKey)
{
var inputString = $"{inputKey} = \"value\"";
var dict = TomletMain.To<Dictionary<string, string>>(inputString);
Assert.Contains(expectedKey, (IDictionary<string, string>)dict);
}

[Theory]
[InlineData("\"a\"b\"")]
[InlineData("'a'b'")]
[InlineData("'a\\'b'")]
//[InlineData("a\"b")] // Illegal in specs, but no harm in reading it
//[InlineData("a'b")] // Illegal in specs, but no harm in reading it
//[InlineData("a🐱b")] // Illegal in specs, but no harm in reading it
[InlineData("'ab\"")]
public void IllegalNonDottedKeysThrow(string inputKey)
{
var inputString = $"{inputKey} = \"value\"";
Assert.ThrowsAny<TomlException>(() => _ = TomletMain.To<Dictionary<string, string>>(inputString));
}

[Theory]
[InlineData("'a.b'.c", "a.b", "c")]
[InlineData("'a.b'.\"c\"", "a.b", "c")]
[InlineData("a.'b.c'", "a", "b.c")]
[InlineData("\"a\".'b.c'", "a", "b.c")]
[InlineData("\"a\\\".b.c", "a", "b.c")]
[InlineData("'a.\"b'.c", "a.\"b", "c")]
[InlineData("\"a.b\\\"c\".d", "a.b\"c", "d")]
public void DottedKeysWork(string inputKey, string expectedKey, string expectedSubkey)
{
var inputString = $"{inputKey} = \"value\"";
var dict = TomletMain.To<Dictionary<string, Dictionary<string, string>>>(inputString);
var subDict = Assert.Contains(expectedKey, (IDictionary<string, Dictionary<string, string>>)dict);
Assert.Contains(expectedSubkey, (IDictionary<string, string>)subDict);
}

[Theory]
[InlineData("'a.\"b'.c\"")]
[InlineData("\"a.bc\".d\"")]
[InlineData("\"a.b\"c\".d\"")]
[InlineData("\"a.b\"c\".d")]
[InlineData("\"a.b\\\"c\".d\"")]
[InlineData("'a.b'c'.d")]
[InlineData("'a.b\\'c'.d")]
[InlineData("'a.bc'.d'")]
public void IllegalDottedKeysThrow(string inputKey)
{
var inputString = $"{inputKey} = \"value\"";
Assert.ThrowsAny<TomlException>(() => _ = TomletMain.To<Dictionary<string, string>>(inputString));
}


[Theory]
[InlineData("\"a\"b\"", @"(?:'""a""b""')|(?:""\\""a\\""b\\"""")")] // Simple or Literal
[InlineData("'a'b'", @"""'a'b'""")] // Simple only
[InlineData("'a\\'b'", @"""'a\\'b'""")] // Simple only
[InlineData("a\"b", @"(?:'a""b')|(?:""a\\""b"")")] // Simple or Literal
[InlineData("a'b", @"""a'b""")] // Simple only
[InlineData("a🐱b", @"(?:'a🐱b')|(?:""a🐱b"")")] // Simple or Literal
[InlineData("'ab\"", @"""'ab\\""""")] // Simple only
public void SerializingIllegalKeysWorks(string inputKey, string expectedOutput)
{
var dict = new Dictionary<string, string>
{
{ inputKey, "a" },
};
var document = TomletMain.DocumentFrom(dict);
Assert.NotEmpty(document.Keys);
var parsedKey = document.Keys.First();
Assert.Matches(expectedOutput, parsedKey);
}
}
}
2 changes: 1 addition & 1 deletion Tomlet/Exceptions/InvalidTomlKeyException.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ public InvalidTomlKeyException(string key)
_key = key;
}

public override string Message => $"The string |{_key}| (between the two bars) contains at least one of both a double quote and a single quote, so it cannot be used for a TOML key.";
public override string Message => $"The string |{_key}| (between the two bars) contains invalid characters, so it cannot be used for a TOML key.";
}
}
135 changes: 107 additions & 28 deletions Tomlet/TomlKeyUtils.cs
Original file line number Diff line number Diff line change
@@ -1,57 +1,136 @@
using System;
using System.Text.RegularExpressions;
using Tomlet.Exceptions;

namespace Tomlet
{
internal static class TomlKeyUtils
{
private static readonly Regex UnquotedKeyRegex = new Regex("^[a-zA-Z0-9-_]+$");

internal static void GetTopLevelAndSubKeys(string key, out string ourKeyName, out string restOfKey)
{
var wholeKeyIsQuoted = key.StartsWith("\"") && key.EndsWith("\"") || key.StartsWith("'") && key.EndsWith("'");
var firstPartOfKeyIsQuoted = !wholeKeyIsQuoted && (key.StartsWith("\"") || key.StartsWith("'"));
var isBasicString = key.StartsWith("\"");
var isLiteralString = key.StartsWith("'");

if (!key.Contains(".") || wholeKeyIsQuoted)
if (isLiteralString)
{
ourKeyName = key;
restOfKey = "";
// Literal strings can't be escaped
var literalEnd = key.IndexOf('\'', 1);
if (literalEnd + 1 == key.Length)
{
// Full key, no splitting needed.
ourKeyName = key;
restOfKey = "";
return;
}

if (key[literalEnd + 1] != '.')
{
// Literal strings cannot contain '
// TODO: Find better exception
throw new InvalidTomlKeyException(key);
}

if (literalEnd + 2 == key.Length)
{
// You cannot have an empty unquoted key
// TODO: Find better exception
throw new InvalidTomlKeyException(key);
}

ourKeyName = key.Substring(0, literalEnd + 1);
restOfKey = key.Substring(literalEnd + 2);
return;
}

//Unquoted dotted key means we put this in a sub-table.
if (!isBasicString)
{
var firstDot = key.IndexOf(".", StringComparison.Ordinal);
if (firstDot == -1)
{
// Key is undotted.
// We could make a check for illegal characters here, but there isn't much point to it.
ourKeyName = key;
restOfKey = "";
return;
}

if (firstDot + 1 == key.Length)
{
// You cannot have an empty unquoted key
// TODO: Find better exception
throw new InvalidTomlKeyException(key);
}

ourKeyName = key.Substring(0, firstDot);
restOfKey = key.Substring(firstDot + 1);
return;
}

//First get the name of the key in *this* table.
if (!firstPartOfKeyIsQuoted)
var firstUnquote = FindNextUnescapedQuote(key, 1);
if (firstUnquote == -1)
{
var split = key.Split('.');
ourKeyName = split[0];
// Quoted string was never closed
// TODO: Find better exception
throw new InvalidTomlKeyException(key);
}
else

if (firstUnquote + 1 == key.Length)
{
// Full key, no splitting needed.
ourKeyName = key;
var keyNameWithoutOpeningQuote = ourKeyName.Substring(1);
if (ourKeyName.Contains("\""))
ourKeyName = ourKeyName.Substring(0, 2 + keyNameWithoutOpeningQuote.IndexOf("\"", StringComparison.Ordinal));
else
ourKeyName = ourKeyName.Substring(0, 2 + keyNameWithoutOpeningQuote.IndexOf("'", StringComparison.Ordinal));
restOfKey = "";
return;
}

//And get the remainder of the key, relative to the sub-table.
restOfKey = key.Substring(ourKeyName.Length + 1);
if (key[firstUnquote + 1] != '.')
{
// Quoted strings cannot contain unescaped "
// TODO: Find better exception
throw new InvalidTomlKeyException(key);
}

ourKeyName = ourKeyName.Trim();
if (firstUnquote + 2 == key.Length)
{
// You cannot have an empty unquoted key
// TODO: Find better exception
throw new InvalidTomlKeyException(key);
}

ourKeyName = key.Substring(0, firstUnquote + 1);
restOfKey = key.Substring(firstUnquote + 2);
}

public static string FullStringToProperKey(string key)
{
GetTopLevelAndSubKeys(key, out var a, out var b);
var keyLooksQuoted = key.StartsWith("\"") || key.StartsWith("'");
var keyLooksDotted = key.Contains(".");

if (keyLooksQuoted || keyLooksDotted || !string.IsNullOrEmpty(b))
private static int FindNextUnescapedQuote(string input, int startingIndex)
{
var i = startingIndex;
var isEscaped = false;
for (; i < input.Length; i++)
{
return TomlUtils.AddCorrectQuotes(key);
if (input[i] == '\\')
{
isEscaped = !isEscaped;
continue;
}

if (input[i] != '"' || isEscaped)
{
isEscaped = false;
continue;
}

return i;
}

return key;

return -1; // Return -1 if no unescaped quote is found
}

internal static string FullStringToProperKey(string key)
{
var canBeUnquoted = UnquotedKeyRegex.Match(key).Success;
return canBeUnquoted ? key : TomlUtils.AddCorrectQuotes(key);
}
}
}
47 changes: 38 additions & 9 deletions Tomlet/TomlUtils.cs
Original file line number Diff line number Diff line change
@@ -1,26 +1,55 @@
using Tomlet.Exceptions;
using System.Text.RegularExpressions;
using Tomlet.Exceptions;

namespace Tomlet
{
internal static class TomlUtils
{
// Characters that can't be in either literal or quoted strings. *Technically* these can be converted to \u
// characters, but somebody else can implement this functionality.
private static readonly Regex CanBeBasicRegex =
new Regex(@"^[\x08-\x0A\x0C-\x0D\x20-\x7E\x80-\uD7FF\uE000-\uFFFF]+$");

// Toml defines non-ascii as %x80-D7FF / %xE000-10FFFF, so this will break hard for UTF16
private static readonly Regex CanBeLiteralRegex =
new Regex(@"^[\x09\x20-\x26\x28-\x7E\x80-\uD7FF\uE000-\uFFFF]+$");

public static string EscapeStringValue(string key)
{
var escaped = key.Replace(@"\", @"\\")
.Replace("\n", @"\n")
.Replace("\r", "");

// Escaped characters allowed in simple strings:
// https://github.com/toml-lang/toml/blob/8eae5e1c005bc5836098505f85a7aa06568999dd/toml.abnf#L74
var escaped =
key.Replace(@"\", @"\\")
.Replace("\n", @"\n")
.Replace("\t", @"\t")
.Replace("\"", @"\""")
.Replace("\b", @"\b") // Backspace
.Replace("\f", @"\f") // Form Feed
.Replace("\r", @"\r") // Carriage Return
// \uXXXX and \UXXXXXXXX get parsed as unicode, thus we should escape strings that the parser
// would mistake for such an escape value. Since unicode symbols are allowed we don't need to
// escape *actual* unicode characters in the text
.Replace(@"\u", @"\\u")
.Replace(@"\U", @"\\U");
return escaped;
}

public static string AddCorrectQuotes(string key)
{
if (key.Contains("'") && key.Contains("\""))
throw new InvalidTomlKeyException(key);

if (key.Contains("\""))
var literal = CanBeLiteralRegex.Match(key).Success;
if (literal)
{
// Literal strings aren't escaped
return $"'{key}'";
}

var basic = CanBeBasicRegex.Match(key).Success;
if (!basic)
{
throw new InvalidTomlKeyException(key);
}

key = EscapeStringValue(key);
return $"\"{key}\"";
}
}
Expand Down

0 comments on commit f02a662

Please sign in to comment.