Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C3ID implementation #21

Merged
merged 12 commits into from
Jan 15, 2025
96 changes: 96 additions & 0 deletions src/Cask/CrossCompanyCorrelatingId.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.Diagnostics;
using System.Security.Cryptography;
using System.Text;

using static CommonAnnotatedSecurityKeys.Limits;

namespace CommonAnnotatedSecurityKeys;

internal static class CrossCompanyCorrelatingId
nguerrera marked this conversation as resolved.
Show resolved Hide resolved
nguerrera marked this conversation as resolved.
Show resolved Hide resolved
{
/// <summary>
/// The size of a Cross-Company Correlating ID (aka C3ID) in bytes.
/// </summary>
public const int SizeInBytes = 15;

private static ReadOnlySpan<byte> CompanyPrefix => "Cross"u8;
private static ReadOnlySpan<byte> CompanySuffix => "CorrelatingId:"u8;
private static ReadOnlySpan<byte> Hex => "0123456789ABCDEF"u8;
private const int HexCharsPerByte = 2;

/// <summary>
/// Computes the Cross-Company Correlating Id (aka C3ID) bytes for the given
nguerrera marked this conversation as resolved.
Show resolved Hide resolved
/// company and text and writes them to the destination span.
/// </summary>
public static void Compute(string company, string text, Span<byte> destination)
nguerrera marked this conversation as resolved.
Show resolved Hide resolved
nguerrera marked this conversation as resolved.
Show resolved Hide resolved
{
Debug.Assert(destination.Length >= SizeInBytes);
nguerrera marked this conversation as resolved.
Show resolved Hide resolved

// Input: $"Cross{company}CorrelatingId:{SHA256Hex(text))}" encoded in UTF-8
nguerrera marked this conversation as resolved.
Show resolved Hide resolved
nguerrera marked this conversation as resolved.
Show resolved Hide resolved
int companyByteCount = Encoding.UTF8.GetByteCount(company);
int inputByteCount =
CompanyPrefix.Length +
companyByteCount +
CompanySuffix.Length +
(SHA256.HashSizeInBytes * HexCharsPerByte);

Span<byte> input = inputByteCount <= MaxStackAlloc ? stackalloc byte[inputByteCount] : new byte[inputByteCount];
Span<byte> inputDestination = input;

// 'Cross'
CompanyPrefix.CopyTo(inputDestination);
inputDestination = inputDestination[CompanyPrefix.Length..];

// {company}
Encoding.UTF8.GetBytes(company.AsSpan(), inputDestination);
inputDestination = inputDestination[companyByteCount..];

// 'CorrelatingId:'
CompanySuffix.CopyTo(inputDestination);
inputDestination = inputDestination[CompanySuffix.Length..];

// SHA256 hash of UTF-8 encoded text, converted to uppercase UTF-8 encoded hex
Sha256Hex(text, inputDestination);

// Compute second SHA256 of above input, truncate, and copy to destination
Span<byte> sha = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(input, sha);
sha[..SizeInBytes].CopyTo(destination);
}

/// <summary>
/// Computes the SHA256 of the text encoded as UTF-8 and writes the result
/// to the destination as UTF-8 encoded uppercase hex.
/// </summary>
private static void Sha256Hex(string text, Span<byte> destination)
{
Debug.Assert(destination.Length >= SHA256.HashSizeInBytes * HexCharsPerByte);

int byteCount = Encoding.UTF8.GetByteCount(text);
Span<byte> bytes = byteCount <= MaxStackAlloc ? stackalloc byte[byteCount] : new byte[byteCount];
Encoding.UTF8.GetBytes(text.AsSpan(), bytes);

Span<byte> sha = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(bytes, sha);
ConvertToHex(sha, destination);
}

/// <summary>
/// Converts bytes to UTF-8 encoded uppercase hex. Directly, without
/// allocation or UTF-16 to UTF-8 conversion.
/// </summary>
private static void ConvertToHex(ReadOnlySpan<byte> bytes, Span<byte> destination)
{
Debug.Assert(destination.Length >= bytes.Length * HexCharsPerByte);

for (int src = 0, dst = 0; src < bytes.Length; src++, dst += HexCharsPerByte)
{
byte b = bytes[src];
destination[dst] = Hex[b >> 4];
destination[dst + 1] = Hex[b & 0xF];
}
}
}
15 changes: 15 additions & 0 deletions src/Cask/Polyfill.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ internal static class Extensions
{
public static unsafe string GetString(this Encoding encoding, ReadOnlySpan<byte> bytes)
{
if (bytes.Length == 0)
{
return string.Empty;
}

fixed (byte* ptr = bytes)
{
return encoding.GetString(ptr, bytes.Length);
Expand All @@ -77,6 +82,11 @@ public static unsafe string GetString(this Encoding encoding, ReadOnlySpan<byte>

public static unsafe int GetByteCount(this Encoding encoding, ReadOnlySpan<char> chars)
{
if (chars.Length == 0)
{
return 0;
}

fixed (char* ptr = chars)
{
return encoding.GetByteCount(ptr, chars.Length);
Expand All @@ -85,6 +95,11 @@ public static unsafe int GetByteCount(this Encoding encoding, ReadOnlySpan<char>

public static unsafe int GetBytes(this Encoding encoding, ReadOnlySpan<char> chars, Span<byte> bytes)
{
if (chars.Length == 0)
{
return 0;
}

fixed (char* charPtr = chars)
fixed (byte* bytePtr = bytes)
{
Expand Down
40 changes: 40 additions & 0 deletions src/Tests/Cask.Tests/CrossCompanyCorrelatingIdTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using Xunit;

namespace CommonAnnotatedSecurityKeys.Tests;

public class CrossCompanyCorrelatingIdTests
{
[Theory]
[InlineData("", "EZ3GxRsKq+Dp21GvyCpQ")]
[InlineData("Hello world", "R8ogeP7QfTFvL5qAATry")]
[InlineData("😁", "f/BTV0j6A8km4KDw7aJz")]
public void Test_Basic(string text, string expected)
{
string actual = ComputeC3IDBase64(company: "Microsoft", text);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey is the proper rendering C3ID or C3id?

Haha. It is a true fact that I treat this as a valid question despite the obvious triviality of the concern.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I prefer C3id but need I mention I have no desire to die on this hill? :)

Copy link
Member

@rwoll rwoll Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tehe. I think I prefer C3Id or C3ID over C3id. Id is a word so if the convention in C# is CreateUrl, C3Id would be the most consistent.

Copy link
Contributor Author

@nguerrera nguerrera Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not appear in any identifier other than in tests. As such, my care factor is vanishingly small. I'm just as picky about these things in public API, so no judgment. 😁 I'll happily change it, but I still don't know which to use...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went with C3Id now. I need it in non-test code in my next change. Still not public, but I think it's worth settling this right now and not having noise of renaming it mixed in with other changes.

My reason for C3Id over C3id is a nod to my much younger self that rewrote the FxCop casing tokenizer ~20 years ago. That would parse this as two tokens C3 and id. And only C3Id respects the guidelines with that tokenization. 😁

I'd ask to get rid of the number in the middle of an acronym, but I think I've caused enough trouble asking for algo changes.

Assert.Equal(expected, actual);
}

[Fact]
public void Test_LargeText()
{
string actual = ComputeC3IDBase64(company: "Microsoft", text: new string('x', 300));
Assert.Equal("QjHXB4Bu8voB3eJcJagI", actual);
}

[Fact]
public void Test_LargeCompany()
{
string actual = ComputeC3IDBase64(company: new string('x', 300), text: "test");
Assert.Equal("rG1CONo8M3lcBqzxyIpf", actual);
}

private static string ComputeC3IDBase64(string company, string text)
{
byte[] bytes = new byte[CrossCompanyCorrelatingId.SizeInBytes];
CrossCompanyCorrelatingId.Compute(company, text, bytes);
return Convert.ToBase64String(bytes);
}
}
25 changes: 25 additions & 0 deletions src/Tests/Cask.Tests/PolyfillTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,31 @@ public void Random_NotDeterministic()
Assert.False(random1.SequenceEqual(random2), "RandomNumberGenerator produced two identical 32-byte sequences.");
}

[Fact]
public void Encoding_GetString_Empty()
{
ReadOnlySpan<byte> data = [];
string text = Encoding.UTF8.GetString(data);
Assert.Equal("", text);
}

[Fact]
public void Encoding_GetByteCount_Empty()
{
ReadOnlySpan<char> text = "".AsSpan();
int byteCount = Encoding.UTF8.GetByteCount(text);
Assert.Equal(0, byteCount);
}

[Fact]
public void Encoding_GetBytes_Empty()
{
ReadOnlySpan<char> text = "".AsSpan();
Span<byte> bytes = [];
int bytesWritten = Encoding.UTF8.GetBytes(text, bytes);
Assert.Equal(0, bytesWritten);
}

#if NETFRAMEWORK // We don't need to stress test the modern BCL :)
[Fact]
public async Task Polyfill_ThreadingStress()
Expand Down
Loading