Skip to content

Commit

Permalink
Backported some performance improvements from .NET 9. Fix for multi u…
Browse files Browse the repository at this point in the history
…tf16 unicode code points.
  • Loading branch information
CptMoore committed Dec 22, 2024
1 parent 510ad74 commit 5e89f90
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 53 deletions.
72 changes: 19 additions & 53 deletions ModTek/Features/Logging/FastBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,31 +55,11 @@ internal void Append(byte[] value)
}
}

// mainly used for ThreadId
internal void Append(int value)
{
if (value < 10)
{
var position = GetPointerAndIncrementLength(1);
position[0] = (byte)(value % 10 + AsciiZero);
}
else if (value < 100)
{
var position = GetPointerAndIncrementLength(2);
position[0] = (byte)(value / 10 % 10 + AsciiZero);
position[1] = (byte)(value % 10 + AsciiZero);
}
else if (value < 1000)
{
var position = GetPointerAndIncrementLength(3);
position[0] = (byte)(value / 100 % 10 + AsciiZero);
position[1] = (byte)(value / 10 % 10 + AsciiZero);
position[2] = (byte)(value % 10 + AsciiZero);
}
else
{
Append(value.ToString(CultureInfo.InvariantCulture));
}
var digits = FormattingHelpers.CountDigits((uint)value);
var position = GetPointerAndIncrementLength(digits);
FormattingHelpers.WriteDigits(position, (uint)value, digits);
}

internal void Append(decimal value)
Expand All @@ -99,20 +79,14 @@ internal void Append(string value)

// assume one byte per char, enlarge through AppendUsingEncoding if necessary
EnsureCapacity(_length + processingCount);
void AppendUsingEncoding(int iterSize)
{
const int Utf8MaxBytesPerChar = 4;
EnsureCapacity(_length + (processingCount - iterSize) + (iterSize * Utf8MaxBytesPerChar));
var charIndex = value.Length - processingCount;
_length += Encoding.UTF8.GetBytes(value, charIndex, iterSize, _buffer, _length);
}

fixed (char* chars = value)
{
var positionIterPtr = _bufferPtr + _length;
var charsIterPtr = chars;

// loop unrolling similar to Buffer.memcpy1
// parallelism isn't what makes it particular fast, it's the batching that is helpful (fewer ops overall)

{
const int IterSize = 8;
Expand All @@ -132,7 +106,7 @@ void AppendUsingEncoding(int iterSize)
}
else
{
AppendUsingEncoding(IterSize);
goto Utf8Fallback;
}
positionIterPtr = _bufferPtr + _length;
charsIterPtr += IterSize;
Expand All @@ -151,7 +125,7 @@ void AppendUsingEncoding(int iterSize)
}
else
{
AppendUsingEncoding(IterSize);
goto Utf8Fallback;
}
positionIterPtr = _bufferPtr + _length;
charsIterPtr += IterSize;
Expand All @@ -168,9 +142,17 @@ void AppendUsingEncoding(int iterSize)
}
else
{
AppendUsingEncoding(IterSize);
goto Utf8Fallback;
}
}

return;

Utf8Fallback: // this is 10x slower or more (GetBytes has no fast ASCII path and no SIMD)
const int Utf8MaxBytesPerChar = 4;
EnsureCapacity(_length + processingCount * Utf8MaxBytesPerChar);
var charIndex = value.Length - processingCount;
_length += Encoding.UTF8.GetBytes(value, charIndex, processingCount, _buffer, _length);
}
}

Expand All @@ -184,33 +166,17 @@ private bool SetAscii(byte* positionIterPtr, char* charsIterPtr, int offset)

internal void Append(DateTime value)
{
var hour = value.Hour;
var minute = value.Minute;
var second = value.Second;
var ticks = value.Ticks;

var position = GetPointerAndIncrementLength(17);
position[0] = (byte)(hour / 10 % 10 + AsciiZero);
position[1] = (byte)(hour % 10 + AsciiZero);
FormattingHelpers.WriteDigits(position, value.Hour, 2);
position[2] = (byte)':';
position[3] = (byte)(minute / 10 % 10 + AsciiZero);
position[4] = (byte)(minute % 10 + AsciiZero);
FormattingHelpers.WriteDigits(position + 3, value.Minute, 2);
position[5] = (byte)':';
position[6] = (byte)(second / 10 % 10 + AsciiZero);
position[7] = (byte)(second % 10 + AsciiZero);
FormattingHelpers.WriteDigits(position + 6, value.Second, 2);
position[8] = (byte)'.';
position[9] = (byte)(ticks / 1_000_000 % 10 + AsciiZero);
position[10] = (byte)(ticks / 100_000 % 10 + AsciiZero);
position[11] = (byte)(ticks / 10_000 % 10 + AsciiZero);
position[12] = (byte)(ticks / 1_000 % 10 + AsciiZero);
position[13] = (byte)(ticks / 100 % 10 + AsciiZero);
position[14] = (byte)(ticks / 10 % 10 + AsciiZero);
position[15] = (byte)(ticks % 10 + AsciiZero);
FormattingHelpers.WriteDigits(position + 9, value.Ticks, 7);
position[16] = (byte)' ';
}
const byte AsciiZero = (byte)'0';

//[MethodImpl(MethodImplOptions.AggressiveInlining)]
private byte* GetPointerAndIncrementLength(int increment)
{
var length = _length;
Expand Down
1 change: 1 addition & 0 deletions ModTek/Features/Logging/Formatter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ private static string GetLocationString(IStackTrace st)
}

// avoid heap allocations during logging
// switch to inlined " "u8 once https://github.com/MonoMod/MonoMod/issues/194 is fixed
private static readonly byte[] s_threadIdPrefix = Encoding.UTF8.GetBytes("[ThreadId=");
private static readonly byte[] s_threadIdSuffix = Encoding.UTF8.GetBytes("] ");
private static readonly byte[] s_whitespaceBytes = Encoding.UTF8.GetBytes(" ");
Expand Down
64 changes: 64 additions & 0 deletions ModTek/Features/Logging/FormattingHelpers.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
using System.Numerics;
using System.Runtime.CompilerServices;

namespace ModTek.Features.Logging;

// copied from .NET 9
internal static class FormattingHelpers
{
internal static unsafe void WriteDigits(byte* positionPtr, long value, int digits)
{
const byte AsciiZero = (byte)'0';

byte* current;
for (current = positionPtr + digits - 1; current >= positionPtr; current--)
{
var temp = value + AsciiZero;
value /= 10;
*current = (byte)(temp - (value * 10));
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int CountDigits(uint value)
{
var tableValue = s_countDigitsTable[(uint)BitOperations.Log2(value)];
return (int)((value + tableValue) >> 32);
}
// Algorithm based on https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster.
private static readonly long[] s_countDigitsTable =
[
4294967296,
8589934582,
8589934582,
8589934582,
12884901788,
12884901788,
12884901788,
17179868184,
17179868184,
17179868184,
21474826480,
21474826480,
21474826480,
21474826480,
25769703776,
25769703776,
25769703776,
30063771072,
30063771072,
30063771072,
34349738368,
34349738368,
34349738368,
34349738368,
38554705664,
38554705664,
38554705664,
41949672960,
41949672960,
41949672960,
42949672960,
42949672960,
];
}

0 comments on commit 5e89f90

Please sign in to comment.