diff --git a/src/McProtoNet/McProtoNet.Benchmark/BinaryPrimitivesTest.cs b/src/McProtoNet/McProtoNet.Benchmark/BinaryPrimitivesTest.cs new file mode 100644 index 00000000..8476c543 --- /dev/null +++ b/src/McProtoNet/McProtoNet.Benchmark/BinaryPrimitivesTest.cs @@ -0,0 +1,205 @@ +using System; +using System.Buffers.Binary; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace McProtoNet.Benchmark; + +public static class BinaryPrimitivesTest +{ + + [CLSCompliant(false)] + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => + ReverseEndianness(MemoryMarshal.Cast(source), + MemoryMarshal.Cast(destination)); + + /// + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => + ReverseEndianness(source, destination); + + /// + [CLSCompliant(false)] + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => + ReverseEndianness(MemoryMarshal.Cast(source), + MemoryMarshal.Cast(destination)); + + /// + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => + ReverseEndianness(source, destination); + + /// + [CLSCompliant(false)] + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => + ReverseEndianness(MemoryMarshal.Cast(source), + MemoryMarshal.Cast(destination)); + + /// + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => + ReverseEndianness(source, destination); + + /// + [CLSCompliant(false)] + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => +#if TARGET_64BIT + ReverseEndianness(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination)); +#else + ReverseEndianness(MemoryMarshal.Cast(source), + MemoryMarshal.Cast(destination)); +#endif + + /// + public static void ReverseEndianness(ReadOnlySpan source, Span destination) => +#if TARGET_64BIT + ReverseEndianness(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination)); +#else + ReverseEndianness(MemoryMarshal.Cast(source), + MemoryMarshal.Cast(destination)); +#endif + + private readonly struct Int16EndiannessReverser : IEndiannessReverser + { + public static short Reverse(short value) => + BinaryPrimitives.ReverseEndianness(value); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Reverse(Vector128 vector) => + Vector128.ShiftLeft(vector, 8) | Vector128.ShiftRightLogical(vector, 8); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Reverse(Vector256 vector) => + Vector256.ShiftLeft(vector, 8) | Vector256.ShiftRightLogical(vector, 8); + } + + private readonly struct Int32EndiannessReverser : IEndiannessReverser + { + public static int Reverse(int value) => + BinaryPrimitives.ReverseEndianness(value); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Reverse(Vector128 vector) => + Impl.ShuffleUnsafe(vector.AsByte(), + Vector128.Create((byte)3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12)).AsInt32(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Reverse(Vector256 vector) => + Impl.ShuffleUnsafe(vector.AsByte(), + Vector256.Create((byte)3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, + 20, 27, 26, 25, 24, 31, 30, 29, 28)).AsInt32(); + } + + private readonly struct Int64EndiannessReverser : IEndiannessReverser + { + public static long Reverse(long value) => + BinaryPrimitives.ReverseEndianness(value); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Reverse(Vector128 vector) => + Impl.ShuffleUnsafe(vector.AsByte(), + Vector128.Create((byte)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8)) + .AsInt64(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Reverse(Vector256 vector) => + Impl.ShuffleUnsafe(vector.AsByte(), + Vector256.Create((byte)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 23, 22, 21, 20, 19, 18, 17, + 16, 31, 30, 29, 28, 27, 26, 25, 24)).AsInt64(); + } + + private static void ReverseEndianness(ReadOnlySpan source, Span destination) + where T : struct + where TReverser : IEndiannessReverser + { + if (destination.Length < source.Length) + { + //ThrowDestinationTooSmall(); + throw new Exception("destination is small"); + } + + ref T sourceRef = ref MemoryMarshal.GetReference(source); + ref T destRef = ref MemoryMarshal.GetReference(destination); + + if (Unsafe.AreSame(ref sourceRef, ref destRef) || + !source.Overlaps(destination, out int elementOffset) || + elementOffset < 0) + { + // Either there's no overlap between the source and the destination, or there's overlap but the + // destination starts at or before the source. That means we can safely iterate from beginning + // to end of the source and not have to worry about writing into the destination and clobbering + // source data we haven't yet read. + + int i = 0; + + if (Vector256.IsHardwareAccelerated) + { + while (i <= source.Length - Vector256.Count) + { + Vector256.StoreUnsafe(TReverser.Reverse(Vector256.LoadUnsafe(ref sourceRef, (uint)i)), ref destRef, + (uint)i); + i += Vector256.Count; + } + } + + if (Vector128.IsHardwareAccelerated) + { + while (i <= source.Length - Vector128.Count) + { + Vector128.StoreUnsafe(TReverser.Reverse(Vector128.LoadUnsafe(ref sourceRef, (uint)i)), ref destRef, + (uint)i); + i += Vector128.Count; + } + } + + while (i < source.Length) + { + Unsafe.Add(ref destRef, i) = TReverser.Reverse(Unsafe.Add(ref sourceRef, i)); + i++; + } + } + else + { + // There's overlap between the source and the destination, and the source starts before the destination. + // That means if we were to iterate from beginning to end, reading from the source and writing to the + // destination, we'd overwrite source elements not yet read. To avoid that, we iterate from end to beginning. + + int i = source.Length; + + if (Vector256.IsHardwareAccelerated) + { + while (i >= Vector256.Count) + { + i -= Vector256.Count; + Vector256.StoreUnsafe(TReverser.Reverse(Vector256.LoadUnsafe(ref sourceRef, (uint)i)), ref destRef, + (uint)i); + } + } + + if (Vector128.IsHardwareAccelerated) + { + while (i >= Vector128.Count) + { + i -= Vector128.Count; + Vector128.StoreUnsafe(TReverser.Reverse(Vector128.LoadUnsafe(ref sourceRef, (uint)i)), ref destRef, + (uint)i); + } + } + + while (i > 0) + { + i--; + Unsafe.Add(ref destRef, i) = TReverser.Reverse(Unsafe.Add(ref sourceRef, i)); + } + } + } + + private interface IEndiannessReverser where T : struct + { + static abstract T Reverse(T value); + static abstract Vector128 Reverse(Vector128 vector); + static abstract Vector256 Reverse(Vector256 vector); + } + + + + +} \ No newline at end of file diff --git a/src/McProtoNet/McProtoNet.Benchmark/Impl.cs b/src/McProtoNet/McProtoNet.Benchmark/Impl.cs new file mode 100644 index 00000000..6f4bd5e3 --- /dev/null +++ b/src/McProtoNet/McProtoNet.Benchmark/Impl.cs @@ -0,0 +1,28 @@ +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace McProtoNet.Benchmark; + +public static class Impl +{ + public static Vector128 ShuffleUnsafe(Vector128 values, Vector128 indices) + { + if (Ssse3.IsSupported) return Ssse3.Shuffle(values, indices); + return Vector128.Shuffle(values, indices); + } + + public static Vector256 ShuffleUnsafe(this Vector256 values, Vector256 indices) + { + if (Avx2.IsSupported) + { + var indicesXord = Avx2.And(Avx2.Xor(indices, Vector256.Create(Vector128.Create((byte)0), Vector128.Create((byte)0x10))), Vector256.Create((byte)0x9F)); + var swap = Avx2.Permute2x128(values, values, 0b00000001); + var shuf1 = Avx2.Shuffle(values, indices); + var shuf2 = Avx2.Shuffle(swap, indices); + var selection = Avx2.CompareGreaterThan(indicesXord.AsSByte(), Vector256.Create((sbyte)0x0F)).AsByte(); + return Avx2.BlendVariable(shuf1, shuf2, selection); + } + return Vector256.Shuffle(values, indices); + } + +} \ No newline at end of file diff --git a/src/McProtoNet/McProtoNet.Benchmark/Program.cs b/src/McProtoNet/McProtoNet.Benchmark/Program.cs index 143c519b..74452182 100644 --- a/src/McProtoNet/McProtoNet.Benchmark/Program.cs +++ b/src/McProtoNet/McProtoNet.Benchmark/Program.cs @@ -1,4 +1,4 @@ -using System; +using System.Diagnostics.CodeAnalysis; using System.Net.Sockets; using System.Threading.Tasks; using BenchmarkDotNet.Running; diff --git a/src/McProtoNet/McProtoNet.Benchmark/ReadBigEndianBenchmarks.cs b/src/McProtoNet/McProtoNet.Benchmark/ReadBigEndianBenchmarks.cs index e3e51178..33ef0697 100644 --- a/src/McProtoNet/McProtoNet.Benchmark/ReadBigEndianBenchmarks.cs +++ b/src/McProtoNet/McProtoNet.Benchmark/ReadBigEndianBenchmarks.cs @@ -2,6 +2,7 @@ using System.Buffers.Binary; using System.IO; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; using BenchmarkDotNet.Attributes; using DotNext.Buffers; @@ -13,12 +14,19 @@ public class ReadBigEndianBenchmarks public byte[] TestArr; private Random r = new(); [Params(10, 100, 100000)] public int Count { get; set; } - + //public int Count = 10; [GlobalSetup] public void Setup() { - TestArr = new byte[sizeof(long) * Count]; + Random r = new(71); + r.NextBytes(MemoryMarshal.AsBytes(new Span>(ref A128))); + r.NextBytes(MemoryMarshal.AsBytes(new Span>(ref B128))); + r.NextBytes(MemoryMarshal.AsBytes(new Span>(ref A256))); + r.NextBytes(MemoryMarshal.AsBytes(new Span>(ref B256))); + // int Count = 10; + TestArr = new byte[sizeof(long) * Count]; + scoped SpanWriter writer = new SpanWriter(TestArr); for (int i = 0; i < Count; i++) { @@ -28,7 +36,7 @@ public void Setup() } - [Benchmark] + [Benchmark] public long[] SpanReader() { scoped SpanReader reader = new SpanReader(TestArr); @@ -50,4 +58,31 @@ public long[] SimdRead() BinaryPrimitives.ReverseEndianness(numbers, source); return source; } + + [Benchmark] + public long[] SimdReadUnsafe() + { + Span numbers = MemoryMarshal.Cast(TestArr); + long[] source = new long[Count]; + if (BitConverter.IsLittleEndian) + BinaryPrimitivesTest.ReverseEndianness(numbers, source); + return source; + } + public Vector128 A128; + public Vector128 B128; + + public Vector256 A256; + public Vector256 B256; + //[Benchmark] + public Vector128 Shuffle128() + { + return Vector128.Shuffle(A128, B128); + } + //[Benchmark] + public Vector128 ShuffleUnsafe128() + { + return Impl.ShuffleUnsafe(A128, B128); + } + + } \ No newline at end of file diff --git a/src/Sandbox/SandBoxMcProtoNet/Program.cs b/src/Sandbox/SandBoxMcProtoNet/Program.cs index fe981e03..5875afc1 100644 --- a/src/Sandbox/SandBoxMcProtoNet/Program.cs +++ b/src/Sandbox/SandBoxMcProtoNet/Program.cs @@ -3,6 +3,7 @@ using System.Collections.Concurrent; using System.Diagnostics; using System.Reactive.Linq; +using System.Runtime.Intrinsics.X86; using System.Text; using DotNext.Collections.Generic; using McProtoNet.Client; @@ -17,6 +18,7 @@ public static async Task Main(string[] args) Console.WriteLine("Start"); try { + var list = new List(); var listProtocols = new List(); for (int i = 0; i < 200; i++)