From 4e3a34412ef00e2f8b6c0255176719c6ecdb1a95 Mon Sep 17 00:00:00 2001 From: Alex Barney Date: Thu, 7 Apr 2022 11:18:14 -0700 Subject: [PATCH 1/8] Update to LibHac 0.16.1 (#3263) --- Ryujinx.HLE/Ryujinx.HLE.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/Ryujinx.HLE.csproj b/Ryujinx.HLE/Ryujinx.HLE.csproj index cf1c734fc2ec..4713c9b31f9e 100644 --- a/Ryujinx.HLE/Ryujinx.HLE.csproj +++ b/Ryujinx.HLE/Ryujinx.HLE.csproj @@ -19,7 +19,7 @@ - + From 55ee26136363338d1cfffabd4cca07c9ca9b7847 Mon Sep 17 00:00:00 2001 From: Narr the Reg Date: Thu, 7 Apr 2022 13:43:14 -0500 Subject: [PATCH 2/8] service: hid: Signal event on AcquireNpadStyleSetUpdateEventHandle (#3247) --- Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs b/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs index db093dad06e7..abc76c62f7b0 100644 --- a/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs +++ b/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs @@ -767,6 +767,9 @@ public ResultCode AcquireNpadStyleSetUpdateEventHandle(ServiceCtx context) throw new InvalidOperationException("Out of handles!"); } + // Games expect this event to be signaled after calling this function + evnt.ReadableEvent.Signal(); + context.Response.HandleDesc = IpcHandleDesc.MakeCopy(handle); Logger.Stub?.PrintStub(LogClass.ServiceHid, new { appletResourceUserId, npadId, npadStyleSet }); From d04ba51bb0456ee8f778fe86fc224665b0fb20c8 Mon Sep 17 00:00:00 2001 From: Mary Date: Fri, 8 Apr 2022 10:52:18 +0200 Subject: [PATCH 3/8] amadeus: Improve and fix delay effect processing (#3205) * amadeus: Improve and fix delay effect processing This rework the delay effect processing by representing calculation with the appropriate matrix and by unrolling some loop in the code. This allows better optimization by the JIT while making it more readeable. Also fix a bug in the Surround code path found while looking back at my notes. * Remove useless GetHashCode * Address gdkchan's comments --- .../Renderer/Dsp/Command/DelayCommand.cs | 157 ++++++++++-------- .../Renderer/Dsp/State/DelayState.cs | 13 +- .../Renderer/Utils/Math/Matrix2x2.cs | 71 ++++++++ .../Renderer/Utils/Math/Matrix6x6.cs | 97 +++++++++++ .../Renderer/Utils/Math/MatrixHelper.cs | 45 +++++ Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs | 56 +++++++ 6 files changed, 368 insertions(+), 71 deletions(-) create mode 100644 Ryujinx.Audio/Renderer/Utils/Math/Matrix2x2.cs create mode 100644 Ryujinx.Audio/Renderer/Utils/Math/Matrix6x6.cs create mode 100644 Ryujinx.Audio/Renderer/Utils/Math/MatrixHelper.cs create mode 100644 Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs diff --git a/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs b/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs index 8d416cc328d2..186efb6a38d1 100644 --- a/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs +++ b/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs @@ -18,8 +18,10 @@ using Ryujinx.Audio.Renderer.Dsp.State; using Ryujinx.Audio.Renderer.Parameter.Effect; using Ryujinx.Audio.Renderer.Server.Effect; +using Ryujinx.Audio.Renderer.Utils.Math; using System; using System.Diagnostics; +using System.Numerics; using System.Runtime.CompilerServices; namespace Ryujinx.Audio.Renderer.Dsp.Command @@ -70,7 +72,7 @@ public DelayCommand(uint bufferOffset, DelayParameter parameter, Memory outputBuffers, ReadOnlySpan inputBuffers, uint sampleCount) { const ushort channelCount = 2; - Span channelInput = stackalloc float[channelCount]; - Span delayLineValues = stackalloc float[channelCount]; - Span temp = stackalloc float[channelCount]; - float delayFeedbackBaseGain = state.DelayFeedbackBaseGain; float delayFeedbackCrossGain = state.DelayFeedbackCrossGain; float inGain = FixedPointHelper.ToFloat(Parameter.InGain, FixedPointPrecision); float dryGain = FixedPointHelper.ToFloat(Parameter.DryGain, FixedPointPrecision); float outGain = FixedPointHelper.ToFloat(Parameter.OutGain, FixedPointPrecision); + Matrix2x2 delayFeedback = new Matrix2x2(delayFeedbackBaseGain , delayFeedbackCrossGain, + delayFeedbackCrossGain, delayFeedbackBaseGain); + for (int i = 0; i < sampleCount; i++) { - for (int j = 0; j < channelCount; j++) + Vector2 channelInput = new Vector2 { - channelInput[j] = *((float*)inputBuffers[j] + i) * 64; - delayLineValues[j] = state.DelayLines[j].Read(); - } - - temp[0] = channelInput[0] * inGain + delayLineValues[1] * delayFeedbackCrossGain + delayLineValues[0] * delayFeedbackBaseGain; - temp[1] = channelInput[1] * inGain + delayLineValues[0] * delayFeedbackCrossGain + delayLineValues[1] * delayFeedbackBaseGain; + X = *((float*)inputBuffers[0] + i) * 64, + Y = *((float*)inputBuffers[1] + i) * 64, + }; - for (int j = 0; j < channelCount; j++) + Vector2 delayLineValues = new Vector2() { - float lowPassResult = state.LowPassFeedbackGain * state.LowPassZ[j] + temp[j] * state.LowPassBaseGain; + X = state.DelayLines[0].Read(), + Y = state.DelayLines[1].Read(), + }; - state.LowPassZ[j] = lowPassResult; - state.DelayLines[j].Update(lowPassResult); + Vector2 temp = MatrixHelper.Transform(ref channelInput, ref delayFeedback) + channelInput * inGain; - *((float*)outputBuffers[j] + i) = (channelInput[j] * dryGain + delayLineValues[j] * outGain) / 64; - } + state.UpdateLowPassFilter(ref Unsafe.As(ref temp), channelCount); + + *((float*)outputBuffers[0] + i) = (channelInput.X * dryGain + delayLineValues.X * outGain) / 64; + *((float*)outputBuffers[1] + i) = (channelInput.Y * dryGain + delayLineValues.Y * outGain) / 64; } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] private unsafe void ProcessDelayQuadraphonic(ref DelayState state, Span outputBuffers, ReadOnlySpan inputBuffers, uint sampleCount) { const ushort channelCount = 4; - Span channelInput = stackalloc float[channelCount]; - Span delayLineValues = stackalloc float[channelCount]; - Span temp = stackalloc float[channelCount]; - float delayFeedbackBaseGain = state.DelayFeedbackBaseGain; float delayFeedbackCrossGain = state.DelayFeedbackCrossGain; float inGain = FixedPointHelper.ToFloat(Parameter.InGain, FixedPointPrecision); float dryGain = FixedPointHelper.ToFloat(Parameter.DryGain, FixedPointPrecision); float outGain = FixedPointHelper.ToFloat(Parameter.OutGain, FixedPointPrecision); + Matrix4x4 delayFeedback = new Matrix4x4(delayFeedbackBaseGain , delayFeedbackCrossGain, delayFeedbackCrossGain, 0.0f, + delayFeedbackCrossGain, delayFeedbackBaseGain , 0.0f , delayFeedbackCrossGain, + delayFeedbackCrossGain, 0.0f , delayFeedbackBaseGain , delayFeedbackCrossGain, + 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, delayFeedbackBaseGain); + + for (int i = 0; i < sampleCount; i++) { - for (int j = 0; j < channelCount; j++) + Vector4 channelInput = new Vector4 { - channelInput[j] = *((float*)inputBuffers[j] + i) * 64; - delayLineValues[j] = state.DelayLines[j].Read(); - } - - temp[0] = channelInput[0] * inGain + (delayLineValues[2] + delayLineValues[1]) * delayFeedbackCrossGain + delayLineValues[0] * delayFeedbackBaseGain; - temp[1] = channelInput[1] * inGain + (delayLineValues[0] + delayLineValues[3]) * delayFeedbackCrossGain + delayLineValues[1] * delayFeedbackBaseGain; - temp[2] = channelInput[2] * inGain + (delayLineValues[3] + delayLineValues[0]) * delayFeedbackCrossGain + delayLineValues[2] * delayFeedbackBaseGain; - temp[3] = channelInput[3] * inGain + (delayLineValues[1] + delayLineValues[2]) * delayFeedbackCrossGain + delayLineValues[3] * delayFeedbackBaseGain; + X = *((float*)inputBuffers[0] + i) * 64, + Y = *((float*)inputBuffers[1] + i) * 64, + Z = *((float*)inputBuffers[2] + i) * 64, + W = *((float*)inputBuffers[3] + i) * 64 + }; - for (int j = 0; j < channelCount; j++) + Vector4 delayLineValues = new Vector4() { - float lowPassResult = state.LowPassFeedbackGain * state.LowPassZ[j] + temp[j] * state.LowPassBaseGain; - - state.LowPassZ[j] = lowPassResult; - state.DelayLines[j].Update(lowPassResult); - - *((float*)outputBuffers[j] + i) = (channelInput[j] * dryGain + delayLineValues[j] * outGain) / 64; - } + X = state.DelayLines[0].Read(), + Y = state.DelayLines[1].Read(), + Z = state.DelayLines[2].Read(), + W = state.DelayLines[3].Read() + }; + + Vector4 temp = MatrixHelper.Transform(ref channelInput, ref delayFeedback) + channelInput * inGain; + + state.UpdateLowPassFilter(ref Unsafe.As(ref temp), channelCount); + + *((float*)outputBuffers[0] + i) = (channelInput.X * dryGain + delayLineValues.X * outGain) / 64; + *((float*)outputBuffers[1] + i) = (channelInput.Y * dryGain + delayLineValues.Y * outGain) / 64; + *((float*)outputBuffers[2] + i) = (channelInput.Z * dryGain + delayLineValues.Z * outGain) / 64; + *((float*)outputBuffers[3] + i) = (channelInput.W * dryGain + delayLineValues.W * outGain) / 64; } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] private unsafe void ProcessDelaySurround(ref DelayState state, Span outputBuffers, ReadOnlySpan inputBuffers, uint sampleCount) { const ushort channelCount = 6; - Span channelInput = stackalloc float[channelCount]; - Span delayLineValues = stackalloc float[channelCount]; - Span temp = stackalloc float[channelCount]; - + float feedbackGain = FixedPointHelper.ToFloat(Parameter.FeedbackGain, FixedPointPrecision); float delayFeedbackBaseGain = state.DelayFeedbackBaseGain; float delayFeedbackCrossGain = state.DelayFeedbackCrossGain; float inGain = FixedPointHelper.ToFloat(Parameter.InGain, FixedPointPrecision); float dryGain = FixedPointHelper.ToFloat(Parameter.DryGain, FixedPointPrecision); float outGain = FixedPointHelper.ToFloat(Parameter.OutGain, FixedPointPrecision); + Matrix6x6 delayFeedback = new Matrix6x6(delayFeedbackBaseGain , 0.0f , 0.0f , 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, + 0.0f , delayFeedbackBaseGain , 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, 0.0f , + delayFeedbackCrossGain, 0.0f , delayFeedbackBaseGain , delayFeedbackCrossGain, 0.0f , 0.0f , + 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, delayFeedbackBaseGain , 0.0f , 0.0f , + delayFeedbackCrossGain, delayFeedbackCrossGain, 0.0f , 0.0f , delayFeedbackBaseGain , 0.0f , + 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , feedbackGain); + for (int i = 0; i < sampleCount; i++) { - for (int j = 0; j < channelCount; j++) + Vector6 channelInput = new Vector6 { - channelInput[j] = *((float*)inputBuffers[j] + i) * 64; - delayLineValues[j] = state.DelayLines[j].Read(); - } - - temp[0] = channelInput[0] * inGain + (delayLineValues[2] + delayLineValues[4]) * delayFeedbackCrossGain + delayLineValues[0] * delayFeedbackBaseGain; - temp[1] = channelInput[1] * inGain + (delayLineValues[4] + delayLineValues[3]) * delayFeedbackCrossGain + delayLineValues[1] * delayFeedbackBaseGain; - temp[2] = channelInput[2] * inGain + (delayLineValues[3] + delayLineValues[0]) * delayFeedbackCrossGain + delayLineValues[2] * delayFeedbackBaseGain; - temp[3] = channelInput[3] * inGain + (delayLineValues[1] + delayLineValues[2]) * delayFeedbackCrossGain + delayLineValues[3] * delayFeedbackBaseGain; - temp[4] = channelInput[4] * inGain + (delayLineValues[0] + delayLineValues[1]) * delayFeedbackCrossGain + delayLineValues[4] * delayFeedbackBaseGain; - temp[5] = channelInput[5] * inGain + delayLineValues[5] * delayFeedbackBaseGain; - - for (int j = 0; j < channelCount; j++) + X = *((float*)inputBuffers[0] + i) * 64, + Y = *((float*)inputBuffers[1] + i) * 64, + Z = *((float*)inputBuffers[2] + i) * 64, + W = *((float*)inputBuffers[3] + i) * 64, + V = *((float*)inputBuffers[4] + i) * 64, + U = *((float*)inputBuffers[5] + i) * 64 + }; + + Vector6 delayLineValues = new Vector6 { - float lowPassResult = state.LowPassFeedbackGain * state.LowPassZ[j] + temp[j] * state.LowPassBaseGain; - - state.LowPassZ[j] = lowPassResult; - state.DelayLines[j].Update(lowPassResult); - - *((float*)outputBuffers[j] + i) = (channelInput[j] * dryGain + delayLineValues[j] * outGain) / 64; - } + X = state.DelayLines[0].Read(), + Y = state.DelayLines[1].Read(), + Z = state.DelayLines[2].Read(), + W = state.DelayLines[3].Read(), + V = state.DelayLines[4].Read(), + U = state.DelayLines[5].Read() + }; + + Vector6 temp = MatrixHelper.Transform(ref channelInput, ref delayFeedback) + channelInput * inGain; + + state.UpdateLowPassFilter(ref Unsafe.As(ref temp), channelCount); + + *((float*)outputBuffers[0] + i) = (channelInput.X * dryGain + delayLineValues.X * outGain) / 64; + *((float*)outputBuffers[1] + i) = (channelInput.Y * dryGain + delayLineValues.Y * outGain) / 64; + *((float*)outputBuffers[2] + i) = (channelInput.Z * dryGain + delayLineValues.Z * outGain) / 64; + *((float*)outputBuffers[3] + i) = (channelInput.W * dryGain + delayLineValues.W * outGain) / 64; + *((float*)outputBuffers[4] + i) = (channelInput.V * dryGain + delayLineValues.V * outGain) / 64; + *((float*)outputBuffers[5] + i) = (channelInput.U * dryGain + delayLineValues.U * outGain) / 64; } } diff --git a/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs b/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs index 7b694fb0ae77..21ffbebd638a 100644 --- a/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs +++ b/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs @@ -17,6 +17,7 @@ using Ryujinx.Audio.Renderer.Dsp.Effect; using Ryujinx.Audio.Renderer.Parameter.Effect; +using System.Runtime.CompilerServices; namespace Ryujinx.Audio.Renderer.Dsp.State { @@ -43,7 +44,6 @@ public DelayState(ref DelayParameter parameter, ulong workBuffer) { DelayLines[i] = new DelayLine(sampleRate, parameter.DelayTimeMax); DelayLines[i].SetDelay(parameter.DelayTime); - LowPassZ[0] = 0; } UpdateParameter(ref parameter); @@ -69,5 +69,16 @@ public void UpdateParameter(ref DelayParameter parameter) LowPassFeedbackGain = 0.95f * FixedPointHelper.ToFloat(parameter.LowPassAmount, FixedPointPrecision); LowPassBaseGain = 1.0f - LowPassFeedbackGain; } + + public void UpdateLowPassFilter(ref float tempRawRef, uint channelCount) + { + for (int i = 0; i < channelCount; i++) + { + float lowPassResult = LowPassFeedbackGain * LowPassZ[i] + Unsafe.Add(ref tempRawRef, i) * LowPassBaseGain; + + LowPassZ[i] = lowPassResult; + DelayLines[i].Update(lowPassResult); + } + } } } diff --git a/Ryujinx.Audio/Renderer/Utils/Math/Matrix2x2.cs b/Ryujinx.Audio/Renderer/Utils/Math/Matrix2x2.cs new file mode 100644 index 000000000000..f25b15379e9d --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/Matrix2x2.cs @@ -0,0 +1,71 @@ +namespace Ryujinx.Audio.Renderer.Utils.Math +{ + record struct Matrix2x2 + { + public float M11; + public float M12; + public float M21; + public float M22; + + public Matrix2x2(float m11, float m12, + float m21, float m22) + { + M11 = m11; + M12 = m12; + + M21 = m21; + M22 = m22; + } + + public static Matrix2x2 operator +(Matrix2x2 value1, Matrix2x2 value2) + { + Matrix2x2 m; + + m.M11 = value1.M11 + value2.M11; + m.M12 = value1.M12 + value2.M12; + m.M21 = value1.M21 + value2.M21; + m.M22 = value1.M22 + value2.M22; + + return m; + } + + public static Matrix2x2 operator -(Matrix2x2 value1, float value2) + { + Matrix2x2 m; + + m.M11 = value1.M11 - value2; + m.M12 = value1.M12 - value2; + m.M21 = value1.M21 - value2; + m.M22 = value1.M22 - value2; + + return m; + } + + public static Matrix2x2 operator *(Matrix2x2 value1, float value2) + { + Matrix2x2 m; + + m.M11 = value1.M11 * value2; + m.M12 = value1.M12 * value2; + m.M21 = value1.M21 * value2; + m.M22 = value1.M22 * value2; + + return m; + } + + public static Matrix2x2 operator *(Matrix2x2 value1, Matrix2x2 value2) + { + Matrix2x2 m; + + // First row + m.M11 = value1.M11 * value2.M11 + value1.M12 * value2.M21; + m.M12 = value1.M11 * value2.M12 + value1.M12 * value2.M22; + + // Second row + m.M21 = value1.M21 * value2.M11 + value1.M22 * value2.M21; + m.M22 = value1.M21 * value2.M12 + value1.M22 * value2.M22; + + return m; + } + } +} diff --git a/Ryujinx.Audio/Renderer/Utils/Math/Matrix6x6.cs b/Ryujinx.Audio/Renderer/Utils/Math/Matrix6x6.cs new file mode 100644 index 000000000000..41e2fc843ada --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/Matrix6x6.cs @@ -0,0 +1,97 @@ +namespace Ryujinx.Audio.Renderer.Utils.Math +{ + record struct Matrix6x6 + { + public float M11; + public float M12; + public float M13; + public float M14; + public float M15; + public float M16; + + public float M21; + public float M22; + public float M23; + public float M24; + public float M25; + public float M26; + + public float M31; + public float M32; + public float M33; + public float M34; + public float M35; + public float M36; + + public float M41; + public float M42; + public float M43; + public float M44; + public float M45; + public float M46; + + public float M51; + public float M52; + public float M53; + public float M54; + public float M55; + public float M56; + + public float M61; + public float M62; + public float M63; + public float M64; + public float M65; + public float M66; + + public Matrix6x6(float m11, float m12, float m13, float m14, float m15, float m16, + float m21, float m22, float m23, float m24, float m25, float m26, + float m31, float m32, float m33, float m34, float m35, float m36, + float m41, float m42, float m43, float m44, float m45, float m46, + float m51, float m52, float m53, float m54, float m55, float m56, + float m61, float m62, float m63, float m64, float m65, float m66) + { + M11 = m11; + M12 = m12; + M13 = m13; + M14 = m14; + M15 = m15; + M16 = m16; + + M21 = m21; + M22 = m22; + M23 = m23; + M24 = m24; + M25 = m25; + M26 = m26; + + M31 = m31; + M32 = m32; + M33 = m33; + M34 = m34; + M35 = m35; + M36 = m36; + + M41 = m41; + M42 = m42; + M43 = m43; + M44 = m44; + M45 = m45; + M46 = m46; + + M51 = m51; + M52 = m52; + M53 = m53; + M54 = m54; + M55 = m55; + M56 = m56; + + M61 = m61; + M62 = m62; + M63 = m63; + M64 = m64; + M65 = m65; + M66 = m66; + } + } +} diff --git a/Ryujinx.Audio/Renderer/Utils/Math/MatrixHelper.cs b/Ryujinx.Audio/Renderer/Utils/Math/MatrixHelper.cs new file mode 100644 index 000000000000..3fbffa94e6e3 --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/MatrixHelper.cs @@ -0,0 +1,45 @@ +using Ryujinx.Audio.Renderer.Utils.Math; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Audio.Renderer.Dsp +{ + static class MatrixHelper + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 Transform(ref Vector6 value1, ref Matrix6x6 value2) + { + return new Vector6 + { + X = value2.M11 * value1.X + value2.M12 * value1.Y + value2.M13 * value1.Z + value2.M14 * value1.W + value2.M15 * value1.V + value2.M16 * value1.U, + Y = value2.M21 * value1.X + value2.M22 * value1.Y + value2.M23 * value1.Z + value2.M24 * value1.W + value2.M25 * value1.V + value2.M26 * value1.U, + Z = value2.M31 * value1.X + value2.M32 * value1.Y + value2.M33 * value1.Z + value2.M34 * value1.W + value2.M35 * value1.V + value2.M36 * value1.U, + W = value2.M41 * value1.X + value2.M42 * value1.Y + value2.M43 * value1.Z + value2.M44 * value1.W + value2.M45 * value1.V + value2.M46 * value1.U, + V = value2.M51 * value1.X + value2.M52 * value1.Y + value2.M53 * value1.Z + value2.M54 * value1.W + value2.M55 * value1.V + value2.M56 * value1.U, + U = value2.M61 * value1.X + value2.M62 * value1.Y + value2.M63 * value1.Z + value2.M64 * value1.W + value2.M65 * value1.V + value2.M66 * value1.U, + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 Transform(ref Vector4 value1, ref Matrix4x4 value2) + { + return new Vector4 + { + X = value2.M11 * value1.X + value2.M12 * value1.Y + value2.M13 * value1.Z + value2.M14 * value1.W, + Y = value2.M21 * value1.X + value2.M22 * value1.Y + value2.M23 * value1.Z + value2.M24 * value1.W, + Z = value2.M31 * value1.X + value2.M32 * value1.Y + value2.M33 * value1.Z + value2.M34 * value1.W, + W = value2.M41 * value1.X + value2.M42 * value1.Y + value2.M43 * value1.Z + value2.M44 * value1.W + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector2 Transform(ref Vector2 value1, ref Matrix2x2 value2) + { + return new Vector2 + { + X = value2.M11 * value1.X + value2.M12 * value1.Y, + Y = value2.M21 * value1.X + value2.M22 * value1.Y, + }; + } + } +} diff --git a/Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs b/Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs new file mode 100644 index 000000000000..b2cd481b6d1c --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs @@ -0,0 +1,56 @@ +using System.Runtime.CompilerServices; + +namespace Ryujinx.Audio.Renderer.Utils.Math +{ + record struct Vector6 + { + public float X; + public float Y; + public float Z; + public float W; + public float V; + public float U; + + public Vector6(float value) : this(value, value, value, value, value, value) + { + } + + public Vector6(float x, float y, float z, float w, float v, float u) + { + X = x; + Y = y; + Z = z; + W = w; + V = v; + U = u; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 operator +(Vector6 left, Vector6 right) + { + return new Vector6(left.X + right.X, + left.Y + right.Y, + left.Z + right.Z, + left.W + right.W, + left.V + right.V, + left.U + right.U); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 operator *(Vector6 left, Vector6 right) + { + return new Vector6(left.X * right.X, + left.Y * right.Y, + left.Z * right.Z, + left.W * right.W, + left.V * right.V, + left.U * right.U); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 operator *(Vector6 left, float right) + { + return left * new Vector6(right); + } + } +} From 952f6f8a65ca72ab5e53fb03f3b4bd0194c4a34f Mon Sep 17 00:00:00 2001 From: gdkchan Date: Fri, 8 Apr 2022 06:02:06 -0300 Subject: [PATCH 4/8] Calculate vertex buffer size from index buffer type (#3253) * Calculate vertex buffer size from index buffer type * We also need to update the size if first vertex changes --- .../Engine/Threed/StateUpdater.cs | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index f44089598087..8d67d0fd737a 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -35,6 +35,8 @@ class StateUpdater private byte _vsClipDistancesWritten; private bool _prevDrawIndexed; + private IndexType _prevIndexType; + private uint _prevFirstVertex; private bool _prevTfEnable; /// @@ -214,6 +216,17 @@ public void Update() _prevDrawIndexed = _drawState.DrawIndexed; } + // In some cases, the index type is also used to guess the + // vertex buffer size, so we must update it if the type changed too. + if (_drawState.DrawIndexed && + (_prevIndexType != _state.State.IndexBufferState.Type || + _prevFirstVertex != _state.State.FirstVertex)) + { + _updateTracker.ForceDirty(VertexBufferStateIndex); + _prevIndexType = _state.State.IndexBufferState.Type; + _prevFirstVertex = _state.State.FirstVertex; + } + bool tfEnable = _state.State.TfEnable; if (!tfEnable && _prevTfEnable) @@ -867,6 +880,9 @@ private void UpdateIndexBufferState() /// private void UpdateVertexBufferState() { + IndexType indexType = _state.State.IndexBufferState.Type; + bool indexTypeSmall = indexType == IndexType.UByte || indexType == IndexType.UShort; + _drawState.IsAnyVbInstanced = false; for (int index = 0; index < Constants.TotalVertexBuffers; index++) @@ -898,12 +914,27 @@ private void UpdateVertexBufferState() { // This size may be (much) larger than the real vertex buffer size. // Avoid calculating it this way, unless we don't have any other option. + size = endAddress.Pack() - address + 1; + + if (stride > 0 && indexTypeSmall) + { + // If the index type is a small integer type, then we might be still able + // to reduce the vertex buffer size based on the maximum possible index value. + + ulong maxVertexBufferSize = indexType == IndexType.UByte ? 0x100UL : 0x10000UL; + + maxVertexBufferSize += _state.State.FirstVertex; + maxVertexBufferSize *= (uint)stride; + + size = Math.Min(size, maxVertexBufferSize); + } } else { // For non-indexed draws, we can guess the size from the vertex count // and stride. + int firstInstance = (int)_state.State.FirstInstance; var drawState = _state.State.VertexBufferDrawState; From 6a9e9b536086e5f50388be215826188627a06ba4 Mon Sep 17 00:00:00 2001 From: MutantAura <44103205+MutantAura@users.noreply.github.com> Date: Fri, 8 Apr 2022 10:09:35 +0100 Subject: [PATCH 5/8] Remove save data creation prompt (#3252) * begone * review * mods directory update --- Ryujinx.HLE/HOS/ModLoader.cs | 2 +- Ryujinx/Ui/Widgets/GameTableContextMenu.cs | 17 ++--------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/Ryujinx.HLE/HOS/ModLoader.cs b/Ryujinx.HLE/HOS/ModLoader.cs index 4f37903366a0..c24f0f745a2c 100644 --- a/Ryujinx.HLE/HOS/ModLoader.cs +++ b/Ryujinx.HLE/HOS/ModLoader.cs @@ -161,7 +161,7 @@ public string GetTitleDir(string modsBasePath, string titleId) if (titleModsPath == null) { - Logger.Info?.Print(LogClass.ModLoader, $"Creating mods dir for Title {titleId.ToUpper()}"); + Logger.Info?.Print(LogClass.ModLoader, $"Creating mods directory for Title {titleId.ToUpper()}"); titleModsPath = contentsDir.CreateSubdirectory(titleId); } diff --git a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs index 306033d2d207..0e3b4892f098 100644 --- a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs +++ b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs @@ -86,23 +86,10 @@ private bool TryFindSaveData(string titleName, ulong titleId, BlitStruct Date: Fri, 8 Apr 2022 10:17:38 +0100 Subject: [PATCH 6/8] Lop3Expression: Optimize expressions (#3184) * lut3 * bugfixes * TruthTable * false/true -> 0/-1 * add or to expressions * fix inversions * increment cache version --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 2 +- .../Instructions/Lop3Expression.cs | 197 +++++++++--------- 2 files changed, 98 insertions(+), 101 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index e6d46884a701..5e0e6bedb612 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -40,7 +40,7 @@ class ShaderCache : IDisposable /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 3054; + private const ulong ShaderCodeGenVersion = 3184; // Progress reporting helpers private volatile int _shaderCount; diff --git a/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs b/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs index 76a520075f2f..6217ce530a15 100644 --- a/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs +++ b/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs @@ -7,138 +7,135 @@ namespace Ryujinx.Graphics.Shader.Instructions { static class Lop3Expression { - public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm) + private enum TruthTable : byte { - Operand expr = null; - - // Handle some simple cases, or cases where - // the KMap would yield poor results (like XORs). - if (imm == 0x96 || imm == 0x69) - { - // XOR (0x96) and XNOR (0x69). - if (imm == 0x69) - { - srcA = context.BitwiseNot(srcA); - } - - expr = context.BitwiseExclusiveOr(srcA, srcB); - expr = context.BitwiseExclusiveOr(expr, srcC); - - return expr; - } - else if (imm == 0) - { - // Always false. - return Const(IrConsts.False); - } - else if (imm == 0xff) - { - // Always true. - return Const(IrConsts.True); - } - - int map; - - // Encode into gray code. - map = ((imm >> 0) & 1) << 0; - map |= ((imm >> 1) & 1) << 4; - map |= ((imm >> 2) & 1) << 1; - map |= ((imm >> 3) & 1) << 5; - map |= ((imm >> 4) & 1) << 3; - map |= ((imm >> 5) & 1) << 7; - map |= ((imm >> 6) & 1) << 2; - map |= ((imm >> 7) & 1) << 6; - - // Solve KMap, get sum of products. - int visited = 0; + False = 0x00, // false + True = 0xff, // true + In = 0xf0, // a + And2 = 0xc0, // a & b + Or2 = 0xfc, // a | b + Xor2 = 0x3c, // a ^ b + And3 = 0x80, // a & b & c + Or3 = 0xfe, // a | b | c + XorAnd = 0x60, // a & (b ^ c) + XorOr = 0xf6, // a | (b ^ c) + OrAnd = 0xe0, // a & (b | c) + AndOr = 0xf8, // a | (b & c) + Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true. + Majority = 0xe8, // Popcount(a, b, c) >= 2 + Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off + InverseGamble = 0x7e, // Inverse of Gamble + Dot = 0x1a, // a ^ (c | (a & b)) + Mux = 0xca, // a ? b : c + AndXor = 0x78, // a ^ (b & c) + OrXor = 0x1e, // a ^ (b | c) + Xor3 = 0x96, // a ^ b ^ c + } - for (int index = 0; index < 8 && visited != 0xff; index++) + public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm) + { + for (int i = 0; i < 0x40; i++) { - if ((map & (1 << index)) == 0) - { - continue; - } - - int mask = 0; + TruthTable currImm = (TruthTable)imm; - for (int mSize = 4; mSize != 0; mSize >>= 1) + Operand x = srcA; + Operand y = srcB; + Operand z = srcC; + + if ((i & 0x01) != 0) { - mask = RotateLeft4((1 << mSize) - 1, index & 3) << (index & 4); - - if ((map & mask) == mask) - { - break; - } + (x, y) = (y, x); + currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0); } - // The mask should wrap, if we are on the high row, shift to low etc. - int mask2 = (index & 4) != 0 ? mask >> 4 : mask << 4; - - if ((map & mask2) == mask2) + if ((i & 0x02) != 0) { - mask |= mask2; + (x, z) = (z, x); + currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0); } - if ((mask & visited) == mask) + if ((i & 0x04) != 0) { - continue; + (y, z) = (z, y); + currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0); } - bool notA = (mask & 0x33) != 0; - bool notB = (mask & 0x99) != 0; - bool notC = (mask & 0x0f) != 0; - - bool aChanges = (mask & 0xcc) != 0 && notA; - bool bChanges = (mask & 0x66) != 0 && notB; - bool cChanges = (mask & 0xf0) != 0 && notC; - - Operand localExpr = null; - - void And(Operand source) + if ((i & 0x08) != 0) { - if (localExpr != null) - { - localExpr = context.BitwiseAnd(localExpr, source); - } - else - { - localExpr = source; - } + x = context.BitwiseNot(x); + currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4); } - if (!aChanges) + if ((i & 0x10) != 0) { - And(context.BitwiseNot(srcA, notA)); + y = context.BitwiseNot(y); + currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2); } - if (!bChanges) + if ((i & 0x20) != 0) { - And(context.BitwiseNot(srcB, notB)); + z = context.BitwiseNot(z); + currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1); } - if (!cChanges) + Operand result = GetExpr(currImm, context, x, y, z); + if (result != null) { - And(context.BitwiseNot(srcC, notC)); + return result; } - if (expr != null) - { - expr = context.BitwiseOr(expr, localExpr); - } - else + Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z); + if (notResult != null) { - expr = localExpr; + return context.BitwiseNot(notResult); } - - visited |= mask; } - return expr; + return null; + } + + private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z) + { + return imm switch + { + TruthTable.False => Const(0), + TruthTable.True => Const(-1), + TruthTable.In => x, + TruthTable.And2 => context.BitwiseAnd(x, y), + TruthTable.Or2 => context.BitwiseOr(x, y), + TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y), + TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)), + TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)), + TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)), + TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)), + TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)), + TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)), + TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)), + TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)), + TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)), + TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)), + _ => null + }; } - private static int RotateLeft4(int value, int shift) + private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0) { - return ((value << shift) | (value >> (4 - shift))) & 0xf; + int result = 0; + + result |= (((int)imm >> 0) & 1) << bit0; + result |= (((int)imm >> 1) & 1) << bit1; + result |= (((int)imm >> 2) & 1) << bit2; + result |= (((int)imm >> 3) & 1) << bit3; + result |= (((int)imm >> 4) & 1) << bit4; + result |= (((int)imm >> 5) & 1) << bit5; + result |= (((int)imm >> 6) & 1) << bit6; + result |= (((int)imm >> 7) & 1) << bit7; + + return (TruthTable)result; } } } \ No newline at end of file From 3139a85a2b8e83aa6babfbc683bd46ca1d75e448 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Fri, 8 Apr 2022 06:26:48 -0300 Subject: [PATCH 7/8] Allow copy texture views to have mismatching multisample state (#3152) --- Ryujinx.Graphics.Gpu/Image/Texture.cs | 25 ++++++++++---- Ryujinx.Graphics.Gpu/Image/TextureCache.cs | 38 +++++++++++++++++++--- 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index e1f00606feef..cfb7a3b76a8d 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -1136,17 +1136,33 @@ public TextureMatchQuality IsExactMatch(TextureInfo info, TextureSearchFlags fla /// Texture view physical memory ranges /// Layer size on the given texture /// Host GPU capabilities + /// Indicates that multisample textures are allowed to match non-multisample requested textures /// Texture view initial layer on this texture /// Texture view first mipmap level on this texture /// The level of compatiblilty a view with the given parameters created from this texture has - public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange range, int layerSize, Capabilities caps, out int firstLayer, out int firstLevel) + public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange range, int layerSize, Capabilities caps, bool allowMs, out int firstLayer, out int firstLevel) { TextureViewCompatibility result = TextureViewCompatibility.Full; result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewFormatCompatible(Info, info, caps)); if (result != TextureViewCompatibility.Incompatible) { - result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewTargetCompatible(Info, info)); + bool msTargetCompatible = false; + + if (allowMs) + { + msTargetCompatible = Info.Target == Target.Texture2DMultisample && info.Target == Target.Texture2D; + } + + if (!msTargetCompatible) + { + result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewTargetCompatible(Info, info)); + + if (Info.SamplesInX != info.SamplesInX || Info.SamplesInY != info.SamplesInY) + { + result = TextureViewCompatibility.Incompatible; + } + } if (result == TextureViewCompatibility.Full && Info.FormatInfo.Format != info.FormatInfo.Format && !_context.Capabilities.SupportsMismatchingViewFormat) { @@ -1156,11 +1172,6 @@ public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange ra result = TextureViewCompatibility.CopyOnly; } - - if (Info.SamplesInX != info.SamplesInX || Info.SamplesInY != info.SamplesInY) - { - result = TextureViewCompatibility.Incompatible; - } } firstLayer = 0; diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs index 16248308123f..4fa80c95d82d 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs @@ -542,7 +542,14 @@ public Texture FindOrCreateTexture( for (int index = 0; index < overlapsCount; index++) { Texture overlap = _textureOverlaps[index]; - TextureViewCompatibility overlapCompatibility = overlap.IsViewCompatible(info, range.Value, sizeInfo.LayerSize, _context.Capabilities, out int firstLayer, out int firstLevel); + TextureViewCompatibility overlapCompatibility = overlap.IsViewCompatible( + info, + range.Value, + sizeInfo.LayerSize, + _context.Capabilities, + flags.HasFlag(TextureSearchFlags.ForCopy), + out int firstLayer, + out int firstLevel); if (overlapCompatibility == TextureViewCompatibility.Full) { @@ -650,7 +657,14 @@ public Texture FindOrCreateTexture( Texture overlap = _textureOverlaps[index]; bool overlapInCache = overlap.CacheNode != null; - TextureViewCompatibility compatibility = texture.IsViewCompatible(overlap.Info, overlap.Range, overlap.LayerSize, _context.Capabilities, out int firstLayer, out int firstLevel); + TextureViewCompatibility compatibility = texture.IsViewCompatible( + overlap.Info, + overlap.Range, + overlap.LayerSize, + _context.Capabilities, + false, + out int firstLayer, + out int firstLevel); if (overlap.IsView && compatibility == TextureViewCompatibility.Full) { @@ -1000,20 +1014,34 @@ private static TextureInfo AdjustSizes(Texture parent, TextureInfo info, int fir depthOrLayers = info.DepthOrLayers; } + // 2D and 2D multisample textures are not considered compatible. + // This specific case is required for copies, where the source texture might be multisample. + // In this case, we inherit the parent texture multisample state. + Target target = info.Target; + int samplesInX = info.SamplesInX; + int samplesInY = info.SamplesInY; + + if (target == Target.Texture2D && parent.Target == Target.Texture2DMultisample) + { + target = Target.Texture2DMultisample; + samplesInX = parent.Info.SamplesInX; + samplesInY = parent.Info.SamplesInY; + } + return new TextureInfo( info.GpuAddress, width, height, depthOrLayers, info.Levels, - info.SamplesInX, - info.SamplesInY, + samplesInX, + samplesInY, info.Stride, info.IsLinear, info.GobBlocksInY, info.GobBlocksInZ, info.GobBlocksInTileX, - info.Target, + target, info.FormatInfo, info.DepthStencilMode, info.SwizzleR, From e44a43c7e1ee0b25ef93a9419dbd6ac2eb7665b5 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Fri, 8 Apr 2022 07:42:39 -0300 Subject: [PATCH 8/8] Implement VMAD shader instruction and improve InvocationInfo and ISBERD handling (#3251) * Implement VMAD shader instruction and improve InvocationInfo and ISBERD handling * Shader cache version bump * Fix typo --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 2 +- .../CodeGen/Glsl/OperandManager.cs | 2 +- .../Decoders/InstDecoders.cs | 2 + Ryujinx.Graphics.Shader/InputTopology.cs | 28 +++-- .../Instructions/InstEmitAluHelper.cs | 29 +++++ .../Instructions/InstEmitAttribute.cs | 9 +- .../Instructions/InstEmitMove.cs | 26 ++--- .../Instructions/InstEmitVideoArithmetic.cs | 104 +++++++++++++++++- .../Instructions/InstEmitVideoMinMax.cs | 30 +---- 9 files changed, 171 insertions(+), 61 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 5e0e6bedb612..f38709001bda 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -40,7 +40,7 @@ class ShaderCache : IDisposable /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 3184; + private const ulong ShaderCodeGenVersion = 3251; // Progress reporting helpers private volatile int _shaderCount; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs index b1bd8188099b..2d6607ad00fe 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -250,9 +250,9 @@ public static string GetAttributeName(int value, ShaderConfig config, bool perPa : "gl_SubgroupInvocationID"; } - // TODO: There must be a better way to handle this... if (config.Stage == ShaderStage.Fragment) { + // TODO: There must be a better way to handle this... switch (value) { case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])"; diff --git a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs index b7a0caf10db2..98a43640709c 100644 --- a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs +++ b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs @@ -5144,6 +5144,7 @@ struct InstVadd public int SrcC => (int)((_opcode >> 39) & 0xFF); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); public bool WriteCC => (_opcode & 0x800000000000) != 0; public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3); public bool DFormat => (_opcode & 0x40000000000000) != 0; @@ -5164,6 +5165,7 @@ struct InstVmad public int SrcC => (int)((_opcode >> 39) & 0xFF); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); public bool WriteCC => (_opcode & 0x800000000000) != 0; public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); diff --git a/Ryujinx.Graphics.Shader/InputTopology.cs b/Ryujinx.Graphics.Shader/InputTopology.cs index 429aa2119419..da3329090e01 100644 --- a/Ryujinx.Graphics.Shader/InputTopology.cs +++ b/Ryujinx.Graphics.Shader/InputTopology.cs @@ -13,16 +13,28 @@ static class InputTopologyExtensions { public static string ToGlslString(this InputTopology topology) { - switch (topology) + return topology switch { - case InputTopology.Points: return "points"; - case InputTopology.Lines: return "lines"; - case InputTopology.LinesAdjacency: return "lines_adjacency"; - case InputTopology.Triangles: return "triangles"; - case InputTopology.TrianglesAdjacency: return "triangles_adjacency"; - } + InputTopology.Points => "points", + InputTopology.Lines => "lines", + InputTopology.LinesAdjacency => "lines_adjacency", + InputTopology.Triangles => "triangles", + InputTopology.TrianglesAdjacency => "triangles_adjacency", + _ => "points" + }; + } - return "points"; + public static int ToInputVertices(this InputTopology topology) + { + return topology switch + { + InputTopology.Points => 1, + InputTopology.Lines or + InputTopology.LinesAdjacency => 2, + InputTopology.Triangles or + InputTopology.TrianglesAdjacency => 3, + _ => 1 + }; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs index 3fbd0aeb2329..879075bae4e2 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs @@ -73,6 +73,26 @@ public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, O }; } + public static Operand Extend(EmitterContext context, Operand src, VectorSelect type) + { + return type switch + { + VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + _ => src + }; + } + public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false) { if (!setCC) @@ -118,6 +138,15 @@ public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC } } + public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high) + { + low = context.BitwiseNot(low); + high = context.BitwiseNot(high); + low = AddWithCarry(context, low, Const(1), out Operand carryOut); + high = context.IAdd(high, carryOut); + return (low, high); + } + public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut) { Operand result = context.IAdd(lhs, rhs); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs index e2131602a0de..1cdb384224f4 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -168,10 +168,11 @@ public static void Isberd(EmitterContext context) { InstIsberd op = context.GetOp(); - // This instruction performs a load from ISBE memory, - // however it seems to be only used to get some vertex - // input data, so we instead propagate the offset so that - // it can be used on the attribute load. + // This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory. + // Here, we just propagate the offset, as the result from this instruction is usually + // used with ALD to perform vertex load on geometry or tessellation shaders. + // The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex. + // Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex. context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA)); } diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs index 51b706011ca6..16b02f978939 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs @@ -94,31 +94,19 @@ public static void S2r(EmitterContext context) case SReg.InvocationInfo: if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment) { - Operand primitiveId = Attribute(AttributeConsts.PrimitiveId); - Operand patchVerticesIn; + // Note: Lowest 8-bits seems to contain some primitive index, + // but it seems to be NVIDIA implementation specific as it's only used + // to calculate ISBE offsets, so we can just keep it as zero. - if (context.Config.Stage == ShaderStage.TessellationEvaluation) + if (context.Config.Stage == ShaderStage.TessellationControl || + context.Config.Stage == ShaderStage.TessellationEvaluation) { - patchVerticesIn = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16)); + src = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16)); } else { - InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); - - int inputVertices = inputTopology switch - { - InputTopology.Points => 1, - InputTopology.Lines or - InputTopology.LinesAdjacency => 2, - InputTopology.Triangles or - InputTopology.TrianglesAdjacency => 3, - _ => 1 - }; - - patchVerticesIn = Const(inputVertices << 16); + src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16); } - - src = context.BitwiseOr(primitiveId, patchVerticesIn); } else { diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs index 43c0035a32a9..2d84c5bdb58f 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs @@ -1,7 +1,9 @@ using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.Translation; using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; namespace Ryujinx.Graphics.Shader.Instructions { @@ -11,8 +13,106 @@ public static void Vmad(EmitterContext context) { InstVmad op = context.GetOp(); - // TODO: Implement properly. - context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcC)); + bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0; + bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0; + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if (bSigned) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand productLow = context.IMultiply(srcA, srcB); + Operand productHigh; + + if (aSigned == bSigned) + { + productHigh = aSigned + ? context.MultiplyHighS32(srcA, srcB) + : context.MultiplyHighU32(srcA, srcB); + } + else + { + Operand temp = aSigned + ? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))) + : context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))); + + productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB)); + } + + if (op.AvgMode == AvgMode.NegA) + { + (productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh); + } + + Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry); + Operand resHigh = context.IAdd(productHigh, sumCarry); + + if (op.AvgMode == AvgMode.PlusOne) + { + resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry); + resHigh = context.IAdd(resHigh, poCarry); + } + + bool resSigned = op.ASelect == VectorSelect.S32 || + op.BSelect == VectorSelect.S32 || + op.AvgMode == AvgMode.NegB || + op.AvgMode == AvgMode.NegA; + + int shift = op.VideoScale switch + { + VideoScale.Shr7 => 7, + VideoScale.Shr15 => 15, + _ => 0 + }; + + if (shift != 0) + { + // Low = (Low >> Shift) | (High << (32 - Shift)) + // High >>= Shift + resLow = context.ShiftRightU32(resLow, Const(shift)); + resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift))); + resHigh = resSigned + ? context.ShiftRightS32(resHigh, Const(shift)) + : context.ShiftRightU32(resHigh, Const(shift)); + } + + Operand res = resLow; + + if (op.Sat) + { + Operand sign = context.ShiftRightS32(resHigh, Const(31)); + + if (resSigned) + { + Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31))); + Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue)); + res = context.ConditionalSelect(overflow, clampValue, resLow); + } + else + { + Operand overflow = context.ICompareNotEqual(resHigh, Const(0)); + res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow); + } + } + + context.Copy(GetDest(op.Dest), res); + + // TODO: CC. } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs index 120d6f2273f4..67b185ab5f95 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs @@ -13,14 +13,13 @@ public static void Vmnmx(EmitterContext context) { InstVmnmx op = context.GetOp(); - Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); Operand srcC = GetSrcReg(context, op.SrcC); - Operand srcB; if (op.BVideo) { - srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); } else { @@ -124,13 +123,12 @@ public static void Vsetp(EmitterContext context) { InstVsetp op = context.GetOp(); - Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); - + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); Operand srcB; if (op.BVideo) { - srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); } else { @@ -181,25 +179,5 @@ public static void Vsetp(EmitterContext context) context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); } - - private static Operand Extend(EmitterContext context, Operand src, VectorSelect type) - { - return type switch - { - VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), - VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), - VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), - VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), - VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), - VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), - VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), - VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), - VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), - VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), - VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), - VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), - _ => src - }; - } } } \ No newline at end of file