diff --git a/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs b/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs index 8d416cc328d2..186efb6a38d1 100644 --- a/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs +++ b/Ryujinx.Audio/Renderer/Dsp/Command/DelayCommand.cs @@ -18,8 +18,10 @@ using Ryujinx.Audio.Renderer.Dsp.State; using Ryujinx.Audio.Renderer.Parameter.Effect; using Ryujinx.Audio.Renderer.Server.Effect; +using Ryujinx.Audio.Renderer.Utils.Math; using System; using System.Diagnostics; +using System.Numerics; using System.Runtime.CompilerServices; namespace Ryujinx.Audio.Renderer.Dsp.Command @@ -70,7 +72,7 @@ public DelayCommand(uint bufferOffset, DelayParameter parameter, Memory outputBuffers, ReadOnlySpan inputBuffers, uint sampleCount) { const ushort channelCount = 2; - Span channelInput = stackalloc float[channelCount]; - Span delayLineValues = stackalloc float[channelCount]; - Span temp = stackalloc float[channelCount]; - float delayFeedbackBaseGain = state.DelayFeedbackBaseGain; float delayFeedbackCrossGain = state.DelayFeedbackCrossGain; float inGain = FixedPointHelper.ToFloat(Parameter.InGain, FixedPointPrecision); float dryGain = FixedPointHelper.ToFloat(Parameter.DryGain, FixedPointPrecision); float outGain = FixedPointHelper.ToFloat(Parameter.OutGain, FixedPointPrecision); + Matrix2x2 delayFeedback = new Matrix2x2(delayFeedbackBaseGain , delayFeedbackCrossGain, + delayFeedbackCrossGain, delayFeedbackBaseGain); + for (int i = 0; i < sampleCount; i++) { - for (int j = 0; j < channelCount; j++) + Vector2 channelInput = new Vector2 { - channelInput[j] = *((float*)inputBuffers[j] + i) * 64; - delayLineValues[j] = state.DelayLines[j].Read(); - } - - temp[0] = channelInput[0] * inGain + delayLineValues[1] * delayFeedbackCrossGain + delayLineValues[0] * delayFeedbackBaseGain; - temp[1] = channelInput[1] * inGain + delayLineValues[0] * delayFeedbackCrossGain + delayLineValues[1] * delayFeedbackBaseGain; + X = *((float*)inputBuffers[0] + i) * 64, + Y = *((float*)inputBuffers[1] + i) * 64, + }; - for (int j = 0; j < channelCount; j++) + Vector2 delayLineValues = new Vector2() { - float lowPassResult = state.LowPassFeedbackGain * state.LowPassZ[j] + temp[j] * state.LowPassBaseGain; + X = state.DelayLines[0].Read(), + Y = state.DelayLines[1].Read(), + }; - state.LowPassZ[j] = lowPassResult; - state.DelayLines[j].Update(lowPassResult); + Vector2 temp = MatrixHelper.Transform(ref channelInput, ref delayFeedback) + channelInput * inGain; - *((float*)outputBuffers[j] + i) = (channelInput[j] * dryGain + delayLineValues[j] * outGain) / 64; - } + state.UpdateLowPassFilter(ref Unsafe.As(ref temp), channelCount); + + *((float*)outputBuffers[0] + i) = (channelInput.X * dryGain + delayLineValues.X * outGain) / 64; + *((float*)outputBuffers[1] + i) = (channelInput.Y * dryGain + delayLineValues.Y * outGain) / 64; } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] private unsafe void ProcessDelayQuadraphonic(ref DelayState state, Span outputBuffers, ReadOnlySpan inputBuffers, uint sampleCount) { const ushort channelCount = 4; - Span channelInput = stackalloc float[channelCount]; - Span delayLineValues = stackalloc float[channelCount]; - Span temp = stackalloc float[channelCount]; - float delayFeedbackBaseGain = state.DelayFeedbackBaseGain; float delayFeedbackCrossGain = state.DelayFeedbackCrossGain; float inGain = FixedPointHelper.ToFloat(Parameter.InGain, FixedPointPrecision); float dryGain = FixedPointHelper.ToFloat(Parameter.DryGain, FixedPointPrecision); float outGain = FixedPointHelper.ToFloat(Parameter.OutGain, FixedPointPrecision); + Matrix4x4 delayFeedback = new Matrix4x4(delayFeedbackBaseGain , delayFeedbackCrossGain, delayFeedbackCrossGain, 0.0f, + delayFeedbackCrossGain, delayFeedbackBaseGain , 0.0f , delayFeedbackCrossGain, + delayFeedbackCrossGain, 0.0f , delayFeedbackBaseGain , delayFeedbackCrossGain, + 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, delayFeedbackBaseGain); + + for (int i = 0; i < sampleCount; i++) { - for (int j = 0; j < channelCount; j++) + Vector4 channelInput = new Vector4 { - channelInput[j] = *((float*)inputBuffers[j] + i) * 64; - delayLineValues[j] = state.DelayLines[j].Read(); - } - - temp[0] = channelInput[0] * inGain + (delayLineValues[2] + delayLineValues[1]) * delayFeedbackCrossGain + delayLineValues[0] * delayFeedbackBaseGain; - temp[1] = channelInput[1] * inGain + (delayLineValues[0] + delayLineValues[3]) * delayFeedbackCrossGain + delayLineValues[1] * delayFeedbackBaseGain; - temp[2] = channelInput[2] * inGain + (delayLineValues[3] + delayLineValues[0]) * delayFeedbackCrossGain + delayLineValues[2] * delayFeedbackBaseGain; - temp[3] = channelInput[3] * inGain + (delayLineValues[1] + delayLineValues[2]) * delayFeedbackCrossGain + delayLineValues[3] * delayFeedbackBaseGain; + X = *((float*)inputBuffers[0] + i) * 64, + Y = *((float*)inputBuffers[1] + i) * 64, + Z = *((float*)inputBuffers[2] + i) * 64, + W = *((float*)inputBuffers[3] + i) * 64 + }; - for (int j = 0; j < channelCount; j++) + Vector4 delayLineValues = new Vector4() { - float lowPassResult = state.LowPassFeedbackGain * state.LowPassZ[j] + temp[j] * state.LowPassBaseGain; - - state.LowPassZ[j] = lowPassResult; - state.DelayLines[j].Update(lowPassResult); - - *((float*)outputBuffers[j] + i) = (channelInput[j] * dryGain + delayLineValues[j] * outGain) / 64; - } + X = state.DelayLines[0].Read(), + Y = state.DelayLines[1].Read(), + Z = state.DelayLines[2].Read(), + W = state.DelayLines[3].Read() + }; + + Vector4 temp = MatrixHelper.Transform(ref channelInput, ref delayFeedback) + channelInput * inGain; + + state.UpdateLowPassFilter(ref Unsafe.As(ref temp), channelCount); + + *((float*)outputBuffers[0] + i) = (channelInput.X * dryGain + delayLineValues.X * outGain) / 64; + *((float*)outputBuffers[1] + i) = (channelInput.Y * dryGain + delayLineValues.Y * outGain) / 64; + *((float*)outputBuffers[2] + i) = (channelInput.Z * dryGain + delayLineValues.Z * outGain) / 64; + *((float*)outputBuffers[3] + i) = (channelInput.W * dryGain + delayLineValues.W * outGain) / 64; } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] private unsafe void ProcessDelaySurround(ref DelayState state, Span outputBuffers, ReadOnlySpan inputBuffers, uint sampleCount) { const ushort channelCount = 6; - Span channelInput = stackalloc float[channelCount]; - Span delayLineValues = stackalloc float[channelCount]; - Span temp = stackalloc float[channelCount]; - + float feedbackGain = FixedPointHelper.ToFloat(Parameter.FeedbackGain, FixedPointPrecision); float delayFeedbackBaseGain = state.DelayFeedbackBaseGain; float delayFeedbackCrossGain = state.DelayFeedbackCrossGain; float inGain = FixedPointHelper.ToFloat(Parameter.InGain, FixedPointPrecision); float dryGain = FixedPointHelper.ToFloat(Parameter.DryGain, FixedPointPrecision); float outGain = FixedPointHelper.ToFloat(Parameter.OutGain, FixedPointPrecision); + Matrix6x6 delayFeedback = new Matrix6x6(delayFeedbackBaseGain , 0.0f , 0.0f , 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, + 0.0f , delayFeedbackBaseGain , 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, 0.0f , + delayFeedbackCrossGain, 0.0f , delayFeedbackBaseGain , delayFeedbackCrossGain, 0.0f , 0.0f , + 0.0f , delayFeedbackCrossGain, delayFeedbackCrossGain, delayFeedbackBaseGain , 0.0f , 0.0f , + delayFeedbackCrossGain, delayFeedbackCrossGain, 0.0f , 0.0f , delayFeedbackBaseGain , 0.0f , + 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , feedbackGain); + for (int i = 0; i < sampleCount; i++) { - for (int j = 0; j < channelCount; j++) + Vector6 channelInput = new Vector6 { - channelInput[j] = *((float*)inputBuffers[j] + i) * 64; - delayLineValues[j] = state.DelayLines[j].Read(); - } - - temp[0] = channelInput[0] * inGain + (delayLineValues[2] + delayLineValues[4]) * delayFeedbackCrossGain + delayLineValues[0] * delayFeedbackBaseGain; - temp[1] = channelInput[1] * inGain + (delayLineValues[4] + delayLineValues[3]) * delayFeedbackCrossGain + delayLineValues[1] * delayFeedbackBaseGain; - temp[2] = channelInput[2] * inGain + (delayLineValues[3] + delayLineValues[0]) * delayFeedbackCrossGain + delayLineValues[2] * delayFeedbackBaseGain; - temp[3] = channelInput[3] * inGain + (delayLineValues[1] + delayLineValues[2]) * delayFeedbackCrossGain + delayLineValues[3] * delayFeedbackBaseGain; - temp[4] = channelInput[4] * inGain + (delayLineValues[0] + delayLineValues[1]) * delayFeedbackCrossGain + delayLineValues[4] * delayFeedbackBaseGain; - temp[5] = channelInput[5] * inGain + delayLineValues[5] * delayFeedbackBaseGain; - - for (int j = 0; j < channelCount; j++) + X = *((float*)inputBuffers[0] + i) * 64, + Y = *((float*)inputBuffers[1] + i) * 64, + Z = *((float*)inputBuffers[2] + i) * 64, + W = *((float*)inputBuffers[3] + i) * 64, + V = *((float*)inputBuffers[4] + i) * 64, + U = *((float*)inputBuffers[5] + i) * 64 + }; + + Vector6 delayLineValues = new Vector6 { - float lowPassResult = state.LowPassFeedbackGain * state.LowPassZ[j] + temp[j] * state.LowPassBaseGain; - - state.LowPassZ[j] = lowPassResult; - state.DelayLines[j].Update(lowPassResult); - - *((float*)outputBuffers[j] + i) = (channelInput[j] * dryGain + delayLineValues[j] * outGain) / 64; - } + X = state.DelayLines[0].Read(), + Y = state.DelayLines[1].Read(), + Z = state.DelayLines[2].Read(), + W = state.DelayLines[3].Read(), + V = state.DelayLines[4].Read(), + U = state.DelayLines[5].Read() + }; + + Vector6 temp = MatrixHelper.Transform(ref channelInput, ref delayFeedback) + channelInput * inGain; + + state.UpdateLowPassFilter(ref Unsafe.As(ref temp), channelCount); + + *((float*)outputBuffers[0] + i) = (channelInput.X * dryGain + delayLineValues.X * outGain) / 64; + *((float*)outputBuffers[1] + i) = (channelInput.Y * dryGain + delayLineValues.Y * outGain) / 64; + *((float*)outputBuffers[2] + i) = (channelInput.Z * dryGain + delayLineValues.Z * outGain) / 64; + *((float*)outputBuffers[3] + i) = (channelInput.W * dryGain + delayLineValues.W * outGain) / 64; + *((float*)outputBuffers[4] + i) = (channelInput.V * dryGain + delayLineValues.V * outGain) / 64; + *((float*)outputBuffers[5] + i) = (channelInput.U * dryGain + delayLineValues.U * outGain) / 64; } } diff --git a/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs b/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs index 7b694fb0ae77..21ffbebd638a 100644 --- a/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs +++ b/Ryujinx.Audio/Renderer/Dsp/State/DelayState.cs @@ -17,6 +17,7 @@ using Ryujinx.Audio.Renderer.Dsp.Effect; using Ryujinx.Audio.Renderer.Parameter.Effect; +using System.Runtime.CompilerServices; namespace Ryujinx.Audio.Renderer.Dsp.State { @@ -43,7 +44,6 @@ public DelayState(ref DelayParameter parameter, ulong workBuffer) { DelayLines[i] = new DelayLine(sampleRate, parameter.DelayTimeMax); DelayLines[i].SetDelay(parameter.DelayTime); - LowPassZ[0] = 0; } UpdateParameter(ref parameter); @@ -69,5 +69,16 @@ public void UpdateParameter(ref DelayParameter parameter) LowPassFeedbackGain = 0.95f * FixedPointHelper.ToFloat(parameter.LowPassAmount, FixedPointPrecision); LowPassBaseGain = 1.0f - LowPassFeedbackGain; } + + public void UpdateLowPassFilter(ref float tempRawRef, uint channelCount) + { + for (int i = 0; i < channelCount; i++) + { + float lowPassResult = LowPassFeedbackGain * LowPassZ[i] + Unsafe.Add(ref tempRawRef, i) * LowPassBaseGain; + + LowPassZ[i] = lowPassResult; + DelayLines[i].Update(lowPassResult); + } + } } } diff --git a/Ryujinx.Audio/Renderer/Utils/Math/Matrix2x2.cs b/Ryujinx.Audio/Renderer/Utils/Math/Matrix2x2.cs new file mode 100644 index 000000000000..f25b15379e9d --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/Matrix2x2.cs @@ -0,0 +1,71 @@ +namespace Ryujinx.Audio.Renderer.Utils.Math +{ + record struct Matrix2x2 + { + public float M11; + public float M12; + public float M21; + public float M22; + + public Matrix2x2(float m11, float m12, + float m21, float m22) + { + M11 = m11; + M12 = m12; + + M21 = m21; + M22 = m22; + } + + public static Matrix2x2 operator +(Matrix2x2 value1, Matrix2x2 value2) + { + Matrix2x2 m; + + m.M11 = value1.M11 + value2.M11; + m.M12 = value1.M12 + value2.M12; + m.M21 = value1.M21 + value2.M21; + m.M22 = value1.M22 + value2.M22; + + return m; + } + + public static Matrix2x2 operator -(Matrix2x2 value1, float value2) + { + Matrix2x2 m; + + m.M11 = value1.M11 - value2; + m.M12 = value1.M12 - value2; + m.M21 = value1.M21 - value2; + m.M22 = value1.M22 - value2; + + return m; + } + + public static Matrix2x2 operator *(Matrix2x2 value1, float value2) + { + Matrix2x2 m; + + m.M11 = value1.M11 * value2; + m.M12 = value1.M12 * value2; + m.M21 = value1.M21 * value2; + m.M22 = value1.M22 * value2; + + return m; + } + + public static Matrix2x2 operator *(Matrix2x2 value1, Matrix2x2 value2) + { + Matrix2x2 m; + + // First row + m.M11 = value1.M11 * value2.M11 + value1.M12 * value2.M21; + m.M12 = value1.M11 * value2.M12 + value1.M12 * value2.M22; + + // Second row + m.M21 = value1.M21 * value2.M11 + value1.M22 * value2.M21; + m.M22 = value1.M21 * value2.M12 + value1.M22 * value2.M22; + + return m; + } + } +} diff --git a/Ryujinx.Audio/Renderer/Utils/Math/Matrix6x6.cs b/Ryujinx.Audio/Renderer/Utils/Math/Matrix6x6.cs new file mode 100644 index 000000000000..41e2fc843ada --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/Matrix6x6.cs @@ -0,0 +1,97 @@ +namespace Ryujinx.Audio.Renderer.Utils.Math +{ + record struct Matrix6x6 + { + public float M11; + public float M12; + public float M13; + public float M14; + public float M15; + public float M16; + + public float M21; + public float M22; + public float M23; + public float M24; + public float M25; + public float M26; + + public float M31; + public float M32; + public float M33; + public float M34; + public float M35; + public float M36; + + public float M41; + public float M42; + public float M43; + public float M44; + public float M45; + public float M46; + + public float M51; + public float M52; + public float M53; + public float M54; + public float M55; + public float M56; + + public float M61; + public float M62; + public float M63; + public float M64; + public float M65; + public float M66; + + public Matrix6x6(float m11, float m12, float m13, float m14, float m15, float m16, + float m21, float m22, float m23, float m24, float m25, float m26, + float m31, float m32, float m33, float m34, float m35, float m36, + float m41, float m42, float m43, float m44, float m45, float m46, + float m51, float m52, float m53, float m54, float m55, float m56, + float m61, float m62, float m63, float m64, float m65, float m66) + { + M11 = m11; + M12 = m12; + M13 = m13; + M14 = m14; + M15 = m15; + M16 = m16; + + M21 = m21; + M22 = m22; + M23 = m23; + M24 = m24; + M25 = m25; + M26 = m26; + + M31 = m31; + M32 = m32; + M33 = m33; + M34 = m34; + M35 = m35; + M36 = m36; + + M41 = m41; + M42 = m42; + M43 = m43; + M44 = m44; + M45 = m45; + M46 = m46; + + M51 = m51; + M52 = m52; + M53 = m53; + M54 = m54; + M55 = m55; + M56 = m56; + + M61 = m61; + M62 = m62; + M63 = m63; + M64 = m64; + M65 = m65; + M66 = m66; + } + } +} diff --git a/Ryujinx.Audio/Renderer/Utils/Math/MatrixHelper.cs b/Ryujinx.Audio/Renderer/Utils/Math/MatrixHelper.cs new file mode 100644 index 000000000000..3fbffa94e6e3 --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/MatrixHelper.cs @@ -0,0 +1,45 @@ +using Ryujinx.Audio.Renderer.Utils.Math; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Audio.Renderer.Dsp +{ + static class MatrixHelper + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 Transform(ref Vector6 value1, ref Matrix6x6 value2) + { + return new Vector6 + { + X = value2.M11 * value1.X + value2.M12 * value1.Y + value2.M13 * value1.Z + value2.M14 * value1.W + value2.M15 * value1.V + value2.M16 * value1.U, + Y = value2.M21 * value1.X + value2.M22 * value1.Y + value2.M23 * value1.Z + value2.M24 * value1.W + value2.M25 * value1.V + value2.M26 * value1.U, + Z = value2.M31 * value1.X + value2.M32 * value1.Y + value2.M33 * value1.Z + value2.M34 * value1.W + value2.M35 * value1.V + value2.M36 * value1.U, + W = value2.M41 * value1.X + value2.M42 * value1.Y + value2.M43 * value1.Z + value2.M44 * value1.W + value2.M45 * value1.V + value2.M46 * value1.U, + V = value2.M51 * value1.X + value2.M52 * value1.Y + value2.M53 * value1.Z + value2.M54 * value1.W + value2.M55 * value1.V + value2.M56 * value1.U, + U = value2.M61 * value1.X + value2.M62 * value1.Y + value2.M63 * value1.Z + value2.M64 * value1.W + value2.M65 * value1.V + value2.M66 * value1.U, + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 Transform(ref Vector4 value1, ref Matrix4x4 value2) + { + return new Vector4 + { + X = value2.M11 * value1.X + value2.M12 * value1.Y + value2.M13 * value1.Z + value2.M14 * value1.W, + Y = value2.M21 * value1.X + value2.M22 * value1.Y + value2.M23 * value1.Z + value2.M24 * value1.W, + Z = value2.M31 * value1.X + value2.M32 * value1.Y + value2.M33 * value1.Z + value2.M34 * value1.W, + W = value2.M41 * value1.X + value2.M42 * value1.Y + value2.M43 * value1.Z + value2.M44 * value1.W + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector2 Transform(ref Vector2 value1, ref Matrix2x2 value2) + { + return new Vector2 + { + X = value2.M11 * value1.X + value2.M12 * value1.Y, + Y = value2.M21 * value1.X + value2.M22 * value1.Y, + }; + } + } +} diff --git a/Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs b/Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs new file mode 100644 index 000000000000..b2cd481b6d1c --- /dev/null +++ b/Ryujinx.Audio/Renderer/Utils/Math/Vector6.cs @@ -0,0 +1,56 @@ +using System.Runtime.CompilerServices; + +namespace Ryujinx.Audio.Renderer.Utils.Math +{ + record struct Vector6 + { + public float X; + public float Y; + public float Z; + public float W; + public float V; + public float U; + + public Vector6(float value) : this(value, value, value, value, value, value) + { + } + + public Vector6(float x, float y, float z, float w, float v, float u) + { + X = x; + Y = y; + Z = z; + W = w; + V = v; + U = u; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 operator +(Vector6 left, Vector6 right) + { + return new Vector6(left.X + right.X, + left.Y + right.Y, + left.Z + right.Z, + left.W + right.W, + left.V + right.V, + left.U + right.U); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 operator *(Vector6 left, Vector6 right) + { + return new Vector6(left.X * right.X, + left.Y * right.Y, + left.Z * right.Z, + left.W * right.W, + left.V * right.V, + left.U * right.U); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector6 operator *(Vector6 left, float right) + { + return left * new Vector6(right); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index f44089598087..8d67d0fd737a 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -35,6 +35,8 @@ class StateUpdater private byte _vsClipDistancesWritten; private bool _prevDrawIndexed; + private IndexType _prevIndexType; + private uint _prevFirstVertex; private bool _prevTfEnable; /// @@ -214,6 +216,17 @@ public void Update() _prevDrawIndexed = _drawState.DrawIndexed; } + // In some cases, the index type is also used to guess the + // vertex buffer size, so we must update it if the type changed too. + if (_drawState.DrawIndexed && + (_prevIndexType != _state.State.IndexBufferState.Type || + _prevFirstVertex != _state.State.FirstVertex)) + { + _updateTracker.ForceDirty(VertexBufferStateIndex); + _prevIndexType = _state.State.IndexBufferState.Type; + _prevFirstVertex = _state.State.FirstVertex; + } + bool tfEnable = _state.State.TfEnable; if (!tfEnable && _prevTfEnable) @@ -867,6 +880,9 @@ private void UpdateIndexBufferState() /// private void UpdateVertexBufferState() { + IndexType indexType = _state.State.IndexBufferState.Type; + bool indexTypeSmall = indexType == IndexType.UByte || indexType == IndexType.UShort; + _drawState.IsAnyVbInstanced = false; for (int index = 0; index < Constants.TotalVertexBuffers; index++) @@ -898,12 +914,27 @@ private void UpdateVertexBufferState() { // This size may be (much) larger than the real vertex buffer size. // Avoid calculating it this way, unless we don't have any other option. + size = endAddress.Pack() - address + 1; + + if (stride > 0 && indexTypeSmall) + { + // If the index type is a small integer type, then we might be still able + // to reduce the vertex buffer size based on the maximum possible index value. + + ulong maxVertexBufferSize = indexType == IndexType.UByte ? 0x100UL : 0x10000UL; + + maxVertexBufferSize += _state.State.FirstVertex; + maxVertexBufferSize *= (uint)stride; + + size = Math.Min(size, maxVertexBufferSize); + } } else { // For non-indexed draws, we can guess the size from the vertex count // and stride. + int firstInstance = (int)_state.State.FirstInstance; var drawState = _state.State.VertexBufferDrawState; diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index e1f00606feef..cfb7a3b76a8d 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -1136,17 +1136,33 @@ public TextureMatchQuality IsExactMatch(TextureInfo info, TextureSearchFlags fla /// Texture view physical memory ranges /// Layer size on the given texture /// Host GPU capabilities + /// Indicates that multisample textures are allowed to match non-multisample requested textures /// Texture view initial layer on this texture /// Texture view first mipmap level on this texture /// The level of compatiblilty a view with the given parameters created from this texture has - public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange range, int layerSize, Capabilities caps, out int firstLayer, out int firstLevel) + public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange range, int layerSize, Capabilities caps, bool allowMs, out int firstLayer, out int firstLevel) { TextureViewCompatibility result = TextureViewCompatibility.Full; result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewFormatCompatible(Info, info, caps)); if (result != TextureViewCompatibility.Incompatible) { - result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewTargetCompatible(Info, info)); + bool msTargetCompatible = false; + + if (allowMs) + { + msTargetCompatible = Info.Target == Target.Texture2DMultisample && info.Target == Target.Texture2D; + } + + if (!msTargetCompatible) + { + result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewTargetCompatible(Info, info)); + + if (Info.SamplesInX != info.SamplesInX || Info.SamplesInY != info.SamplesInY) + { + result = TextureViewCompatibility.Incompatible; + } + } if (result == TextureViewCompatibility.Full && Info.FormatInfo.Format != info.FormatInfo.Format && !_context.Capabilities.SupportsMismatchingViewFormat) { @@ -1156,11 +1172,6 @@ public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange ra result = TextureViewCompatibility.CopyOnly; } - - if (Info.SamplesInX != info.SamplesInX || Info.SamplesInY != info.SamplesInY) - { - result = TextureViewCompatibility.Incompatible; - } } firstLayer = 0; diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs index 16248308123f..4fa80c95d82d 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs @@ -542,7 +542,14 @@ public Texture FindOrCreateTexture( for (int index = 0; index < overlapsCount; index++) { Texture overlap = _textureOverlaps[index]; - TextureViewCompatibility overlapCompatibility = overlap.IsViewCompatible(info, range.Value, sizeInfo.LayerSize, _context.Capabilities, out int firstLayer, out int firstLevel); + TextureViewCompatibility overlapCompatibility = overlap.IsViewCompatible( + info, + range.Value, + sizeInfo.LayerSize, + _context.Capabilities, + flags.HasFlag(TextureSearchFlags.ForCopy), + out int firstLayer, + out int firstLevel); if (overlapCompatibility == TextureViewCompatibility.Full) { @@ -650,7 +657,14 @@ public Texture FindOrCreateTexture( Texture overlap = _textureOverlaps[index]; bool overlapInCache = overlap.CacheNode != null; - TextureViewCompatibility compatibility = texture.IsViewCompatible(overlap.Info, overlap.Range, overlap.LayerSize, _context.Capabilities, out int firstLayer, out int firstLevel); + TextureViewCompatibility compatibility = texture.IsViewCompatible( + overlap.Info, + overlap.Range, + overlap.LayerSize, + _context.Capabilities, + false, + out int firstLayer, + out int firstLevel); if (overlap.IsView && compatibility == TextureViewCompatibility.Full) { @@ -1000,20 +1014,34 @@ private static TextureInfo AdjustSizes(Texture parent, TextureInfo info, int fir depthOrLayers = info.DepthOrLayers; } + // 2D and 2D multisample textures are not considered compatible. + // This specific case is required for copies, where the source texture might be multisample. + // In this case, we inherit the parent texture multisample state. + Target target = info.Target; + int samplesInX = info.SamplesInX; + int samplesInY = info.SamplesInY; + + if (target == Target.Texture2D && parent.Target == Target.Texture2DMultisample) + { + target = Target.Texture2DMultisample; + samplesInX = parent.Info.SamplesInX; + samplesInY = parent.Info.SamplesInY; + } + return new TextureInfo( info.GpuAddress, width, height, depthOrLayers, info.Levels, - info.SamplesInX, - info.SamplesInY, + samplesInX, + samplesInY, info.Stride, info.IsLinear, info.GobBlocksInY, info.GobBlocksInZ, info.GobBlocksInTileX, - info.Target, + target, info.FormatInfo, info.DepthStencilMode, info.SwizzleR, diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index e6d46884a701..f38709001bda 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -40,7 +40,7 @@ class ShaderCache : IDisposable /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 3054; + private const ulong ShaderCodeGenVersion = 3251; // Progress reporting helpers private volatile int _shaderCount; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs index b1bd8188099b..2d6607ad00fe 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -250,9 +250,9 @@ public static string GetAttributeName(int value, ShaderConfig config, bool perPa : "gl_SubgroupInvocationID"; } - // TODO: There must be a better way to handle this... if (config.Stage == ShaderStage.Fragment) { + // TODO: There must be a better way to handle this... switch (value) { case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])"; diff --git a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs index b7a0caf10db2..98a43640709c 100644 --- a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs +++ b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs @@ -5144,6 +5144,7 @@ struct InstVadd public int SrcC => (int)((_opcode >> 39) & 0xFF); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); public bool WriteCC => (_opcode & 0x800000000000) != 0; public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3); public bool DFormat => (_opcode & 0x40000000000000) != 0; @@ -5164,6 +5165,7 @@ struct InstVmad public int SrcC => (int)((_opcode >> 39) & 0xFF); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); public bool WriteCC => (_opcode & 0x800000000000) != 0; public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); diff --git a/Ryujinx.Graphics.Shader/InputTopology.cs b/Ryujinx.Graphics.Shader/InputTopology.cs index 429aa2119419..da3329090e01 100644 --- a/Ryujinx.Graphics.Shader/InputTopology.cs +++ b/Ryujinx.Graphics.Shader/InputTopology.cs @@ -13,16 +13,28 @@ static class InputTopologyExtensions { public static string ToGlslString(this InputTopology topology) { - switch (topology) + return topology switch { - case InputTopology.Points: return "points"; - case InputTopology.Lines: return "lines"; - case InputTopology.LinesAdjacency: return "lines_adjacency"; - case InputTopology.Triangles: return "triangles"; - case InputTopology.TrianglesAdjacency: return "triangles_adjacency"; - } + InputTopology.Points => "points", + InputTopology.Lines => "lines", + InputTopology.LinesAdjacency => "lines_adjacency", + InputTopology.Triangles => "triangles", + InputTopology.TrianglesAdjacency => "triangles_adjacency", + _ => "points" + }; + } - return "points"; + public static int ToInputVertices(this InputTopology topology) + { + return topology switch + { + InputTopology.Points => 1, + InputTopology.Lines or + InputTopology.LinesAdjacency => 2, + InputTopology.Triangles or + InputTopology.TrianglesAdjacency => 3, + _ => 1 + }; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs index 3fbd0aeb2329..879075bae4e2 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs @@ -73,6 +73,26 @@ public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, O }; } + public static Operand Extend(EmitterContext context, Operand src, VectorSelect type) + { + return type switch + { + VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + _ => src + }; + } + public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false) { if (!setCC) @@ -118,6 +138,15 @@ public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC } } + public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high) + { + low = context.BitwiseNot(low); + high = context.BitwiseNot(high); + low = AddWithCarry(context, low, Const(1), out Operand carryOut); + high = context.IAdd(high, carryOut); + return (low, high); + } + public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut) { Operand result = context.IAdd(lhs, rhs); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs index e2131602a0de..1cdb384224f4 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -168,10 +168,11 @@ public static void Isberd(EmitterContext context) { InstIsberd op = context.GetOp(); - // This instruction performs a load from ISBE memory, - // however it seems to be only used to get some vertex - // input data, so we instead propagate the offset so that - // it can be used on the attribute load. + // This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory. + // Here, we just propagate the offset, as the result from this instruction is usually + // used with ALD to perform vertex load on geometry or tessellation shaders. + // The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex. + // Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex. context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA)); } diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs index 51b706011ca6..16b02f978939 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs @@ -94,31 +94,19 @@ public static void S2r(EmitterContext context) case SReg.InvocationInfo: if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment) { - Operand primitiveId = Attribute(AttributeConsts.PrimitiveId); - Operand patchVerticesIn; + // Note: Lowest 8-bits seems to contain some primitive index, + // but it seems to be NVIDIA implementation specific as it's only used + // to calculate ISBE offsets, so we can just keep it as zero. - if (context.Config.Stage == ShaderStage.TessellationEvaluation) + if (context.Config.Stage == ShaderStage.TessellationControl || + context.Config.Stage == ShaderStage.TessellationEvaluation) { - patchVerticesIn = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16)); + src = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16)); } else { - InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); - - int inputVertices = inputTopology switch - { - InputTopology.Points => 1, - InputTopology.Lines or - InputTopology.LinesAdjacency => 2, - InputTopology.Triangles or - InputTopology.TrianglesAdjacency => 3, - _ => 1 - }; - - patchVerticesIn = Const(inputVertices << 16); + src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16); } - - src = context.BitwiseOr(primitiveId, patchVerticesIn); } else { diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs index 43c0035a32a9..2d84c5bdb58f 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs @@ -1,7 +1,9 @@ using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.Translation; using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; namespace Ryujinx.Graphics.Shader.Instructions { @@ -11,8 +13,106 @@ public static void Vmad(EmitterContext context) { InstVmad op = context.GetOp(); - // TODO: Implement properly. - context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcC)); + bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0; + bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0; + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if (bSigned) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand productLow = context.IMultiply(srcA, srcB); + Operand productHigh; + + if (aSigned == bSigned) + { + productHigh = aSigned + ? context.MultiplyHighS32(srcA, srcB) + : context.MultiplyHighU32(srcA, srcB); + } + else + { + Operand temp = aSigned + ? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))) + : context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))); + + productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB)); + } + + if (op.AvgMode == AvgMode.NegA) + { + (productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh); + } + + Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry); + Operand resHigh = context.IAdd(productHigh, sumCarry); + + if (op.AvgMode == AvgMode.PlusOne) + { + resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry); + resHigh = context.IAdd(resHigh, poCarry); + } + + bool resSigned = op.ASelect == VectorSelect.S32 || + op.BSelect == VectorSelect.S32 || + op.AvgMode == AvgMode.NegB || + op.AvgMode == AvgMode.NegA; + + int shift = op.VideoScale switch + { + VideoScale.Shr7 => 7, + VideoScale.Shr15 => 15, + _ => 0 + }; + + if (shift != 0) + { + // Low = (Low >> Shift) | (High << (32 - Shift)) + // High >>= Shift + resLow = context.ShiftRightU32(resLow, Const(shift)); + resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift))); + resHigh = resSigned + ? context.ShiftRightS32(resHigh, Const(shift)) + : context.ShiftRightU32(resHigh, Const(shift)); + } + + Operand res = resLow; + + if (op.Sat) + { + Operand sign = context.ShiftRightS32(resHigh, Const(31)); + + if (resSigned) + { + Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31))); + Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue)); + res = context.ConditionalSelect(overflow, clampValue, resLow); + } + else + { + Operand overflow = context.ICompareNotEqual(resHigh, Const(0)); + res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow); + } + } + + context.Copy(GetDest(op.Dest), res); + + // TODO: CC. } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs index 120d6f2273f4..67b185ab5f95 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs @@ -13,14 +13,13 @@ public static void Vmnmx(EmitterContext context) { InstVmnmx op = context.GetOp(); - Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); Operand srcC = GetSrcReg(context, op.SrcC); - Operand srcB; if (op.BVideo) { - srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); } else { @@ -124,13 +123,12 @@ public static void Vsetp(EmitterContext context) { InstVsetp op = context.GetOp(); - Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); - + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); Operand srcB; if (op.BVideo) { - srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); } else { @@ -181,25 +179,5 @@ public static void Vsetp(EmitterContext context) context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); } - - private static Operand Extend(EmitterContext context, Operand src, VectorSelect type) - { - return type switch - { - VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), - VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), - VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), - VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), - VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), - VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), - VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), - VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), - VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), - VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), - VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), - VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), - _ => src - }; - } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs b/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs index 76a520075f2f..6217ce530a15 100644 --- a/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs +++ b/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs @@ -7,138 +7,135 @@ namespace Ryujinx.Graphics.Shader.Instructions { static class Lop3Expression { - public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm) + private enum TruthTable : byte { - Operand expr = null; - - // Handle some simple cases, or cases where - // the KMap would yield poor results (like XORs). - if (imm == 0x96 || imm == 0x69) - { - // XOR (0x96) and XNOR (0x69). - if (imm == 0x69) - { - srcA = context.BitwiseNot(srcA); - } - - expr = context.BitwiseExclusiveOr(srcA, srcB); - expr = context.BitwiseExclusiveOr(expr, srcC); - - return expr; - } - else if (imm == 0) - { - // Always false. - return Const(IrConsts.False); - } - else if (imm == 0xff) - { - // Always true. - return Const(IrConsts.True); - } - - int map; - - // Encode into gray code. - map = ((imm >> 0) & 1) << 0; - map |= ((imm >> 1) & 1) << 4; - map |= ((imm >> 2) & 1) << 1; - map |= ((imm >> 3) & 1) << 5; - map |= ((imm >> 4) & 1) << 3; - map |= ((imm >> 5) & 1) << 7; - map |= ((imm >> 6) & 1) << 2; - map |= ((imm >> 7) & 1) << 6; - - // Solve KMap, get sum of products. - int visited = 0; + False = 0x00, // false + True = 0xff, // true + In = 0xf0, // a + And2 = 0xc0, // a & b + Or2 = 0xfc, // a | b + Xor2 = 0x3c, // a ^ b + And3 = 0x80, // a & b & c + Or3 = 0xfe, // a | b | c + XorAnd = 0x60, // a & (b ^ c) + XorOr = 0xf6, // a | (b ^ c) + OrAnd = 0xe0, // a & (b | c) + AndOr = 0xf8, // a | (b & c) + Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true. + Majority = 0xe8, // Popcount(a, b, c) >= 2 + Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off + InverseGamble = 0x7e, // Inverse of Gamble + Dot = 0x1a, // a ^ (c | (a & b)) + Mux = 0xca, // a ? b : c + AndXor = 0x78, // a ^ (b & c) + OrXor = 0x1e, // a ^ (b | c) + Xor3 = 0x96, // a ^ b ^ c + } - for (int index = 0; index < 8 && visited != 0xff; index++) + public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm) + { + for (int i = 0; i < 0x40; i++) { - if ((map & (1 << index)) == 0) - { - continue; - } - - int mask = 0; + TruthTable currImm = (TruthTable)imm; - for (int mSize = 4; mSize != 0; mSize >>= 1) + Operand x = srcA; + Operand y = srcB; + Operand z = srcC; + + if ((i & 0x01) != 0) { - mask = RotateLeft4((1 << mSize) - 1, index & 3) << (index & 4); - - if ((map & mask) == mask) - { - break; - } + (x, y) = (y, x); + currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0); } - // The mask should wrap, if we are on the high row, shift to low etc. - int mask2 = (index & 4) != 0 ? mask >> 4 : mask << 4; - - if ((map & mask2) == mask2) + if ((i & 0x02) != 0) { - mask |= mask2; + (x, z) = (z, x); + currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0); } - if ((mask & visited) == mask) + if ((i & 0x04) != 0) { - continue; + (y, z) = (z, y); + currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0); } - bool notA = (mask & 0x33) != 0; - bool notB = (mask & 0x99) != 0; - bool notC = (mask & 0x0f) != 0; - - bool aChanges = (mask & 0xcc) != 0 && notA; - bool bChanges = (mask & 0x66) != 0 && notB; - bool cChanges = (mask & 0xf0) != 0 && notC; - - Operand localExpr = null; - - void And(Operand source) + if ((i & 0x08) != 0) { - if (localExpr != null) - { - localExpr = context.BitwiseAnd(localExpr, source); - } - else - { - localExpr = source; - } + x = context.BitwiseNot(x); + currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4); } - if (!aChanges) + if ((i & 0x10) != 0) { - And(context.BitwiseNot(srcA, notA)); + y = context.BitwiseNot(y); + currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2); } - if (!bChanges) + if ((i & 0x20) != 0) { - And(context.BitwiseNot(srcB, notB)); + z = context.BitwiseNot(z); + currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1); } - if (!cChanges) + Operand result = GetExpr(currImm, context, x, y, z); + if (result != null) { - And(context.BitwiseNot(srcC, notC)); + return result; } - if (expr != null) - { - expr = context.BitwiseOr(expr, localExpr); - } - else + Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z); + if (notResult != null) { - expr = localExpr; + return context.BitwiseNot(notResult); } - - visited |= mask; } - return expr; + return null; + } + + private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z) + { + return imm switch + { + TruthTable.False => Const(0), + TruthTable.True => Const(-1), + TruthTable.In => x, + TruthTable.And2 => context.BitwiseAnd(x, y), + TruthTable.Or2 => context.BitwiseOr(x, y), + TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y), + TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)), + TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)), + TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)), + TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)), + TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)), + TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)), + TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)), + TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)), + TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)), + TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)), + _ => null + }; } - private static int RotateLeft4(int value, int shift) + private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0) { - return ((value << shift) | (value >> (4 - shift))) & 0xf; + int result = 0; + + result |= (((int)imm >> 0) & 1) << bit0; + result |= (((int)imm >> 1) & 1) << bit1; + result |= (((int)imm >> 2) & 1) << bit2; + result |= (((int)imm >> 3) & 1) << bit3; + result |= (((int)imm >> 4) & 1) << bit4; + result |= (((int)imm >> 5) & 1) << bit5; + result |= (((int)imm >> 6) & 1) << bit6; + result |= (((int)imm >> 7) & 1) << bit7; + + return (TruthTable)result; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/ModLoader.cs b/Ryujinx.HLE/HOS/ModLoader.cs index 4f37903366a0..c24f0f745a2c 100644 --- a/Ryujinx.HLE/HOS/ModLoader.cs +++ b/Ryujinx.HLE/HOS/ModLoader.cs @@ -161,7 +161,7 @@ public string GetTitleDir(string modsBasePath, string titleId) if (titleModsPath == null) { - Logger.Info?.Print(LogClass.ModLoader, $"Creating mods dir for Title {titleId.ToUpper()}"); + Logger.Info?.Print(LogClass.ModLoader, $"Creating mods directory for Title {titleId.ToUpper()}"); titleModsPath = contentsDir.CreateSubdirectory(titleId); } diff --git a/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs b/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs index db093dad06e7..abc76c62f7b0 100644 --- a/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs +++ b/Ryujinx.HLE/HOS/Services/Hid/IHidServer.cs @@ -767,6 +767,9 @@ public ResultCode AcquireNpadStyleSetUpdateEventHandle(ServiceCtx context) throw new InvalidOperationException("Out of handles!"); } + // Games expect this event to be signaled after calling this function + evnt.ReadableEvent.Signal(); + context.Response.HandleDesc = IpcHandleDesc.MakeCopy(handle); Logger.Stub?.PrintStub(LogClass.ServiceHid, new { appletResourceUserId, npadId, npadStyleSet }); diff --git a/Ryujinx.HLE/Ryujinx.HLE.csproj b/Ryujinx.HLE/Ryujinx.HLE.csproj index cf1c734fc2ec..4713c9b31f9e 100644 --- a/Ryujinx.HLE/Ryujinx.HLE.csproj +++ b/Ryujinx.HLE/Ryujinx.HLE.csproj @@ -19,7 +19,7 @@ - + diff --git a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs index 306033d2d207..0e3b4892f098 100644 --- a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs +++ b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs @@ -86,23 +86,10 @@ private bool TryFindSaveData(string titleName, ulong titleId, BlitStruct