diff --git a/Directory.Packages.props b/Directory.Packages.props index 34655164e2..07fc8cc283 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -44,6 +44,7 @@ + diff --git a/Ryujinx.sln b/Ryujinx.sln index c3cb5a2b0d..71d5f6dd95 100644 --- a/Ryujinx.sln +++ b/Ryujinx.sln @@ -80,6 +80,10 @@ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Horizon.Kernel.Generators", "src\Ryujinx.Horizon.Kernel.Generators\Ryujinx.Horizon.Kernel.Generators.csproj", "{7F55A45D-4E1D-4A36-ADD3-87F29A285AA2}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.HLE.Generators", "src\Ryujinx.HLE.Generators\Ryujinx.HLE.Generators.csproj", "{B575BCDE-2FD8-4A5D-8756-31CDD7FE81F0}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.Metal", "src\Ryujinx.Graphics.Metal\Ryujinx.Graphics.Metal.csproj", "{C08931FA-1191-417A-864F-3882D93E683B}" + ProjectSection(ProjectDependencies) = postProject + {A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E} = {A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E} + EndProjectSection EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{36F870C1-3E5F-485F-B426-F0645AF78751}" ProjectSection(SolutionItems) = preProject @@ -257,6 +261,10 @@ Global {4A89A234-4F19-497D-A576-DDE8CDFC5B22}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {4A89A234-4F19-497D-A576-DDE8CDFC5B22}.Debug|Any CPU.Build.0 = Debug|Any CPU {4A89A234-4F19-497D-A576-DDE8CDFC5B22}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C08931FA-1191-417A-864F-3882D93E683B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C08931FA-1191-417A-864F-3882D93E683B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C08931FA-1191-417A-864F-3882D93E683B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C08931FA-1191-417A-864F-3882D93E683B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/Ryujinx.Common/Configuration/GraphicsBackend.cs b/src/Ryujinx.Common/Configuration/GraphicsBackend.cs index e3b4f91b01..9d399d560b 100644 --- a/src/Ryujinx.Common/Configuration/GraphicsBackend.cs +++ b/src/Ryujinx.Common/Configuration/GraphicsBackend.cs @@ -6,7 +6,9 @@ namespace Ryujinx.Common.Configuration [JsonConverter(typeof(TypedStringEnumConverter))] public enum GraphicsBackend { + Auto, Vulkan, OpenGl, + Metal } } diff --git a/src/Ryujinx.Graphics.GAL/ComputeSize.cs b/src/Ryujinx.Graphics.GAL/ComputeSize.cs new file mode 100644 index 0000000000..ce9c2531c8 --- /dev/null +++ b/src/Ryujinx.Graphics.GAL/ComputeSize.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.GAL +{ + public readonly struct ComputeSize + { + public readonly static ComputeSize VtgAsCompute = new ComputeSize(32, 32, 1); + + public readonly int X; + public readonly int Y; + public readonly int Z; + + public ComputeSize(int x, int y, int z) + { + X = x; + Y = y; + Z = z; + } + } +} diff --git a/src/Ryujinx.Graphics.GAL/Format.cs b/src/Ryujinx.Graphics.GAL/Format.cs index 17c42d2d41..b1eb68f722 100644 --- a/src/Ryujinx.Graphics.GAL/Format.cs +++ b/src/Ryujinx.Graphics.GAL/Format.cs @@ -339,6 +339,84 @@ public static int GetScalarSize(this Format format) return 1; } + /// + /// Get bytes per element for this format. + /// + /// Texture format + /// Byte size for an element of this format (pixel, vertex attribute, etc) + public static int GetBytesPerElement(this Format format) + { + int scalarSize = format.GetScalarSize(); + + switch (format) + { + case Format.R8G8Unorm: + case Format.R8G8Snorm: + case Format.R8G8Uint: + case Format.R8G8Sint: + case Format.R8G8Uscaled: + case Format.R8G8Sscaled: + case Format.R16G16Float: + case Format.R16G16Unorm: + case Format.R16G16Snorm: + case Format.R16G16Uint: + case Format.R16G16Sint: + case Format.R16G16Uscaled: + case Format.R16G16Sscaled: + case Format.R32G32Float: + case Format.R32G32Uint: + case Format.R32G32Sint: + case Format.R32G32Uscaled: + case Format.R32G32Sscaled: + return 2 * scalarSize; + + case Format.R8G8B8Unorm: + case Format.R8G8B8Snorm: + case Format.R8G8B8Uint: + case Format.R8G8B8Sint: + case Format.R8G8B8Uscaled: + case Format.R8G8B8Sscaled: + case Format.R16G16B16Float: + case Format.R16G16B16Unorm: + case Format.R16G16B16Snorm: + case Format.R16G16B16Uint: + case Format.R16G16B16Sint: + case Format.R16G16B16Uscaled: + case Format.R16G16B16Sscaled: + case Format.R32G32B32Float: + case Format.R32G32B32Uint: + case Format.R32G32B32Sint: + case Format.R32G32B32Uscaled: + case Format.R32G32B32Sscaled: + return 3 * scalarSize; + + case Format.R8G8B8A8Unorm: + case Format.R8G8B8A8Snorm: + case Format.R8G8B8A8Uint: + case Format.R8G8B8A8Sint: + case Format.R8G8B8A8Srgb: + case Format.R8G8B8A8Uscaled: + case Format.R8G8B8A8Sscaled: + case Format.B8G8R8A8Unorm: + case Format.B8G8R8A8Srgb: + case Format.R16G16B16A16Float: + case Format.R16G16B16A16Unorm: + case Format.R16G16B16A16Snorm: + case Format.R16G16B16A16Uint: + case Format.R16G16B16A16Sint: + case Format.R16G16B16A16Uscaled: + case Format.R16G16B16A16Sscaled: + case Format.R32G32B32A32Float: + case Format.R32G32B32A32Uint: + case Format.R32G32B32A32Sint: + case Format.R32G32B32A32Uscaled: + case Format.R32G32B32A32Sscaled: + return 4 * scalarSize; + } + + return scalarSize; + } + /// /// Checks if the texture format is a depth or depth-stencil format. /// diff --git a/src/Ryujinx.Graphics.GAL/ShaderInfo.cs b/src/Ryujinx.Graphics.GAL/ShaderInfo.cs index 2fd3227dc1..c7965a03df 100644 --- a/src/Ryujinx.Graphics.GAL/ShaderInfo.cs +++ b/src/Ryujinx.Graphics.GAL/ShaderInfo.cs @@ -4,23 +4,22 @@ public struct ShaderInfo { public int FragmentOutputMap { get; } public ResourceLayout ResourceLayout { get; } + public ComputeSize ComputeLocalSize { get; } public ProgramPipelineState? State { get; } public bool FromCache { get; set; } - public ShaderInfo(int fragmentOutputMap, ResourceLayout resourceLayout, ProgramPipelineState state, bool fromCache = false) + public ShaderInfo( + int fragmentOutputMap, + ResourceLayout resourceLayout, + ComputeSize computeLocalSize, + ProgramPipelineState? state, + bool fromCache = false) { FragmentOutputMap = fragmentOutputMap; ResourceLayout = resourceLayout; + ComputeLocalSize = computeLocalSize; State = state; FromCache = fromCache; } - - public ShaderInfo(int fragmentOutputMap, ResourceLayout resourceLayout, bool fromCache = false) - { - FragmentOutputMap = fragmentOutputMap; - ResourceLayout = resourceLayout; - State = null; - FromCache = fromCache; - } } } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs index 6de50fb2e7..34f2cfcadc 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs @@ -11,8 +11,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw /// class VtgAsComputeContext : IDisposable { - private const int DummyBufferSize = 16; - private readonly GpuContext _context; /// @@ -48,7 +46,7 @@ public ITexture Get(IRenderer renderer, Format format) 1, 1, 1, - 1, + format.GetBytesPerElement(), format, DepthStencilMode.Depth, Target.TextureBuffer, @@ -521,21 +519,6 @@ public BufferRange GetGeometryIndexDataBufferRange(int offset, int size, bool wr return new BufferRange(_geometryIndexDataBuffer.Handle, offset, size, write); } - /// - /// Gets the range for a dummy 16 bytes buffer, filled with zeros. - /// - /// Dummy buffer range - public BufferRange GetDummyBufferRange() - { - if (_dummyBuffer == BufferHandle.Null) - { - _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize, BufferAccess.DeviceMemory); - _context.Renderer.Pipeline.ClearBuffer(_dummyBuffer, 0, DummyBufferSize, 0); - } - - return new BufferRange(_dummyBuffer, 0, DummyBufferSize); - } - /// /// Gets the range for a sequential index buffer, with ever incrementing index values. /// diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs index 73682866ba..2de3243924 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs @@ -147,7 +147,6 @@ public readonly void RunVertex() { _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount); _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0); - SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format); continue; } @@ -163,15 +162,12 @@ public readonly void RunVertex() { _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount); _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0); - SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format); continue; } int vbStride = vertexBuffer.UnpackStride(); ulong vbSize = GetVertexBufferSize(address, endAddress.Pack(), vbStride, _indexed, instanced, _firstVertex, _count); - ulong oldVbSize = vbSize; - ulong attributeOffset = (ulong)vertexAttrib.UnpackOffset(); int componentSize = format.GetScalarSize(); @@ -345,20 +341,6 @@ private static int GetMaxCompleteStrips(int verticesPerPrimitive, int maxOutputV return maxOutputVertices / verticesPerPrimitive; } - /// - /// Binds a dummy buffer as vertex buffer into a buffer texture. - /// - /// Shader resource binding reservations - /// Buffer texture index - /// Buffer texture format - private readonly void SetDummyBufferTexture(ResourceReservations reservations, int index, Format format) - { - ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format); - bufferTexture.SetStorage(_vacContext.GetDummyBufferRange()); - - _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.GetVertexBufferTextureBinding(index), bufferTexture, null); - } - /// /// Binds a vertex buffer into a buffer texture. /// diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index c36fc0ada2..b6b8113892 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -324,6 +324,11 @@ public void LoadShaders(GpuContext context, ParallelDiskCacheLoader loader) bool loadHostCache = header.CodeGenVersion == CodeGenVersion; + if (context.Capabilities.Api == TargetApi.Metal) + { + loadHostCache = false; + } + int programIndex = 0; DataEntry entry = new(); @@ -392,7 +397,8 @@ public void LoadShaders(GpuContext context, ParallelDiskCacheLoader loader) context, shaders, specState.PipelineState, - specState.TransformFeedbackDescriptors != null); + specState.TransformFeedbackDescriptors != null, + specState.ComputeState.GetLocalSize()); IProgram hostProgram; @@ -629,7 +635,10 @@ public void AddShader(GpuContext context, CachedShaderProgram program, ReadOnlyS return; } - WriteHostCode(context, hostCode, program.Shaders, streams, timestamp); + if (context.Capabilities.Api != TargetApi.Metal) + { + WriteHostCode(context, hostCode, program.Shaders, streams, timestamp); + } } /// diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs index 20f96462ea..74922d1e3d 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -490,7 +490,12 @@ private void ProcessCompilationQueue() { ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length]; - ShaderInfoBuilder shaderInfoBuilder = new(_context, compilation.SpecializationState.TransformFeedbackDescriptors != null); + ref GpuChannelComputeState computeState = ref compilation.SpecializationState.ComputeState; + + ShaderInfoBuilder shaderInfoBuilder = new( + _context, + compilation.SpecializationState.TransformFeedbackDescriptors != null, + computeLocalSize: computeState.GetLocalSize()); for (int index = 0; index < compilation.TranslatedStages.Length; index++) { diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs index 1be75f242b..4e91342917 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs @@ -16,7 +16,7 @@ class GpuAccessor : GpuAccessorBase, IGpuAccessor private readonly GpuAccessorState _state; private readonly int _stageIndex; private readonly bool _compute; - private readonly bool _isVulkan; + private readonly bool _isOpenGL; private readonly bool _hasGeometryShader; private readonly bool _supportsQuads; @@ -38,7 +38,7 @@ public GpuAccessor( _channel = channel; _state = state; _stageIndex = stageIndex; - _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; + _isOpenGL = context.Capabilities.Api == TargetApi.OpenGL; _hasGeometryShader = hasGeometryShader; _supportsQuads = context.Capabilities.SupportsQuads; @@ -116,10 +116,10 @@ public uint QueryConstantBufferUse() public GpuGraphicsState QueryGraphicsState() { return _state.GraphicsState.CreateShaderGraphicsState( - !_isVulkan, + _isOpenGL, _supportsQuads, _hasGeometryShader, - _isVulkan || _state.GraphicsState.YNegateEnabled); + !_isOpenGL || _state.GraphicsState.YNegateEnabled); } /// diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index d89eebabfd..701ff764a5 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -55,7 +55,7 @@ public SetBindingPair CreateConstantBufferBinding(int index) { int binding; - if (_context.Capabilities.Api == TargetApi.Vulkan) + if (_context.Capabilities.Api != TargetApi.OpenGL) { binding = GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer"); } @@ -71,7 +71,7 @@ public SetBindingPair CreateImageBinding(int count, bool isBuffer) { int binding; - if (_context.Capabilities.Api == TargetApi.Vulkan) + if (_context.Capabilities.Api != TargetApi.OpenGL) { if (count == 1) { @@ -103,7 +103,7 @@ public SetBindingPair CreateStorageBufferBinding(int index) { int binding; - if (_context.Capabilities.Api == TargetApi.Vulkan) + if (_context.Capabilities.Api != TargetApi.OpenGL) { binding = GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer"); } @@ -119,7 +119,7 @@ public SetBindingPair CreateTextureBinding(int count, bool isBuffer) { int binding; - if (_context.Capabilities.Api == TargetApi.Vulkan) + if (_context.Capabilities.Api != TargetApi.OpenGL) { if (count == 1) { diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs index d8cdbc3485..720f7e796f 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs @@ -1,3 +1,5 @@ +using Ryujinx.Graphics.GAL; + namespace Ryujinx.Graphics.Gpu.Shader { /// @@ -61,5 +63,14 @@ public GpuChannelComputeState( SharedMemorySize = sharedMemorySize; HasUnalignedStorageBuffer = hasUnalignedStorageBuffer; } + + /// + /// Gets the local group size of the shader in a GAL compatible struct. + /// + /// Local group size + public ComputeSize GetLocalSize() + { + return new ComputeSize(LocalSizeX, LocalSizeY, LocalSizeZ); + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 4fc66c4c06..4473e8cb3e 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -224,7 +224,10 @@ public CachedShaderProgram GetComputeShader( TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode, asCompute: false); ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) }; - ShaderInfo info = ShaderInfoBuilder.BuildForCompute(_context, translatedShader.Program.Info); + ShaderInfo info = ShaderInfoBuilder.BuildForCompute( + _context, + translatedShader.Program.Info, + computeState.GetLocalSize()); IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, info); cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); @@ -425,7 +428,8 @@ public CachedShaderProgram GetGraphicsShader( TranslatorContext lastInVertexPipeline = geometryToCompute ? translatorContexts[4] ?? currentStage : currentStage; - program = lastInVertexPipeline.GenerateVertexPassthroughForCompute(); + (program, ShaderProgramInfo vacInfo) = lastInVertexPipeline.GenerateVertexPassthroughForCompute(); + infoBuilder.AddStageInfoVac(vacInfo); } else { @@ -530,7 +534,7 @@ public static bool MayConvertVtgToCompute(ref Capabilities capabilities) private ShaderAsCompute CreateHostVertexAsComputeProgram(ShaderProgram program, TranslatorContext context, bool tfEnabled) { ShaderSource source = new(program.Code, program.BinaryCode, ShaderStage.Compute, program.Language); - ShaderInfo info = ShaderInfoBuilder.BuildForVertexAsCompute(_context, program.Info, tfEnabled); + ShaderInfo info = ShaderInfoBuilder.BuildForVertexAsCompute(_context, program.Info, context.GetVertexAsComputeInfo(), tfEnabled); return new(_context.Renderer.CreateProgram(new[] { source }, info), program.Info, context.GetResourceReservations()); } @@ -822,16 +826,19 @@ private static int StageToStageIndex(ShaderStage stage) /// /// Creates shader translation options with the requested graphics API and flags. - /// The shader language is choosen based on the current configuration and graphics API. + /// The shader language is chosen based on the current configuration and graphics API. /// /// Target graphics API /// Translation flags /// Translation options private static TranslationOptions CreateTranslationOptions(TargetApi api, TranslationFlags flags) { - TargetLanguage lang = GraphicsConfig.EnableSpirvCompilationOnVulkan && api == TargetApi.Vulkan - ? TargetLanguage.Spirv - : TargetLanguage.Glsl; + TargetLanguage lang = api switch + { + TargetApi.OpenGL => TargetLanguage.Glsl, + TargetApi.Vulkan => GraphicsConfig.EnableSpirvCompilationOnVulkan ? TargetLanguage.Spirv : TargetLanguage.Glsl, + TargetApi.Metal => TargetLanguage.Msl, + }; return new TranslationOptions(lang, api, flags); } diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs index 49823562f2..e283d08322 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs @@ -22,6 +22,7 @@ class ShaderInfoBuilder ResourceStages.Geometry; private readonly GpuContext _context; + private readonly ComputeSize _computeLocalSize; private int _fragmentOutputMap; @@ -39,9 +40,11 @@ class ShaderInfoBuilder /// GPU context that owns the shaders that will be added to the builder /// Indicates if the graphics shader is used with transform feedback enabled /// Indicates that the vertex shader will be emulated on a compute shader - public ShaderInfoBuilder(GpuContext context, bool tfEnabled, bool vertexAsCompute = false) + /// Indicates the local thread size for a compute shader + public ShaderInfoBuilder(GpuContext context, bool tfEnabled, bool vertexAsCompute = false, ComputeSize computeLocalSize = default) { _context = context; + _computeLocalSize = computeLocalSize; _fragmentOutputMap = -1; @@ -95,7 +98,7 @@ public ShaderInfoBuilder(GpuContext context, bool tfEnabled, bool vertexAsComput private void PopulateDescriptorAndUsages(ResourceStages stages, ResourceType type, int setIndex, int start, int count, bool write = false) { AddDescriptor(stages, type, setIndex, start, count); - AddUsage(stages, type, setIndex, start, count, write); + // AddUsage(stages, type, setIndex, start, count, write); } /// @@ -159,6 +162,25 @@ public void AddStageInfo(ShaderProgramInfo info, bool vertexAsCompute = false) AddUsage(info.Images, stages, isImage: true); } + public void AddStageInfoVac(ShaderProgramInfo info) + { + ResourceStages stages = info.Stage switch + { + ShaderStage.Compute => ResourceStages.Compute, + ShaderStage.Vertex => ResourceStages.Vertex, + ShaderStage.TessellationControl => ResourceStages.TessellationControl, + ShaderStage.TessellationEvaluation => ResourceStages.TessellationEvaluation, + ShaderStage.Geometry => ResourceStages.Geometry, + ShaderStage.Fragment => ResourceStages.Fragment, + _ => ResourceStages.None, + }; + + AddUsage(info.CBuffers, stages, isStorage: false); + AddUsage(info.SBuffers, stages, isStorage: true); + AddUsage(info.Textures, stages, isImage: false); + AddUsage(info.Images, stages, isImage: true); + } + /// /// Adds a resource descriptor to the list of descriptors. /// @@ -361,14 +383,7 @@ public ShaderInfo Build(ProgramPipelineState? pipeline, bool fromCache = false) ResourceLayout resourceLayout = new(descriptors.AsReadOnly(), usages.AsReadOnly()); - if (pipeline.HasValue) - { - return new ShaderInfo(_fragmentOutputMap, resourceLayout, pipeline.Value, fromCache); - } - else - { - return new ShaderInfo(_fragmentOutputMap, resourceLayout, fromCache); - } + return new ShaderInfo(_fragmentOutputMap, resourceLayout, _computeLocalSize, pipeline, fromCache); } /// @@ -378,14 +393,16 @@ public ShaderInfo Build(ProgramPipelineState? pipeline, bool fromCache = false) /// Shaders from the disk cache /// Optional pipeline for background compilation /// Indicates if the graphics shader is used with transform feedback enabled + /// Compute local thread size /// Shader information public static ShaderInfo BuildForCache( GpuContext context, IEnumerable programs, ProgramPipelineState? pipeline, - bool tfEnabled) + bool tfEnabled, + ComputeSize computeLocalSize) { - ShaderInfoBuilder builder = new(context, tfEnabled); + ShaderInfoBuilder builder = new(context, tfEnabled, computeLocalSize: computeLocalSize); foreach (CachedShaderStage program in programs) { @@ -403,11 +420,12 @@ public static ShaderInfo BuildForCache( /// /// GPU context that owns the shader /// Compute shader information + /// Compute local thread size /// True if the compute shader comes from a disk cache, false otherwise /// Shader information - public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo info, bool fromCache = false) + public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo info, ComputeSize computeLocalSize, bool fromCache = false) { - ShaderInfoBuilder builder = new(context, tfEnabled: false, vertexAsCompute: false); + ShaderInfoBuilder builder = new(context, tfEnabled: false, vertexAsCompute: false, computeLocalSize: computeLocalSize); builder.AddStageInfo(info); @@ -422,10 +440,11 @@ public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo i /// Indicates if the graphics shader is used with transform feedback enabled /// True if the compute shader comes from a disk cache, false otherwise /// Shader information - public static ShaderInfo BuildForVertexAsCompute(GpuContext context, ShaderProgramInfo info, bool tfEnabled, bool fromCache = false) + public static ShaderInfo BuildForVertexAsCompute(GpuContext context, ShaderProgramInfo info, ShaderProgramInfo info2, bool tfEnabled, bool fromCache = false) { - ShaderInfoBuilder builder = new(context, tfEnabled, vertexAsCompute: true); + ShaderInfoBuilder builder = new(context, tfEnabled, vertexAsCompute: true, computeLocalSize: ComputeSize.VtgAsCompute); + builder.AddStageInfoVac(info2); builder.AddStageInfo(info, vertexAsCompute: true); return builder.Build(null, fromCache); diff --git a/src/Ryujinx.Graphics.Metal/Auto.cs b/src/Ryujinx.Graphics.Metal/Auto.cs new file mode 100644 index 0000000000..7e79ecbc37 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Auto.cs @@ -0,0 +1,146 @@ +using System; +using System.Diagnostics; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Graphics.Metal +{ + interface IAuto + { + bool HasCommandBufferDependency(CommandBufferScoped cbs); + + void IncrementReferenceCount(); + void DecrementReferenceCount(int cbIndex); + void DecrementReferenceCount(); + } + + interface IAutoPrivate : IAuto + { + void AddCommandBufferDependencies(CommandBufferScoped cbs); + } + + [SupportedOSPlatform("macos")] + class Auto : IAutoPrivate, IDisposable where T : IDisposable + { + private int _referenceCount; + private T _value; + + private readonly BitMap _cbOwnership; + private readonly MultiFenceHolder _waitable; + + private bool _disposed; + private bool _destroyed; + + public Auto(T value) + { + _referenceCount = 1; + _value = value; + _cbOwnership = new BitMap(CommandBufferPool.MaxCommandBuffers); + } + + public Auto(T value, MultiFenceHolder waitable) : this(value) + { + _waitable = waitable; + } + + public T Get(CommandBufferScoped cbs, int offset, int size, bool write = false) + { + _waitable?.AddBufferUse(cbs.CommandBufferIndex, offset, size, write); + return Get(cbs); + } + + public T GetUnsafe() + { + return _value; + } + + public T Get(CommandBufferScoped cbs) + { + if (!_destroyed) + { + AddCommandBufferDependencies(cbs); + } + + return _value; + } + + public bool HasCommandBufferDependency(CommandBufferScoped cbs) + { + return _cbOwnership.IsSet(cbs.CommandBufferIndex); + } + + public bool HasRentedCommandBufferDependency(CommandBufferPool cbp) + { + return _cbOwnership.AnySet(); + } + + public void AddCommandBufferDependencies(CommandBufferScoped cbs) + { + // We don't want to add a reference to this object to the command buffer + // more than once, so if we detect that the command buffer already has ownership + // of this object, then we can just return without doing anything else. + if (_cbOwnership.Set(cbs.CommandBufferIndex)) + { + if (_waitable != null) + { + cbs.AddWaitable(_waitable); + } + + cbs.AddDependant(this); + } + } + + public bool TryIncrementReferenceCount() + { + int lastValue; + do + { + lastValue = _referenceCount; + + if (lastValue == 0) + { + return false; + } + } + while (Interlocked.CompareExchange(ref _referenceCount, lastValue + 1, lastValue) != lastValue); + + return true; + } + + public void IncrementReferenceCount() + { + if (Interlocked.Increment(ref _referenceCount) == 1) + { + Interlocked.Decrement(ref _referenceCount); + throw new InvalidOperationException("Attempted to increment the reference count of an object that was already destroyed."); + } + } + + public void DecrementReferenceCount(int cbIndex) + { + _cbOwnership.Clear(cbIndex); + DecrementReferenceCount(); + } + + public void DecrementReferenceCount() + { + if (Interlocked.Decrement(ref _referenceCount) == 0) + { + _value.Dispose(); + _value = default; + _destroyed = true; + } + + Debug.Assert(_referenceCount >= 0); + } + + public void Dispose() + { + if (!_disposed) + { + DecrementReferenceCount(); + _disposed = true; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/BackgroundResources.cs b/src/Ryujinx.Graphics.Metal/BackgroundResources.cs new file mode 100644 index 0000000000..8bf6b92bd1 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/BackgroundResources.cs @@ -0,0 +1,107 @@ +using SharpMetal.Metal; +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class BackgroundResource : IDisposable + { + private readonly MetalRenderer _renderer; + + private CommandBufferPool _pool; + private PersistentFlushBuffer _flushBuffer; + + public BackgroundResource(MetalRenderer renderer) + { + _renderer = renderer; + } + + public CommandBufferPool GetPool() + { + if (_pool == null) + { + MTLCommandQueue queue = _renderer.BackgroundQueue; + _pool = new CommandBufferPool(queue, true); + _pool.Initialize(null); // TODO: Proper encoder factory for background render/compute + } + + return _pool; + } + + public PersistentFlushBuffer GetFlushBuffer() + { + _flushBuffer ??= new PersistentFlushBuffer(_renderer); + + return _flushBuffer; + } + + public void Dispose() + { + _pool?.Dispose(); + _flushBuffer?.Dispose(); + } + } + + [SupportedOSPlatform("macos")] + class BackgroundResources : IDisposable + { + private readonly MetalRenderer _renderer; + + private readonly Dictionary _resources; + + public BackgroundResources(MetalRenderer renderer) + { + _renderer = renderer; + + _resources = new Dictionary(); + } + + private void Cleanup() + { + lock (_resources) + { + foreach (KeyValuePair tuple in _resources) + { + if (!tuple.Key.IsAlive) + { + tuple.Value.Dispose(); + _resources.Remove(tuple.Key); + } + } + } + } + + public BackgroundResource Get() + { + Thread thread = Thread.CurrentThread; + + lock (_resources) + { + if (!_resources.TryGetValue(thread, out BackgroundResource resource)) + { + Cleanup(); + + resource = new BackgroundResource(_renderer); + + _resources[thread] = resource; + } + + return resource; + } + } + + public void Dispose() + { + lock (_resources) + { + foreach (var resource in _resources.Values) + { + resource.Dispose(); + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/BitMap.cs b/src/Ryujinx.Graphics.Metal/BitMap.cs new file mode 100644 index 0000000000..4ddc438c17 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/BitMap.cs @@ -0,0 +1,157 @@ +namespace Ryujinx.Graphics.Metal +{ + readonly struct BitMap + { + public const int IntSize = 64; + + private const int IntShift = 6; + private const int IntMask = IntSize - 1; + + private readonly long[] _masks; + + public BitMap(int count) + { + _masks = new long[(count + IntMask) / IntSize]; + } + + public bool AnySet() + { + for (int i = 0; i < _masks.Length; i++) + { + if (_masks[i] != 0) + { + return true; + } + } + + return false; + } + + public bool IsSet(int bit) + { + int wordIndex = bit >> IntShift; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + return (_masks[wordIndex] & wordMask) != 0; + } + + public bool IsSet(int start, int end) + { + if (start == end) + { + return IsSet(start); + } + + int startIndex = start >> IntShift; + int startBit = start & IntMask; + long startMask = -1L << startBit; + + int endIndex = end >> IntShift; + int endBit = end & IntMask; + long endMask = (long)(ulong.MaxValue >> (IntMask - endBit)); + + if (startIndex == endIndex) + { + return (_masks[startIndex] & startMask & endMask) != 0; + } + + if ((_masks[startIndex] & startMask) != 0) + { + return true; + } + + for (int i = startIndex + 1; i < endIndex; i++) + { + if (_masks[i] != 0) + { + return true; + } + } + + if ((_masks[endIndex] & endMask) != 0) + { + return true; + } + + return false; + } + + public bool Set(int bit) + { + int wordIndex = bit >> IntShift; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + if ((_masks[wordIndex] & wordMask) != 0) + { + return false; + } + + _masks[wordIndex] |= wordMask; + + return true; + } + + public void SetRange(int start, int end) + { + if (start == end) + { + Set(start); + return; + } + + int startIndex = start >> IntShift; + int startBit = start & IntMask; + long startMask = -1L << startBit; + + int endIndex = end >> IntShift; + int endBit = end & IntMask; + long endMask = (long)(ulong.MaxValue >> (IntMask - endBit)); + + if (startIndex == endIndex) + { + _masks[startIndex] |= startMask & endMask; + } + else + { + _masks[startIndex] |= startMask; + + for (int i = startIndex + 1; i < endIndex; i++) + { + _masks[i] |= -1; + } + + _masks[endIndex] |= endMask; + } + } + + public void Clear(int bit) + { + int wordIndex = bit >> IntShift; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + _masks[wordIndex] &= ~wordMask; + } + + public void Clear() + { + for (int i = 0; i < _masks.Length; i++) + { + _masks[i] = 0; + } + } + + public void ClearInt(int start, int end) + { + for (int i = start; i <= end; i++) + { + _masks[i] = 0; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/BufferHolder.cs b/src/Ryujinx.Graphics.Metal/BufferHolder.cs new file mode 100644 index 0000000000..cc86a403fd --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/BufferHolder.cs @@ -0,0 +1,385 @@ +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class BufferHolder : IDisposable + { + private CacheByRange _cachedConvertedBuffers; + + public int Size { get; } + + private readonly IntPtr _map; + private readonly MetalRenderer _renderer; + private readonly Pipeline _pipeline; + + private readonly MultiFenceHolder _waitable; + private readonly Auto _buffer; + + private readonly ReaderWriterLockSlim _flushLock; + private FenceHolder _flushFence; + private int _flushWaiting; + + public BufferHolder(MetalRenderer renderer, Pipeline pipeline, MTLBuffer buffer, int size) + { + _renderer = renderer; + _pipeline = pipeline; + _map = buffer.Contents; + _waitable = new MultiFenceHolder(size); + _buffer = new Auto(new(buffer), _waitable); + + _flushLock = new ReaderWriterLockSlim(); + + Size = size; + } + + public Auto GetBuffer() + { + return _buffer; + } + + public Auto GetBuffer(bool isWrite) + { + if (isWrite) + { + SignalWrite(0, Size); + } + + return _buffer; + } + + public Auto GetBuffer(int offset, int size, bool isWrite) + { + if (isWrite) + { + SignalWrite(offset, size); + } + + return _buffer; + } + + public void SignalWrite(int offset, int size) + { + if (offset == 0 && size == Size) + { + _cachedConvertedBuffers.Clear(); + } + else + { + _cachedConvertedBuffers.ClearRange(offset, size); + } + } + + private void ClearFlushFence() + { + // Assumes _flushLock is held as writer. + + if (_flushFence != null) + { + if (_flushWaiting == 0) + { + _flushFence.Put(); + } + + _flushFence = null; + } + } + + private void WaitForFlushFence() + { + if (_flushFence == null) + { + return; + } + + // If storage has changed, make sure the fence has been reached so that the data is in place. + _flushLock.ExitReadLock(); + _flushLock.EnterWriteLock(); + + if (_flushFence != null) + { + var fence = _flushFence; + Interlocked.Increment(ref _flushWaiting); + + // Don't wait in the lock. + + _flushLock.ExitWriteLock(); + + fence.Wait(); + + _flushLock.EnterWriteLock(); + + if (Interlocked.Decrement(ref _flushWaiting) == 0) + { + fence.Put(); + } + + _flushFence = null; + } + + // Assumes the _flushLock is held as reader, returns in same state. + _flushLock.ExitWriteLock(); + _flushLock.EnterReadLock(); + } + + public PinnedSpan GetData(int offset, int size) + { + _flushLock.EnterReadLock(); + + WaitForFlushFence(); + + Span result; + + if (_map != IntPtr.Zero) + { + result = GetDataStorage(offset, size); + + // Need to be careful here, the buffer can't be unmapped while the data is being used. + _buffer.IncrementReferenceCount(); + + _flushLock.ExitReadLock(); + + return PinnedSpan.UnsafeFromSpan(result, _buffer.DecrementReferenceCount); + } + + throw new InvalidOperationException("The buffer is not mapped"); + } + + public unsafe Span GetDataStorage(int offset, int size) + { + int mappingSize = Math.Min(size, Size - offset); + + if (_map != IntPtr.Zero) + { + return new Span((void*)(_map + offset), mappingSize); + } + + throw new InvalidOperationException("The buffer is not mapped."); + } + + public unsafe void SetData(int offset, ReadOnlySpan data, CommandBufferScoped? cbs = null, bool allowCbsWait = true) + { + int dataSize = Math.Min(data.Length, Size - offset); + if (dataSize == 0) + { + return; + } + + if (_map != IntPtr.Zero) + { + // If persistently mapped, set the data directly if the buffer is not currently in use. + bool isRented = _buffer.HasRentedCommandBufferDependency(_renderer.CommandBufferPool); + + // If the buffer is rented, take a little more time and check if the use overlaps this handle. + bool needsFlush = isRented && _waitable.IsBufferRangeInUse(offset, dataSize, false); + + if (!needsFlush) + { + WaitForFences(offset, dataSize); + + data[..dataSize].CopyTo(new Span((void*)(_map + offset), dataSize)); + + SignalWrite(offset, dataSize); + + return; + } + } + + if (cbs != null && + cbs.Value.Encoders.CurrentEncoderType == EncoderType.Render && + !(_buffer.HasCommandBufferDependency(cbs.Value) && + _waitable.IsBufferRangeInUse(cbs.Value.CommandBufferIndex, offset, dataSize))) + { + // If the buffer hasn't been used on the command buffer yet, try to preload the data. + // This avoids ending and beginning render passes on each buffer data upload. + + cbs = _pipeline.GetPreloadCommandBuffer(); + } + + if (allowCbsWait) + { + _renderer.BufferManager.StagingBuffer.PushData(_renderer.CommandBufferPool, cbs, this, offset, data); + } + else + { + bool rentCbs = cbs == null; + if (rentCbs) + { + cbs = _renderer.CommandBufferPool.Rent(); + } + + if (!_renderer.BufferManager.StagingBuffer.TryPushData(cbs.Value, this, offset, data)) + { + // Need to do a slow upload. + BufferHolder srcHolder = _renderer.BufferManager.Create(dataSize); + srcHolder.SetDataUnchecked(0, data); + + var srcBuffer = srcHolder.GetBuffer(); + var dstBuffer = this.GetBuffer(true); + + Copy(cbs.Value, srcBuffer, dstBuffer, 0, offset, dataSize); + + srcHolder.Dispose(); + } + + if (rentCbs) + { + cbs.Value.Dispose(); + } + } + } + + public unsafe void SetDataUnchecked(int offset, ReadOnlySpan data) + { + int dataSize = Math.Min(data.Length, Size - offset); + if (dataSize == 0) + { + return; + } + + if (_map != IntPtr.Zero) + { + data[..dataSize].CopyTo(new Span((void*)(_map + offset), dataSize)); + } + } + + public void SetDataUnchecked(int offset, ReadOnlySpan data) where T : unmanaged + { + SetDataUnchecked(offset, MemoryMarshal.AsBytes(data)); + } + + public static void Copy( + CommandBufferScoped cbs, + Auto src, + Auto dst, + int srcOffset, + int dstOffset, + int size, + bool registerSrcUsage = true) + { + var srcBuffer = registerSrcUsage ? src.Get(cbs, srcOffset, size).Value : src.GetUnsafe().Value; + var dstbuffer = dst.Get(cbs, dstOffset, size, true).Value; + + cbs.Encoders.EnsureBlitEncoder().CopyFromBuffer( + srcBuffer, + (ulong)srcOffset, + dstbuffer, + (ulong)dstOffset, + (ulong)size); + } + + public void WaitForFences() + { + _waitable.WaitForFences(); + } + + public void WaitForFences(int offset, int size) + { + _waitable.WaitForFences(offset, size); + } + + private bool BoundToRange(int offset, ref int size) + { + if (offset >= Size) + { + return false; + } + + size = Math.Min(Size - offset, size); + + return true; + } + + public Auto GetBufferI8ToI16(CommandBufferScoped cbs, int offset, int size) + { + if (!BoundToRange(offset, ref size)) + { + return null; + } + + var key = new I8ToI16CacheKey(_renderer); + + if (!_cachedConvertedBuffers.TryGetValue(offset, size, key, out var holder)) + { + holder = _renderer.BufferManager.Create((size * 2 + 3) & ~3); + + _renderer.HelperShader.ConvertI8ToI16(cbs, this, holder, offset, size); + + key.SetBuffer(holder.GetBuffer()); + + _cachedConvertedBuffers.Add(offset, size, key, holder); + } + + return holder.GetBuffer(); + } + + public Auto GetBufferTopologyConversion(CommandBufferScoped cbs, int offset, int size, IndexBufferPattern pattern, int indexSize) + { + if (!BoundToRange(offset, ref size)) + { + return null; + } + + var key = new TopologyConversionCacheKey(_renderer, pattern, indexSize); + + if (!_cachedConvertedBuffers.TryGetValue(offset, size, key, out var holder)) + { + // The destination index size is always I32. + + int indexCount = size / indexSize; + + int convertedCount = pattern.GetConvertedCount(indexCount); + + holder = _renderer.BufferManager.Create(convertedCount * 4); + + _renderer.HelperShader.ConvertIndexBuffer(cbs, this, holder, pattern, indexSize, offset, indexCount); + + key.SetBuffer(holder.GetBuffer()); + + _cachedConvertedBuffers.Add(offset, size, key, holder); + } + + return holder.GetBuffer(); + } + + public bool TryGetCachedConvertedBuffer(int offset, int size, ICacheKey key, out BufferHolder holder) + { + return _cachedConvertedBuffers.TryGetValue(offset, size, key, out holder); + } + + public void AddCachedConvertedBuffer(int offset, int size, ICacheKey key, BufferHolder holder) + { + _cachedConvertedBuffers.Add(offset, size, key, holder); + } + + public void AddCachedConvertedBufferDependency(int offset, int size, ICacheKey key, Dependency dependency) + { + _cachedConvertedBuffers.AddDependency(offset, size, key, dependency); + } + + public void RemoveCachedConvertedBuffer(int offset, int size, ICacheKey key) + { + _cachedConvertedBuffers.Remove(offset, size, key); + } + + + public void Dispose() + { + _pipeline.FlushCommandsIfWeightExceeding(_buffer, (ulong)Size); + + _buffer.Dispose(); + _cachedConvertedBuffers.Dispose(); + + _flushLock.EnterWriteLock(); + + ClearFlushFence(); + + _flushLock.ExitWriteLock(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/BufferManager.cs b/src/Ryujinx.Graphics.Metal/BufferManager.cs new file mode 100644 index 0000000000..07a6862230 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/BufferManager.cs @@ -0,0 +1,237 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + readonly struct ScopedTemporaryBuffer : IDisposable + { + private readonly BufferManager _bufferManager; + private readonly bool _isReserved; + + public readonly BufferRange Range; + public readonly BufferHolder Holder; + + public BufferHandle Handle => Range.Handle; + public int Offset => Range.Offset; + + public ScopedTemporaryBuffer(BufferManager bufferManager, BufferHolder holder, BufferHandle handle, int offset, int size, bool isReserved) + { + _bufferManager = bufferManager; + + Range = new BufferRange(handle, offset, size); + Holder = holder; + + _isReserved = isReserved; + } + + public void Dispose() + { + if (!_isReserved) + { + _bufferManager.Delete(Range.Handle); + } + } + } + + [SupportedOSPlatform("macos")] + class BufferManager : IDisposable + { + private readonly IdList _buffers; + + private readonly MTLDevice _device; + private readonly MetalRenderer _renderer; + private readonly Pipeline _pipeline; + + public int BufferCount { get; private set; } + + public StagingBuffer StagingBuffer { get; } + + public BufferManager(MTLDevice device, MetalRenderer renderer, Pipeline pipeline) + { + _device = device; + _renderer = renderer; + _pipeline = pipeline; + _buffers = new IdList(); + + StagingBuffer = new StagingBuffer(_renderer, this); + } + + public BufferHandle Create(nint pointer, int size) + { + // TODO: This is the wrong Metal method, we need no-copy which SharpMetal isn't giving us. + var buffer = _device.NewBuffer(pointer, (ulong)size, MTLResourceOptions.ResourceStorageModeShared); + + if (buffer == IntPtr.Zero) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X}, and pointer 0x{pointer:X}."); + + return BufferHandle.Null; + } + + var holder = new BufferHolder(_renderer, _pipeline, buffer, size); + + BufferCount++; + + ulong handle64 = (uint)_buffers.Add(holder); + + return Unsafe.As(ref handle64); + } + + public BufferHandle CreateWithHandle(int size) + { + return CreateWithHandle(size, out _); + } + + public BufferHandle CreateWithHandle(int size, out BufferHolder holder) + { + holder = Create(size); + + if (holder == null) + { + return BufferHandle.Null; + } + + BufferCount++; + + ulong handle64 = (uint)_buffers.Add(holder); + + return Unsafe.As(ref handle64); + } + + public ScopedTemporaryBuffer ReserveOrCreate(CommandBufferScoped cbs, int size) + { + StagingBufferReserved? result = StagingBuffer.TryReserveData(cbs, size); + + if (result.HasValue) + { + return new ScopedTemporaryBuffer(this, result.Value.Buffer, StagingBuffer.Handle, result.Value.Offset, result.Value.Size, true); + } + else + { + // Create a temporary buffer. + BufferHandle handle = CreateWithHandle(size, out BufferHolder holder); + + return new ScopedTemporaryBuffer(this, holder, handle, 0, size, false); + } + } + + public BufferHolder Create(int size) + { + var buffer = _device.NewBuffer((ulong)size, MTLResourceOptions.ResourceStorageModeShared); + + if (buffer != IntPtr.Zero) + { + return new BufferHolder(_renderer, _pipeline, buffer, size); + } + + Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X}."); + + return null; + } + + public Auto GetBuffer(BufferHandle handle, bool isWrite, out int size) + { + if (TryGetBuffer(handle, out var holder)) + { + size = holder.Size; + return holder.GetBuffer(isWrite); + } + + size = 0; + return null; + } + + public Auto GetBuffer(BufferHandle handle, int offset, int size, bool isWrite) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetBuffer(offset, size, isWrite); + } + + return null; + } + + public Auto GetBuffer(BufferHandle handle, bool isWrite) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetBuffer(isWrite); + } + + return null; + } + + public Auto GetBufferI8ToI16(CommandBufferScoped cbs, BufferHandle handle, int offset, int size) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetBufferI8ToI16(cbs, offset, size); + } + + return null; + } + + public Auto GetBufferTopologyConversion(CommandBufferScoped cbs, BufferHandle handle, int offset, int size, IndexBufferPattern pattern, int indexSize) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetBufferTopologyConversion(cbs, offset, size, pattern, indexSize); + } + + return null; + } + + public PinnedSpan GetData(BufferHandle handle, int offset, int size) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetData(offset, size); + } + + return new PinnedSpan(); + } + + public void SetData(BufferHandle handle, int offset, ReadOnlySpan data) where T : unmanaged + { + SetData(handle, offset, MemoryMarshal.Cast(data), null); + } + + public void SetData(BufferHandle handle, int offset, ReadOnlySpan data, CommandBufferScoped? cbs) + { + if (TryGetBuffer(handle, out var holder)) + { + holder.SetData(offset, data, cbs); + } + } + + public void Delete(BufferHandle handle) + { + if (TryGetBuffer(handle, out var holder)) + { + holder.Dispose(); + _buffers.Remove((int)Unsafe.As(ref handle)); + } + } + + private bool TryGetBuffer(BufferHandle handle, out BufferHolder holder) + { + return _buffers.TryGetValue((int)Unsafe.As(ref handle), out holder); + } + + public void Dispose() + { + StagingBuffer.Dispose(); + + foreach (var buffer in _buffers) + { + buffer.Dispose(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/BufferUsageBitmap.cs b/src/Ryujinx.Graphics.Metal/BufferUsageBitmap.cs new file mode 100644 index 0000000000..379e274074 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/BufferUsageBitmap.cs @@ -0,0 +1,85 @@ +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + internal class BufferUsageBitmap + { + private readonly BitMap _bitmap; + private readonly int _size; + private readonly int _granularity; + private readonly int _bits; + private readonly int _writeBitOffset; + + private readonly int _intsPerCb; + private readonly int _bitsPerCb; + + public BufferUsageBitmap(int size, int granularity) + { + _size = size; + _granularity = granularity; + + // There are two sets of bits - one for read tracking, and the other for write. + int bits = (size + (granularity - 1)) / granularity; + _writeBitOffset = bits; + _bits = bits << 1; + + _intsPerCb = (_bits + (BitMap.IntSize - 1)) / BitMap.IntSize; + _bitsPerCb = _intsPerCb * BitMap.IntSize; + + _bitmap = new BitMap(_bitsPerCb * CommandBufferPool.MaxCommandBuffers); + } + + public void Add(int cbIndex, int offset, int size, bool write) + { + if (size == 0) + { + return; + } + + // Some usages can be out of bounds (vertex buffer on amd), so bound if necessary. + if (offset + size > _size) + { + size = _size - offset; + } + + int cbBase = cbIndex * _bitsPerCb + (write ? _writeBitOffset : 0); + int start = cbBase + offset / _granularity; + int end = cbBase + (offset + size - 1) / _granularity; + + _bitmap.SetRange(start, end); + } + + public bool OverlapsWith(int cbIndex, int offset, int size, bool write = false) + { + if (size == 0) + { + return false; + } + + int cbBase = cbIndex * _bitsPerCb + (write ? _writeBitOffset : 0); + int start = cbBase + offset / _granularity; + int end = cbBase + (offset + size - 1) / _granularity; + + return _bitmap.IsSet(start, end); + } + + public bool OverlapsWith(int offset, int size, bool write) + { + for (int i = 0; i < CommandBufferPool.MaxCommandBuffers; i++) + { + if (OverlapsWith(i, offset, size, write)) + { + return true; + } + } + + return false; + } + + public void Clear(int cbIndex) + { + _bitmap.ClearInt(cbIndex * _intsPerCb, (cbIndex + 1) * _intsPerCb - 1); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/CacheByRange.cs b/src/Ryujinx.Graphics.Metal/CacheByRange.cs new file mode 100644 index 0000000000..76515808f6 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/CacheByRange.cs @@ -0,0 +1,294 @@ +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + interface ICacheKey : IDisposable + { + bool KeyEqual(ICacheKey other); + } + + [SupportedOSPlatform("macos")] + struct I8ToI16CacheKey : ICacheKey + { + // Used to notify the pipeline that bindings have invalidated on dispose. + // private readonly MetalRenderer _renderer; + // private Auto _buffer; + + public I8ToI16CacheKey(MetalRenderer renderer) + { + // _renderer = renderer; + // _buffer = null; + } + + public readonly bool KeyEqual(ICacheKey other) + { + return other is I8ToI16CacheKey; + } + + public readonly void SetBuffer(Auto buffer) + { + // _buffer = buffer; + } + + public readonly void Dispose() + { + // TODO: Tell pipeline buffer is dirty! + // _renderer.PipelineInternal.DirtyIndexBuffer(_buffer); + } + } + + [SupportedOSPlatform("macos")] + readonly struct TopologyConversionCacheKey : ICacheKey + { + private readonly IndexBufferPattern _pattern; + private readonly int _indexSize; + + // Used to notify the pipeline that bindings have invalidated on dispose. + // private readonly MetalRenderer _renderer; + // private Auto _buffer; + + public TopologyConversionCacheKey(MetalRenderer renderer, IndexBufferPattern pattern, int indexSize) + { + // _renderer = renderer; + // _buffer = null; + _pattern = pattern; + _indexSize = indexSize; + } + + public readonly bool KeyEqual(ICacheKey other) + { + return other is TopologyConversionCacheKey entry && + entry._pattern == _pattern && + entry._indexSize == _indexSize; + } + + public void SetBuffer(Auto buffer) + { + // _buffer = buffer; + } + + public readonly void Dispose() + { + // TODO: Tell pipeline buffer is dirty! + // _renderer.PipelineInternal.DirtyVertexBuffer(_buffer); + } + } + + [SupportedOSPlatform("macos")] + readonly struct Dependency + { + private readonly BufferHolder _buffer; + private readonly int _offset; + private readonly int _size; + private readonly ICacheKey _key; + + public Dependency(BufferHolder buffer, int offset, int size, ICacheKey key) + { + _buffer = buffer; + _offset = offset; + _size = size; + _key = key; + } + + public void RemoveFromOwner() + { + _buffer.RemoveCachedConvertedBuffer(_offset, _size, _key); + } + } + + [SupportedOSPlatform("macos")] + struct CacheByRange where T : IDisposable + { + private struct Entry + { + public readonly ICacheKey Key; + public readonly T Value; + public List DependencyList; + + public Entry(ICacheKey key, T value) + { + Key = key; + Value = value; + DependencyList = null; + } + + public readonly void InvalidateDependencies() + { + if (DependencyList != null) + { + foreach (Dependency dependency in DependencyList) + { + dependency.RemoveFromOwner(); + } + + DependencyList.Clear(); + } + } + } + + private Dictionary> _ranges; + + public void Add(int offset, int size, ICacheKey key, T value) + { + List entries = GetEntries(offset, size); + + entries.Add(new Entry(key, value)); + } + + public void AddDependency(int offset, int size, ICacheKey key, Dependency dependency) + { + List entries = GetEntries(offset, size); + + for (int i = 0; i < entries.Count; i++) + { + Entry entry = entries[i]; + + if (entry.Key.KeyEqual(key)) + { + if (entry.DependencyList == null) + { + entry.DependencyList = new List(); + entries[i] = entry; + } + + entry.DependencyList.Add(dependency); + + break; + } + } + } + + public void Remove(int offset, int size, ICacheKey key) + { + List entries = GetEntries(offset, size); + + for (int i = 0; i < entries.Count; i++) + { + Entry entry = entries[i]; + + if (entry.Key.KeyEqual(key)) + { + entries.RemoveAt(i--); + + DestroyEntry(entry); + } + } + + if (entries.Count == 0) + { + _ranges.Remove(PackRange(offset, size)); + } + } + + public bool TryGetValue(int offset, int size, ICacheKey key, out T value) + { + List entries = GetEntries(offset, size); + + foreach (Entry entry in entries) + { + if (entry.Key.KeyEqual(key)) + { + value = entry.Value; + + return true; + } + } + + value = default; + return false; + } + + public void Clear() + { + if (_ranges != null) + { + foreach (List entries in _ranges.Values) + { + foreach (Entry entry in entries) + { + DestroyEntry(entry); + } + } + + _ranges.Clear(); + _ranges = null; + } + } + + public readonly void ClearRange(int offset, int size) + { + if (_ranges != null && _ranges.Count > 0) + { + int end = offset + size; + + List toRemove = null; + + foreach (KeyValuePair> range in _ranges) + { + (int rOffset, int rSize) = UnpackRange(range.Key); + + int rEnd = rOffset + rSize; + + if (rEnd > offset && rOffset < end) + { + List entries = range.Value; + + foreach (Entry entry in entries) + { + DestroyEntry(entry); + } + + (toRemove ??= new List()).Add(range.Key); + } + } + + if (toRemove != null) + { + foreach (ulong range in toRemove) + { + _ranges.Remove(range); + } + } + } + } + + private List GetEntries(int offset, int size) + { + _ranges ??= new Dictionary>(); + + ulong key = PackRange(offset, size); + + if (!_ranges.TryGetValue(key, out List value)) + { + value = new List(); + _ranges.Add(key, value); + } + + return value; + } + + private static void DestroyEntry(Entry entry) + { + entry.Key.Dispose(); + entry.Value?.Dispose(); + entry.InvalidateDependencies(); + } + + private static ulong PackRange(int offset, int size) + { + return (uint)offset | ((ulong)size << 32); + } + + private static (int offset, int size) UnpackRange(ulong range) + { + return ((int)range, (int)(range >> 32)); + } + + public void Dispose() + { + Clear(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/CommandBufferEncoder.cs b/src/Ryujinx.Graphics.Metal/CommandBufferEncoder.cs new file mode 100644 index 0000000000..ec41500304 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/CommandBufferEncoder.cs @@ -0,0 +1,170 @@ +using Ryujinx.Graphics.Metal; +using SharpMetal.Metal; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Versioning; + +interface IEncoderFactory +{ + MTLRenderCommandEncoder CreateRenderCommandEncoder(); + MTLComputeCommandEncoder CreateComputeCommandEncoder(); +} + +/// +/// Tracks active encoder object for a command buffer. +/// +[SupportedOSPlatform("macos")] +class CommandBufferEncoder +{ + public EncoderType CurrentEncoderType { get; private set; } = EncoderType.None; + + public MTLBlitCommandEncoder BlitEncoder => new(CurrentEncoder.Value); + + public MTLComputeCommandEncoder ComputeEncoder => new(CurrentEncoder.Value); + + public MTLRenderCommandEncoder RenderEncoder => new(CurrentEncoder.Value); + + internal MTLCommandEncoder? CurrentEncoder { get; private set; } + + private MTLCommandBuffer _commandBuffer; + private IEncoderFactory _encoderFactory; + + public void Initialize(MTLCommandBuffer commandBuffer, IEncoderFactory encoderFactory) + { + _commandBuffer = commandBuffer; + _encoderFactory = encoderFactory; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public MTLRenderCommandEncoder EnsureRenderEncoder() + { + if (CurrentEncoderType != EncoderType.Render) + { + return BeginRenderPass(); + } + + return RenderEncoder; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public MTLBlitCommandEncoder EnsureBlitEncoder() + { + if (CurrentEncoderType != EncoderType.Blit) + { + return BeginBlitPass(); + } + + return BlitEncoder; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public MTLComputeCommandEncoder EnsureComputeEncoder() + { + if (CurrentEncoderType != EncoderType.Compute) + { + return BeginComputePass(); + } + + return ComputeEncoder; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryGetRenderEncoder(out MTLRenderCommandEncoder encoder) + { + if (CurrentEncoderType != EncoderType.Render) + { + encoder = default; + return false; + } + + encoder = RenderEncoder; + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryGetBlitEncoder(out MTLBlitCommandEncoder encoder) + { + if (CurrentEncoderType != EncoderType.Blit) + { + encoder = default; + return false; + } + + encoder = BlitEncoder; + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryGetComputeEncoder(out MTLComputeCommandEncoder encoder) + { + if (CurrentEncoderType != EncoderType.Compute) + { + encoder = default; + return false; + } + + encoder = ComputeEncoder; + return true; + } + + public void EndCurrentPass() + { + if (CurrentEncoder != null) + { + switch (CurrentEncoderType) + { + case EncoderType.Blit: + BlitEncoder.EndEncoding(); + CurrentEncoder = null; + break; + case EncoderType.Compute: + ComputeEncoder.EndEncoding(); + CurrentEncoder = null; + break; + case EncoderType.Render: + RenderEncoder.EndEncoding(); + CurrentEncoder = null; + break; + default: + throw new InvalidOperationException(); + } + + CurrentEncoderType = EncoderType.None; + } + } + + private MTLRenderCommandEncoder BeginRenderPass() + { + EndCurrentPass(); + + var renderCommandEncoder = _encoderFactory.CreateRenderCommandEncoder(); + + CurrentEncoder = renderCommandEncoder; + CurrentEncoderType = EncoderType.Render; + + return renderCommandEncoder; + } + + private MTLBlitCommandEncoder BeginBlitPass() + { + EndCurrentPass(); + + using var descriptor = new MTLBlitPassDescriptor(); + var blitCommandEncoder = _commandBuffer.BlitCommandEncoder(descriptor); + + CurrentEncoder = blitCommandEncoder; + CurrentEncoderType = EncoderType.Blit; + return blitCommandEncoder; + } + + private MTLComputeCommandEncoder BeginComputePass() + { + EndCurrentPass(); + + var computeCommandEncoder = _encoderFactory.CreateComputeCommandEncoder(); + + CurrentEncoder = computeCommandEncoder; + CurrentEncoderType = EncoderType.Compute; + return computeCommandEncoder; + } +} diff --git a/src/Ryujinx.Graphics.Metal/CommandBufferPool.cs b/src/Ryujinx.Graphics.Metal/CommandBufferPool.cs new file mode 100644 index 0000000000..5e7576b37b --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/CommandBufferPool.cs @@ -0,0 +1,289 @@ +using SharpMetal.Metal; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class CommandBufferPool : IDisposable + { + public const int MaxCommandBuffers = 16; + + private readonly int _totalCommandBuffers; + private readonly int _totalCommandBuffersMask; + private readonly MTLCommandQueue _queue; + private readonly Thread _owner; + private IEncoderFactory _defaultEncoderFactory; + + public bool OwnedByCurrentThread => _owner == Thread.CurrentThread; + + [SupportedOSPlatform("macos")] + private struct ReservedCommandBuffer + { + public bool InUse; + public bool InConsumption; + public int SubmissionCount; + public MTLCommandBuffer CommandBuffer; + public CommandBufferEncoder Encoders; + public FenceHolder Fence; + + public List Dependants; + public List Waitables; + + public void Use(MTLCommandQueue queue, IEncoderFactory stateManager) + { + MTLCommandBufferDescriptor descriptor = new(); +#if DEBUG + descriptor.ErrorOptions = MTLCommandBufferErrorOption.EncoderExecutionStatus; +#endif + + CommandBuffer = queue.CommandBuffer(descriptor); + Fence = new FenceHolder(CommandBuffer); + + Encoders.Initialize(CommandBuffer, stateManager); + + InUse = true; + } + + public void Initialize() + { + Dependants = new List(); + Waitables = new List(); + Encoders = new CommandBufferEncoder(); + } + } + + private readonly ReservedCommandBuffer[] _commandBuffers; + + private readonly int[] _queuedIndexes; + private int _queuedIndexesPtr; + private int _queuedCount; + private int _inUseCount; + + public CommandBufferPool(MTLCommandQueue queue, bool isLight = false) + { + _queue = queue; + _owner = Thread.CurrentThread; + + _totalCommandBuffers = isLight ? 2 : MaxCommandBuffers; + _totalCommandBuffersMask = _totalCommandBuffers - 1; + + _commandBuffers = new ReservedCommandBuffer[_totalCommandBuffers]; + + _queuedIndexes = new int[_totalCommandBuffers]; + _queuedIndexesPtr = 0; + _queuedCount = 0; + } + + public void Initialize(IEncoderFactory encoderFactory) + { + _defaultEncoderFactory = encoderFactory; + + for (int i = 0; i < _totalCommandBuffers; i++) + { + _commandBuffers[i].Initialize(); + WaitAndDecrementRef(i); + } + } + + public void AddDependant(int cbIndex, IAuto dependant) + { + dependant.IncrementReferenceCount(); + _commandBuffers[cbIndex].Dependants.Add(dependant); + } + + public void AddWaitable(MultiFenceHolder waitable) + { + lock (_commandBuffers) + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[i]; + + if (entry.InConsumption) + { + AddWaitable(i, waitable); + } + } + } + } + + public void AddInUseWaitable(MultiFenceHolder waitable) + { + lock (_commandBuffers) + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[i]; + + if (entry.InUse) + { + AddWaitable(i, waitable); + } + } + } + } + + public void AddWaitable(int cbIndex, MultiFenceHolder waitable) + { + ref var entry = ref _commandBuffers[cbIndex]; + if (waitable.AddFence(cbIndex, entry.Fence)) + { + entry.Waitables.Add(waitable); + } + } + + public bool IsFenceOnRentedCommandBuffer(FenceHolder fence) + { + lock (_commandBuffers) + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[i]; + + if (entry.InUse && entry.Fence == fence) + { + return true; + } + } + } + + return false; + } + + public FenceHolder GetFence(int cbIndex) + { + return _commandBuffers[cbIndex].Fence; + } + + public int GetSubmissionCount(int cbIndex) + { + return _commandBuffers[cbIndex].SubmissionCount; + } + + private int FreeConsumed(bool wait) + { + int freeEntry = 0; + + while (_queuedCount > 0) + { + int index = _queuedIndexes[_queuedIndexesPtr]; + + ref var entry = ref _commandBuffers[index]; + + if (wait || !entry.InConsumption || entry.Fence.IsSignaled()) + { + WaitAndDecrementRef(index); + + wait = false; + freeEntry = index; + + _queuedCount--; + _queuedIndexesPtr = (_queuedIndexesPtr + 1) % _totalCommandBuffers; + } + else + { + break; + } + } + + return freeEntry; + } + + public CommandBufferScoped ReturnAndRent(CommandBufferScoped cbs) + { + Return(cbs); + return Rent(); + } + + public CommandBufferScoped Rent() + { + lock (_commandBuffers) + { + int cursor = FreeConsumed(_inUseCount + _queuedCount == _totalCommandBuffers); + + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[cursor]; + + if (!entry.InUse && !entry.InConsumption) + { + entry.Use(_queue, _defaultEncoderFactory); + + _inUseCount++; + + return new CommandBufferScoped(this, entry.CommandBuffer, entry.Encoders, cursor); + } + + cursor = (cursor + 1) & _totalCommandBuffersMask; + } + } + + throw new InvalidOperationException($"Out of command buffers (In use: {_inUseCount}, queued: {_queuedCount}, total: {_totalCommandBuffers})"); + } + + public void Return(CommandBufferScoped cbs) + { + // Ensure the encoder is committed. + cbs.Encoders.EndCurrentPass(); + + lock (_commandBuffers) + { + int cbIndex = cbs.CommandBufferIndex; + + ref var entry = ref _commandBuffers[cbIndex]; + + Debug.Assert(entry.InUse); + Debug.Assert(entry.CommandBuffer.NativePtr == cbs.CommandBuffer.NativePtr); + entry.InUse = false; + entry.InConsumption = true; + entry.SubmissionCount++; + _inUseCount--; + + var commandBuffer = entry.CommandBuffer; + commandBuffer.Commit(); + + int ptr = (_queuedIndexesPtr + _queuedCount) % _totalCommandBuffers; + _queuedIndexes[ptr] = cbIndex; + _queuedCount++; + } + } + + private void WaitAndDecrementRef(int cbIndex) + { + ref var entry = ref _commandBuffers[cbIndex]; + + if (entry.InConsumption) + { + entry.Fence.Wait(); + entry.InConsumption = false; + } + + foreach (var dependant in entry.Dependants) + { + dependant.DecrementReferenceCount(cbIndex); + } + + foreach (var waitable in entry.Waitables) + { + waitable.RemoveFence(cbIndex); + waitable.RemoveBufferUses(cbIndex); + } + + entry.Dependants.Clear(); + entry.Waitables.Clear(); + entry.Fence?.Dispose(); + } + + public void Dispose() + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + WaitAndDecrementRef(i); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/CommandBufferScoped.cs b/src/Ryujinx.Graphics.Metal/CommandBufferScoped.cs new file mode 100644 index 0000000000..822f69b460 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/CommandBufferScoped.cs @@ -0,0 +1,43 @@ +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + readonly struct CommandBufferScoped : IDisposable + { + private readonly CommandBufferPool _pool; + public MTLCommandBuffer CommandBuffer { get; } + public CommandBufferEncoder Encoders { get; } + public int CommandBufferIndex { get; } + + public CommandBufferScoped(CommandBufferPool pool, MTLCommandBuffer commandBuffer, CommandBufferEncoder encoders, int commandBufferIndex) + { + _pool = pool; + CommandBuffer = commandBuffer; + Encoders = encoders; + CommandBufferIndex = commandBufferIndex; + } + + public void AddDependant(IAuto dependant) + { + _pool.AddDependant(CommandBufferIndex, dependant); + } + + public void AddWaitable(MultiFenceHolder waitable) + { + _pool.AddWaitable(CommandBufferIndex, waitable); + } + + public FenceHolder GetFence() + { + return _pool.GetFence(CommandBufferIndex); + } + + public void Dispose() + { + _pool?.Return(this); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Constants.cs b/src/Ryujinx.Graphics.Metal/Constants.cs new file mode 100644 index 0000000000..43baf722ae --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Constants.cs @@ -0,0 +1,41 @@ +namespace Ryujinx.Graphics.Metal +{ + static class Constants + { + public const int MaxShaderStages = 5; + public const int MaxVertexBuffers = 16; + public const int MaxUniformBuffersPerStage = 18; + public const int MaxStorageBuffersPerStage = 16; + public const int MaxTexturesPerStage = 64; + public const int MaxImagesPerStage = 16; + + public const int MaxUniformBufferBindings = MaxUniformBuffersPerStage * MaxShaderStages; + public const int MaxStorageBufferBindings = MaxStorageBuffersPerStage * MaxShaderStages; + public const int MaxTextureBindings = MaxTexturesPerStage * MaxShaderStages; + public const int MaxImageBindings = MaxImagesPerStage * MaxShaderStages; + public const int MaxColorAttachments = 8; + public const int MaxViewports = 16; + // TODO: Check this value + public const int MaxVertexAttributes = 31; + + public const int MinResourceAlignment = 16; + + // Must match constants set in shader generation + public const uint ZeroBufferIndex = MaxVertexBuffers; + public const uint BaseSetIndex = MaxVertexBuffers + 1; + + public const uint ConstantBuffersIndex = BaseSetIndex; + public const uint StorageBuffersIndex = BaseSetIndex + 1; + public const uint TexturesIndex = BaseSetIndex + 2; + public const uint ImagesIndex = BaseSetIndex + 3; + + public const uint ConstantBuffersSetIndex = 0; + public const uint StorageBuffersSetIndex = 1; + public const uint TexturesSetIndex = 2; + public const uint ImagesSetIndex = 3; + + public const uint MaximumBufferArgumentTableEntries = 31; + + public const uint MaximumExtraSets = MaximumBufferArgumentTableEntries - ImagesIndex; + } +} diff --git a/src/Ryujinx.Graphics.Metal/CounterEvent.cs b/src/Ryujinx.Graphics.Metal/CounterEvent.cs new file mode 100644 index 0000000000..46b04997e4 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/CounterEvent.cs @@ -0,0 +1,22 @@ +using Ryujinx.Graphics.GAL; + +namespace Ryujinx.Graphics.Metal +{ + class CounterEvent : ICounterEvent + { + public CounterEvent() + { + Invalid = false; + } + + public bool Invalid { get; set; } + public bool ReserveForHostAccess() + { + return true; + } + + public void Flush() { } + + public void Dispose() { } + } +} diff --git a/src/Ryujinx.Graphics.Metal/DepthStencilCache.cs b/src/Ryujinx.Graphics.Metal/DepthStencilCache.cs new file mode 100644 index 0000000000..bb6e4c180d --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/DepthStencilCache.cs @@ -0,0 +1,68 @@ +using Ryujinx.Graphics.Metal.State; +using SharpMetal.Metal; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class DepthStencilCache : StateCache + { + private readonly MTLDevice _device; + + public DepthStencilCache(MTLDevice device) + { + _device = device; + } + + protected override DepthStencilUid GetHash(DepthStencilUid descriptor) + { + return descriptor; + } + + protected override MTLDepthStencilState CreateValue(DepthStencilUid descriptor) + { + // Create descriptors + + ref StencilUid frontUid = ref descriptor.FrontFace; + + using var frontFaceStencil = new MTLStencilDescriptor + { + StencilFailureOperation = frontUid.StencilFailureOperation, + DepthFailureOperation = frontUid.DepthFailureOperation, + DepthStencilPassOperation = frontUid.DepthStencilPassOperation, + StencilCompareFunction = frontUid.StencilCompareFunction, + ReadMask = frontUid.ReadMask, + WriteMask = frontUid.WriteMask + }; + + ref StencilUid backUid = ref descriptor.BackFace; + + using var backFaceStencil = new MTLStencilDescriptor + { + StencilFailureOperation = backUid.StencilFailureOperation, + DepthFailureOperation = backUid.DepthFailureOperation, + DepthStencilPassOperation = backUid.DepthStencilPassOperation, + StencilCompareFunction = backUid.StencilCompareFunction, + ReadMask = backUid.ReadMask, + WriteMask = backUid.WriteMask + }; + + var mtlDescriptor = new MTLDepthStencilDescriptor + { + DepthCompareFunction = descriptor.DepthCompareFunction, + DepthWriteEnabled = descriptor.DepthWriteEnabled + }; + + if (descriptor.StencilTestEnabled) + { + mtlDescriptor.BackFaceStencil = backFaceStencil; + mtlDescriptor.FrontFaceStencil = frontFaceStencil; + } + + using (mtlDescriptor) + { + return _device.NewDepthStencilState(mtlDescriptor); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/DisposableBuffer.cs b/src/Ryujinx.Graphics.Metal/DisposableBuffer.cs new file mode 100644 index 0000000000..a2d2247c49 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/DisposableBuffer.cs @@ -0,0 +1,26 @@ +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + readonly struct DisposableBuffer : IDisposable + { + public MTLBuffer Value { get; } + + public DisposableBuffer(MTLBuffer buffer) + { + Value = buffer; + } + + public void Dispose() + { + if (Value != IntPtr.Zero) + { + Value.SetPurgeableState(MTLPurgeableState.Empty); + Value.Dispose(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/DisposableSampler.cs b/src/Ryujinx.Graphics.Metal/DisposableSampler.cs new file mode 100644 index 0000000000..ba041be894 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/DisposableSampler.cs @@ -0,0 +1,22 @@ +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + readonly struct DisposableSampler : IDisposable + { + public MTLSamplerState Value { get; } + + public DisposableSampler(MTLSamplerState sampler) + { + Value = sampler; + } + + public void Dispose() + { + Value.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Effects/IPostProcessingEffect.cs b/src/Ryujinx.Graphics.Metal/Effects/IPostProcessingEffect.cs new file mode 100644 index 0000000000..d575d521fb --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Effects/IPostProcessingEffect.cs @@ -0,0 +1,10 @@ +using System; + +namespace Ryujinx.Graphics.Metal.Effects +{ + internal interface IPostProcessingEffect : IDisposable + { + const int LocalGroupSize = 64; + Texture Run(Texture view, int width, int height); + } +} diff --git a/src/Ryujinx.Graphics.Metal/Effects/IScalingFilter.cs b/src/Ryujinx.Graphics.Metal/Effects/IScalingFilter.cs new file mode 100644 index 0000000000..19f1a3c3d6 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Effects/IScalingFilter.cs @@ -0,0 +1,18 @@ +using Ryujinx.Graphics.GAL; +using System; + +namespace Ryujinx.Graphics.Metal.Effects +{ + internal interface IScalingFilter : IDisposable + { + float Level { get; set; } + void Run( + Texture view, + Texture destinationTexture, + Format format, + int width, + int height, + Extents2D source, + Extents2D destination); + } +} diff --git a/src/Ryujinx.Graphics.Metal/EncoderResources.cs b/src/Ryujinx.Graphics.Metal/EncoderResources.cs new file mode 100644 index 0000000000..562500d767 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/EncoderResources.cs @@ -0,0 +1,63 @@ +using SharpMetal.Metal; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Metal +{ + public struct RenderEncoderBindings + { + public List Resources = new(); + public List VertexBuffers = new(); + public List FragmentBuffers = new(); + + public RenderEncoderBindings() { } + + public readonly void Clear() + { + Resources.Clear(); + VertexBuffers.Clear(); + FragmentBuffers.Clear(); + } + } + + public struct ComputeEncoderBindings + { + public List Resources = new(); + public List Buffers = new(); + + public ComputeEncoderBindings() { } + + public readonly void Clear() + { + Resources.Clear(); + Buffers.Clear(); + } + } + + public struct BufferResource + { + public MTLBuffer Buffer; + public ulong Offset; + public ulong Binding; + + public BufferResource(MTLBuffer buffer, ulong offset, ulong binding) + { + Buffer = buffer; + Offset = offset; + Binding = binding; + } + } + + public struct Resource + { + public MTLResource MtlResource; + public MTLResourceUsage ResourceUsage; + public MTLRenderStages Stages; + + public Resource(MTLResource resource, MTLResourceUsage resourceUsage, MTLRenderStages stages) + { + MtlResource = resource; + ResourceUsage = resourceUsage; + Stages = stages; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/EncoderState.cs b/src/Ryujinx.Graphics.Metal/EncoderState.cs new file mode 100644 index 0000000000..34de168a67 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/EncoderState.cs @@ -0,0 +1,206 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Metal.State; +using Ryujinx.Graphics.Shader; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [Flags] + enum DirtyFlags + { + None = 0, + RenderPipeline = 1 << 0, + ComputePipeline = 1 << 1, + DepthStencil = 1 << 2, + DepthClamp = 1 << 3, + DepthBias = 1 << 4, + CullMode = 1 << 5, + FrontFace = 1 << 6, + StencilRef = 1 << 7, + Viewports = 1 << 8, + Scissors = 1 << 9, + Uniforms = 1 << 10, + Storages = 1 << 11, + Textures = 1 << 12, + Images = 1 << 13, + + ArgBuffers = Uniforms | Storages | Textures | Images, + + RenderAll = RenderPipeline | DepthStencil | DepthClamp | DepthBias | CullMode | FrontFace | StencilRef | Viewports | Scissors | ArgBuffers, + ComputeAll = ComputePipeline | ArgBuffers, + All = RenderAll | ComputeAll, + } + + record struct BufferRef + { + public Auto Buffer; + public BufferRange? Range; + + public BufferRef(Auto buffer) + { + Buffer = buffer; + } + + public BufferRef(Auto buffer, ref BufferRange range) + { + Buffer = buffer; + Range = range; + } + } + + record struct TextureRef + { + public ShaderStage Stage; + public TextureBase Storage; + public Auto Sampler; + public Format ImageFormat; + + public TextureRef(ShaderStage stage, TextureBase storage, Auto sampler) + { + Stage = stage; + Storage = storage; + Sampler = sampler; + } + } + + record struct ImageRef + { + public ShaderStage Stage; + public Texture Storage; + + public ImageRef(ShaderStage stage, Texture storage) + { + Stage = stage; + Storage = storage; + } + } + + struct PredrawState + { + public MTLCullMode CullMode; + public DepthStencilUid DepthStencilUid; + public PrimitiveTopology Topology; + public MTLViewport[] Viewports; + } + + struct RenderTargetCopy + { + public MTLScissorRect[] Scissors; + public Texture DepthStencil; + public Texture[] RenderTargets; + } + + [SupportedOSPlatform("macos")] + class EncoderState + { + public Program RenderProgram = null; + public Program ComputeProgram = null; + + public PipelineState Pipeline; + public DepthStencilUid DepthStencilUid; + + public readonly record struct ArrayRef(ShaderStage Stage, T Array); + + public readonly BufferRef[] UniformBufferRefs = new BufferRef[Constants.MaxUniformBufferBindings]; + public readonly BufferRef[] StorageBufferRefs = new BufferRef[Constants.MaxStorageBufferBindings]; + public readonly TextureRef[] TextureRefs = new TextureRef[Constants.MaxTextureBindings * 2]; + public readonly ImageRef[] ImageRefs = new ImageRef[Constants.MaxImageBindings * 2]; + + public ArrayRef[] TextureArrayRefs = []; + public ArrayRef[] ImageArrayRefs = []; + + public ArrayRef[] TextureArrayExtraRefs = []; + public ArrayRef[] ImageArrayExtraRefs = []; + + public IndexBufferState IndexBuffer = default; + + public MTLDepthClipMode DepthClipMode = MTLDepthClipMode.Clip; + + public float DepthBias; + public float SlopeScale; + public float Clamp; + + public int BackRefValue = 0; + public int FrontRefValue = 0; + + public PrimitiveTopology Topology = PrimitiveTopology.Triangles; + public MTLCullMode CullMode = MTLCullMode.None; + public MTLWinding Winding = MTLWinding.CounterClockwise; + public bool CullBoth = false; + + public MTLViewport[] Viewports = new MTLViewport[Constants.MaxViewports]; + public MTLScissorRect[] Scissors = new MTLScissorRect[Constants.MaxViewports]; + + // Changes to attachments take recreation! + public Texture DepthStencil; + public Texture[] RenderTargets = new Texture[Constants.MaxColorAttachments]; + public ITexture PreMaskDepthStencil = default; + public ITexture[] PreMaskRenderTargets; + public bool FramebufferUsingColorWriteMask; + + public Array8 StoredBlend; + public ColorF BlendColor = new(); + + public readonly VertexBufferState[] VertexBuffers = new VertexBufferState[Constants.MaxVertexBuffers]; + public readonly VertexAttribDescriptor[] VertexAttribs = new VertexAttribDescriptor[Constants.MaxVertexAttributes]; + // Dirty flags + public DirtyFlags Dirty = DirtyFlags.None; + + // Only to be used for present + public bool ClearLoadAction = false; + + public RenderEncoderBindings RenderEncoderBindings = new(); + public ComputeEncoderBindings ComputeEncoderBindings = new(); + + public EncoderState() + { + Pipeline.Initialize(); + DepthStencilUid.DepthCompareFunction = MTLCompareFunction.Always; + } + + public RenderTargetCopy InheritForClear(EncoderState other, bool depth, int singleIndex = -1) + { + // Inherit render target related information without causing a render encoder split. + + var oldState = new RenderTargetCopy + { + Scissors = other.Scissors, + RenderTargets = other.RenderTargets, + DepthStencil = other.DepthStencil + }; + + Scissors = other.Scissors; + RenderTargets = other.RenderTargets; + DepthStencil = other.DepthStencil; + + Pipeline.ColorBlendAttachmentStateCount = other.Pipeline.ColorBlendAttachmentStateCount; + Pipeline.Internal.ColorBlendState = other.Pipeline.Internal.ColorBlendState; + Pipeline.DepthStencilFormat = other.Pipeline.DepthStencilFormat; + + ref var blendStates = ref Pipeline.Internal.ColorBlendState; + + // Mask out irrelevant attachments. + for (int i = 0; i < blendStates.Length; i++) + { + if (depth || (singleIndex != -1 && singleIndex != i)) + { + blendStates[i].WriteMask = MTLColorWriteMask.None; + } + } + + return oldState; + } + + public void Restore(RenderTargetCopy copy) + { + Scissors = copy.Scissors; + RenderTargets = copy.RenderTargets; + DepthStencil = copy.DepthStencil; + + Pipeline.Internal.ResetColorState(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/EncoderStateManager.cs b/src/Ryujinx.Graphics.Metal/EncoderStateManager.cs new file mode 100644 index 0000000000..0093ac1486 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/EncoderStateManager.cs @@ -0,0 +1,1788 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Metal.State; +using Ryujinx.Graphics.Shader; +using SharpMetal.Metal; +using System; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; +using BufferAssignment = Ryujinx.Graphics.GAL.BufferAssignment; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + struct EncoderStateManager : IDisposable + { + private const int ArrayGrowthSize = 16; + + private readonly MTLDevice _device; + private readonly Pipeline _pipeline; + private readonly BufferManager _bufferManager; + + private readonly DepthStencilCache _depthStencilCache; + private readonly MTLDepthStencilState _defaultState; + + private readonly EncoderState _mainState = new(); + private EncoderState _currentState; + + public readonly IndexBufferState IndexBuffer => _currentState.IndexBuffer; + public readonly PrimitiveTopology Topology => _currentState.Topology; + public readonly Texture[] RenderTargets => _currentState.RenderTargets; + public readonly Texture DepthStencil => _currentState.DepthStencil; + public readonly ComputeSize ComputeLocalSize => _currentState.ComputeProgram.ComputeLocalSize; + + // RGBA32F is the biggest format + private const int ZeroBufferSize = 4 * 4; + private readonly BufferHandle _zeroBuffer; + + public unsafe EncoderStateManager(MTLDevice device, BufferManager bufferManager, Pipeline pipeline) + { + _device = device; + _pipeline = pipeline; + _bufferManager = bufferManager; + + _depthStencilCache = new(device); + _currentState = _mainState; + + _defaultState = _depthStencilCache.GetOrCreate(_currentState.DepthStencilUid); + + // Zero buffer + byte[] zeros = new byte[ZeroBufferSize]; + fixed (byte* ptr = zeros) + { + _zeroBuffer = _bufferManager.Create((IntPtr)ptr, ZeroBufferSize); + } + } + + public readonly void Dispose() + { + _depthStencilCache.Dispose(); + } + + private readonly void SignalDirty(DirtyFlags flags) + { + _currentState.Dirty |= flags; + } + + public readonly void SignalRenderDirty() + { + SignalDirty(DirtyFlags.RenderAll); + } + + public readonly void SignalComputeDirty() + { + SignalDirty(DirtyFlags.ComputeAll); + } + + public EncoderState SwapState(EncoderState state, DirtyFlags flags = DirtyFlags.All) + { + _currentState = state ?? _mainState; + + SignalDirty(flags); + + return _mainState; + } + + public PredrawState SavePredrawState() + { + return new PredrawState + { + CullMode = _currentState.CullMode, + DepthStencilUid = _currentState.DepthStencilUid, + Topology = _currentState.Topology, + Viewports = _currentState.Viewports.ToArray(), + }; + } + + public readonly void RestorePredrawState(PredrawState state) + { + _currentState.CullMode = state.CullMode; + _currentState.DepthStencilUid = state.DepthStencilUid; + _currentState.Topology = state.Topology; + _currentState.Viewports = state.Viewports; + + SignalDirty(DirtyFlags.CullMode | DirtyFlags.DepthStencil | DirtyFlags.Viewports); + } + + public readonly void SetClearLoadAction(bool clear) + { + _currentState.ClearLoadAction = clear; + } + + public readonly void DirtyTextures() + { + SignalDirty(DirtyFlags.Textures); + } + + public readonly void DirtyImages() + { + SignalDirty(DirtyFlags.Images); + } + + public readonly MTLRenderCommandEncoder CreateRenderCommandEncoder() + { + // Initialise Pass & State + using var renderPassDescriptor = new MTLRenderPassDescriptor(); + + for (int i = 0; i < Constants.MaxColorAttachments; i++) + { + if (_currentState.RenderTargets[i] is Texture tex) + { + var passAttachment = renderPassDescriptor.ColorAttachments.Object((ulong)i); + tex.PopulateRenderPassAttachment(passAttachment); + passAttachment.LoadAction = _currentState.ClearLoadAction ? MTLLoadAction.Clear : MTLLoadAction.Load; + passAttachment.StoreAction = MTLStoreAction.Store; + } + } + + var depthAttachment = renderPassDescriptor.DepthAttachment; + var stencilAttachment = renderPassDescriptor.StencilAttachment; + + if (_currentState.DepthStencil != null) + { + switch (_currentState.DepthStencil.GetHandle().PixelFormat) + { + // Depth Only Attachment + case MTLPixelFormat.Depth16Unorm: + case MTLPixelFormat.Depth32Float: + depthAttachment.Texture = _currentState.DepthStencil.GetHandle(); + depthAttachment.LoadAction = MTLLoadAction.Load; + depthAttachment.StoreAction = MTLStoreAction.Store; + break; + + // Stencil Only Attachment + case MTLPixelFormat.Stencil8: + stencilAttachment.Texture = _currentState.DepthStencil.GetHandle(); + stencilAttachment.LoadAction = MTLLoadAction.Load; + stencilAttachment.StoreAction = MTLStoreAction.Store; + break; + + // Combined Attachment + case MTLPixelFormat.Depth24UnormStencil8: + case MTLPixelFormat.Depth32FloatStencil8: + depthAttachment.Texture = _currentState.DepthStencil.GetHandle(); + depthAttachment.LoadAction = MTLLoadAction.Load; + depthAttachment.StoreAction = MTLStoreAction.Store; + + stencilAttachment.Texture = _currentState.DepthStencil.GetHandle(); + stencilAttachment.LoadAction = MTLLoadAction.Load; + stencilAttachment.StoreAction = MTLStoreAction.Store; + break; + default: + Logger.Error?.PrintMsg(LogClass.Gpu, $"Unsupported Depth/Stencil Format: {_currentState.DepthStencil.GetHandle().PixelFormat}!"); + break; + } + } + + // Initialise Encoder + var renderCommandEncoder = _pipeline.CommandBuffer.RenderCommandEncoder(renderPassDescriptor); + + return renderCommandEncoder; + } + + public readonly MTLComputeCommandEncoder CreateComputeCommandEncoder() + { + using var descriptor = new MTLComputePassDescriptor(); + var computeCommandEncoder = _pipeline.CommandBuffer.ComputeCommandEncoder(descriptor); + + return computeCommandEncoder; + } + + public readonly void RenderResourcesPrepass() + { + _currentState.RenderEncoderBindings.Clear(); + + if ((_currentState.Dirty & DirtyFlags.RenderPipeline) != 0) + { + SetVertexBuffers(_currentState.VertexBuffers, ref _currentState.RenderEncoderBindings); + } + + if ((_currentState.Dirty & DirtyFlags.Uniforms) != 0) + { + UpdateAndBind(_currentState.RenderProgram, Constants.ConstantBuffersSetIndex, ref _currentState.RenderEncoderBindings); + } + + if ((_currentState.Dirty & DirtyFlags.Storages) != 0) + { + UpdateAndBind(_currentState.RenderProgram, Constants.StorageBuffersSetIndex, ref _currentState.RenderEncoderBindings); + } + + if ((_currentState.Dirty & DirtyFlags.Textures) != 0) + { + UpdateAndBind(_currentState.RenderProgram, Constants.TexturesSetIndex, ref _currentState.RenderEncoderBindings); + } + + if ((_currentState.Dirty & DirtyFlags.Images) != 0) + { + UpdateAndBind(_currentState.RenderProgram, Constants.ImagesSetIndex, ref _currentState.RenderEncoderBindings); + } + } + + public readonly void ComputeResourcesPrepass() + { + _currentState.ComputeEncoderBindings.Clear(); + + if ((_currentState.Dirty & DirtyFlags.Uniforms) != 0) + { + UpdateAndBind(_currentState.ComputeProgram, Constants.ConstantBuffersSetIndex, ref _currentState.ComputeEncoderBindings); + } + + if ((_currentState.Dirty & DirtyFlags.Storages) != 0) + { + UpdateAndBind(_currentState.ComputeProgram, Constants.StorageBuffersSetIndex, ref _currentState.ComputeEncoderBindings); + } + + if ((_currentState.Dirty & DirtyFlags.Textures) != 0) + { + UpdateAndBind(_currentState.ComputeProgram, Constants.TexturesSetIndex, ref _currentState.ComputeEncoderBindings); + } + + if ((_currentState.Dirty & DirtyFlags.Images) != 0) + { + UpdateAndBind(_currentState.ComputeProgram, Constants.ImagesSetIndex, ref _currentState.ComputeEncoderBindings); + } + } + + public void RebindRenderState(MTLRenderCommandEncoder renderCommandEncoder) + { + if ((_currentState.Dirty & DirtyFlags.RenderPipeline) != 0) + { + SetRenderPipelineState(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.DepthStencil) != 0) + { + SetDepthStencilState(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.DepthClamp) != 0) + { + SetDepthClamp(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.DepthBias) != 0) + { + SetDepthBias(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.CullMode) != 0) + { + SetCullMode(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.FrontFace) != 0) + { + SetFrontFace(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.StencilRef) != 0) + { + SetStencilRefValue(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.Viewports) != 0) + { + SetViewports(renderCommandEncoder); + } + + if ((_currentState.Dirty & DirtyFlags.Scissors) != 0) + { + SetScissors(renderCommandEncoder); + } + + foreach (var resource in _currentState.RenderEncoderBindings.Resources) + { + renderCommandEncoder.UseResource(resource.MtlResource, resource.ResourceUsage, resource.Stages); + } + + foreach (var buffer in _currentState.RenderEncoderBindings.VertexBuffers) + { + renderCommandEncoder.SetVertexBuffer(buffer.Buffer, buffer.Offset, buffer.Binding); + } + + foreach (var buffer in _currentState.RenderEncoderBindings.FragmentBuffers) + { + renderCommandEncoder.SetFragmentBuffer(buffer.Buffer, buffer.Offset, buffer.Binding); + } + + _currentState.Dirty &= ~DirtyFlags.RenderAll; + } + + public readonly void RebindComputeState(MTLComputeCommandEncoder computeCommandEncoder) + { + if ((_currentState.Dirty & DirtyFlags.ComputePipeline) != 0) + { + SetComputePipelineState(computeCommandEncoder); + } + + foreach (var resource in _currentState.ComputeEncoderBindings.Resources) + { + computeCommandEncoder.UseResource(resource.MtlResource, resource.ResourceUsage); + } + + foreach (var buffer in _currentState.ComputeEncoderBindings.Buffers) + { + computeCommandEncoder.SetBuffer(buffer.Buffer, buffer.Offset, buffer.Binding); + } + + _currentState.Dirty &= ~DirtyFlags.ComputeAll; + } + + private readonly void SetRenderPipelineState(MTLRenderCommandEncoder renderCommandEncoder) + { + MTLRenderPipelineState pipelineState = _currentState.Pipeline.CreateRenderPipeline(_device, _currentState.RenderProgram); + + renderCommandEncoder.SetRenderPipelineState(pipelineState); + + renderCommandEncoder.SetBlendColor( + _currentState.BlendColor.Red, + _currentState.BlendColor.Green, + _currentState.BlendColor.Blue, + _currentState.BlendColor.Alpha); + } + + private readonly void SetComputePipelineState(MTLComputeCommandEncoder computeCommandEncoder) + { + if (_currentState.ComputeProgram == null) + { + return; + } + + var pipelineState = PipelineState.CreateComputePipeline(_device, _currentState.ComputeProgram); + + computeCommandEncoder.SetComputePipelineState(pipelineState); + } + + public readonly void UpdateIndexBuffer(BufferRange buffer, IndexType type) + { + if (buffer.Handle != BufferHandle.Null) + { + _currentState.IndexBuffer = new IndexBufferState(buffer.Handle, buffer.Offset, buffer.Size, type); + } + else + { + _currentState.IndexBuffer = IndexBufferState.Null; + } + } + + public readonly void UpdatePrimitiveTopology(PrimitiveTopology topology) + { + _currentState.Topology = topology; + } + + public readonly void UpdateProgram(IProgram program) + { + Program prg = (Program)program; + + if (prg.VertexFunction == IntPtr.Zero && prg.ComputeFunction == IntPtr.Zero) + { + if (prg.FragmentFunction == IntPtr.Zero) + { + Logger.Error?.PrintMsg(LogClass.Gpu, "No compute function"); + } + else + { + Logger.Error?.PrintMsg(LogClass.Gpu, "No vertex function"); + } + return; + } + + if (prg.VertexFunction != IntPtr.Zero) + { + _currentState.RenderProgram = prg; + + SignalDirty(DirtyFlags.RenderPipeline | DirtyFlags.ArgBuffers); + } + else if (prg.ComputeFunction != IntPtr.Zero) + { + _currentState.ComputeProgram = prg; + + SignalDirty(DirtyFlags.ComputePipeline | DirtyFlags.ArgBuffers); + } + } + + public readonly void UpdateRasterizerDiscard(bool discard) + { + _currentState.Pipeline.RasterizerDiscardEnable = discard; + + SignalDirty(DirtyFlags.RenderPipeline); + } + + public readonly void UpdateRenderTargets(ITexture[] colors, ITexture depthStencil) + { + _currentState.FramebufferUsingColorWriteMask = false; + UpdateRenderTargetsInternal(colors, depthStencil); + } + + public readonly void UpdateRenderTargetColorMasks(ReadOnlySpan componentMask) + { + ref var blendState = ref _currentState.Pipeline.Internal.ColorBlendState; + + for (int i = 0; i < componentMask.Length; i++) + { + bool red = (componentMask[i] & (0x1 << 0)) != 0; + bool green = (componentMask[i] & (0x1 << 1)) != 0; + bool blue = (componentMask[i] & (0x1 << 2)) != 0; + bool alpha = (componentMask[i] & (0x1 << 3)) != 0; + + var mask = MTLColorWriteMask.None; + + mask |= red ? MTLColorWriteMask.Red : 0; + mask |= green ? MTLColorWriteMask.Green : 0; + mask |= blue ? MTLColorWriteMask.Blue : 0; + mask |= alpha ? MTLColorWriteMask.Alpha : 0; + + ref ColorBlendStateUid mtlBlend = ref blendState[i]; + + // When color write mask is 0, remove all blend state to help the pipeline cache. + // Restore it when the mask becomes non-zero. + if (mtlBlend.WriteMask != mask) + { + if (mask == 0) + { + _currentState.StoredBlend[i] = mtlBlend; + + mtlBlend.Swap(new ColorBlendStateUid()); + } + else if (mtlBlend.WriteMask == 0) + { + mtlBlend.Swap(_currentState.StoredBlend[i]); + } + } + + blendState[i].WriteMask = mask; + } + + if (_currentState.FramebufferUsingColorWriteMask) + { + UpdateRenderTargetsInternal(_currentState.PreMaskRenderTargets, _currentState.PreMaskDepthStencil); + } + else + { + // Requires recreating pipeline + if (_pipeline.CurrentEncoderType == EncoderType.Render) + { + _pipeline.EndCurrentPass(); + } + } + } + + private readonly void UpdateRenderTargetsInternal(ITexture[] colors, ITexture depthStencil) + { + // TBDR GPUs don't work properly if the same attachment is bound to multiple targets, + // due to each attachment being a copy of the real attachment, rather than a direct write. + // + // Just try to remove duplicate attachments. + // Save a copy of the array to rebind when mask changes. + + // Look for textures that are masked out. + + ref PipelineState pipeline = ref _currentState.Pipeline; + ref var blendState = ref pipeline.Internal.ColorBlendState; + + pipeline.ColorBlendAttachmentStateCount = (uint)colors.Length; + + for (int i = 0; i < colors.Length; i++) + { + if (colors[i] == null) + { + continue; + } + + var mtlMask = blendState[i].WriteMask; + + for (int j = 0; j < i; j++) + { + // Check each binding for a duplicate binding before it. + + if (colors[i] == colors[j]) + { + // Prefer the binding with no write mask. + + var mtlMask2 = blendState[j].WriteMask; + + if (mtlMask == 0) + { + colors[i] = null; + MaskOut(colors, depthStencil); + } + else if (mtlMask2 == 0) + { + colors[j] = null; + MaskOut(colors, depthStencil); + } + } + } + } + + _currentState.RenderTargets = new Texture[Constants.MaxColorAttachments]; + + for (int i = 0; i < colors.Length; i++) + { + if (colors[i] is not Texture tex) + { + blendState[i].PixelFormat = MTLPixelFormat.Invalid; + + continue; + } + + blendState[i].PixelFormat = tex.GetHandle().PixelFormat; // TODO: cache this + _currentState.RenderTargets[i] = tex; + } + + if (depthStencil is Texture depthTexture) + { + pipeline.DepthStencilFormat = depthTexture.GetHandle().PixelFormat; // TODO: cache this + _currentState.DepthStencil = depthTexture; + } + else if (depthStencil == null) + { + pipeline.DepthStencilFormat = MTLPixelFormat.Invalid; + _currentState.DepthStencil = null; + } + + // Requires recreating pipeline + if (_pipeline.CurrentEncoderType == EncoderType.Render) + { + _pipeline.EndCurrentPass(); + } + } + + private readonly void MaskOut(ITexture[] colors, ITexture depthStencil) + { + if (!_currentState.FramebufferUsingColorWriteMask) + { + _currentState.PreMaskRenderTargets = colors; + _currentState.PreMaskDepthStencil = depthStencil; + } + + // If true, then the framebuffer must be recreated when the mask changes. + _currentState.FramebufferUsingColorWriteMask = true; + } + + public readonly void UpdateVertexAttribs(ReadOnlySpan vertexAttribs) + { + vertexAttribs.CopyTo(_currentState.VertexAttribs); + + // Update the buffers on the pipeline + UpdatePipelineVertexState(_currentState.VertexBuffers, _currentState.VertexAttribs); + + SignalDirty(DirtyFlags.RenderPipeline); + } + + public readonly void UpdateBlendDescriptors(int index, BlendDescriptor blend) + { + ref var blendState = ref _currentState.Pipeline.Internal.ColorBlendState[index]; + + blendState.Enable = blend.Enable; + blendState.AlphaBlendOperation = blend.AlphaOp.Convert(); + blendState.RgbBlendOperation = blend.ColorOp.Convert(); + blendState.SourceAlphaBlendFactor = blend.AlphaSrcFactor.Convert(); + blendState.DestinationAlphaBlendFactor = blend.AlphaDstFactor.Convert(); + blendState.SourceRGBBlendFactor = blend.ColorSrcFactor.Convert(); + blendState.DestinationRGBBlendFactor = blend.ColorDstFactor.Convert(); + + if (blendState.WriteMask == 0) + { + _currentState.StoredBlend[index] = blendState; + + blendState.Swap(new ColorBlendStateUid()); + } + + _currentState.BlendColor = blend.BlendConstant; + + SignalDirty(DirtyFlags.RenderPipeline); + } + + public void UpdateStencilState(StencilTestDescriptor stencilTest) + { + ref DepthStencilUid uid = ref _currentState.DepthStencilUid; + + uid.FrontFace = new StencilUid + { + StencilFailureOperation = stencilTest.FrontSFail.Convert(), + DepthFailureOperation = stencilTest.FrontDpFail.Convert(), + DepthStencilPassOperation = stencilTest.FrontDpPass.Convert(), + StencilCompareFunction = stencilTest.FrontFunc.Convert(), + ReadMask = (uint)stencilTest.FrontFuncMask, + WriteMask = (uint)stencilTest.FrontMask + }; + + uid.BackFace = new StencilUid + { + StencilFailureOperation = stencilTest.BackSFail.Convert(), + DepthFailureOperation = stencilTest.BackDpFail.Convert(), + DepthStencilPassOperation = stencilTest.BackDpPass.Convert(), + StencilCompareFunction = stencilTest.BackFunc.Convert(), + ReadMask = (uint)stencilTest.BackFuncMask, + WriteMask = (uint)stencilTest.BackMask + }; + + uid.StencilTestEnabled = stencilTest.TestEnable; + + UpdateStencilRefValue(stencilTest.FrontFuncRef, stencilTest.BackFuncRef); + + SignalDirty(DirtyFlags.DepthStencil); + } + + public readonly void UpdateDepthState(DepthTestDescriptor depthTest) + { + ref DepthStencilUid uid = ref _currentState.DepthStencilUid; + + uid.DepthCompareFunction = depthTest.TestEnable ? depthTest.Func.Convert() : MTLCompareFunction.Always; + uid.DepthWriteEnabled = depthTest.TestEnable && depthTest.WriteEnable; + + SignalDirty(DirtyFlags.DepthStencil); + } + + public readonly void UpdateDepthClamp(bool clamp) + { + _currentState.DepthClipMode = clamp ? MTLDepthClipMode.Clamp : MTLDepthClipMode.Clip; + + // Inline update + if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder)) + { + SetDepthClamp(renderCommandEncoder); + return; + } + + SignalDirty(DirtyFlags.DepthClamp); + } + + public readonly void UpdateDepthBias(float depthBias, float slopeScale, float clamp) + { + _currentState.DepthBias = depthBias; + _currentState.SlopeScale = slopeScale; + _currentState.Clamp = clamp; + + // Inline update + if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder)) + { + SetDepthBias(renderCommandEncoder); + return; + } + + SignalDirty(DirtyFlags.DepthBias); + } + + public readonly void UpdateLogicOpState(bool enable, LogicalOp op) + { + _currentState.Pipeline.LogicOpEnable = enable; + _currentState.Pipeline.LogicOp = op.Convert(); + + SignalDirty(DirtyFlags.RenderPipeline); + } + + public readonly void UpdateMultisampleState(MultisampleDescriptor multisample) + { + _currentState.Pipeline.AlphaToCoverageEnable = multisample.AlphaToCoverageEnable; + _currentState.Pipeline.AlphaToOneEnable = multisample.AlphaToOneEnable; + + SignalDirty(DirtyFlags.RenderPipeline); + } + + public void UpdateScissors(ReadOnlySpan> regions) + { + for (int i = 0; i < regions.Length; i++) + { + var region = regions[i]; + + _currentState.Scissors[i] = new MTLScissorRect + { + height = (ulong)region.Height, + width = (ulong)region.Width, + x = (ulong)region.X, + y = (ulong)region.Y + }; + } + + // Inline update + if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder)) + { + SetScissors(renderCommandEncoder); + return; + } + + SignalDirty(DirtyFlags.Scissors); + } + + public void UpdateViewports(ReadOnlySpan viewports) + { + static float Clamp(float value) + { + return Math.Clamp(value, 0f, 1f); + } + + for (int i = 0; i < viewports.Length; i++) + { + var viewport = viewports[i]; + // Y coordinate is inverted + _currentState.Viewports[i] = new MTLViewport + { + originX = viewport.Region.X, + originY = viewport.Region.Y + viewport.Region.Height, + width = viewport.Region.Width, + height = -viewport.Region.Height, + znear = Clamp(viewport.DepthNear), + zfar = Clamp(viewport.DepthFar) + }; + } + + // Inline update + if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder)) + { + SetViewports(renderCommandEncoder); + return; + } + + SignalDirty(DirtyFlags.Viewports); + } + + public readonly void UpdateVertexBuffers(ReadOnlySpan vertexBuffers) + { + for (int i = 0; i < Constants.MaxVertexBuffers; i++) + { + if (i < vertexBuffers.Length) + { + var vertexBuffer = vertexBuffers[i]; + + _currentState.VertexBuffers[i] = new VertexBufferState( + vertexBuffer.Buffer.Handle, + vertexBuffer.Buffer.Offset, + vertexBuffer.Buffer.Size, + vertexBuffer.Divisor, + vertexBuffer.Stride); + } + else + { + _currentState.VertexBuffers[i] = VertexBufferState.Null; + } + } + + // Update the buffers on the pipeline + UpdatePipelineVertexState(_currentState.VertexBuffers, _currentState.VertexAttribs); + + SignalDirty(DirtyFlags.RenderPipeline); + } + + public readonly void UpdateUniformBuffers(ReadOnlySpan buffers) + { + foreach (BufferAssignment assignment in buffers) + { + var buffer = assignment.Range; + int index = assignment.Binding; + + Auto mtlBuffer = buffer.Handle == BufferHandle.Null + ? null + : _bufferManager.GetBuffer(buffer.Handle, buffer.Write); + + _currentState.UniformBufferRefs[index] = new BufferRef(mtlBuffer, ref buffer); + } + + SignalDirty(DirtyFlags.Uniforms); + } + + public readonly void UpdateStorageBuffers(ReadOnlySpan buffers) + { + foreach (BufferAssignment assignment in buffers) + { + var buffer = assignment.Range; + int index = assignment.Binding; + + Auto mtlBuffer = buffer.Handle == BufferHandle.Null + ? null + : _bufferManager.GetBuffer(buffer.Handle, buffer.Write); + + _currentState.StorageBufferRefs[index] = new BufferRef(mtlBuffer, ref buffer); + } + + SignalDirty(DirtyFlags.Storages); + } + + public readonly void UpdateStorageBuffers(int first, ReadOnlySpan> buffers) + { + for (int i = 0; i < buffers.Length; i++) + { + var mtlBuffer = buffers[i]; + int index = first + i; + + _currentState.StorageBufferRefs[index] = new BufferRef(mtlBuffer); + } + + SignalDirty(DirtyFlags.Storages); + } + + public void UpdateCullMode(bool enable, Face face) + { + var dirtyScissor = (face == Face.FrontAndBack) != _currentState.CullBoth; + + _currentState.CullMode = enable ? face.Convert() : MTLCullMode.None; + _currentState.CullBoth = face == Face.FrontAndBack; + + // Inline update + if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder)) + { + SetCullMode(renderCommandEncoder); + SetScissors(renderCommandEncoder); + return; + } + + // Mark dirty + SignalDirty(DirtyFlags.CullMode); + + if (dirtyScissor) + { + SignalDirty(DirtyFlags.Scissors); + } + } + + public readonly void UpdateFrontFace(FrontFace frontFace) + { + _currentState.Winding = frontFace.Convert(); + + // Inline update + if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder)) + { + SetFrontFace(renderCommandEncoder); + return; + } + + SignalDirty(DirtyFlags.FrontFace); + } + + private readonly void UpdateStencilRefValue(int frontRef, int backRef) + { + _currentState.FrontRefValue = frontRef; + _currentState.BackRefValue = backRef; + + // Inline update + if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder)) + { + SetStencilRefValue(renderCommandEncoder); + } + + SignalDirty(DirtyFlags.StencilRef); + } + + public readonly void UpdateTextureAndSampler(ShaderStage stage, int binding, TextureBase texture, SamplerHolder samplerHolder) + { + if (texture != null) + { + _currentState.TextureRefs[binding] = new(stage, texture, samplerHolder?.GetSampler()); + } + else + { + _currentState.TextureRefs[binding] = default; + } + + SignalDirty(DirtyFlags.Textures); + } + + public readonly void UpdateImage(ShaderStage stage, int binding, TextureBase image) + { + if (image is Texture view) + { + _currentState.ImageRefs[binding] = new(stage, view); + } + else + { + _currentState.ImageRefs[binding] = default; + } + + SignalDirty(DirtyFlags.Images); + } + + public readonly void UpdateTextureArray(ShaderStage stage, int binding, TextureArray array) + { + ref EncoderState.ArrayRef arrayRef = ref GetArrayRef(ref _currentState.TextureArrayRefs, binding, ArrayGrowthSize); + + if (arrayRef.Stage != stage || arrayRef.Array != array) + { + arrayRef = new EncoderState.ArrayRef(stage, array); + + SignalDirty(DirtyFlags.Textures); + } + } + + public readonly void UpdateTextureArraySeparate(ShaderStage stage, int setIndex, TextureArray array) + { + ref EncoderState.ArrayRef arrayRef = ref GetArrayRef(ref _currentState.TextureArrayExtraRefs, setIndex - MetalRenderer.TotalSets); + + if (arrayRef.Stage != stage || arrayRef.Array != array) + { + arrayRef = new EncoderState.ArrayRef(stage, array); + + SignalDirty(DirtyFlags.Textures); + } + } + + public readonly void UpdateImageArray(ShaderStage stage, int binding, ImageArray array) + { + ref EncoderState.ArrayRef arrayRef = ref GetArrayRef(ref _currentState.ImageArrayRefs, binding, ArrayGrowthSize); + + if (arrayRef.Stage != stage || arrayRef.Array != array) + { + arrayRef = new EncoderState.ArrayRef(stage, array); + + SignalDirty(DirtyFlags.Images); + } + } + + public readonly void UpdateImageArraySeparate(ShaderStage stage, int setIndex, ImageArray array) + { + ref EncoderState.ArrayRef arrayRef = ref GetArrayRef(ref _currentState.ImageArrayExtraRefs, setIndex - MetalRenderer.TotalSets); + + if (arrayRef.Stage != stage || arrayRef.Array != array) + { + arrayRef = new EncoderState.ArrayRef(stage, array); + + SignalDirty(DirtyFlags.Images); + } + } + + private static ref EncoderState.ArrayRef GetArrayRef(ref EncoderState.ArrayRef[] array, int index, int growthSize = 1) + { + ArgumentOutOfRangeException.ThrowIfNegative(index); + + if (array.Length <= index) + { + Array.Resize(ref array, index + growthSize); + } + + return ref array[index]; + } + + private readonly void SetDepthStencilState(MTLRenderCommandEncoder renderCommandEncoder) + { + if (DepthStencil != null) + { + MTLDepthStencilState state = _depthStencilCache.GetOrCreate(_currentState.DepthStencilUid); + + renderCommandEncoder.SetDepthStencilState(state); + } + else + { + renderCommandEncoder.SetDepthStencilState(_defaultState); + } + } + + private readonly void SetDepthClamp(MTLRenderCommandEncoder renderCommandEncoder) + { + renderCommandEncoder.SetDepthClipMode(_currentState.DepthClipMode); + } + + private readonly void SetDepthBias(MTLRenderCommandEncoder renderCommandEncoder) + { + renderCommandEncoder.SetDepthBias(_currentState.DepthBias, _currentState.SlopeScale, _currentState.Clamp); + } + + private unsafe void SetScissors(MTLRenderCommandEncoder renderCommandEncoder) + { + var isTriangles = (_currentState.Topology == PrimitiveTopology.Triangles) || + (_currentState.Topology == PrimitiveTopology.TriangleStrip); + + if (_currentState.CullBoth && isTriangles) + { + renderCommandEncoder.SetScissorRect(new MTLScissorRect { x = 0, y = 0, width = 0, height = 0 }); + } + else + { + if (_currentState.Scissors.Length > 0) + { + fixed (MTLScissorRect* pMtlScissors = _currentState.Scissors) + { + renderCommandEncoder.SetScissorRects((IntPtr)pMtlScissors, (ulong)_currentState.Scissors.Length); + } + } + } + } + + private readonly unsafe void SetViewports(MTLRenderCommandEncoder renderCommandEncoder) + { + if (_currentState.Viewports.Length > 0) + { + fixed (MTLViewport* pMtlViewports = _currentState.Viewports) + { + renderCommandEncoder.SetViewports((IntPtr)pMtlViewports, (ulong)_currentState.Viewports.Length); + } + } + } + + private readonly void UpdatePipelineVertexState(VertexBufferState[] bufferDescriptors, VertexAttribDescriptor[] attribDescriptors) + { + ref PipelineState pipeline = ref _currentState.Pipeline; + uint indexMask = 0; + + for (int i = 0; i < attribDescriptors.Length; i++) + { + ref var attrib = ref pipeline.Internal.VertexAttributes[i]; + + if (attribDescriptors[i].IsZero) + { + attrib.Format = attribDescriptors[i].Format.Convert(); + indexMask |= 1u << (int)Constants.ZeroBufferIndex; + attrib.BufferIndex = Constants.ZeroBufferIndex; + attrib.Offset = 0; + } + else + { + attrib.Format = attribDescriptors[i].Format.Convert(); + indexMask |= 1u << attribDescriptors[i].BufferIndex; + attrib.BufferIndex = (ulong)attribDescriptors[i].BufferIndex; + attrib.Offset = (ulong)attribDescriptors[i].Offset; + } + } + + for (int i = 0; i < bufferDescriptors.Length; i++) + { + ref var layout = ref pipeline.Internal.VertexBindings[i]; + + if ((indexMask & (1u << i)) != 0) + { + layout.Stride = (uint)bufferDescriptors[i].Stride; + + if (layout.Stride == 0) + { + layout.Stride = 1; + layout.StepFunction = MTLVertexStepFunction.Constant; + layout.StepRate = 0; + } + else + { + if (bufferDescriptors[i].Divisor > 0) + { + layout.StepFunction = MTLVertexStepFunction.PerInstance; + layout.StepRate = (uint)bufferDescriptors[i].Divisor; + } + else + { + layout.StepFunction = MTLVertexStepFunction.PerVertex; + layout.StepRate = 1; + } + } + } + else + { + layout = new(); + } + } + + ref var zeroBufLayout = ref pipeline.Internal.VertexBindings[(int)Constants.ZeroBufferIndex]; + + // Zero buffer + if ((indexMask & (1u << (int)Constants.ZeroBufferIndex)) != 0) + { + zeroBufLayout.Stride = 1; + zeroBufLayout.StepFunction = MTLVertexStepFunction.Constant; + zeroBufLayout.StepRate = 0; + } + else + { + zeroBufLayout = new(); + } + + pipeline.VertexAttributeDescriptionsCount = (uint)attribDescriptors.Length; + pipeline.VertexBindingDescriptionsCount = Constants.ZeroBufferIndex + 1; // TODO: move this out? + } + + private readonly void SetVertexBuffers(VertexBufferState[] bufferStates, ref readonly RenderEncoderBindings bindings) + { + for (int i = 0; i < bufferStates.Length; i++) + { + (MTLBuffer mtlBuffer, int offset) = bufferStates[i].GetVertexBuffer(_bufferManager, _pipeline.Cbs); + + if (mtlBuffer.NativePtr != IntPtr.Zero) + { + bindings.VertexBuffers.Add(new BufferResource(mtlBuffer, (ulong)offset, (ulong)i)); + } + } + + Auto autoZeroBuffer = _zeroBuffer == BufferHandle.Null + ? null + : _bufferManager.GetBuffer(_zeroBuffer, false); + + if (autoZeroBuffer == null) + { + return; + } + + var zeroMtlBuffer = autoZeroBuffer.Get(_pipeline.Cbs).Value; + bindings.VertexBuffers.Add(new BufferResource(zeroMtlBuffer, 0, Constants.ZeroBufferIndex)); + } + + private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForBuffer(ref BufferRef buffer) + { + ulong gpuAddress = 0; + IntPtr nativePtr = IntPtr.Zero; + + var range = buffer.Range; + var autoBuffer = buffer.Buffer; + + if (autoBuffer != null) + { + var offset = 0; + MTLBuffer mtlBuffer; + + if (range.HasValue) + { + offset = range.Value.Offset; + mtlBuffer = autoBuffer.Get(_pipeline.Cbs, offset, range.Value.Size, range.Value.Write).Value; + } + else + { + mtlBuffer = autoBuffer.Get(_pipeline.Cbs).Value; + } + + gpuAddress = mtlBuffer.GpuAddress + (ulong)offset; + nativePtr = mtlBuffer.NativePtr; + } + + return (gpuAddress, nativePtr); + } + + private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForTexture(ref TextureRef texture) + { + var storage = texture.Storage; + + ulong gpuAddress = 0; + IntPtr nativePtr = IntPtr.Zero; + + if (storage != null) + { + if (storage is TextureBuffer textureBuffer) + { + textureBuffer.RebuildStorage(false); + } + + var mtlTexture = storage.GetHandle(); + + gpuAddress = mtlTexture.GpuResourceID._impl; + nativePtr = mtlTexture.NativePtr; + } + + return (gpuAddress, nativePtr); + } + + private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForImage(ref ImageRef image) + { + var storage = image.Storage; + + ulong gpuAddress = 0; + IntPtr nativePtr = IntPtr.Zero; + + if (storage != null) + { + var mtlTexture = storage.GetHandle(); + + gpuAddress = mtlTexture.GpuResourceID._impl; + nativePtr = mtlTexture.NativePtr; + } + + return (gpuAddress, nativePtr); + } + + private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForTextureBuffer(ref TextureBuffer bufferTexture) + { + ulong gpuAddress = 0; + IntPtr nativePtr = IntPtr.Zero; + + if (bufferTexture != null) + { + bufferTexture.RebuildStorage(false); + + var mtlTexture = bufferTexture.GetHandle(); + + gpuAddress = mtlTexture.GpuResourceID._impl; + nativePtr = mtlTexture.NativePtr; + } + + return (gpuAddress, nativePtr); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void AddResource(IntPtr resourcePointer, MTLResourceUsage usage, MTLRenderStages stages, ref readonly RenderEncoderBindings bindings) + { + if (resourcePointer != IntPtr.Zero) + { + bindings.Resources.Add(new Resource(new MTLResource(resourcePointer), usage, stages)); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void AddResource(IntPtr resourcePointer, MTLResourceUsage usage, ref readonly ComputeEncoderBindings bindings) + { + if (resourcePointer != IntPtr.Zero) + { + bindings.Resources.Add(new Resource(new MTLResource(resourcePointer), usage, 0)); + } + } + + private readonly void UpdateAndBind(Program program, uint setIndex, ref readonly RenderEncoderBindings bindings) + { + var bindingSegments = program.BindingSegments[setIndex]; + + if (bindingSegments.Length == 0) + { + return; + } + + ScopedTemporaryBuffer vertArgBuffer = default; + ScopedTemporaryBuffer fragArgBuffer = default; + + if (program.ArgumentBufferSizes[setIndex] > 0) + { + vertArgBuffer = _bufferManager.ReserveOrCreate(_pipeline.Cbs, program.ArgumentBufferSizes[setIndex] * sizeof(ulong)); + } + + if (program.FragArgumentBufferSizes[setIndex] > 0) + { + fragArgBuffer = _bufferManager.ReserveOrCreate(_pipeline.Cbs, program.FragArgumentBufferSizes[setIndex] * sizeof(ulong)); + } + + Span vertResourceIds = stackalloc ulong[program.ArgumentBufferSizes[setIndex]]; + Span fragResourceIds = stackalloc ulong[program.FragArgumentBufferSizes[setIndex]]; + + var vertResourceIdIndex = 0; + var fragResourceIdIndex = 0; + + foreach (ResourceBindingSegment segment in bindingSegments) + { + int binding = segment.Binding; + int count = segment.Count; + + switch (setIndex) + { + case Constants.ConstantBuffersSetIndex: + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref BufferRef buffer = ref _currentState.UniformBufferRefs[index]; + var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer); + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings); + } + break; + case Constants.StorageBuffersSetIndex: + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref BufferRef buffer = ref _currentState.StorageBufferRefs[index]; + var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer); + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings); + } + break; + case Constants.TexturesSetIndex: + if (!segment.IsArray) + { + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref var texture = ref _currentState.TextureRefs[index]; + var (gpuAddress, nativePtr) = AddressForTexture(ref texture); + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + + if (texture.Sampler != null) + { + vertResourceIds[vertResourceIdIndex] = texture.Sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl; + vertResourceIdIndex++; + } + + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + + if (texture.Sampler != null) + { + fragResourceIds[fragResourceIdIndex] = texture.Sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl; + fragResourceIdIndex++; + } + + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings); + } + } + else + { + var textureArray = _currentState.TextureArrayRefs[binding].Array; + + if (segment.Type != ResourceType.BufferTexture) + { + var textures = textureArray.GetTextureRefs(); + var samplers = new Auto[textures.Length]; + + for (int i = 0; i < textures.Length; i++) + { + TextureRef texture = textures[i]; + var (gpuAddress, nativePtr) = AddressForTexture(ref texture); + + samplers[i] = texture.Sampler; + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings); + } + + foreach (var sampler in samplers) + { + ulong gpuAddress = 0; + + if (sampler != null) + { + gpuAddress = sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl; + } + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + } + } + } + else + { + var bufferTextures = textureArray.GetBufferTextureRefs(); + + for (int i = 0; i < bufferTextures.Length; i++) + { + TextureBuffer bufferTexture = bufferTextures[i]; + var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref bufferTexture); + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings); + } + } + } + break; + case Constants.ImagesSetIndex: + if (!segment.IsArray) + { + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref var image = ref _currentState.ImageRefs[index]; + var (gpuAddress, nativePtr) = AddressForImage(ref image); + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, renderStages, in bindings); + } + } + else + { + var imageArray = _currentState.ImageArrayRefs[binding].Array; + + if (segment.Type != ResourceType.BufferImage) + { + var images = imageArray.GetTextureRefs(); + + for (int i = 0; i < images.Length; i++) + { + TextureRef image = images[i]; + var (gpuAddress, nativePtr) = AddressForTexture(ref image); + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, renderStages, in bindings); + } + } + else + { + var bufferImages = imageArray.GetBufferTextureRefs(); + + for (int i = 0; i < bufferImages.Length; i++) + { + TextureBuffer image = bufferImages[i]; + var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref image); + + MTLRenderStages renderStages = 0; + + if ((segment.Stages & ResourceStages.Vertex) != 0) + { + vertResourceIds[vertResourceIdIndex] = gpuAddress; + vertResourceIdIndex++; + renderStages |= MTLRenderStages.RenderStageVertex; + } + + if ((segment.Stages & ResourceStages.Fragment) != 0) + { + fragResourceIds[fragResourceIdIndex] = gpuAddress; + fragResourceIdIndex++; + renderStages |= MTLRenderStages.RenderStageFragment; + } + + AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, renderStages, in bindings); + } + } + } + break; + } + } + + if (program.ArgumentBufferSizes[setIndex] > 0) + { + vertArgBuffer.Holder.SetDataUnchecked(vertArgBuffer.Offset, MemoryMarshal.AsBytes(vertResourceIds)); + var mtlVertArgBuffer = _bufferManager.GetBuffer(vertArgBuffer.Handle, false).Get(_pipeline.Cbs).Value; + bindings.VertexBuffers.Add(new BufferResource(mtlVertArgBuffer, (uint)vertArgBuffer.Range.Offset, SetIndexToBindingIndex(setIndex))); + } + + if (program.FragArgumentBufferSizes[setIndex] > 0) + { + fragArgBuffer.Holder.SetDataUnchecked(fragArgBuffer.Offset, MemoryMarshal.AsBytes(fragResourceIds)); + var mtlFragArgBuffer = _bufferManager.GetBuffer(fragArgBuffer.Handle, false).Get(_pipeline.Cbs).Value; + bindings.FragmentBuffers.Add(new BufferResource(mtlFragArgBuffer, (uint)fragArgBuffer.Range.Offset, SetIndexToBindingIndex(setIndex))); + } + } + + private readonly void UpdateAndBind(Program program, uint setIndex, ref readonly ComputeEncoderBindings bindings) + { + var bindingSegments = program.BindingSegments[setIndex]; + + if (bindingSegments.Length == 0) + { + return; + } + + ScopedTemporaryBuffer argBuffer = default; + + if (program.ArgumentBufferSizes[setIndex] > 0) + { + argBuffer = _bufferManager.ReserveOrCreate(_pipeline.Cbs, program.ArgumentBufferSizes[setIndex] * sizeof(ulong)); + } + + Span resourceIds = stackalloc ulong[program.ArgumentBufferSizes[setIndex]]; + var resourceIdIndex = 0; + + foreach (ResourceBindingSegment segment in bindingSegments) + { + int binding = segment.Binding; + int count = segment.Count; + + switch (setIndex) + { + case Constants.ConstantBuffersSetIndex: + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref BufferRef buffer = ref _currentState.UniformBufferRefs[index]; + var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read, in bindings); + bindings.Resources.Add(new Resource(new MTLResource(nativePtr), MTLResourceUsage.Read, 0)); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + } + } + break; + case Constants.StorageBuffersSetIndex: + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref BufferRef buffer = ref _currentState.StorageBufferRefs[index]; + var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + } + } + break; + case Constants.TexturesSetIndex: + if (!segment.IsArray) + { + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref var texture = ref _currentState.TextureRefs[index]; + var (gpuAddress, nativePtr) = AddressForTexture(ref texture); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read, in bindings); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + + if (texture.Sampler != null) + { + resourceIds[resourceIdIndex] = texture.Sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl; + resourceIdIndex++; + } + } + } + } + else + { + var textureArray = _currentState.TextureArrayRefs[binding].Array; + + if (segment.Type != ResourceType.BufferTexture) + { + var textures = textureArray.GetTextureRefs(); + var samplers = new Auto[textures.Length]; + + for (int i = 0; i < textures.Length; i++) + { + TextureRef texture = textures[i]; + var (gpuAddress, nativePtr) = AddressForTexture(ref texture); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read, in bindings); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + + samplers[i] = texture.Sampler; + } + } + + foreach (var sampler in samplers) + { + if (sampler != null) + { + resourceIds[resourceIdIndex] = sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl; + resourceIdIndex++; + } + } + } + else + { + var bufferTextures = textureArray.GetBufferTextureRefs(); + + for (int i = 0; i < bufferTextures.Length; i++) + { + TextureBuffer bufferTexture = bufferTextures[i]; + var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref bufferTexture); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read, in bindings); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + } + } + } + } + break; + case Constants.ImagesSetIndex: + if (!segment.IsArray) + { + for (int i = 0; i < count; i++) + { + int index = binding + i; + + ref var image = ref _currentState.ImageRefs[index]; + var (gpuAddress, nativePtr) = AddressForImage(ref image); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + } + } + } + else + { + var imageArray = _currentState.ImageArrayRefs[binding].Array; + + if (segment.Type != ResourceType.BufferImage) + { + var images = imageArray.GetTextureRefs(); + + for (int i = 0; i < images.Length; i++) + { + TextureRef image = images[i]; + var (gpuAddress, nativePtr) = AddressForTexture(ref image); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + } + } + } + else + { + var bufferImages = imageArray.GetBufferTextureRefs(); + + for (int i = 0; i < bufferImages.Length; i++) + { + TextureBuffer image = bufferImages[i]; + var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref image); + + if ((segment.Stages & ResourceStages.Compute) != 0) + { + AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings); + resourceIds[resourceIdIndex] = gpuAddress; + resourceIdIndex++; + } + } + } + } + break; + } + } + + if (program.ArgumentBufferSizes[setIndex] > 0) + { + argBuffer.Holder.SetDataUnchecked(argBuffer.Offset, MemoryMarshal.AsBytes(resourceIds)); + var mtlArgBuffer = _bufferManager.GetBuffer(argBuffer.Handle, false).Get(_pipeline.Cbs).Value; + bindings.Buffers.Add(new BufferResource(mtlArgBuffer, (uint)argBuffer.Range.Offset, SetIndexToBindingIndex(setIndex))); + } + } + + private static uint SetIndexToBindingIndex(uint setIndex) + { + return setIndex switch + { + Constants.ConstantBuffersSetIndex => Constants.ConstantBuffersIndex, + Constants.StorageBuffersSetIndex => Constants.StorageBuffersIndex, + Constants.TexturesSetIndex => Constants.TexturesIndex, + Constants.ImagesSetIndex => Constants.ImagesIndex, + }; + } + + private readonly void SetCullMode(MTLRenderCommandEncoder renderCommandEncoder) + { + renderCommandEncoder.SetCullMode(_currentState.CullMode); + } + + private readonly void SetFrontFace(MTLRenderCommandEncoder renderCommandEncoder) + { + renderCommandEncoder.SetFrontFacingWinding(_currentState.Winding); + } + + private readonly void SetStencilRefValue(MTLRenderCommandEncoder renderCommandEncoder) + { + renderCommandEncoder.SetStencilReferenceValues((uint)_currentState.FrontRefValue, (uint)_currentState.BackRefValue); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/EnumConversion.cs b/src/Ryujinx.Graphics.Metal/EnumConversion.cs new file mode 100644 index 0000000000..e498546e8f --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/EnumConversion.cs @@ -0,0 +1,293 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + static class EnumConversion + { + public static MTLSamplerAddressMode Convert(this AddressMode mode) + { + return mode switch + { + AddressMode.Clamp => MTLSamplerAddressMode.ClampToEdge, // TODO: Should be clamp. + AddressMode.Repeat => MTLSamplerAddressMode.Repeat, + AddressMode.MirrorClamp => MTLSamplerAddressMode.MirrorClampToEdge, // TODO: Should be mirror clamp. + AddressMode.MirroredRepeat => MTLSamplerAddressMode.MirrorRepeat, + AddressMode.ClampToBorder => MTLSamplerAddressMode.ClampToBorderColor, + AddressMode.ClampToEdge => MTLSamplerAddressMode.ClampToEdge, + AddressMode.MirrorClampToEdge => MTLSamplerAddressMode.MirrorClampToEdge, + AddressMode.MirrorClampToBorder => MTLSamplerAddressMode.ClampToBorderColor, // TODO: Should be mirror clamp to border. + _ => LogInvalidAndReturn(mode, nameof(AddressMode), MTLSamplerAddressMode.ClampToEdge) // TODO: Should be clamp. + }; + } + + public static MTLBlendFactor Convert(this BlendFactor factor) + { + return factor switch + { + BlendFactor.Zero or BlendFactor.ZeroGl => MTLBlendFactor.Zero, + BlendFactor.One or BlendFactor.OneGl => MTLBlendFactor.One, + BlendFactor.SrcColor or BlendFactor.SrcColorGl => MTLBlendFactor.SourceColor, + BlendFactor.OneMinusSrcColor or BlendFactor.OneMinusSrcColorGl => MTLBlendFactor.OneMinusSourceColor, + BlendFactor.SrcAlpha or BlendFactor.SrcAlphaGl => MTLBlendFactor.SourceAlpha, + BlendFactor.OneMinusSrcAlpha or BlendFactor.OneMinusSrcAlphaGl => MTLBlendFactor.OneMinusSourceAlpha, + BlendFactor.DstAlpha or BlendFactor.DstAlphaGl => MTLBlendFactor.DestinationAlpha, + BlendFactor.OneMinusDstAlpha or BlendFactor.OneMinusDstAlphaGl => MTLBlendFactor.OneMinusDestinationAlpha, + BlendFactor.DstColor or BlendFactor.DstColorGl => MTLBlendFactor.DestinationColor, + BlendFactor.OneMinusDstColor or BlendFactor.OneMinusDstColorGl => MTLBlendFactor.OneMinusDestinationColor, + BlendFactor.SrcAlphaSaturate or BlendFactor.SrcAlphaSaturateGl => MTLBlendFactor.SourceAlphaSaturated, + BlendFactor.Src1Color or BlendFactor.Src1ColorGl => MTLBlendFactor.Source1Color, + BlendFactor.OneMinusSrc1Color or BlendFactor.OneMinusSrc1ColorGl => MTLBlendFactor.OneMinusSource1Color, + BlendFactor.Src1Alpha or BlendFactor.Src1AlphaGl => MTLBlendFactor.Source1Alpha, + BlendFactor.OneMinusSrc1Alpha or BlendFactor.OneMinusSrc1AlphaGl => MTLBlendFactor.OneMinusSource1Alpha, + BlendFactor.ConstantColor => MTLBlendFactor.BlendColor, + BlendFactor.OneMinusConstantColor => MTLBlendFactor.OneMinusBlendColor, + BlendFactor.ConstantAlpha => MTLBlendFactor.BlendAlpha, + BlendFactor.OneMinusConstantAlpha => MTLBlendFactor.OneMinusBlendAlpha, + _ => LogInvalidAndReturn(factor, nameof(BlendFactor), MTLBlendFactor.Zero) + }; + } + + public static MTLBlendOperation Convert(this BlendOp op) + { + return op switch + { + BlendOp.Add or BlendOp.AddGl => MTLBlendOperation.Add, + BlendOp.Subtract or BlendOp.SubtractGl => MTLBlendOperation.Subtract, + BlendOp.ReverseSubtract or BlendOp.ReverseSubtractGl => MTLBlendOperation.ReverseSubtract, + BlendOp.Minimum => MTLBlendOperation.Min, + BlendOp.Maximum => MTLBlendOperation.Max, + _ => LogInvalidAndReturn(op, nameof(BlendOp), MTLBlendOperation.Add) + }; + } + + public static MTLCompareFunction Convert(this CompareOp op) + { + return op switch + { + CompareOp.Never or CompareOp.NeverGl => MTLCompareFunction.Never, + CompareOp.Less or CompareOp.LessGl => MTLCompareFunction.Less, + CompareOp.Equal or CompareOp.EqualGl => MTLCompareFunction.Equal, + CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => MTLCompareFunction.LessEqual, + CompareOp.Greater or CompareOp.GreaterGl => MTLCompareFunction.Greater, + CompareOp.NotEqual or CompareOp.NotEqualGl => MTLCompareFunction.NotEqual, + CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => MTLCompareFunction.GreaterEqual, + CompareOp.Always or CompareOp.AlwaysGl => MTLCompareFunction.Always, + _ => LogInvalidAndReturn(op, nameof(CompareOp), MTLCompareFunction.Never) + }; + } + + public static MTLCullMode Convert(this Face face) + { + return face switch + { + Face.Back => MTLCullMode.Back, + Face.Front => MTLCullMode.Front, + Face.FrontAndBack => MTLCullMode.None, + _ => LogInvalidAndReturn(face, nameof(Face), MTLCullMode.Back) + }; + } + + public static MTLWinding Convert(this FrontFace frontFace) + { + // The viewport is flipped vertically, therefore we need to switch the winding order as well + return frontFace switch + { + FrontFace.Clockwise => MTLWinding.CounterClockwise, + FrontFace.CounterClockwise => MTLWinding.Clockwise, + _ => LogInvalidAndReturn(frontFace, nameof(FrontFace), MTLWinding.Clockwise) + }; + } + + public static MTLIndexType Convert(this IndexType type) + { + return type switch + { + IndexType.UShort => MTLIndexType.UInt16, + IndexType.UInt => MTLIndexType.UInt32, + _ => LogInvalidAndReturn(type, nameof(IndexType), MTLIndexType.UInt16) + }; + } + + public static MTLLogicOperation Convert(this LogicalOp op) + { + return op switch + { + LogicalOp.Clear => MTLLogicOperation.Clear, + LogicalOp.And => MTLLogicOperation.And, + LogicalOp.AndReverse => MTLLogicOperation.AndReverse, + LogicalOp.Copy => MTLLogicOperation.Copy, + LogicalOp.AndInverted => MTLLogicOperation.AndInverted, + LogicalOp.Noop => MTLLogicOperation.Noop, + LogicalOp.Xor => MTLLogicOperation.Xor, + LogicalOp.Or => MTLLogicOperation.Or, + LogicalOp.Nor => MTLLogicOperation.Nor, + LogicalOp.Equiv => MTLLogicOperation.Equivalence, + LogicalOp.Invert => MTLLogicOperation.Invert, + LogicalOp.OrReverse => MTLLogicOperation.OrReverse, + LogicalOp.CopyInverted => MTLLogicOperation.CopyInverted, + LogicalOp.OrInverted => MTLLogicOperation.OrInverted, + LogicalOp.Nand => MTLLogicOperation.Nand, + LogicalOp.Set => MTLLogicOperation.Set, + _ => LogInvalidAndReturn(op, nameof(LogicalOp), MTLLogicOperation.And) + }; + } + + public static MTLSamplerMinMagFilter Convert(this MagFilter filter) + { + return filter switch + { + MagFilter.Nearest => MTLSamplerMinMagFilter.Nearest, + MagFilter.Linear => MTLSamplerMinMagFilter.Linear, + _ => LogInvalidAndReturn(filter, nameof(MagFilter), MTLSamplerMinMagFilter.Nearest) + }; + } + + public static (MTLSamplerMinMagFilter, MTLSamplerMipFilter) Convert(this MinFilter filter) + { + return filter switch + { + MinFilter.Nearest => (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Nearest), + MinFilter.Linear => (MTLSamplerMinMagFilter.Linear, MTLSamplerMipFilter.Linear), + MinFilter.NearestMipmapNearest => (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Nearest), + MinFilter.LinearMipmapNearest => (MTLSamplerMinMagFilter.Linear, MTLSamplerMipFilter.Nearest), + MinFilter.NearestMipmapLinear => (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Linear), + MinFilter.LinearMipmapLinear => (MTLSamplerMinMagFilter.Linear, MTLSamplerMipFilter.Linear), + _ => LogInvalidAndReturn(filter, nameof(MinFilter), (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Nearest)) + + }; + } + + public static MTLPrimitiveType Convert(this PrimitiveTopology topology) + { + return topology switch + { + PrimitiveTopology.Points => MTLPrimitiveType.Point, + PrimitiveTopology.Lines => MTLPrimitiveType.Line, + PrimitiveTopology.LineStrip => MTLPrimitiveType.LineStrip, + PrimitiveTopology.Triangles => MTLPrimitiveType.Triangle, + PrimitiveTopology.TriangleStrip => MTLPrimitiveType.TriangleStrip, + _ => LogInvalidAndReturn(topology, nameof(PrimitiveTopology), MTLPrimitiveType.Triangle) + }; + } + + public static MTLStencilOperation Convert(this StencilOp op) + { + return op switch + { + StencilOp.Keep or StencilOp.KeepGl => MTLStencilOperation.Keep, + StencilOp.Zero or StencilOp.ZeroGl => MTLStencilOperation.Zero, + StencilOp.Replace or StencilOp.ReplaceGl => MTLStencilOperation.Replace, + StencilOp.IncrementAndClamp or StencilOp.IncrementAndClampGl => MTLStencilOperation.IncrementClamp, + StencilOp.DecrementAndClamp or StencilOp.DecrementAndClampGl => MTLStencilOperation.DecrementClamp, + StencilOp.Invert or StencilOp.InvertGl => MTLStencilOperation.Invert, + StencilOp.IncrementAndWrap or StencilOp.IncrementAndWrapGl => MTLStencilOperation.IncrementWrap, + StencilOp.DecrementAndWrap or StencilOp.DecrementAndWrapGl => MTLStencilOperation.DecrementWrap, + _ => LogInvalidAndReturn(op, nameof(StencilOp), MTLStencilOperation.Keep) + }; + } + + public static MTLTextureType Convert(this Target target) + { + return target switch + { + Target.TextureBuffer => MTLTextureType.TextureBuffer, + Target.Texture1D => MTLTextureType.Type1D, + Target.Texture1DArray => MTLTextureType.Type1DArray, + Target.Texture2D => MTLTextureType.Type2D, + Target.Texture2DArray => MTLTextureType.Type2DArray, + Target.Texture2DMultisample => MTLTextureType.Type2DMultisample, + Target.Texture2DMultisampleArray => MTLTextureType.Type2DMultisampleArray, + Target.Texture3D => MTLTextureType.Type3D, + Target.Cubemap => MTLTextureType.Cube, + Target.CubemapArray => MTLTextureType.CubeArray, + _ => LogInvalidAndReturn(target, nameof(Target), MTLTextureType.Type2D) + }; + } + + public static MTLTextureSwizzle Convert(this SwizzleComponent swizzleComponent) + { + return swizzleComponent switch + { + SwizzleComponent.Zero => MTLTextureSwizzle.Zero, + SwizzleComponent.One => MTLTextureSwizzle.One, + SwizzleComponent.Red => MTLTextureSwizzle.Red, + SwizzleComponent.Green => MTLTextureSwizzle.Green, + SwizzleComponent.Blue => MTLTextureSwizzle.Blue, + SwizzleComponent.Alpha => MTLTextureSwizzle.Alpha, + _ => LogInvalidAndReturn(swizzleComponent, nameof(SwizzleComponent), MTLTextureSwizzle.Zero) + }; + } + + public static MTLVertexFormat Convert(this Format format) + { + return format switch + { + Format.R16Float => MTLVertexFormat.Half, + Format.R16G16Float => MTLVertexFormat.Half2, + Format.R16G16B16Float => MTLVertexFormat.Half3, + Format.R16G16B16A16Float => MTLVertexFormat.Half4, + Format.R32Float => MTLVertexFormat.Float, + Format.R32G32Float => MTLVertexFormat.Float2, + Format.R32G32B32Float => MTLVertexFormat.Float3, + Format.R11G11B10Float => MTLVertexFormat.FloatRG11B10, + Format.R32G32B32A32Float => MTLVertexFormat.Float4, + Format.R8Uint => MTLVertexFormat.UChar, + Format.R8G8Uint => MTLVertexFormat.UChar2, + Format.R8G8B8Uint => MTLVertexFormat.UChar3, + Format.R8G8B8A8Uint => MTLVertexFormat.UChar4, + Format.R16Uint => MTLVertexFormat.UShort, + Format.R16G16Uint => MTLVertexFormat.UShort2, + Format.R16G16B16Uint => MTLVertexFormat.UShort3, + Format.R16G16B16A16Uint => MTLVertexFormat.UShort4, + Format.R32Uint => MTLVertexFormat.UInt, + Format.R32G32Uint => MTLVertexFormat.UInt2, + Format.R32G32B32Uint => MTLVertexFormat.UInt3, + Format.R32G32B32A32Uint => MTLVertexFormat.UInt4, + Format.R8Sint => MTLVertexFormat.Char, + Format.R8G8Sint => MTLVertexFormat.Char2, + Format.R8G8B8Sint => MTLVertexFormat.Char3, + Format.R8G8B8A8Sint => MTLVertexFormat.Char4, + Format.R16Sint => MTLVertexFormat.Short, + Format.R16G16Sint => MTLVertexFormat.Short2, + Format.R16G16B16Sint => MTLVertexFormat.Short3, + Format.R16G16B16A16Sint => MTLVertexFormat.Short4, + Format.R32Sint => MTLVertexFormat.Int, + Format.R32G32Sint => MTLVertexFormat.Int2, + Format.R32G32B32Sint => MTLVertexFormat.Int3, + Format.R32G32B32A32Sint => MTLVertexFormat.Int4, + Format.R8Unorm => MTLVertexFormat.UCharNormalized, + Format.R8G8Unorm => MTLVertexFormat.UChar2Normalized, + Format.R8G8B8Unorm => MTLVertexFormat.UChar3Normalized, + Format.R8G8B8A8Unorm => MTLVertexFormat.UChar4Normalized, + Format.R16Unorm => MTLVertexFormat.UShortNormalized, + Format.R16G16Unorm => MTLVertexFormat.UShort2Normalized, + Format.R16G16B16Unorm => MTLVertexFormat.UShort3Normalized, + Format.R16G16B16A16Unorm => MTLVertexFormat.UShort4Normalized, + Format.R10G10B10A2Unorm => MTLVertexFormat.UInt1010102Normalized, + Format.R8Snorm => MTLVertexFormat.CharNormalized, + Format.R8G8Snorm => MTLVertexFormat.Char2Normalized, + Format.R8G8B8Snorm => MTLVertexFormat.Char3Normalized, + Format.R8G8B8A8Snorm => MTLVertexFormat.Char4Normalized, + Format.R16Snorm => MTLVertexFormat.ShortNormalized, + Format.R16G16Snorm => MTLVertexFormat.Short2Normalized, + Format.R16G16B16Snorm => MTLVertexFormat.Short3Normalized, + Format.R16G16B16A16Snorm => MTLVertexFormat.Short4Normalized, + Format.R10G10B10A2Snorm => MTLVertexFormat.Int1010102Normalized, + + _ => LogInvalidAndReturn(format, nameof(Format), MTLVertexFormat.Float4) + }; + } + + private static T2 LogInvalidAndReturn(T1 value, string name, T2 defaultValue = default) + { + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {name} enum value: {value}."); + + return defaultValue; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/FenceHolder.cs b/src/Ryujinx.Graphics.Metal/FenceHolder.cs new file mode 100644 index 0000000000..a8dd28c0da --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/FenceHolder.cs @@ -0,0 +1,77 @@ +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class FenceHolder : IDisposable + { + private MTLCommandBuffer _fence; + private int _referenceCount; + private bool _disposed; + + public FenceHolder(MTLCommandBuffer fence) + { + _fence = fence; + _referenceCount = 1; + } + + public MTLCommandBuffer GetUnsafe() + { + return _fence; + } + + public bool TryGet(out MTLCommandBuffer fence) + { + int lastValue; + do + { + lastValue = _referenceCount; + + if (lastValue == 0) + { + fence = default; + return false; + } + } while (Interlocked.CompareExchange(ref _referenceCount, lastValue + 1, lastValue) != lastValue); + + fence = _fence; + return true; + } + + public MTLCommandBuffer Get() + { + Interlocked.Increment(ref _referenceCount); + return _fence; + } + + public void Put() + { + if (Interlocked.Decrement(ref _referenceCount) == 0) + { + _fence = default; + } + } + + public void Wait() + { + _fence.WaitUntilCompleted(); + } + + public bool IsSignaled() + { + return _fence.Status == MTLCommandBufferStatus.Completed; + } + + public void Dispose() + { + if (!_disposed) + { + Put(); + _disposed = true; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/FormatConverter.cs b/src/Ryujinx.Graphics.Metal/FormatConverter.cs new file mode 100644 index 0000000000..e099187b8f --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/FormatConverter.cs @@ -0,0 +1,49 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Metal +{ + class FormatConverter + { + public static void ConvertD24S8ToD32FS8(Span output, ReadOnlySpan input) + { + const float UnormToFloat = 1f / 0xffffff; + + Span outputUint = MemoryMarshal.Cast(output); + ReadOnlySpan inputUint = MemoryMarshal.Cast(input); + + int i = 0; + + for (; i < inputUint.Length; i++) + { + uint depthStencil = inputUint[i]; + uint depth = depthStencil >> 8; + uint stencil = depthStencil & 0xff; + + int j = i * 2; + + outputUint[j] = (uint)BitConverter.SingleToInt32Bits(depth * UnormToFloat); + outputUint[j + 1] = stencil; + } + } + + public static void ConvertD32FS8ToD24S8(Span output, ReadOnlySpan input) + { + Span outputUint = MemoryMarshal.Cast(output); + ReadOnlySpan inputUint = MemoryMarshal.Cast(input); + + int i = 0; + + for (; i < inputUint.Length; i += 2) + { + float depth = BitConverter.Int32BitsToSingle((int)inputUint[i]); + uint stencil = inputUint[i + 1]; + uint depthStencil = (Math.Clamp((uint)(depth * 0xffffff), 0, 0xffffff) << 8) | (stencil & 0xff); + + int j = i >> 1; + + outputUint[j] = depthStencil; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/FormatTable.cs b/src/Ryujinx.Graphics.Metal/FormatTable.cs new file mode 100644 index 0000000000..c1f8923f93 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/FormatTable.cs @@ -0,0 +1,196 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + static class FormatTable + { + private static readonly MTLPixelFormat[] _table; + + static FormatTable() + { + _table = new MTLPixelFormat[Enum.GetNames(typeof(Format)).Length]; + + Add(Format.R8Unorm, MTLPixelFormat.R8Unorm); + Add(Format.R8Snorm, MTLPixelFormat.R8Snorm); + Add(Format.R8Uint, MTLPixelFormat.R8Uint); + Add(Format.R8Sint, MTLPixelFormat.R8Sint); + Add(Format.R16Float, MTLPixelFormat.R16Float); + Add(Format.R16Unorm, MTLPixelFormat.R16Unorm); + Add(Format.R16Snorm, MTLPixelFormat.R16Snorm); + Add(Format.R16Uint, MTLPixelFormat.R16Uint); + Add(Format.R16Sint, MTLPixelFormat.R16Sint); + Add(Format.R32Float, MTLPixelFormat.R32Float); + Add(Format.R32Uint, MTLPixelFormat.R32Uint); + Add(Format.R32Sint, MTLPixelFormat.R32Sint); + Add(Format.R8G8Unorm, MTLPixelFormat.RG8Unorm); + Add(Format.R8G8Snorm, MTLPixelFormat.RG8Snorm); + Add(Format.R8G8Uint, MTLPixelFormat.RG8Uint); + Add(Format.R8G8Sint, MTLPixelFormat.RG8Sint); + Add(Format.R16G16Float, MTLPixelFormat.RG16Float); + Add(Format.R16G16Unorm, MTLPixelFormat.RG16Unorm); + Add(Format.R16G16Snorm, MTLPixelFormat.RG16Snorm); + Add(Format.R16G16Uint, MTLPixelFormat.RG16Uint); + Add(Format.R16G16Sint, MTLPixelFormat.RG16Sint); + Add(Format.R32G32Float, MTLPixelFormat.RG32Float); + Add(Format.R32G32Uint, MTLPixelFormat.RG32Uint); + Add(Format.R32G32Sint, MTLPixelFormat.RG32Sint); + // Add(Format.R8G8B8Unorm, MTLPixelFormat.R8G8B8Unorm); + // Add(Format.R8G8B8Snorm, MTLPixelFormat.R8G8B8Snorm); + // Add(Format.R8G8B8Uint, MTLPixelFormat.R8G8B8Uint); + // Add(Format.R8G8B8Sint, MTLPixelFormat.R8G8B8Sint); + // Add(Format.R16G16B16Float, MTLPixelFormat.R16G16B16Float); + // Add(Format.R16G16B16Unorm, MTLPixelFormat.R16G16B16Unorm); + // Add(Format.R16G16B16Snorm, MTLPixelFormat.R16G16B16SNorm); + // Add(Format.R16G16B16Uint, MTLPixelFormat.R16G16B16Uint); + // Add(Format.R16G16B16Sint, MTLPixelFormat.R16G16B16Sint); + // Add(Format.R32G32B32Float, MTLPixelFormat.R32G32B32Sfloat); + // Add(Format.R32G32B32Uint, MTLPixelFormat.R32G32B32Uint); + // Add(Format.R32G32B32Sint, MTLPixelFormat.R32G32B32Sint); + Add(Format.R8G8B8A8Unorm, MTLPixelFormat.RGBA8Unorm); + Add(Format.R8G8B8A8Snorm, MTLPixelFormat.RGBA8Snorm); + Add(Format.R8G8B8A8Uint, MTLPixelFormat.RGBA8Uint); + Add(Format.R8G8B8A8Sint, MTLPixelFormat.RGBA8Sint); + Add(Format.R16G16B16A16Float, MTLPixelFormat.RGBA16Float); + Add(Format.R16G16B16A16Unorm, MTLPixelFormat.RGBA16Unorm); + Add(Format.R16G16B16A16Snorm, MTLPixelFormat.RGBA16Snorm); + Add(Format.R16G16B16A16Uint, MTLPixelFormat.RGBA16Uint); + Add(Format.R16G16B16A16Sint, MTLPixelFormat.RGBA16Sint); + Add(Format.R32G32B32A32Float, MTLPixelFormat.RGBA32Float); + Add(Format.R32G32B32A32Uint, MTLPixelFormat.RGBA32Uint); + Add(Format.R32G32B32A32Sint, MTLPixelFormat.RGBA32Sint); + Add(Format.S8Uint, MTLPixelFormat.Stencil8); + Add(Format.D16Unorm, MTLPixelFormat.Depth16Unorm); + Add(Format.S8UintD24Unorm, MTLPixelFormat.Depth24UnormStencil8); + Add(Format.X8UintD24Unorm, MTLPixelFormat.Depth24UnormStencil8); + Add(Format.D32Float, MTLPixelFormat.Depth32Float); + Add(Format.D24UnormS8Uint, MTLPixelFormat.Depth24UnormStencil8); + Add(Format.D32FloatS8Uint, MTLPixelFormat.Depth32FloatStencil8); + Add(Format.R8G8B8A8Srgb, MTLPixelFormat.RGBA8UnormsRGB); + // Add(Format.R4G4Unorm, MTLPixelFormat.R4G4Unorm); + Add(Format.R4G4B4A4Unorm, MTLPixelFormat.RGBA8Unorm); + // Add(Format.R5G5B5X1Unorm, MTLPixelFormat.R5G5B5X1Unorm); + Add(Format.R5G5B5A1Unorm, MTLPixelFormat.BGR5A1Unorm); + Add(Format.R5G6B5Unorm, MTLPixelFormat.B5G6R5Unorm); + Add(Format.R10G10B10A2Unorm, MTLPixelFormat.RGB10A2Unorm); + Add(Format.R10G10B10A2Uint, MTLPixelFormat.RGB10A2Uint); + Add(Format.R11G11B10Float, MTLPixelFormat.RG11B10Float); + Add(Format.R9G9B9E5Float, MTLPixelFormat.RGB9E5Float); + Add(Format.Bc1RgbaUnorm, MTLPixelFormat.BC1RGBA); + Add(Format.Bc2Unorm, MTLPixelFormat.BC2RGBA); + Add(Format.Bc3Unorm, MTLPixelFormat.BC3RGBA); + Add(Format.Bc1RgbaSrgb, MTLPixelFormat.BC1RGBAsRGB); + Add(Format.Bc2Srgb, MTLPixelFormat.BC2RGBAsRGB); + Add(Format.Bc3Srgb, MTLPixelFormat.BC3RGBAsRGB); + Add(Format.Bc4Unorm, MTLPixelFormat.BC4RUnorm); + Add(Format.Bc4Snorm, MTLPixelFormat.BC4RSnorm); + Add(Format.Bc5Unorm, MTLPixelFormat.BC5RGUnorm); + Add(Format.Bc5Snorm, MTLPixelFormat.BC5RGSnorm); + Add(Format.Bc7Unorm, MTLPixelFormat.BC7RGBAUnorm); + Add(Format.Bc7Srgb, MTLPixelFormat.BC7RGBAUnormsRGB); + Add(Format.Bc6HSfloat, MTLPixelFormat.BC6HRGBFloat); + Add(Format.Bc6HUfloat, MTLPixelFormat.BC6HRGBUfloat); + Add(Format.Etc2RgbUnorm, MTLPixelFormat.ETC2RGB8); + // Add(Format.Etc2RgbaUnorm, MTLPixelFormat.ETC2RGBA8); + Add(Format.Etc2RgbPtaUnorm, MTLPixelFormat.ETC2RGB8A1); + Add(Format.Etc2RgbSrgb, MTLPixelFormat.ETC2RGB8sRGB); + // Add(Format.Etc2RgbaSrgb, MTLPixelFormat.ETC2RGBA8sRGB); + Add(Format.Etc2RgbPtaSrgb, MTLPixelFormat.ETC2RGB8A1sRGB); + // Add(Format.R8Uscaled, MTLPixelFormat.R8Uscaled); + // Add(Format.R8Sscaled, MTLPixelFormat.R8Sscaled); + // Add(Format.R16Uscaled, MTLPixelFormat.R16Uscaled); + // Add(Format.R16Sscaled, MTLPixelFormat.R16Sscaled); + // Add(Format.R32Uscaled, MTLPixelFormat.R32Uscaled); + // Add(Format.R32Sscaled, MTLPixelFormat.R32Sscaled); + // Add(Format.R8G8Uscaled, MTLPixelFormat.R8G8Uscaled); + // Add(Format.R8G8Sscaled, MTLPixelFormat.R8G8Sscaled); + // Add(Format.R16G16Uscaled, MTLPixelFormat.R16G16Uscaled); + // Add(Format.R16G16Sscaled, MTLPixelFormat.R16G16Sscaled); + // Add(Format.R32G32Uscaled, MTLPixelFormat.R32G32Uscaled); + // Add(Format.R32G32Sscaled, MTLPixelFormat.R32G32Sscaled); + // Add(Format.R8G8B8Uscaled, MTLPixelFormat.R8G8B8Uscaled); + // Add(Format.R8G8B8Sscaled, MTLPixelFormat.R8G8B8Sscaled); + // Add(Format.R16G16B16Uscaled, MTLPixelFormat.R16G16B16Uscaled); + // Add(Format.R16G16B16Sscaled, MTLPixelFormat.R16G16B16Sscaled); + // Add(Format.R32G32B32Uscaled, MTLPixelFormat.R32G32B32Uscaled); + // Add(Format.R32G32B32Sscaled, MTLPixelFormat.R32G32B32Sscaled); + // Add(Format.R8G8B8A8Uscaled, MTLPixelFormat.R8G8B8A8Uscaled); + // Add(Format.R8G8B8A8Sscaled, MTLPixelFormat.R8G8B8A8Sscaled); + // Add(Format.R16G16B16A16Uscaled, MTLPixelFormat.R16G16B16A16Uscaled); + // Add(Format.R16G16B16A16Sscaled, MTLPixelFormat.R16G16B16A16Sscaled); + // Add(Format.R32G32B32A32Uscaled, MTLPixelFormat.R32G32B32A32Uscaled); + // Add(Format.R32G32B32A32Sscaled, MTLPixelFormat.R32G32B32A32Sscaled); + // Add(Format.R10G10B10A2Snorm, MTLPixelFormat.A2B10G10R10SNormPack32); + // Add(Format.R10G10B10A2Sint, MTLPixelFormat.A2B10G10R10SintPack32); + // Add(Format.R10G10B10A2Uscaled, MTLPixelFormat.A2B10G10R10UscaledPack32); + // Add(Format.R10G10B10A2Sscaled, MTLPixelFormat.A2B10G10R10SscaledPack32); + Add(Format.Astc4x4Unorm, MTLPixelFormat.ASTC4x4LDR); + Add(Format.Astc5x4Unorm, MTLPixelFormat.ASTC5x4LDR); + Add(Format.Astc5x5Unorm, MTLPixelFormat.ASTC5x5LDR); + Add(Format.Astc6x5Unorm, MTLPixelFormat.ASTC6x5LDR); + Add(Format.Astc6x6Unorm, MTLPixelFormat.ASTC6x6LDR); + Add(Format.Astc8x5Unorm, MTLPixelFormat.ASTC8x5LDR); + Add(Format.Astc8x6Unorm, MTLPixelFormat.ASTC8x6LDR); + Add(Format.Astc8x8Unorm, MTLPixelFormat.ASTC8x8LDR); + Add(Format.Astc10x5Unorm, MTLPixelFormat.ASTC10x5LDR); + Add(Format.Astc10x6Unorm, MTLPixelFormat.ASTC10x6LDR); + Add(Format.Astc10x8Unorm, MTLPixelFormat.ASTC10x8LDR); + Add(Format.Astc10x10Unorm, MTLPixelFormat.ASTC10x10LDR); + Add(Format.Astc12x10Unorm, MTLPixelFormat.ASTC12x10LDR); + Add(Format.Astc12x12Unorm, MTLPixelFormat.ASTC12x12LDR); + Add(Format.Astc4x4Srgb, MTLPixelFormat.ASTC4x4sRGB); + Add(Format.Astc5x4Srgb, MTLPixelFormat.ASTC5x4sRGB); + Add(Format.Astc5x5Srgb, MTLPixelFormat.ASTC5x5sRGB); + Add(Format.Astc6x5Srgb, MTLPixelFormat.ASTC6x5sRGB); + Add(Format.Astc6x6Srgb, MTLPixelFormat.ASTC6x6sRGB); + Add(Format.Astc8x5Srgb, MTLPixelFormat.ASTC8x5sRGB); + Add(Format.Astc8x6Srgb, MTLPixelFormat.ASTC8x6sRGB); + Add(Format.Astc8x8Srgb, MTLPixelFormat.ASTC8x8sRGB); + Add(Format.Astc10x5Srgb, MTLPixelFormat.ASTC10x5sRGB); + Add(Format.Astc10x6Srgb, MTLPixelFormat.ASTC10x6sRGB); + Add(Format.Astc10x8Srgb, MTLPixelFormat.ASTC10x8sRGB); + Add(Format.Astc10x10Srgb, MTLPixelFormat.ASTC10x10sRGB); + Add(Format.Astc12x10Srgb, MTLPixelFormat.ASTC12x10sRGB); + Add(Format.Astc12x12Srgb, MTLPixelFormat.ASTC12x12sRGB); + Add(Format.B5G6R5Unorm, MTLPixelFormat.B5G6R5Unorm); + Add(Format.B5G5R5A1Unorm, MTLPixelFormat.BGR5A1Unorm); + Add(Format.A1B5G5R5Unorm, MTLPixelFormat.A1BGR5Unorm); + Add(Format.B8G8R8A8Unorm, MTLPixelFormat.BGRA8Unorm); + Add(Format.B8G8R8A8Srgb, MTLPixelFormat.BGRA8UnormsRGB); + } + + private static void Add(Format format, MTLPixelFormat mtlFormat) + { + _table[(int)format] = mtlFormat; + } + + public static MTLPixelFormat GetFormat(Format format) + { + var mtlFormat = _table[(int)format]; + + if (IsD24S8(format)) + { + if (!MTLDevice.CreateSystemDefaultDevice().Depth24Stencil8PixelFormatSupported) + { + mtlFormat = MTLPixelFormat.Depth32FloatStencil8; + } + } + + if (mtlFormat == MTLPixelFormat.Invalid) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"Format {format} is not supported by the host."); + } + + return mtlFormat; + } + + public static bool IsD24S8(Format format) + { + return format == Format.D24UnormS8Uint || format == Format.S8UintD24Unorm || format == Format.X8UintD24Unorm; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/HardwareInfo.cs b/src/Ryujinx.Graphics.Metal/HardwareInfo.cs new file mode 100644 index 0000000000..4b3b710f8a --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/HardwareInfo.cs @@ -0,0 +1,82 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Metal +{ + static partial class HardwareInfoTools + { + + private readonly static IntPtr _kCFAllocatorDefault = IntPtr.Zero; + private readonly static UInt32 _kCFStringEncodingASCII = 0x0600; + private const string IOKit = "/System/Library/Frameworks/IOKit.framework/IOKit"; + private const string CoreFoundation = "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation"; + + [LibraryImport(IOKit, StringMarshalling = StringMarshalling.Utf8)] + private static partial IntPtr IOServiceMatching(string name); + + [LibraryImport(IOKit)] + private static partial IntPtr IOServiceGetMatchingService(IntPtr mainPort, IntPtr matching); + + [LibraryImport(IOKit)] + private static partial IntPtr IORegistryEntryCreateCFProperty(IntPtr entry, IntPtr key, IntPtr allocator, UInt32 options); + + [LibraryImport(CoreFoundation, StringMarshalling = StringMarshalling.Utf8)] + private static partial IntPtr CFStringCreateWithCString(IntPtr allocator, string cString, UInt32 encoding); + + [LibraryImport(CoreFoundation)] + [return: MarshalAs(UnmanagedType.U1)] + public static partial bool CFStringGetCString(IntPtr theString, IntPtr buffer, long bufferSizes, UInt32 encoding); + + [LibraryImport(CoreFoundation)] + public static partial IntPtr CFDataGetBytePtr(IntPtr theData); + + static string GetNameFromId(uint id) + { + return id switch + { + 0x1002 => "AMD", + 0x106B => "Apple", + 0x10DE => "NVIDIA", + 0x13B5 => "ARM", + 0x8086 => "Intel", + _ => $"0x{id:X}" + }; + } + + public static string GetVendor() + { + var serviceDict = IOServiceMatching("IOGPU"); + var service = IOServiceGetMatchingService(IntPtr.Zero, serviceDict); + var cfString = CFStringCreateWithCString(_kCFAllocatorDefault, "vendor-id", _kCFStringEncodingASCII); + var cfProperty = IORegistryEntryCreateCFProperty(service, cfString, _kCFAllocatorDefault, 0); + + byte[] buffer = new byte[4]; + var bufferPtr = CFDataGetBytePtr(cfProperty); + Marshal.Copy(bufferPtr, buffer, 0, buffer.Length); + + var vendorId = BitConverter.ToUInt32(buffer); + + return GetNameFromId(vendorId); + } + + public static string GetModel() + { + var serviceDict = IOServiceMatching("IOGPU"); + var service = IOServiceGetMatchingService(IntPtr.Zero, serviceDict); + var cfString = CFStringCreateWithCString(_kCFAllocatorDefault, "model", _kCFStringEncodingASCII); + var cfProperty = IORegistryEntryCreateCFProperty(service, cfString, _kCFAllocatorDefault, 0); + + char[] buffer = new char[64]; + IntPtr bufferPtr = Marshal.AllocHGlobal(buffer.Length); + + if (CFStringGetCString(cfProperty, bufferPtr, buffer.Length, _kCFStringEncodingASCII)) + { + var model = Marshal.PtrToStringUTF8(bufferPtr); + Marshal.FreeHGlobal(bufferPtr); + return model; + } + + return ""; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/HashTableSlim.cs b/src/Ryujinx.Graphics.Metal/HashTableSlim.cs new file mode 100644 index 0000000000..a27a53d47e --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/HashTableSlim.cs @@ -0,0 +1,143 @@ +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Metal +{ + interface IRefEquatable + { + bool Equals(ref T other); + } + + class HashTableSlim where TKey : IRefEquatable + { + private const int TotalBuckets = 16; // Must be power of 2 + private const int TotalBucketsMask = TotalBuckets - 1; + + private struct Entry + { + public int Hash; + public TKey Key; + public TValue Value; + } + + private struct Bucket + { + public int Length; + public Entry[] Entries; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public readonly Span AsSpan() + { + return Entries == null ? Span.Empty : Entries.AsSpan(0, Length); + } + } + + private readonly Bucket[] _hashTable = new Bucket[TotalBuckets]; + + public IEnumerable Keys + { + get + { + foreach (Bucket bucket in _hashTable) + { + for (int i = 0; i < bucket.Length; i++) + { + yield return bucket.Entries[i].Key; + } + } + } + } + + public IEnumerable Values + { + get + { + foreach (Bucket bucket in _hashTable) + { + for (int i = 0; i < bucket.Length; i++) + { + yield return bucket.Entries[i].Value; + } + } + } + } + + public void Add(ref TKey key, TValue value) + { + var entry = new Entry + { + Hash = key.GetHashCode(), + Key = key, + Value = value, + }; + + int hashCode = key.GetHashCode(); + int bucketIndex = hashCode & TotalBucketsMask; + + ref var bucket = ref _hashTable[bucketIndex]; + if (bucket.Entries != null) + { + int index = bucket.Length; + + if (index >= bucket.Entries.Length) + { + Array.Resize(ref bucket.Entries, index + 1); + } + + bucket.Entries[index] = entry; + } + else + { + bucket.Entries = new[] + { + entry, + }; + } + + bucket.Length++; + } + + public bool Remove(ref TKey key) + { + int hashCode = key.GetHashCode(); + + ref var bucket = ref _hashTable[hashCode & TotalBucketsMask]; + var entries = bucket.AsSpan(); + for (int i = 0; i < entries.Length; i++) + { + ref var entry = ref entries[i]; + + if (entry.Hash == hashCode && entry.Key.Equals(ref key)) + { + entries[(i + 1)..].CopyTo(entries[i..]); + bucket.Length--; + + return true; + } + } + + return false; + } + + public bool TryGetValue(ref TKey key, out TValue value) + { + int hashCode = key.GetHashCode(); + + var entries = _hashTable[hashCode & TotalBucketsMask].AsSpan(); + for (int i = 0; i < entries.Length; i++) + { + ref var entry = ref entries[i]; + + if (entry.Hash == hashCode && entry.Key.Equals(ref key)) + { + value = entry.Value; + return true; + } + } + + value = default; + return false; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/HelperShader.cs b/src/Ryujinx.Graphics.Metal/HelperShader.cs new file mode 100644 index 0000000000..53f503207a --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/HelperShader.cs @@ -0,0 +1,868 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using SharpMetal.Metal; +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class HelperShader : IDisposable + { + private const int ConvertElementsPerWorkgroup = 32 * 100; // Work group size of 32 times 100 elements. + private const string ShadersSourcePath = "/Ryujinx.Graphics.Metal/Shaders"; + private readonly MetalRenderer _renderer; + private readonly Pipeline _pipeline; + private MTLDevice _device; + + private readonly ISampler _samplerLinear; + private readonly ISampler _samplerNearest; + private readonly IProgram _programColorBlitF; + private readonly IProgram _programColorBlitI; + private readonly IProgram _programColorBlitU; + private readonly IProgram _programColorBlitMsF; + private readonly IProgram _programColorBlitMsI; + private readonly IProgram _programColorBlitMsU; + private readonly List _programsColorClearF = new(); + private readonly List _programsColorClearI = new(); + private readonly List _programsColorClearU = new(); + private readonly IProgram _programDepthStencilClear; + private readonly IProgram _programStrideChange; + private readonly IProgram _programConvertD32S8ToD24S8; + private readonly IProgram _programConvertIndexBuffer; + private readonly IProgram _programDepthBlit; + private readonly IProgram _programDepthBlitMs; + private readonly IProgram _programStencilBlit; + private readonly IProgram _programStencilBlitMs; + + private readonly EncoderState _helperShaderState = new(); + + public HelperShader(MTLDevice device, MetalRenderer renderer, Pipeline pipeline) + { + _device = device; + _renderer = renderer; + _pipeline = pipeline; + + _samplerNearest = new SamplerHolder(renderer, _device, SamplerCreateInfo.Create(MinFilter.Nearest, MagFilter.Nearest)); + _samplerLinear = new SamplerHolder(renderer, _device, SamplerCreateInfo.Create(MinFilter.Linear, MagFilter.Linear)); + + var blitResourceLayout = new ResourceLayoutBuilder() + .Add(ResourceStages.Vertex, ResourceType.UniformBuffer, 0) + .Add(ResourceStages.Fragment, ResourceType.TextureAndSampler, 0).Build(); + + var blitSource = ReadMsl("Blit.metal"); + + var blitSourceF = blitSource.Replace("FORMAT", "float", StringComparison.Ordinal); + _programColorBlitF = new Program(renderer, device, [ + new ShaderSource(blitSourceF, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var blitSourceI = blitSource.Replace("FORMAT", "int"); + _programColorBlitI = new Program(renderer, device, [ + new ShaderSource(blitSourceI, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitSourceI, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var blitSourceU = blitSource.Replace("FORMAT", "uint"); + _programColorBlitU = new Program(renderer, device, [ + new ShaderSource(blitSourceU, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitSourceU, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var blitMsSource = ReadMsl("BlitMs.metal"); + + var blitMsSourceF = blitMsSource.Replace("FORMAT", "float"); + _programColorBlitMsF = new Program(renderer, device, [ + new ShaderSource(blitMsSourceF, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitMsSourceF, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var blitMsSourceI = blitMsSource.Replace("FORMAT", "int"); + _programColorBlitMsI = new Program(renderer, device, [ + new ShaderSource(blitMsSourceI, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitMsSourceI, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var blitMsSourceU = blitMsSource.Replace("FORMAT", "uint"); + _programColorBlitMsU = new Program(renderer, device, [ + new ShaderSource(blitMsSourceU, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitMsSourceU, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var colorClearResourceLayout = new ResourceLayoutBuilder() + .Add(ResourceStages.Fragment, ResourceType.UniformBuffer, 0).Build(); + + var colorClearSource = ReadMsl("ColorClear.metal"); + + for (int i = 0; i < Constants.MaxColorAttachments; i++) + { + var crntSource = colorClearSource.Replace("COLOR_ATTACHMENT_INDEX", i.ToString()).Replace("FORMAT", "float"); + _programsColorClearF.Add(new Program(renderer, device, [ + new ShaderSource(crntSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(crntSource, ShaderStage.Vertex, TargetLanguage.Msl) + ], colorClearResourceLayout)); + } + + for (int i = 0; i < Constants.MaxColorAttachments; i++) + { + var crntSource = colorClearSource.Replace("COLOR_ATTACHMENT_INDEX", i.ToString()).Replace("FORMAT", "int"); + _programsColorClearI.Add(new Program(renderer, device, [ + new ShaderSource(crntSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(crntSource, ShaderStage.Vertex, TargetLanguage.Msl) + ], colorClearResourceLayout)); + } + + for (int i = 0; i < Constants.MaxColorAttachments; i++) + { + var crntSource = colorClearSource.Replace("COLOR_ATTACHMENT_INDEX", i.ToString()).Replace("FORMAT", "uint"); + _programsColorClearU.Add(new Program(renderer, device, [ + new ShaderSource(crntSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(crntSource, ShaderStage.Vertex, TargetLanguage.Msl) + ], colorClearResourceLayout)); + } + + var depthStencilClearSource = ReadMsl("DepthStencilClear.metal"); + _programDepthStencilClear = new Program(renderer, device, [ + new ShaderSource(depthStencilClearSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(depthStencilClearSource, ShaderStage.Vertex, TargetLanguage.Msl) + ], colorClearResourceLayout); + + var strideChangeResourceLayout = new ResourceLayoutBuilder() + .Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build(); + + var strideChangeSource = ReadMsl("ChangeBufferStride.metal"); + _programStrideChange = new Program(renderer, device, [ + new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl) + ], strideChangeResourceLayout, new ComputeSize(64, 1, 1)); + + var convertD32S8ToD24S8ResourceLayout = new ResourceLayoutBuilder() + .Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build(); + + var convertD32S8ToD24S8Source = ReadMsl("ConvertD32S8ToD24S8.metal"); + _programConvertD32S8ToD24S8 = new Program(renderer, device, [ + new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl) + ], convertD32S8ToD24S8ResourceLayout, new ComputeSize(64, 1, 1)); + + var convertIndexBufferLayout = new ResourceLayoutBuilder() + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 3).Build(); + + var convertIndexBufferSource = ReadMsl("ConvertIndexBuffer.metal"); + _programConvertIndexBuffer = new Program(renderer, device, [ + new ShaderSource(convertIndexBufferSource, ShaderStage.Compute, TargetLanguage.Msl) + ], convertIndexBufferLayout, new ComputeSize(16, 1, 1)); + + var depthBlitSource = ReadMsl("DepthBlit.metal"); + _programDepthBlit = new Program(renderer, device, [ + new ShaderSource(depthBlitSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var depthBlitMsSource = ReadMsl("DepthBlitMs.metal"); + _programDepthBlitMs = new Program(renderer, device, [ + new ShaderSource(depthBlitMsSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var stencilBlitSource = ReadMsl("StencilBlit.metal"); + _programStencilBlit = new Program(renderer, device, [ + new ShaderSource(stencilBlitSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + + var stencilBlitMsSource = ReadMsl("StencilBlitMs.metal"); + _programStencilBlitMs = new Program(renderer, device, [ + new ShaderSource(stencilBlitMsSource, ShaderStage.Fragment, TargetLanguage.Msl), + new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl) + ], blitResourceLayout); + } + + private static string ReadMsl(string fileName) + { + var msl = EmbeddedResources.ReadAllText(string.Join('/', ShadersSourcePath, fileName)); + +#pragma warning disable IDE0055 // Disable formatting + msl = msl.Replace("CONSTANT_BUFFERS_INDEX", $"{Constants.ConstantBuffersIndex}") + .Replace("STORAGE_BUFFERS_INDEX", $"{Constants.StorageBuffersIndex}") + .Replace("TEXTURES_INDEX", $"{Constants.TexturesIndex}") + .Replace("IMAGES_INDEX", $"{Constants.ImagesIndex}"); +#pragma warning restore IDE0055 + + return msl; + } + + public unsafe void BlitColor( + CommandBufferScoped cbs, + Texture src, + Texture dst, + Extents2D srcRegion, + Extents2D dstRegion, + bool linearFilter, + bool clear = false) + { + _pipeline.SwapState(_helperShaderState); + + const int RegionBufferSize = 16; + + var sampler = linearFilter ? _samplerLinear : _samplerNearest; + + _pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, sampler); + + Span region = stackalloc float[RegionBufferSize / sizeof(float)]; + + region[0] = srcRegion.X1 / (float)src.Width; + region[1] = srcRegion.X2 / (float)src.Width; + region[2] = srcRegion.Y1 / (float)src.Height; + region[3] = srcRegion.Y2 / (float)src.Height; + + if (dstRegion.X1 > dstRegion.X2) + { + (region[0], region[1]) = (region[1], region[0]); + } + + if (dstRegion.Y1 > dstRegion.Y2) + { + (region[2], region[3]) = (region[3], region[2]); + } + + using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, RegionBufferSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, region); + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + var rect = new Rectangle( + MathF.Min(dstRegion.X1, dstRegion.X2), + MathF.Min(dstRegion.Y1, dstRegion.Y2), + MathF.Abs(dstRegion.X2 - dstRegion.X1), + MathF.Abs(dstRegion.Y2 - dstRegion.Y1)); + + Span viewports = stackalloc Viewport[16]; + + viewports[0] = new Viewport( + rect, + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + bool dstIsDepthOrStencil = dst.Info.Format.IsDepthOrStencil(); + + if (dstIsDepthOrStencil) + { + // TODO: Depth & stencil blit! + Logger.Warning?.PrintMsg(LogClass.Gpu, "Requested a depth or stencil blit!"); + _pipeline.SwapState(null); + return; + } + + var debugGroupName = "Blit Color "; + + if (src.Info.Target.IsMultisample()) + { + if (dst.Info.Format.IsSint()) + { + debugGroupName += "MS Int"; + _pipeline.SetProgram(_programColorBlitMsI); + } + else if (dst.Info.Format.IsUint()) + { + debugGroupName += "MS UInt"; + _pipeline.SetProgram(_programColorBlitMsU); + } + else + { + debugGroupName += "MS Float"; + _pipeline.SetProgram(_programColorBlitMsF); + } + } + else + { + if (dst.Info.Format.IsSint()) + { + debugGroupName += "Int"; + _pipeline.SetProgram(_programColorBlitI); + } + else if (dst.Info.Format.IsUint()) + { + debugGroupName += "UInt"; + _pipeline.SetProgram(_programColorBlitU); + } + else + { + debugGroupName += "Float"; + _pipeline.SetProgram(_programColorBlitF); + } + } + + int dstWidth = dst.Width; + int dstHeight = dst.Height; + + Span> scissors = stackalloc Rectangle[16]; + + scissors[0] = new Rectangle(0, 0, dstWidth, dstHeight); + + _pipeline.SetRenderTargets([dst], null); + _pipeline.SetScissors(scissors); + + _pipeline.SetClearLoadAction(clear); + + _pipeline.SetViewports(viewports); + _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip); + _pipeline.Draw(4, 1, 0, 0, debugGroupName); + + // Cleanup + if (clear) + { + _pipeline.SetClearLoadAction(false); + } + + // Restore previous state + _pipeline.SwapState(null); + } + + public unsafe void BlitDepthStencil( + CommandBufferScoped cbs, + Texture src, + Texture dst, + Extents2D srcRegion, + Extents2D dstRegion) + { + _pipeline.SwapState(_helperShaderState); + + const int RegionBufferSize = 16; + + Span region = stackalloc float[RegionBufferSize / sizeof(float)]; + + region[0] = srcRegion.X1 / (float)src.Width; + region[1] = srcRegion.X2 / (float)src.Width; + region[2] = srcRegion.Y1 / (float)src.Height; + region[3] = srcRegion.Y2 / (float)src.Height; + + if (dstRegion.X1 > dstRegion.X2) + { + (region[0], region[1]) = (region[1], region[0]); + } + + if (dstRegion.Y1 > dstRegion.Y2) + { + (region[2], region[3]) = (region[3], region[2]); + } + + using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, RegionBufferSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, region); + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + Span viewports = stackalloc Viewport[16]; + + var rect = new Rectangle( + MathF.Min(dstRegion.X1, dstRegion.X2), + MathF.Min(dstRegion.Y1, dstRegion.Y2), + MathF.Abs(dstRegion.X2 - dstRegion.X1), + MathF.Abs(dstRegion.Y2 - dstRegion.Y1)); + + viewports[0] = new Viewport( + rect, + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + int dstWidth = dst.Width; + int dstHeight = dst.Height; + + Span> scissors = stackalloc Rectangle[16]; + + scissors[0] = new Rectangle(0, 0, dstWidth, dstHeight); + + _pipeline.SetRenderTargets([], dst); + _pipeline.SetScissors(scissors); + _pipeline.SetViewports(viewports); + _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip); + + if (src.Info.Format is + Format.D16Unorm or + Format.D32Float or + Format.X8UintD24Unorm or + Format.D24UnormS8Uint or + Format.D32FloatS8Uint or + Format.S8UintD24Unorm) + { + var depthTexture = CreateDepthOrStencilView(src, DepthStencilMode.Depth); + + BlitDepthStencilDraw(depthTexture, isDepth: true); + + if (depthTexture != src) + { + depthTexture.Release(); + } + } + + if (src.Info.Format is + Format.S8Uint or + Format.D24UnormS8Uint or + Format.D32FloatS8Uint or + Format.S8UintD24Unorm) + { + var stencilTexture = CreateDepthOrStencilView(src, DepthStencilMode.Stencil); + + BlitDepthStencilDraw(stencilTexture, isDepth: false); + + if (stencilTexture != src) + { + stencilTexture.Release(); + } + } + + // Restore previous state + _pipeline.SwapState(null); + } + + private static Texture CreateDepthOrStencilView(Texture depthStencilTexture, DepthStencilMode depthStencilMode) + { + if (depthStencilTexture.Info.DepthStencilMode == depthStencilMode) + { + return depthStencilTexture; + } + + return (Texture)depthStencilTexture.CreateView(new TextureCreateInfo( + depthStencilTexture.Info.Width, + depthStencilTexture.Info.Height, + depthStencilTexture.Info.Depth, + depthStencilTexture.Info.Levels, + depthStencilTexture.Info.Samples, + depthStencilTexture.Info.BlockWidth, + depthStencilTexture.Info.BlockHeight, + depthStencilTexture.Info.BytesPerPixel, + depthStencilTexture.Info.Format, + depthStencilMode, + depthStencilTexture.Info.Target, + SwizzleComponent.Red, + SwizzleComponent.Green, + SwizzleComponent.Blue, + SwizzleComponent.Alpha), 0, 0); + } + + private void BlitDepthStencilDraw(Texture src, bool isDepth) + { + // TODO: Check this https://github.com/Ryujinx/Ryujinx/pull/5003/ + _pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, _samplerNearest); + + string debugGroupName; + + if (isDepth) + { + debugGroupName = "Depth Blit"; + _pipeline.SetProgram(src.Info.Target.IsMultisample() ? _programDepthBlitMs : _programDepthBlit); + _pipeline.SetDepthTest(new DepthTestDescriptor(true, true, CompareOp.Always)); + } + else + { + debugGroupName = "Stencil Blit"; + _pipeline.SetProgram(src.Info.Target.IsMultisample() ? _programStencilBlitMs : _programStencilBlit); + _pipeline.SetStencilTest(CreateStencilTestDescriptor(true)); + } + + _pipeline.Draw(4, 1, 0, 0, debugGroupName); + + if (isDepth) + { + _pipeline.SetDepthTest(new DepthTestDescriptor(false, false, CompareOp.Always)); + } + else + { + _pipeline.SetStencilTest(CreateStencilTestDescriptor(false)); + } + } + + public unsafe void DrawTexture( + ITexture src, + ISampler srcSampler, + Extents2DF srcRegion, + Extents2DF dstRegion) + { + // Save current state + var state = _pipeline.SavePredrawState(); + + _pipeline.SetFaceCulling(false, Face.Front); + _pipeline.SetStencilTest(new StencilTestDescriptor()); + _pipeline.SetDepthTest(new DepthTestDescriptor()); + + const int RegionBufferSize = 16; + + _pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, srcSampler); + + Span region = stackalloc float[RegionBufferSize / sizeof(float)]; + + region[0] = srcRegion.X1 / src.Width; + region[1] = srcRegion.X2 / src.Width; + region[2] = srcRegion.Y1 / src.Height; + region[3] = srcRegion.Y2 / src.Height; + + if (dstRegion.X1 > dstRegion.X2) + { + (region[0], region[1]) = (region[1], region[0]); + } + + if (dstRegion.Y1 > dstRegion.Y2) + { + (region[2], region[3]) = (region[3], region[2]); + } + + var bufferHandle = _renderer.BufferManager.CreateWithHandle(RegionBufferSize); + _renderer.BufferManager.SetData(bufferHandle, 0, region); + _pipeline.SetUniformBuffers([new BufferAssignment(0, new BufferRange(bufferHandle, 0, RegionBufferSize))]); + + Span viewports = stackalloc Viewport[16]; + + var rect = new Rectangle( + MathF.Min(dstRegion.X1, dstRegion.X2), + MathF.Min(dstRegion.Y1, dstRegion.Y2), + MathF.Abs(dstRegion.X2 - dstRegion.X1), + MathF.Abs(dstRegion.Y2 - dstRegion.Y1)); + + viewports[0] = new Viewport( + rect, + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + _pipeline.SetProgram(_programColorBlitF); + _pipeline.SetViewports(viewports); + _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip); + _pipeline.Draw(4, 1, 0, 0, "Draw Texture"); + + _renderer.BufferManager.Delete(bufferHandle); + + // Restore previous state + _pipeline.RestorePredrawState(state); + } + + public void ConvertI8ToI16(CommandBufferScoped cbs, BufferHolder src, BufferHolder dst, int srcOffset, int size) + { + ChangeStride(cbs, src, dst, srcOffset, size, 1, 2); + } + + public unsafe void ChangeStride( + CommandBufferScoped cbs, + BufferHolder src, + BufferHolder dst, + int srcOffset, + int size, + int stride, + int newStride) + { + int elems = size / stride; + + var srcBuffer = src.GetBuffer(); + var dstBuffer = dst.GetBuffer(); + + const int ParamsBufferSize = 4 * sizeof(int); + + // Save current state + _pipeline.SwapState(_helperShaderState); + + Span shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)]; + + shaderParams[0] = stride; + shaderParams[1] = newStride; + shaderParams[2] = size; + shaderParams[3] = srcOffset; + + using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, shaderParams); + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + Span> sbRanges = new Auto[2]; + + sbRanges[0] = srcBuffer; + sbRanges[1] = dstBuffer; + _pipeline.SetStorageBuffers(1, sbRanges); + + _pipeline.SetProgram(_programStrideChange); + _pipeline.DispatchCompute(1 + elems / ConvertElementsPerWorkgroup, 1, 1, "Change Stride"); + + // Restore previous state + _pipeline.SwapState(null); + } + + public unsafe void ConvertD32S8ToD24S8(CommandBufferScoped cbs, BufferHolder src, Auto dstBuffer, int pixelCount, int dstOffset) + { + int inSize = pixelCount * 2 * sizeof(int); + + var srcBuffer = src.GetBuffer(); + + const int ParamsBufferSize = sizeof(int) * 2; + + // Save current state + _pipeline.SwapState(_helperShaderState); + + Span shaderParams = stackalloc int[2]; + + shaderParams[0] = pixelCount; + shaderParams[1] = dstOffset; + + using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, shaderParams); + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + Span> sbRanges = new Auto[2]; + + sbRanges[0] = srcBuffer; + sbRanges[1] = dstBuffer; + _pipeline.SetStorageBuffers(1, sbRanges); + + _pipeline.SetProgram(_programConvertD32S8ToD24S8); + _pipeline.DispatchCompute(1 + inSize / ConvertElementsPerWorkgroup, 1, 1, "D32S8 to D24S8 Conversion"); + + // Restore previous state + _pipeline.SwapState(null); + } + + public void ConvertIndexBuffer( + CommandBufferScoped cbs, + BufferHolder src, + BufferHolder dst, + IndexBufferPattern pattern, + int indexSize, + int srcOffset, + int indexCount) + { + // TODO: Support conversion with primitive restart enabled. + + int primitiveCount = pattern.GetPrimitiveCount(indexCount); + int outputIndexSize = 4; + + var srcBuffer = src.GetBuffer(); + var dstBuffer = dst.GetBuffer(); + + const int ParamsBufferSize = 16 * sizeof(int); + + // Save current state + _pipeline.SwapState(_helperShaderState); + + Span shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)]; + + shaderParams[8] = pattern.PrimitiveVertices; + shaderParams[9] = pattern.PrimitiveVerticesOut; + shaderParams[10] = indexSize; + shaderParams[11] = outputIndexSize; + shaderParams[12] = pattern.BaseIndex; + shaderParams[13] = pattern.IndexStride; + shaderParams[14] = srcOffset; + shaderParams[15] = primitiveCount; + + pattern.OffsetIndex.CopyTo(shaderParams[..pattern.OffsetIndex.Length]); + + using var patternScoped = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize); + patternScoped.Holder.SetDataUnchecked(patternScoped.Offset, shaderParams); + + Span> sbRanges = new Auto[2]; + + sbRanges[0] = srcBuffer; + sbRanges[1] = dstBuffer; + _pipeline.SetStorageBuffers(1, sbRanges); + _pipeline.SetStorageBuffers([new BufferAssignment(3, patternScoped.Range)]); + + _pipeline.SetProgram(_programConvertIndexBuffer); + _pipeline.DispatchCompute(BitUtils.DivRoundUp(primitiveCount, 16), 1, 1, "Convert Index Buffer"); + + // Restore previous state + _pipeline.SwapState(null); + } + + public unsafe void ClearColor( + int index, + ReadOnlySpan clearColor, + uint componentMask, + int dstWidth, + int dstHeight, + Format format) + { + // Keep original scissor + DirtyFlags clearFlags = DirtyFlags.All & (~DirtyFlags.Scissors); + + // Save current state + EncoderState originalState = _pipeline.SwapState(_helperShaderState, clearFlags, false); + + // Inherit some state without fully recreating render pipeline. + RenderTargetCopy save = _helperShaderState.InheritForClear(originalState, false, index); + + const int ClearColorBufferSize = 16; + + // TODO: Flush + + using var buffer = _renderer.BufferManager.ReserveOrCreate(_pipeline.Cbs, ClearColorBufferSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, clearColor); + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + Span viewports = stackalloc Viewport[16]; + + // TODO: Set exact viewport! + viewports[0] = new Viewport( + new Rectangle(0, 0, dstWidth, dstHeight), + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + Span componentMasks = stackalloc uint[index + 1]; + componentMasks[index] = componentMask; + + var debugGroupName = "Clear Color "; + + if (format.IsSint()) + { + debugGroupName += "Int"; + _pipeline.SetProgram(_programsColorClearI[index]); + } + else if (format.IsUint()) + { + debugGroupName += "UInt"; + _pipeline.SetProgram(_programsColorClearU[index]); + } + else + { + debugGroupName += "Float"; + _pipeline.SetProgram(_programsColorClearF[index]); + } + + _pipeline.SetBlendState(index, new BlendDescriptor()); + _pipeline.SetFaceCulling(false, Face.Front); + _pipeline.SetDepthTest(new DepthTestDescriptor(false, false, CompareOp.Always)); + _pipeline.SetRenderTargetColorMasks(componentMasks); + _pipeline.SetViewports(viewports); + _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip); + _pipeline.Draw(4, 1, 0, 0, debugGroupName); + + // Restore previous state + _pipeline.SwapState(null, clearFlags, false); + + _helperShaderState.Restore(save); + } + + public unsafe void ClearDepthStencil( + float depthValue, + bool depthMask, + int stencilValue, + int stencilMask, + int dstWidth, + int dstHeight) + { + // Keep original scissor + DirtyFlags clearFlags = DirtyFlags.All & (~DirtyFlags.Scissors); + var helperScissors = _helperShaderState.Scissors; + + // Save current state + EncoderState originalState = _pipeline.SwapState(_helperShaderState, clearFlags, false); + + // Inherit some state without fully recreating render pipeline. + RenderTargetCopy save = _helperShaderState.InheritForClear(originalState, true); + + const int ClearDepthBufferSize = 16; + + using var buffer = _renderer.BufferManager.ReserveOrCreate(_pipeline.Cbs, ClearDepthBufferSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, new ReadOnlySpan(ref depthValue)); + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + Span viewports = stackalloc Viewport[1]; + + viewports[0] = new Viewport( + new Rectangle(0, 0, dstWidth, dstHeight), + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + _pipeline.SetProgram(_programDepthStencilClear); + _pipeline.SetFaceCulling(false, Face.Front); + _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip); + _pipeline.SetViewports(viewports); + _pipeline.SetDepthTest(new DepthTestDescriptor(true, depthMask, CompareOp.Always)); + _pipeline.SetStencilTest(CreateStencilTestDescriptor(stencilMask != 0, stencilValue, 0xFF, stencilMask)); + _pipeline.Draw(4, 1, 0, 0, "Clear Depth Stencil"); + + // Cleanup + _pipeline.SetDepthTest(new DepthTestDescriptor(false, false, CompareOp.Always)); + _pipeline.SetStencilTest(CreateStencilTestDescriptor(false)); + + // Restore previous state + _pipeline.SwapState(null, clearFlags, false); + + _helperShaderState.Restore(save); + } + + private static StencilTestDescriptor CreateStencilTestDescriptor( + bool enabled, + int refValue = 0, + int compareMask = 0xff, + int writeMask = 0xff) + { + return new StencilTestDescriptor( + enabled, + CompareOp.Always, + StencilOp.Replace, + StencilOp.Replace, + StencilOp.Replace, + refValue, + compareMask, + writeMask, + CompareOp.Always, + StencilOp.Replace, + StencilOp.Replace, + StencilOp.Replace, + refValue, + compareMask, + writeMask); + } + + public void Dispose() + { + _programColorBlitF.Dispose(); + _programColorBlitI.Dispose(); + _programColorBlitU.Dispose(); + _programColorBlitMsF.Dispose(); + _programColorBlitMsI.Dispose(); + _programColorBlitMsU.Dispose(); + + foreach (var programColorClear in _programsColorClearF) + { + programColorClear.Dispose(); + } + + foreach (var programColorClear in _programsColorClearU) + { + programColorClear.Dispose(); + } + + foreach (var programColorClear in _programsColorClearI) + { + programColorClear.Dispose(); + } + + _programDepthStencilClear.Dispose(); + _pipeline.Dispose(); + _samplerLinear.Dispose(); + _samplerNearest.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/IdList.cs b/src/Ryujinx.Graphics.Metal/IdList.cs new file mode 100644 index 0000000000..2c15a80ef7 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/IdList.cs @@ -0,0 +1,121 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Metal +{ + class IdList where T : class + { + private readonly List _list; + private int _freeMin; + + public IdList() + { + _list = new List(); + _freeMin = 0; + } + + public int Add(T value) + { + int id; + int count = _list.Count; + id = _list.IndexOf(null, _freeMin); + + if ((uint)id < (uint)count) + { + _list[id] = value; + } + else + { + id = count; + _freeMin = id + 1; + + _list.Add(value); + } + + return id + 1; + } + + public void Remove(int id) + { + id--; + + int count = _list.Count; + + if ((uint)id >= (uint)count) + { + return; + } + + if (id + 1 == count) + { + // Trim unused items. + int removeIndex = id; + + while (removeIndex > 0 && _list[removeIndex - 1] == null) + { + removeIndex--; + } + + _list.RemoveRange(removeIndex, count - removeIndex); + + if (_freeMin > removeIndex) + { + _freeMin = removeIndex; + } + } + else + { + _list[id] = null; + + if (_freeMin > id) + { + _freeMin = id; + } + } + } + + public bool TryGetValue(int id, out T value) + { + id--; + + try + { + if ((uint)id < (uint)_list.Count) + { + value = _list[id]; + return value != null; + } + + value = null; + return false; + } + catch (ArgumentOutOfRangeException) + { + value = null; + return false; + } + catch (IndexOutOfRangeException) + { + value = null; + return false; + } + } + + public void Clear() + { + _list.Clear(); + _freeMin = 0; + } + + public IEnumerator GetEnumerator() + { + for (int i = 0; i < _list.Count; i++) + { + if (_list[i] != null) + { + yield return _list[i]; + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/ImageArray.cs b/src/Ryujinx.Graphics.Metal/ImageArray.cs new file mode 100644 index 0000000000..9fa0df09d2 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/ImageArray.cs @@ -0,0 +1,74 @@ +using Ryujinx.Graphics.GAL; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + internal class ImageArray : IImageArray + { + private readonly TextureRef[] _textureRefs; + private readonly TextureBuffer[] _bufferTextureRefs; + + private readonly bool _isBuffer; + private readonly Pipeline _pipeline; + + public ImageArray(int size, bool isBuffer, Pipeline pipeline) + { + if (isBuffer) + { + _bufferTextureRefs = new TextureBuffer[size]; + } + else + { + _textureRefs = new TextureRef[size]; + } + + _isBuffer = isBuffer; + _pipeline = pipeline; + } + + public void SetImages(int index, ITexture[] images) + { + for (int i = 0; i < images.Length; i++) + { + ITexture image = images[i]; + + if (image is TextureBuffer textureBuffer) + { + _bufferTextureRefs[index + i] = textureBuffer; + } + else if (image is Texture texture) + { + _textureRefs[index + i].Storage = texture; + } + else if (!_isBuffer) + { + _textureRefs[index + i].Storage = null; + } + else + { + _bufferTextureRefs[index + i] = null; + } + } + + SetDirty(); + } + + public TextureRef[] GetTextureRefs() + { + return _textureRefs; + } + + public TextureBuffer[] GetBufferTextureRefs() + { + return _bufferTextureRefs; + } + + private void SetDirty() + { + _pipeline.DirtyImages(); + } + + public void Dispose() { } + } +} diff --git a/src/Ryujinx.Graphics.Metal/IndexBufferPattern.cs b/src/Ryujinx.Graphics.Metal/IndexBufferPattern.cs new file mode 100644 index 0000000000..24e3222fee --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/IndexBufferPattern.cs @@ -0,0 +1,118 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + internal class IndexBufferPattern : IDisposable + { + public int PrimitiveVertices { get; } + public int PrimitiveVerticesOut { get; } + public int BaseIndex { get; } + public int[] OffsetIndex { get; } + public int IndexStride { get; } + public bool RepeatStart { get; } + + private readonly MetalRenderer _renderer; + private int _currentSize; + private BufferHandle _repeatingBuffer; + + public IndexBufferPattern(MetalRenderer renderer, + int primitiveVertices, + int primitiveVerticesOut, + int baseIndex, + int[] offsetIndex, + int indexStride, + bool repeatStart) + { + PrimitiveVertices = primitiveVertices; + PrimitiveVerticesOut = primitiveVerticesOut; + BaseIndex = baseIndex; + OffsetIndex = offsetIndex; + IndexStride = indexStride; + RepeatStart = repeatStart; + + _renderer = renderer; + } + + public int GetPrimitiveCount(int vertexCount) + { + return Math.Max(0, (vertexCount - BaseIndex) / IndexStride); + } + + public int GetConvertedCount(int indexCount) + { + int primitiveCount = GetPrimitiveCount(indexCount); + return primitiveCount * OffsetIndex.Length; + } + + public BufferHandle GetRepeatingBuffer(int vertexCount, out int indexCount) + { + int primitiveCount = GetPrimitiveCount(vertexCount); + indexCount = primitiveCount * PrimitiveVerticesOut; + + int expectedSize = primitiveCount * OffsetIndex.Length; + + if (expectedSize <= _currentSize && _repeatingBuffer != BufferHandle.Null) + { + return _repeatingBuffer; + } + + // Expand the repeating pattern to the number of requested primitives. + BufferHandle newBuffer = _renderer.BufferManager.CreateWithHandle(expectedSize * sizeof(int)); + + // Copy the old data to the new one. + if (_repeatingBuffer != BufferHandle.Null) + { + _renderer.Pipeline.CopyBuffer(_repeatingBuffer, newBuffer, 0, 0, _currentSize * sizeof(int)); + _renderer.BufferManager.Delete(_repeatingBuffer); + } + + _repeatingBuffer = newBuffer; + + // Add the additional repeats on top. + int newPrimitives = primitiveCount; + int oldPrimitives = (_currentSize) / OffsetIndex.Length; + + int[] newData; + + newPrimitives -= oldPrimitives; + newData = new int[expectedSize - _currentSize]; + + int outOffset = 0; + int index = oldPrimitives * IndexStride + BaseIndex; + + for (int i = 0; i < newPrimitives; i++) + { + if (RepeatStart) + { + // Used for triangle fan + newData[outOffset++] = 0; + } + + for (int j = RepeatStart ? 1 : 0; j < OffsetIndex.Length; j++) + { + newData[outOffset++] = index + OffsetIndex[j]; + } + + index += IndexStride; + } + + _renderer.SetBufferData(newBuffer, _currentSize * sizeof(int), MemoryMarshal.Cast(newData)); + _currentSize = expectedSize; + + return newBuffer; + } + + public void Dispose() + { + if (_repeatingBuffer != BufferHandle.Null) + { + _renderer.BufferManager.Delete(_repeatingBuffer); + _repeatingBuffer = BufferHandle.Null; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/IndexBufferState.cs b/src/Ryujinx.Graphics.Metal/IndexBufferState.cs new file mode 100644 index 0000000000..411df96852 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/IndexBufferState.cs @@ -0,0 +1,103 @@ +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + readonly internal struct IndexBufferState + { + public static IndexBufferState Null => new(BufferHandle.Null, 0, 0); + + private readonly int _offset; + private readonly int _size; + private readonly IndexType _type; + + private readonly BufferHandle _handle; + + public IndexBufferState(BufferHandle handle, int offset, int size, IndexType type = IndexType.UInt) + { + _handle = handle; + _offset = offset; + _size = size; + _type = type; + } + + public (MTLBuffer, int, MTLIndexType) GetIndexBuffer(MetalRenderer renderer, CommandBufferScoped cbs) + { + Auto autoBuffer; + int offset, size; + MTLIndexType type; + + if (_type == IndexType.UByte) + { + // Index type is not supported. Convert to I16. + autoBuffer = renderer.BufferManager.GetBufferI8ToI16(cbs, _handle, _offset, _size); + + type = MTLIndexType.UInt16; + offset = 0; + size = _size * 2; + } + else + { + autoBuffer = renderer.BufferManager.GetBuffer(_handle, false, out int bufferSize); + + if (_offset >= bufferSize) + { + autoBuffer = null; + } + + type = _type.Convert(); + offset = _offset; + size = _size; + } + + if (autoBuffer != null) + { + DisposableBuffer buffer = autoBuffer.Get(cbs, offset, size); + + return (buffer.Value, offset, type); + } + + return (new MTLBuffer(IntPtr.Zero), 0, MTLIndexType.UInt16); + } + + public (MTLBuffer, int, MTLIndexType) GetConvertedIndexBuffer( + MetalRenderer renderer, + CommandBufferScoped cbs, + int firstIndex, + int indexCount, + int convertedCount, + IndexBufferPattern pattern) + { + // Convert the index buffer using the given pattern. + int indexSize = GetIndexSize(); + + int firstIndexOffset = firstIndex * indexSize; + + var autoBuffer = renderer.BufferManager.GetBufferTopologyConversion(cbs, _handle, _offset + firstIndexOffset, indexCount * indexSize, pattern, indexSize); + + int size = convertedCount * 4; + + if (autoBuffer != null) + { + DisposableBuffer buffer = autoBuffer.Get(cbs, 0, size); + + return (buffer.Value, 0, MTLIndexType.UInt32); + } + + return (new MTLBuffer(IntPtr.Zero), 0, MTLIndexType.UInt32); + } + + private int GetIndexSize() + { + return _type switch + { + IndexType.UInt => 4, + IndexType.UShort => 2, + _ => 1, + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/MetalRenderer.cs b/src/Ryujinx.Graphics.Metal/MetalRenderer.cs new file mode 100644 index 0000000000..935a5b3b12 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/MetalRenderer.cs @@ -0,0 +1,309 @@ +using Ryujinx.Common.Configuration; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader.Translation; +using SharpMetal.Metal; +using SharpMetal.QuartzCore; +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + public sealed class MetalRenderer : IRenderer + { + public const int TotalSets = 4; + + private readonly MTLDevice _device; + private readonly MTLCommandQueue _queue; + private readonly Func _getMetalLayer; + + private Pipeline _pipeline; + private Window _window; + + public uint ProgramCount { get; set; } = 0; + + public event EventHandler ScreenCaptured; + public bool PreferThreading => true; + public IPipeline Pipeline => _pipeline; + public IWindow Window => _window; + + internal MTLCommandQueue BackgroundQueue { get; private set; } + internal HelperShader HelperShader { get; private set; } + internal BufferManager BufferManager { get; private set; } + internal CommandBufferPool CommandBufferPool { get; private set; } + internal BackgroundResources BackgroundResources { get; private set; } + internal Action InterruptAction { get; private set; } + internal SyncManager SyncManager { get; private set; } + + internal HashSet Programs { get; } + internal HashSet Samplers { get; } + + public MetalRenderer(Func metalLayer) + { + _device = MTLDevice.CreateSystemDefaultDevice(); + Programs = new HashSet(); + Samplers = new HashSet(); + + if (_device.ArgumentBuffersSupport != MTLArgumentBuffersTier.Tier2) + { + throw new NotSupportedException("Metal backend requires Tier 2 Argument Buffer support."); + } + + _queue = _device.NewCommandQueue(CommandBufferPool.MaxCommandBuffers + 1); + BackgroundQueue = _device.NewCommandQueue(CommandBufferPool.MaxCommandBuffers); + + _getMetalLayer = metalLayer; + } + + public void Initialize(GraphicsDebugLevel logLevel) + { + var layer = _getMetalLayer(); + layer.Device = _device; + layer.FramebufferOnly = false; + + CommandBufferPool = new CommandBufferPool(_queue); + _window = new Window(this, layer); + _pipeline = new Pipeline(_device, this); + BufferManager = new BufferManager(_device, this, _pipeline); + + _pipeline.InitEncoderStateManager(BufferManager); + + BackgroundResources = new BackgroundResources(this); + HelperShader = new HelperShader(_device, this, _pipeline); + SyncManager = new SyncManager(this); + } + + public void BackgroundContextAction(Action action, bool alwaysBackground = false) + { + // GetData methods should be thread safe, so we can call this directly. + // Texture copy (scaled) may also happen in here, so that should also be thread safe. + + action(); + } + + public BufferHandle CreateBuffer(int size, BufferAccess access) + { + return BufferManager.CreateWithHandle(size); + } + + public BufferHandle CreateBuffer(IntPtr pointer, int size) + { + return BufferManager.Create(pointer, size); + } + + public BufferHandle CreateBufferSparse(ReadOnlySpan storageBuffers) + { + throw new NotImplementedException(); + } + + public IImageArray CreateImageArray(int size, bool isBuffer) + { + return new ImageArray(size, isBuffer, _pipeline); + } + + public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info) + { + ProgramCount++; + return new Program(this, _device, shaders, info.ResourceLayout, info.ComputeLocalSize); + } + + public ISampler CreateSampler(SamplerCreateInfo info) + { + return new SamplerHolder(this, _device, info); + } + + public ITexture CreateTexture(TextureCreateInfo info) + { + if (info.Target == Target.TextureBuffer) + { + return new TextureBuffer(_device, this, _pipeline, info); + } + + return new Texture(_device, this, _pipeline, info); + } + + public ITextureArray CreateTextureArray(int size, bool isBuffer) + { + return new TextureArray(size, isBuffer, _pipeline); + } + + public bool PrepareHostMapping(IntPtr address, ulong size) + { + // TODO: Metal Host Mapping + return false; + } + + public void CreateSync(ulong id, bool strict) + { + SyncManager.Create(id, strict); + } + + public void DeleteBuffer(BufferHandle buffer) + { + BufferManager.Delete(buffer); + } + + public PinnedSpan GetBufferData(BufferHandle buffer, int offset, int size) + { + return BufferManager.GetData(buffer, offset, size); + } + + public Capabilities GetCapabilities() + { + // TODO: Finalize these values + return new Capabilities( + api: TargetApi.Metal, + vendorName: HardwareInfoTools.GetVendor(), + SystemMemoryType.UnifiedMemory, + hasFrontFacingBug: false, + hasVectorIndexingBug: false, + needsFragmentOutputSpecialization: true, + reduceShaderPrecision: true, + supportsAstcCompression: true, + supportsBc123Compression: true, + supportsBc45Compression: true, + supportsBc67Compression: true, + supportsEtc2Compression: true, + supports3DTextureCompression: true, + supportsBgraFormat: true, + supportsR4G4Format: false, + supportsR4G4B4A4Format: true, + supportsScaledVertexFormats: false, + supportsSnormBufferTextureFormat: true, + supportsSparseBuffer: false, + supports5BitComponentFormat: true, + supportsBlendEquationAdvanced: false, + supportsFragmentShaderInterlock: true, + supportsFragmentShaderOrderingIntel: false, + supportsGeometryShader: false, + supportsGeometryShaderPassthrough: false, + supportsTransformFeedback: false, + supportsImageLoadFormatted: false, + supportsLayerVertexTessellation: false, + supportsMismatchingViewFormat: true, + supportsCubemapView: true, + supportsNonConstantTextureOffset: false, + supportsQuads: false, + supportsSeparateSampler: true, + supportsShaderBallot: false, + supportsShaderBarrierDivergence: false, + supportsShaderFloat64: false, + supportsTextureGatherOffsets: false, + supportsTextureShadowLod: false, + supportsVertexStoreAndAtomics: false, + supportsViewportIndexVertexTessellation: false, + supportsViewportMask: false, + supportsViewportSwizzle: false, + supportsIndirectParameters: true, + supportsDepthClipControl: false, + uniformBufferSetIndex: (int)Constants.ConstantBuffersSetIndex, + storageBufferSetIndex: (int)Constants.StorageBuffersSetIndex, + textureSetIndex: (int)Constants.TexturesSetIndex, + imageSetIndex: (int)Constants.ImagesSetIndex, + extraSetBaseIndex: TotalSets, + maximumExtraSets: (int)Constants.MaximumExtraSets, + maximumUniformBuffersPerStage: Constants.MaxUniformBuffersPerStage, + maximumStorageBuffersPerStage: Constants.MaxStorageBuffersPerStage, + maximumTexturesPerStage: Constants.MaxTexturesPerStage, + maximumImagesPerStage: Constants.MaxImagesPerStage, + maximumComputeSharedMemorySize: (int)_device.MaxThreadgroupMemoryLength, + maximumSupportedAnisotropy: 16, + shaderSubgroupSize: 256, + storageBufferOffsetAlignment: 16, + textureBufferOffsetAlignment: 16, + gatherBiasPrecision: 0, + maximumGpuMemory: 0 + ); + } + + public ulong GetCurrentSync() + { + return SyncManager.GetCurrent(); + } + + public HardwareInfo GetHardwareInfo() + { + return new HardwareInfo(HardwareInfoTools.GetVendor(), HardwareInfoTools.GetModel(), "Apple"); + } + + public IProgram LoadProgramBinary(byte[] programBinary, bool hasFragmentShader, ShaderInfo info) + { + throw new NotImplementedException(); + } + + public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan data) + { + BufferManager.SetData(buffer, offset, data, _pipeline.Cbs); + } + + public void UpdateCounters() + { + // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/creating_a_counter_sample_buffer_to_store_a_gpu_s_counter_data_during_a_pass?language=objc + } + + public void PreFrame() + { + SyncManager.Cleanup(); + } + + public ICounterEvent ReportCounter(CounterType type, EventHandler resultHandler, float divisor, bool hostReserved) + { + // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/creating_a_counter_sample_buffer_to_store_a_gpu_s_counter_data_during_a_pass?language=objc + var counterEvent = new CounterEvent(); + resultHandler?.Invoke(counterEvent, type == CounterType.SamplesPassed ? (ulong)1 : 0); + return counterEvent; + } + + public void ResetCounter(CounterType type) + { + // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/creating_a_counter_sample_buffer_to_store_a_gpu_s_counter_data_during_a_pass?language=objc + } + + public void WaitSync(ulong id) + { + SyncManager.Wait(id); + } + + public void FlushAllCommands() + { + _pipeline.FlushCommandsImpl(); + } + + public void RegisterFlush() + { + SyncManager.RegisterFlush(); + + // Periodically free unused regions of the staging buffer to avoid doing it all at once. + BufferManager.StagingBuffer.FreeCompleted(); + } + + public void SetInterruptAction(Action interruptAction) + { + InterruptAction = interruptAction; + } + + public void Screenshot() + { + // TODO: Screenshots + } + + public void Dispose() + { + BackgroundResources.Dispose(); + + foreach (var program in Programs) + { + program.Dispose(); + } + + foreach (var sampler in Samplers) + { + sampler.Dispose(); + } + + _pipeline.Dispose(); + _window.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/MultiFenceHolder.cs b/src/Ryujinx.Graphics.Metal/MultiFenceHolder.cs new file mode 100644 index 0000000000..cd5ad08ba1 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/MultiFenceHolder.cs @@ -0,0 +1,262 @@ +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + /// + /// Holder for multiple host GPU fences. + /// + [SupportedOSPlatform("macos")] + class MultiFenceHolder + { + private const int BufferUsageTrackingGranularity = 4096; + + private readonly FenceHolder[] _fences; + private readonly BufferUsageBitmap _bufferUsageBitmap; + + /// + /// Creates a new instance of the multiple fence holder. + /// + public MultiFenceHolder() + { + _fences = new FenceHolder[CommandBufferPool.MaxCommandBuffers]; + } + + /// + /// Creates a new instance of the multiple fence holder, with a given buffer size in mind. + /// + /// Size of the buffer + public MultiFenceHolder(int size) + { + _fences = new FenceHolder[CommandBufferPool.MaxCommandBuffers]; + _bufferUsageBitmap = new BufferUsageBitmap(size, BufferUsageTrackingGranularity); + } + + /// + /// Adds read/write buffer usage information to the uses list. + /// + /// Index of the command buffer where the buffer is used + /// Offset of the buffer being used + /// Size of the buffer region being used, in bytes + /// Whether the access is a write or not + public void AddBufferUse(int cbIndex, int offset, int size, bool write) + { + _bufferUsageBitmap.Add(cbIndex, offset, size, false); + + if (write) + { + _bufferUsageBitmap.Add(cbIndex, offset, size, true); + } + } + + /// + /// Removes all buffer usage information for a given command buffer. + /// + /// Index of the command buffer where the buffer is used + public void RemoveBufferUses(int cbIndex) + { + _bufferUsageBitmap?.Clear(cbIndex); + } + + /// + /// Checks if a given range of a buffer is being used by a command buffer still being processed by the GPU. + /// + /// Index of the command buffer where the buffer is used + /// Offset of the buffer being used + /// Size of the buffer region being used, in bytes + /// True if in use, false otherwise + public bool IsBufferRangeInUse(int cbIndex, int offset, int size) + { + return _bufferUsageBitmap.OverlapsWith(cbIndex, offset, size); + } + + /// + /// Checks if a given range of a buffer is being used by any command buffer still being processed by the GPU. + /// + /// Offset of the buffer being used + /// Size of the buffer region being used, in bytes + /// True if only write usages should count + /// True if in use, false otherwise + public bool IsBufferRangeInUse(int offset, int size, bool write) + { + return _bufferUsageBitmap.OverlapsWith(offset, size, write); + } + + /// + /// Adds a fence to the holder. + /// + /// Command buffer index of the command buffer that owns the fence + /// Fence to be added + /// True if the command buffer's previous fence value was null + public bool AddFence(int cbIndex, FenceHolder fence) + { + ref FenceHolder fenceRef = ref _fences[cbIndex]; + + if (fenceRef == null) + { + fenceRef = fence; + return true; + } + + return false; + } + + /// + /// Removes a fence from the holder. + /// + /// Command buffer index of the command buffer that owns the fence + public void RemoveFence(int cbIndex) + { + _fences[cbIndex] = null; + } + + /// + /// Determines if a fence referenced on the given command buffer. + /// + /// Index of the command buffer to check if it's used + /// True if referenced, false otherwise + public bool HasFence(int cbIndex) + { + return _fences[cbIndex] != null; + } + + /// + /// Wait until all the fences on the holder are signaled. + /// + public void WaitForFences() + { + WaitForFencesImpl(0, 0, true); + } + + /// + /// Wait until all the fences on the holder with buffer uses overlapping the specified range are signaled. + /// + /// Start offset of the buffer range + /// Size of the buffer range in bytes + public void WaitForFences(int offset, int size) + { + WaitForFencesImpl(offset, size, true); + } + + /// + /// Wait until all the fences on the holder with buffer uses overlapping the specified range are signaled. + /// + + // TODO: Add a proper timeout! + public bool WaitForFences(bool indefinite) + { + return WaitForFencesImpl(0, 0, indefinite); + } + + /// + /// Wait until all the fences on the holder with buffer uses overlapping the specified range are signaled. + /// + /// Start offset of the buffer range + /// Size of the buffer range in bytes + /// Indicates if this should wait indefinitely + /// True if all fences were signaled before the timeout expired, false otherwise + private bool WaitForFencesImpl(int offset, int size, bool indefinite) + { + Span fenceHolders = new FenceHolder[CommandBufferPool.MaxCommandBuffers]; + + int count = size != 0 ? GetOverlappingFences(fenceHolders, offset, size) : GetFences(fenceHolders); + Span fences = stackalloc MTLCommandBuffer[count]; + + int fenceCount = 0; + + for (int i = 0; i < count; i++) + { + if (fenceHolders[i].TryGet(out MTLCommandBuffer fence)) + { + fences[fenceCount] = fence; + + if (fenceCount < i) + { + fenceHolders[fenceCount] = fenceHolders[i]; + } + + fenceCount++; + } + } + + if (fenceCount == 0) + { + return true; + } + + bool signaled = true; + + if (indefinite) + { + foreach (var fence in fences) + { + fence.WaitUntilCompleted(); + } + } + else + { + foreach (var fence in fences) + { + if (fence.Status != MTLCommandBufferStatus.Completed) + { + signaled = false; + } + } + } + + for (int i = 0; i < fenceCount; i++) + { + fenceHolders[i].Put(); + } + + return signaled; + } + + /// + /// Gets fences to wait for. + /// + /// Span to store fences in + /// Number of fences placed in storage + private int GetFences(Span storage) + { + int count = 0; + + for (int i = 0; i < _fences.Length; i++) + { + var fence = _fences[i]; + + if (fence != null) + { + storage[count++] = fence; + } + } + + return count; + } + + /// + /// Gets fences to wait for use of a given buffer region. + /// + /// Span to store overlapping fences in + /// Offset of the range + /// Size of the range in bytes + /// Number of fences for the specified region placed in storage + private int GetOverlappingFences(Span storage, int offset, int size) + { + int count = 0; + + for (int i = 0; i < _fences.Length; i++) + { + var fence = _fences[i]; + + if (fence != null && _bufferUsageBitmap.OverlapsWith(i, offset, size)) + { + storage[count++] = fence; + } + } + + return count; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/PersistentFlushBuffer.cs b/src/Ryujinx.Graphics.Metal/PersistentFlushBuffer.cs new file mode 100644 index 0000000000..fa3df47dba --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/PersistentFlushBuffer.cs @@ -0,0 +1,99 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + internal class PersistentFlushBuffer : IDisposable + { + private readonly MetalRenderer _renderer; + + private BufferHolder _flushStorage; + + public PersistentFlushBuffer(MetalRenderer renderer) + { + _renderer = renderer; + } + + private BufferHolder ResizeIfNeeded(int size) + { + var flushStorage = _flushStorage; + + if (flushStorage == null || size > _flushStorage.Size) + { + flushStorage?.Dispose(); + + flushStorage = _renderer.BufferManager.Create(size); + _flushStorage = flushStorage; + } + + return flushStorage; + } + + public Span GetBufferData(CommandBufferPool cbp, BufferHolder buffer, int offset, int size) + { + var flushStorage = ResizeIfNeeded(size); + Auto srcBuffer; + + using (var cbs = cbp.Rent()) + { + srcBuffer = buffer.GetBuffer(); + var dstBuffer = flushStorage.GetBuffer(); + + if (srcBuffer.TryIncrementReferenceCount()) + { + BufferHolder.Copy(cbs, srcBuffer, dstBuffer, offset, 0, size, registerSrcUsage: false); + } + else + { + // Source buffer is no longer alive, don't copy anything to flush storage. + srcBuffer = null; + } + } + + flushStorage.WaitForFences(); + srcBuffer?.DecrementReferenceCount(); + return flushStorage.GetDataStorage(0, size); + } + + public Span GetTextureData(CommandBufferPool cbp, Texture view, int size) + { + TextureCreateInfo info = view.Info; + + var flushStorage = ResizeIfNeeded(size); + + using (var cbs = cbp.Rent()) + { + var buffer = flushStorage.GetBuffer().Get(cbs).Value; + var image = view.GetHandle(); + + view.CopyFromOrToBuffer(cbs, buffer, image, size, true, 0, 0, info.GetLayers(), info.Levels, singleSlice: false); + } + + flushStorage.WaitForFences(); + return flushStorage.GetDataStorage(0, size); + } + + public Span GetTextureData(CommandBufferPool cbp, Texture view, int size, int layer, int level) + { + var flushStorage = ResizeIfNeeded(size); + + using (var cbs = cbp.Rent()) + { + var buffer = flushStorage.GetBuffer().Get(cbs).Value; + var image = view.GetHandle(); + + view.CopyFromOrToBuffer(cbs, buffer, image, size, true, layer, level, 1, 1, singleSlice: true); + } + + flushStorage.WaitForFences(); + return flushStorage.GetDataStorage(0, size); + } + + public void Dispose() + { + _flushStorage.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Pipeline.cs b/src/Ryujinx.Graphics.Metal/Pipeline.cs new file mode 100644 index 0000000000..1139740619 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Pipeline.cs @@ -0,0 +1,877 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using SharpMetal.Foundation; +using SharpMetal.Metal; +using SharpMetal.QuartzCore; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + public enum EncoderType + { + Blit, + Compute, + Render, + None + } + + [SupportedOSPlatform("macos")] + class Pipeline : IPipeline, IEncoderFactory, IDisposable + { + private const ulong MinByteWeightForFlush = 256 * 1024 * 1024; // MiB + + private readonly MTLDevice _device; + private readonly MetalRenderer _renderer; + private EncoderStateManager _encoderStateManager; + private ulong _byteWeight; + + public MTLCommandBuffer CommandBuffer; + + public IndexBufferPattern QuadsToTrisPattern; + public IndexBufferPattern TriFanToTrisPattern; + + internal CommandBufferScoped? PreloadCbs { get; private set; } + internal CommandBufferScoped Cbs { get; private set; } + internal CommandBufferEncoder Encoders => Cbs.Encoders; + internal EncoderType CurrentEncoderType => Encoders.CurrentEncoderType; + + public Pipeline(MTLDevice device, MetalRenderer renderer) + { + _device = device; + _renderer = renderer; + + renderer.CommandBufferPool.Initialize(this); + + CommandBuffer = (Cbs = _renderer.CommandBufferPool.Rent()).CommandBuffer; + } + + internal void InitEncoderStateManager(BufferManager bufferManager) + { + _encoderStateManager = new EncoderStateManager(_device, bufferManager, this); + + QuadsToTrisPattern = new IndexBufferPattern(_renderer, 4, 6, 0, [0, 1, 2, 0, 2, 3], 4, false); + TriFanToTrisPattern = new IndexBufferPattern(_renderer, 3, 3, 2, [int.MinValue, -1, 0], 1, true); + } + + public EncoderState SwapState(EncoderState state, DirtyFlags flags = DirtyFlags.All, bool endRenderPass = true) + { + if (endRenderPass && CurrentEncoderType == EncoderType.Render) + { + EndCurrentPass(); + } + + return _encoderStateManager.SwapState(state, flags); + } + + public PredrawState SavePredrawState() + { + return _encoderStateManager.SavePredrawState(); + } + + public void RestorePredrawState(PredrawState state) + { + _encoderStateManager.RestorePredrawState(state); + } + + public void SetClearLoadAction(bool clear) + { + _encoderStateManager.SetClearLoadAction(clear); + } + + public MTLRenderCommandEncoder GetOrCreateRenderEncoder(bool forDraw = false) + { + // Mark all state as dirty to ensure it is set on the new encoder + if (Cbs.Encoders.CurrentEncoderType != EncoderType.Render) + { + _encoderStateManager.SignalRenderDirty(); + } + + if (forDraw) + { + _encoderStateManager.RenderResourcesPrepass(); + } + + MTLRenderCommandEncoder renderCommandEncoder = Cbs.Encoders.EnsureRenderEncoder(); + + if (forDraw) + { + _encoderStateManager.RebindRenderState(renderCommandEncoder); + } + + return renderCommandEncoder; + } + + public MTLBlitCommandEncoder GetOrCreateBlitEncoder() + { + return Cbs.Encoders.EnsureBlitEncoder(); + } + + public MTLComputeCommandEncoder GetOrCreateComputeEncoder(bool forDispatch = false) + { + // Mark all state as dirty to ensure it is set on the new encoder + if (Cbs.Encoders.CurrentEncoderType != EncoderType.Compute) + { + _encoderStateManager.SignalComputeDirty(); + } + + if (forDispatch) + { + _encoderStateManager.ComputeResourcesPrepass(); + } + + MTLComputeCommandEncoder computeCommandEncoder = Cbs.Encoders.EnsureComputeEncoder(); + + if (forDispatch) + { + _encoderStateManager.RebindComputeState(computeCommandEncoder); + } + + return computeCommandEncoder; + } + + public void EndCurrentPass() + { + Cbs.Encoders.EndCurrentPass(); + } + + public MTLRenderCommandEncoder CreateRenderCommandEncoder() + { + return _encoderStateManager.CreateRenderCommandEncoder(); + } + + public MTLComputeCommandEncoder CreateComputeCommandEncoder() + { + return _encoderStateManager.CreateComputeCommandEncoder(); + } + + public void Present(CAMetalDrawable drawable, Texture src, Extents2D srcRegion, Extents2D dstRegion, bool isLinear) + { + // TODO: Clean this up + var textureInfo = new TextureCreateInfo((int)drawable.Texture.Width, (int)drawable.Texture.Height, (int)drawable.Texture.Depth, (int)drawable.Texture.MipmapLevelCount, (int)drawable.Texture.SampleCount, 0, 0, 0, Format.B8G8R8A8Unorm, 0, Target.Texture2D, SwizzleComponent.Red, SwizzleComponent.Green, SwizzleComponent.Blue, SwizzleComponent.Alpha); + var dst = new Texture(_device, _renderer, this, textureInfo, drawable.Texture, 0, 0); + + _renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, isLinear, true); + + EndCurrentPass(); + + Cbs.CommandBuffer.PresentDrawable(drawable); + + FlushCommandsImpl(); + + // TODO: Auto flush counting + _renderer.SyncManager.GetAndResetWaitTicks(); + + // Cleanup + dst.Dispose(); + } + + public CommandBufferScoped GetPreloadCommandBuffer() + { + PreloadCbs ??= _renderer.CommandBufferPool.Rent(); + + return PreloadCbs.Value; + } + + public void FlushCommandsIfWeightExceeding(IAuto disposedResource, ulong byteWeight) + { + bool usedByCurrentCb = disposedResource.HasCommandBufferDependency(Cbs); + + if (PreloadCbs != null && !usedByCurrentCb) + { + usedByCurrentCb = disposedResource.HasCommandBufferDependency(PreloadCbs.Value); + } + + if (usedByCurrentCb) + { + // Since we can only free memory after the command buffer that uses a given resource was executed, + // keeping the command buffer might cause a high amount of memory to be in use. + // To prevent that, we force submit command buffers if the memory usage by resources + // in use by the current command buffer is above a given limit, and those resources were disposed. + _byteWeight += byteWeight; + + if (_byteWeight >= MinByteWeightForFlush) + { + FlushCommandsImpl(); + } + } + } + + public void FlushCommandsImpl() + { + EndCurrentPass(); + + _byteWeight = 0; + + if (PreloadCbs != null) + { + PreloadCbs.Value.Dispose(); + PreloadCbs = null; + } + + CommandBuffer = (Cbs = _renderer.CommandBufferPool.ReturnAndRent(Cbs)).CommandBuffer; + _renderer.RegisterFlush(); + } + + public void DirtyTextures() + { + _encoderStateManager.DirtyTextures(); + } + + public void DirtyImages() + { + _encoderStateManager.DirtyImages(); + } + + public void Blit( + Texture src, + Texture dst, + Extents2D srcRegion, + Extents2D dstRegion, + bool isDepthOrStencil, + bool linearFilter) + { + if (isDepthOrStencil) + { + _renderer.HelperShader.BlitDepthStencil(Cbs, src, dst, srcRegion, dstRegion); + } + else + { + _renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, linearFilter); + } + } + + public void Barrier() + { + switch (CurrentEncoderType) + { + case EncoderType.Render: + { + var scope = MTLBarrierScope.Buffers | MTLBarrierScope.Textures | MTLBarrierScope.RenderTargets; + MTLRenderStages stages = MTLRenderStages.RenderStageVertex | MTLRenderStages.RenderStageFragment; + Encoders.RenderEncoder.MemoryBarrier(scope, stages, stages); + break; + } + case EncoderType.Compute: + { + var scope = MTLBarrierScope.Buffers | MTLBarrierScope.Textures | MTLBarrierScope.RenderTargets; + Encoders.ComputeEncoder.MemoryBarrier(scope); + break; + } + } + } + + public void ClearBuffer(BufferHandle destination, int offset, int size, uint value) + { + var blitCommandEncoder = GetOrCreateBlitEncoder(); + + var mtlBuffer = _renderer.BufferManager.GetBuffer(destination, offset, size, true).Get(Cbs, offset, size, true).Value; + + // Might need a closer look, range's count, lower, and upper bound + // must be a multiple of 4 + blitCommandEncoder.FillBuffer(mtlBuffer, + new NSRange + { + location = (ulong)offset, + length = (ulong)size + }, + (byte)value); + } + + public void ClearRenderTargetColor(int index, int layer, int layerCount, uint componentMask, ColorF color) + { + float[] colors = [color.Red, color.Green, color.Blue, color.Alpha]; + var dst = _encoderStateManager.RenderTargets[index]; + + // TODO: Remove workaround for Wonder which has an invalid texture due to unsupported format + if (dst == null) + { + Logger.Warning?.PrintMsg(LogClass.Gpu, "Attempted to clear invalid render target!"); + return; + } + + _renderer.HelperShader.ClearColor(index, colors, componentMask, dst.Width, dst.Height, dst.Info.Format); + } + + public void ClearRenderTargetDepthStencil(int layer, int layerCount, float depthValue, bool depthMask, int stencilValue, int stencilMask) + { + var depthStencil = _encoderStateManager.DepthStencil; + + if (depthStencil == null) + { + return; + } + + _renderer.HelperShader.ClearDepthStencil(depthValue, depthMask, stencilValue, stencilMask, depthStencil.Width, depthStencil.Height); + } + + public void CommandBufferBarrier() + { + Barrier(); + } + + public void CopyBuffer(BufferHandle src, BufferHandle dst, int srcOffset, int dstOffset, int size) + { + var srcBuffer = _renderer.BufferManager.GetBuffer(src, srcOffset, size, false); + var dstBuffer = _renderer.BufferManager.GetBuffer(dst, dstOffset, size, true); + + BufferHolder.Copy(Cbs, srcBuffer, dstBuffer, srcOffset, dstOffset, size); + } + + public void PushDebugGroup(string name) + { + var encoder = Encoders.CurrentEncoder; + var debugGroupName = StringHelper.NSString(name); + + if (encoder == null) + { + return; + } + + switch (Encoders.CurrentEncoderType) + { + case EncoderType.Render: + encoder.Value.PushDebugGroup(debugGroupName); + break; + case EncoderType.Blit: + encoder.Value.PushDebugGroup(debugGroupName); + break; + case EncoderType.Compute: + encoder.Value.PushDebugGroup(debugGroupName); + break; + } + } + + public void PopDebugGroup() + { + var encoder = Encoders.CurrentEncoder; + + if (encoder == null) + { + return; + } + + switch (Encoders.CurrentEncoderType) + { + case EncoderType.Render: + encoder.Value.PopDebugGroup(); + break; + case EncoderType.Blit: + encoder.Value.PopDebugGroup(); + break; + case EncoderType.Compute: + encoder.Value.PopDebugGroup(); + break; + } + } + + public void DispatchCompute(int groupsX, int groupsY, int groupsZ) + { + DispatchCompute(groupsX, groupsY, groupsZ, String.Empty); + } + + public void DispatchCompute(int groupsX, int groupsY, int groupsZ, string debugGroupName) + { + var computeCommandEncoder = GetOrCreateComputeEncoder(true); + + ComputeSize localSize = _encoderStateManager.ComputeLocalSize; + + if (debugGroupName != String.Empty) + { + PushDebugGroup(debugGroupName); + } + + computeCommandEncoder.DispatchThreadgroups( + new MTLSize { width = (ulong)groupsX, height = (ulong)groupsY, depth = (ulong)groupsZ }, + new MTLSize { width = (ulong)localSize.X, height = (ulong)localSize.Y, depth = (ulong)localSize.Z }); + + if (debugGroupName != String.Empty) + { + PopDebugGroup(); + } + } + + public void Draw(int vertexCount, int instanceCount, int firstVertex, int firstInstance) + { + Draw(vertexCount, instanceCount, firstVertex, firstInstance, String.Empty); + } + + public void Draw(int vertexCount, int instanceCount, int firstVertex, int firstInstance, string debugGroupName) + { + if (vertexCount == 0) + { + return; + } + + var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert(); + + if (TopologyUnsupported(_encoderStateManager.Topology)) + { + var pattern = GetIndexBufferPattern(); + + BufferHandle handle = pattern.GetRepeatingBuffer(vertexCount, out int indexCount); + var buffer = _renderer.BufferManager.GetBuffer(handle, false); + var mtlBuffer = buffer.Get(Cbs, 0, indexCount * sizeof(int)).Value; + + var renderCommandEncoder = GetOrCreateRenderEncoder(true); + + renderCommandEncoder.DrawIndexedPrimitives( + primitiveType, + (ulong)indexCount, + MTLIndexType.UInt32, + mtlBuffer, + 0); + } + else + { + var renderCommandEncoder = GetOrCreateRenderEncoder(true); + + if (debugGroupName != String.Empty) + { + PushDebugGroup(debugGroupName); + } + + renderCommandEncoder.DrawPrimitives( + primitiveType, + (ulong)firstVertex, + (ulong)vertexCount, + (ulong)instanceCount, + (ulong)firstInstance); + + if (debugGroupName != String.Empty) + { + PopDebugGroup(); + } + } + } + + private IndexBufferPattern GetIndexBufferPattern() + { + return _encoderStateManager.Topology switch + { + PrimitiveTopology.Quads => QuadsToTrisPattern, + PrimitiveTopology.TriangleFan or PrimitiveTopology.Polygon => TriFanToTrisPattern, + _ => throw new NotSupportedException($"Unsupported topology: {_encoderStateManager.Topology}"), + }; + } + + private PrimitiveTopology TopologyRemap(PrimitiveTopology topology) + { + return topology switch + { + PrimitiveTopology.Quads => PrimitiveTopology.Triangles, + PrimitiveTopology.QuadStrip => PrimitiveTopology.TriangleStrip, + PrimitiveTopology.TriangleFan or PrimitiveTopology.Polygon => PrimitiveTopology.Triangles, + _ => topology, + }; + } + + private bool TopologyUnsupported(PrimitiveTopology topology) + { + return topology switch + { + PrimitiveTopology.Quads or PrimitiveTopology.TriangleFan or PrimitiveTopology.Polygon => true, + _ => false, + }; + } + + public void DrawIndexed(int indexCount, int instanceCount, int firstIndex, int firstVertex, int firstInstance) + { + if (indexCount == 0) + { + return; + } + + MTLBuffer mtlBuffer; + int offset; + MTLIndexType type; + int finalIndexCount = indexCount; + + var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert(); + + if (TopologyUnsupported(_encoderStateManager.Topology)) + { + var pattern = GetIndexBufferPattern(); + int convertedCount = pattern.GetConvertedCount(indexCount); + + finalIndexCount = convertedCount; + + (mtlBuffer, offset, type) = _encoderStateManager.IndexBuffer.GetConvertedIndexBuffer(_renderer, Cbs, firstIndex, indexCount, convertedCount, pattern); + } + else + { + (mtlBuffer, offset, type) = _encoderStateManager.IndexBuffer.GetIndexBuffer(_renderer, Cbs); + } + + if (mtlBuffer.NativePtr != IntPtr.Zero) + { + var renderCommandEncoder = GetOrCreateRenderEncoder(true); + + renderCommandEncoder.DrawIndexedPrimitives( + primitiveType, + (ulong)finalIndexCount, + type, + mtlBuffer, + (ulong)offset, + (ulong)instanceCount, + firstVertex, + (ulong)firstInstance); + } + } + + public void DrawIndexedIndirect(BufferRange indirectBuffer) + { + DrawIndexedIndirectOffset(indirectBuffer); + } + + public void DrawIndexedIndirectOffset(BufferRange indirectBuffer, int offset = 0) + { + // TODO: Reindex unsupported topologies + if (TopologyUnsupported(_encoderStateManager.Topology)) + { + Logger.Warning?.Print(LogClass.Gpu, $"Drawing indexed with unsupported topology: {_encoderStateManager.Topology}"); + } + + var buffer = _renderer.BufferManager + .GetBuffer(indirectBuffer.Handle, indirectBuffer.Offset, indirectBuffer.Size, false) + .Get(Cbs, indirectBuffer.Offset, indirectBuffer.Size).Value; + + var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert(); + + (MTLBuffer indexBuffer, int indexOffset, MTLIndexType type) = _encoderStateManager.IndexBuffer.GetIndexBuffer(_renderer, Cbs); + + if (indexBuffer.NativePtr != IntPtr.Zero && buffer.NativePtr != IntPtr.Zero) + { + var renderCommandEncoder = GetOrCreateRenderEncoder(true); + + renderCommandEncoder.DrawIndexedPrimitives( + primitiveType, + type, + indexBuffer, + (ulong)indexOffset, + buffer, + (ulong)(indirectBuffer.Offset + offset)); + } + } + + public void DrawIndexedIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride) + { + for (int i = 0; i < maxDrawCount; i++) + { + DrawIndexedIndirectOffset(indirectBuffer, stride * i); + } + } + + public void DrawIndirect(BufferRange indirectBuffer) + { + DrawIndirectOffset(indirectBuffer); + } + + public void DrawIndirectOffset(BufferRange indirectBuffer, int offset = 0) + { + if (TopologyUnsupported(_encoderStateManager.Topology)) + { + // TODO: Reindex unsupported topologies + Logger.Warning?.Print(LogClass.Gpu, $"Drawing indirect with unsupported topology: {_encoderStateManager.Topology}"); + } + + var buffer = _renderer.BufferManager + .GetBuffer(indirectBuffer.Handle, indirectBuffer.Offset, indirectBuffer.Size, false) + .Get(Cbs, indirectBuffer.Offset, indirectBuffer.Size).Value; + + var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert(); + var renderCommandEncoder = GetOrCreateRenderEncoder(true); + + renderCommandEncoder.DrawPrimitives( + primitiveType, + buffer, + (ulong)(indirectBuffer.Offset + offset)); + } + + public void DrawIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride) + { + for (int i = 0; i < maxDrawCount; i++) + { + DrawIndirectOffset(indirectBuffer, stride * i); + } + } + + public void DrawTexture(ITexture texture, ISampler sampler, Extents2DF srcRegion, Extents2DF dstRegion) + { + _renderer.HelperShader.DrawTexture(texture, sampler, srcRegion, dstRegion); + } + + public void SetAlphaTest(bool enable, float reference, CompareOp op) + { + // This is currently handled using shader specialization, as Metal does not support alpha test. + // In the future, we may want to use this to write the reference value into the support buffer, + // to avoid creating one version of the shader per reference value used. + } + + public void SetBlendState(AdvancedBlendDescriptor blend) + { + // Metal does not support advanced blend. + } + + public void SetBlendState(int index, BlendDescriptor blend) + { + _encoderStateManager.UpdateBlendDescriptors(index, blend); + } + + public void SetDepthBias(PolygonModeMask enables, float factor, float units, float clamp) + { + if (enables == 0) + { + _encoderStateManager.UpdateDepthBias(0, 0, 0); + } + else + { + _encoderStateManager.UpdateDepthBias(units, factor, clamp); + } + } + + public void SetDepthClamp(bool clamp) + { + _encoderStateManager.UpdateDepthClamp(clamp); + } + + public void SetDepthMode(DepthMode mode) + { + // Metal does not support depth clip control. + } + + public void SetDepthTest(DepthTestDescriptor depthTest) + { + _encoderStateManager.UpdateDepthState(depthTest); + } + + public void SetFaceCulling(bool enable, Face face) + { + _encoderStateManager.UpdateCullMode(enable, face); + } + + public void SetFrontFace(FrontFace frontFace) + { + _encoderStateManager.UpdateFrontFace(frontFace); + } + + public void SetIndexBuffer(BufferRange buffer, IndexType type) + { + _encoderStateManager.UpdateIndexBuffer(buffer, type); + } + + public void SetImage(ShaderStage stage, int binding, ITexture image) + { + if (image is TextureBase img) + { + _encoderStateManager.UpdateImage(stage, binding, img); + } + } + + public void SetImageArray(ShaderStage stage, int binding, IImageArray array) + { + if (array is ImageArray imageArray) + { + _encoderStateManager.UpdateImageArray(stage, binding, imageArray); + } + } + + public void SetImageArraySeparate(ShaderStage stage, int setIndex, IImageArray array) + { + if (array is ImageArray imageArray) + { + _encoderStateManager.UpdateImageArraySeparate(stage, setIndex, imageArray); + } + } + + public void SetLineParameters(float width, bool smooth) + { + // Metal does not support wide-lines. + } + + public void SetLogicOpState(bool enable, LogicalOp op) + { + _encoderStateManager.UpdateLogicOpState(enable, op); + } + + public void SetMultisampleState(MultisampleDescriptor multisample) + { + _encoderStateManager.UpdateMultisampleState(multisample); + } + + public void SetPatchParameters(int vertices, ReadOnlySpan defaultOuterLevel, ReadOnlySpan defaultInnerLevel) + { + Logger.Warning?.Print(LogClass.Gpu, "Not Implemented!"); + } + + public void SetPointParameters(float size, bool isProgramPointSize, bool enablePointSprite, Origin origin) + { + Logger.Warning?.Print(LogClass.Gpu, "Not Implemented!"); + } + + public void SetPolygonMode(PolygonMode frontMode, PolygonMode backMode) + { + // Metal does not support polygon mode. + } + + public void SetPrimitiveRestart(bool enable, int index) + { + // Always active for LineStrip and TriangleStrip + // https://github.com/gpuweb/gpuweb/issues/1220#issuecomment-732483263 + // https://developer.apple.com/documentation/metal/mtlrendercommandencoder/1515520-drawindexedprimitives + // https://stackoverflow.com/questions/70813665/how-to-render-multiple-trianglestrips-using-metal + + // Emulating disabling this is very difficult. It's unlikely for an index buffer to use the largest possible index, + // so it's fine nearly all of the time. + } + + public void SetPrimitiveTopology(PrimitiveTopology topology) + { + _encoderStateManager.UpdatePrimitiveTopology(topology); + } + + public void SetProgram(IProgram program) + { + _encoderStateManager.UpdateProgram(program); + } + + public void SetRasterizerDiscard(bool discard) + { + _encoderStateManager.UpdateRasterizerDiscard(discard); + } + + public void SetRenderTargetColorMasks(ReadOnlySpan componentMask) + { + _encoderStateManager.UpdateRenderTargetColorMasks(componentMask); + } + + public void SetRenderTargets(ITexture[] colors, ITexture depthStencil) + { + _encoderStateManager.UpdateRenderTargets(colors, depthStencil); + } + + public void SetScissors(ReadOnlySpan> regions) + { + _encoderStateManager.UpdateScissors(regions); + } + + public void SetStencilTest(StencilTestDescriptor stencilTest) + { + _encoderStateManager.UpdateStencilState(stencilTest); + } + + public void SetUniformBuffers(ReadOnlySpan buffers) + { + _encoderStateManager.UpdateUniformBuffers(buffers); + } + + public void SetStorageBuffers(ReadOnlySpan buffers) + { + _encoderStateManager.UpdateStorageBuffers(buffers); + } + + internal void SetStorageBuffers(int first, ReadOnlySpan> buffers) + { + _encoderStateManager.UpdateStorageBuffers(first, buffers); + } + + public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler) + { + if (texture is TextureBase tex) + { + if (sampler == null || sampler is SamplerHolder) + { + _encoderStateManager.UpdateTextureAndSampler(stage, binding, tex, (SamplerHolder)sampler); + } + } + } + + public void SetTextureArray(ShaderStage stage, int binding, ITextureArray array) + { + if (array is TextureArray textureArray) + { + _encoderStateManager.UpdateTextureArray(stage, binding, textureArray); + } + } + + public void SetTextureArraySeparate(ShaderStage stage, int setIndex, ITextureArray array) + { + if (array is TextureArray textureArray) + { + _encoderStateManager.UpdateTextureArraySeparate(stage, setIndex, textureArray); + } + } + + public void SetUserClipDistance(int index, bool enableClip) + { + // TODO. Same as Vulkan + } + + public void SetVertexAttribs(ReadOnlySpan vertexAttribs) + { + _encoderStateManager.UpdateVertexAttribs(vertexAttribs); + } + + public void SetVertexBuffers(ReadOnlySpan vertexBuffers) + { + _encoderStateManager.UpdateVertexBuffers(vertexBuffers); + } + + public void SetViewports(ReadOnlySpan viewports) + { + _encoderStateManager.UpdateViewports(viewports); + } + + public void TextureBarrier() + { + if (CurrentEncoderType == EncoderType.Render) + { + Encoders.RenderEncoder.MemoryBarrier(MTLBarrierScope.Textures, MTLRenderStages.RenderStageFragment, MTLRenderStages.RenderStageFragment); + } + } + + public void TextureBarrierTiled() + { + TextureBarrier(); + } + + public bool TryHostConditionalRendering(ICounterEvent value, ulong compare, bool isEqual) + { + // TODO: Implementable via indirect draw commands + return false; + } + + public bool TryHostConditionalRendering(ICounterEvent value, ICounterEvent compare, bool isEqual) + { + // TODO: Implementable via indirect draw commands + return false; + } + + public void EndHostConditionalRendering() + { + // TODO: Implementable via indirect draw commands + } + + public void BeginTransformFeedback(PrimitiveTopology topology) + { + // Metal does not support transform feedback. + } + + public void EndTransformFeedback() + { + // Metal does not support transform feedback. + } + + public void SetTransformFeedbackBuffers(ReadOnlySpan buffers) + { + // Metal does not support transform feedback. + } + + public void Dispose() + { + EndCurrentPass(); + _encoderStateManager.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Program.cs b/src/Ryujinx.Graphics.Metal/Program.cs new file mode 100644 index 0000000000..37bae5817f --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Program.cs @@ -0,0 +1,286 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using SharpMetal.Foundation; +using SharpMetal.Metal; +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class Program : IProgram + { + private ProgramLinkStatus _status; + private readonly ShaderSource[] _shaders; + private readonly GCHandle[] _handles; + private int _successCount; + + private readonly MetalRenderer _renderer; + + public MTLFunction VertexFunction; + public MTLFunction FragmentFunction; + public MTLFunction ComputeFunction; + public ComputeSize ComputeLocalSize { get; } + + private HashTableSlim _graphicsPipelineCache; + private MTLComputePipelineState? _computePipelineCache; + private bool _firstBackgroundUse; + + public ResourceBindingSegment[][] BindingSegments { get; } + // Argument buffer sizes for Vertex or Compute stages + public int[] ArgumentBufferSizes { get; } + // Argument buffer sizes for Fragment stage + public int[] FragArgumentBufferSizes { get; } + + public Program( + MetalRenderer renderer, + MTLDevice device, + ShaderSource[] shaders, + ResourceLayout resourceLayout, + ComputeSize computeLocalSize = default) + { + _renderer = renderer; + renderer.Programs.Add(this); + + ComputeLocalSize = computeLocalSize; + _shaders = shaders; + _handles = new GCHandle[_shaders.Length]; + + _status = ProgramLinkStatus.Incomplete; + + for (int i = 0; i < _shaders.Length; i++) + { + ShaderSource shader = _shaders[i]; + + using var compileOptions = new MTLCompileOptions + { + PreserveInvariance = true, + LanguageVersion = MTLLanguageVersion.Version31, + }; + var index = i; + + _handles[i] = device.NewLibrary(StringHelper.NSString(shader.Code), compileOptions, (library, error) => CompilationResultHandler(library, error, index)); + } + + (BindingSegments, ArgumentBufferSizes, FragArgumentBufferSizes) = BuildBindingSegments(resourceLayout.SetUsages); + } + + public void CompilationResultHandler(MTLLibrary library, NSError error, int index) + { + var shader = _shaders[index]; + + if (_handles[index].IsAllocated) + { + _handles[index].Free(); + } + + if (error != IntPtr.Zero) + { + Logger.Warning?.PrintMsg(LogClass.Gpu, shader.Code); + Logger.Warning?.Print(LogClass.Gpu, $"{shader.Stage} shader linking failed: \n{StringHelper.String(error.LocalizedDescription)}"); + _status = ProgramLinkStatus.Failure; + return; + } + + switch (shader.Stage) + { + case ShaderStage.Compute: + ComputeFunction = library.NewFunction(StringHelper.NSString("kernelMain")); + break; + case ShaderStage.Vertex: + VertexFunction = library.NewFunction(StringHelper.NSString("vertexMain")); + break; + case ShaderStage.Fragment: + FragmentFunction = library.NewFunction(StringHelper.NSString("fragmentMain")); + break; + default: + Logger.Warning?.Print(LogClass.Gpu, $"Cannot handle stage {shader.Stage}!"); + break; + } + + _successCount++; + + if (_successCount >= _shaders.Length && _status != ProgramLinkStatus.Failure) + { + _status = ProgramLinkStatus.Success; + } + } + + private static (ResourceBindingSegment[][], int[], int[]) BuildBindingSegments(ReadOnlyCollection setUsages) + { + ResourceBindingSegment[][] segments = new ResourceBindingSegment[setUsages.Count][]; + int[] argBufferSizes = new int[setUsages.Count]; + int[] fragArgBufferSizes = new int[setUsages.Count]; + + for (int setIndex = 0; setIndex < setUsages.Count; setIndex++) + { + List currentSegments = new(); + + ResourceUsage currentUsage = default; + int currentCount = 0; + + for (int index = 0; index < setUsages[setIndex].Usages.Count; index++) + { + ResourceUsage usage = setUsages[setIndex].Usages[index]; + + if (currentUsage.Binding + currentCount != usage.Binding || + currentUsage.Type != usage.Type || + currentUsage.Stages != usage.Stages || + currentUsage.ArrayLength > 1 || + usage.ArrayLength > 1) + { + if (currentCount != 0) + { + currentSegments.Add(new ResourceBindingSegment( + currentUsage.Binding, + currentCount, + currentUsage.Type, + currentUsage.Stages, + currentUsage.ArrayLength > 1)); + + var size = currentCount * ResourcePointerSize(currentUsage.Type); + if (currentUsage.Stages.HasFlag(ResourceStages.Fragment)) + { + fragArgBufferSizes[setIndex] += size; + } + + if (currentUsage.Stages.HasFlag(ResourceStages.Vertex) || + currentUsage.Stages.HasFlag(ResourceStages.Compute)) + { + argBufferSizes[setIndex] += size; + } + } + + currentUsage = usage; + currentCount = usage.ArrayLength; + } + else + { + currentCount++; + } + } + + if (currentCount != 0) + { + currentSegments.Add(new ResourceBindingSegment( + currentUsage.Binding, + currentCount, + currentUsage.Type, + currentUsage.Stages, + currentUsage.ArrayLength > 1)); + + var size = currentCount * ResourcePointerSize(currentUsage.Type); + if (currentUsage.Stages.HasFlag(ResourceStages.Fragment)) + { + fragArgBufferSizes[setIndex] += size; + } + + if (currentUsage.Stages.HasFlag(ResourceStages.Vertex) || + currentUsage.Stages.HasFlag(ResourceStages.Compute)) + { + argBufferSizes[setIndex] += size; + } + } + + segments[setIndex] = currentSegments.ToArray(); + } + + return (segments, argBufferSizes, fragArgBufferSizes); + } + + private static int ResourcePointerSize(ResourceType type) + { + return (type == ResourceType.TextureAndSampler ? 2 : 1); + } + + public ProgramLinkStatus CheckProgramLink(bool blocking) + { + if (blocking) + { + while (_status == ProgramLinkStatus.Incomplete) + { } + + return _status; + } + + return _status; + } + + public byte[] GetBinary() + { + return []; + } + + public void AddGraphicsPipeline(ref PipelineUid key, MTLRenderPipelineState pipeline) + { + (_graphicsPipelineCache ??= new()).Add(ref key, pipeline); + } + + public void AddComputePipeline(MTLComputePipelineState pipeline) + { + _computePipelineCache = pipeline; + } + + public bool TryGetGraphicsPipeline(ref PipelineUid key, out MTLRenderPipelineState pipeline) + { + if (_graphicsPipelineCache == null) + { + pipeline = default; + return false; + } + + if (!_graphicsPipelineCache.TryGetValue(ref key, out pipeline)) + { + if (_firstBackgroundUse) + { + Logger.Warning?.Print(LogClass.Gpu, "Background pipeline compile missed on draw - incorrect pipeline state?"); + _firstBackgroundUse = false; + } + + return false; + } + + _firstBackgroundUse = false; + + return true; + } + + public bool TryGetComputePipeline(out MTLComputePipelineState pipeline) + { + if (_computePipelineCache.HasValue) + { + pipeline = _computePipelineCache.Value; + return true; + } + + pipeline = default; + return false; + } + + public void Dispose() + { + if (!_renderer.Programs.Remove(this)) + { + return; + } + + if (_graphicsPipelineCache != null) + { + foreach (MTLRenderPipelineState pipeline in _graphicsPipelineCache.Values) + { + pipeline.Dispose(); + } + } + + _computePipelineCache?.Dispose(); + + VertexFunction.Dispose(); + FragmentFunction.Dispose(); + ComputeFunction.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/ResourceBindingSegment.cs b/src/Ryujinx.Graphics.Metal/ResourceBindingSegment.cs new file mode 100644 index 0000000000..8e6d88c4bf --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/ResourceBindingSegment.cs @@ -0,0 +1,22 @@ +using Ryujinx.Graphics.GAL; + +namespace Ryujinx.Graphics.Metal +{ + readonly struct ResourceBindingSegment + { + public readonly int Binding; + public readonly int Count; + public readonly ResourceType Type; + public readonly ResourceStages Stages; + public readonly bool IsArray; + + public ResourceBindingSegment(int binding, int count, ResourceType type, ResourceStages stages, bool isArray) + { + Binding = binding; + Count = count; + Type = type; + Stages = stages; + IsArray = isArray; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/ResourceLayoutBuilder.cs b/src/Ryujinx.Graphics.Metal/ResourceLayoutBuilder.cs new file mode 100644 index 0000000000..36ae9bac6e --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/ResourceLayoutBuilder.cs @@ -0,0 +1,59 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class ResourceLayoutBuilder + { + private const int TotalSets = MetalRenderer.TotalSets; + + private readonly List[] _resourceDescriptors; + private readonly List[] _resourceUsages; + + public ResourceLayoutBuilder() + { + _resourceDescriptors = new List[TotalSets]; + _resourceUsages = new List[TotalSets]; + + for (int index = 0; index < TotalSets; index++) + { + _resourceDescriptors[index] = new(); + _resourceUsages[index] = new(); + } + } + + public ResourceLayoutBuilder Add(ResourceStages stages, ResourceType type, int binding, bool write = false) + { + uint setIndex = type switch + { + ResourceType.UniformBuffer => Constants.ConstantBuffersSetIndex, + ResourceType.StorageBuffer => Constants.StorageBuffersSetIndex, + ResourceType.TextureAndSampler or ResourceType.BufferTexture => Constants.TexturesSetIndex, + ResourceType.Image or ResourceType.BufferImage => Constants.ImagesSetIndex, + _ => throw new ArgumentException($"Invalid resource type \"{type}\"."), + }; + + _resourceDescriptors[setIndex].Add(new ResourceDescriptor(binding, 1, type, stages)); + _resourceUsages[setIndex].Add(new ResourceUsage(binding, 1, type, stages, write)); + + return this; + } + + public ResourceLayout Build() + { + var descriptors = new ResourceDescriptorCollection[TotalSets]; + var usages = new ResourceUsageCollection[TotalSets]; + + for (int index = 0; index < TotalSets; index++) + { + descriptors[index] = new ResourceDescriptorCollection(_resourceDescriptors[index].ToArray().AsReadOnly()); + usages[index] = new ResourceUsageCollection(_resourceUsages[index].ToArray().AsReadOnly()); + } + + return new ResourceLayout(descriptors.AsReadOnly(), usages.AsReadOnly()); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj new file mode 100644 index 0000000000..02afb150a3 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj @@ -0,0 +1,30 @@ + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Ryujinx.Graphics.Metal/SamplerHolder.cs b/src/Ryujinx.Graphics.Metal/SamplerHolder.cs new file mode 100644 index 0000000000..3241efa6d9 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/SamplerHolder.cs @@ -0,0 +1,90 @@ +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class SamplerHolder : ISampler + { + private readonly MetalRenderer _renderer; + private readonly Auto _sampler; + + public SamplerHolder(MetalRenderer renderer, MTLDevice device, SamplerCreateInfo info) + { + _renderer = renderer; + + renderer.Samplers.Add(this); + + (MTLSamplerMinMagFilter minFilter, MTLSamplerMipFilter mipFilter) = info.MinFilter.Convert(); + + MTLSamplerBorderColor borderColor = GetConstrainedBorderColor(info.BorderColor, out _); + + using var descriptor = new MTLSamplerDescriptor + { + BorderColor = borderColor, + MinFilter = minFilter, + MagFilter = info.MagFilter.Convert(), + MipFilter = mipFilter, + CompareFunction = info.CompareOp.Convert(), + LodMinClamp = info.MinLod, + LodMaxClamp = info.MaxLod, + LodAverage = false, + MaxAnisotropy = Math.Max((uint)info.MaxAnisotropy, 1), + SAddressMode = info.AddressU.Convert(), + TAddressMode = info.AddressV.Convert(), + RAddressMode = info.AddressP.Convert(), + SupportArgumentBuffers = true + }; + + var sampler = device.NewSamplerState(descriptor); + + _sampler = new Auto(new DisposableSampler(sampler)); + } + + private static MTLSamplerBorderColor GetConstrainedBorderColor(ColorF arbitraryBorderColor, out bool cantConstrain) + { + float r = arbitraryBorderColor.Red; + float g = arbitraryBorderColor.Green; + float b = arbitraryBorderColor.Blue; + float a = arbitraryBorderColor.Alpha; + + if (r == 0f && g == 0f && b == 0f) + { + if (a == 1f) + { + cantConstrain = false; + return MTLSamplerBorderColor.OpaqueBlack; + } + + if (a == 0f) + { + cantConstrain = false; + return MTLSamplerBorderColor.TransparentBlack; + } + } + else if (r == 1f && g == 1f && b == 1f && a == 1f) + { + cantConstrain = false; + return MTLSamplerBorderColor.OpaqueWhite; + } + + cantConstrain = true; + return MTLSamplerBorderColor.OpaqueBlack; + } + + public Auto GetSampler() + { + return _sampler; + } + + public void Dispose() + { + if (_renderer.Samplers.Remove(this)) + { + _sampler.Dispose(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/Blit.metal b/src/Ryujinx.Graphics.Metal/Shaders/Blit.metal new file mode 100644 index 0000000000..887878499c --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/Blit.metal @@ -0,0 +1,43 @@ +#include + +using namespace metal; + +struct CopyVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct TexCoords { + float data[4]; +}; + +struct ConstantBuffers { + constant TexCoords* tex_coord; +}; + +struct Textures +{ + texture2d texture; + sampler sampler; +}; + +vertex CopyVertexOut vertexMain(uint vid [[vertex_id]], + constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) { + CopyVertexOut out; + + int low = vid & 1; + int high = vid >> 1; + out.uv.x = constant_buffers.tex_coord->data[low]; + out.uv.y = constant_buffers.tex_coord->data[2 + high]; + out.position.x = (float(low) - 0.5f) * 2.0f; + out.position.y = (float(high) - 0.5f) * 2.0f; + out.position.z = 0.0f; + out.position.w = 1.0f; + + return out; +} + +fragment FORMAT4 fragmentMain(CopyVertexOut in [[stage_in]], + constant Textures &textures [[buffer(TEXTURES_INDEX)]]) { + return textures.texture.sample(textures.sampler, in.uv); +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/BlitMs.metal b/src/Ryujinx.Graphics.Metal/Shaders/BlitMs.metal new file mode 100644 index 0000000000..1077b6ceac --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/BlitMs.metal @@ -0,0 +1,45 @@ +#include + +using namespace metal; + +struct CopyVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct TexCoords { + float data[4]; +}; + +struct ConstantBuffers { + constant TexCoords* tex_coord; +}; + +struct Textures +{ + texture2d_ms texture; +}; + +vertex CopyVertexOut vertexMain(uint vid [[vertex_id]], + constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) { + CopyVertexOut out; + + int low = vid & 1; + int high = vid >> 1; + out.uv.x = constant_buffers.tex_coord->data[low]; + out.uv.y = constant_buffers.tex_coord->data[2 + high]; + out.position.x = (float(low) - 0.5f) * 2.0f; + out.position.y = (float(high) - 0.5f) * 2.0f; + out.position.z = 0.0f; + out.position.w = 1.0f; + + return out; +} + +fragment FORMAT4 fragmentMain(CopyVertexOut in [[stage_in]], + constant Textures &textures [[buffer(TEXTURES_INDEX)]], + uint sample_id [[sample_id]]) { + uint2 tex_size = uint2(textures.texture.get_width(), textures.texture.get_height()); + uint2 tex_coord = uint2(in.uv * float2(tex_size)); + return textures.texture.read(tex_coord, sample_id); +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal new file mode 100644 index 0000000000..1a7d2c5749 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal @@ -0,0 +1,72 @@ +#include + +using namespace metal; + +struct StrideArguments { + int4 data; +}; + +struct InData { + uint8_t data[1]; +}; + +struct OutData { + uint8_t data[1]; +}; + +struct ConstantBuffers { + constant StrideArguments* stride_arguments; +}; + +struct StorageBuffers { + device InData* in_data; + device OutData* out_data; +}; + +kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]], + device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]], + uint3 thread_position_in_grid [[thread_position_in_grid]], + uint3 threads_per_threadgroup [[threads_per_threadgroup]], + uint3 threadgroups_per_grid [[threadgroups_per_grid]]) +{ + // Determine what slice of the stride copies this invocation will perform. + + int sourceStride = constant_buffers.stride_arguments->data.x; + int targetStride = constant_buffers.stride_arguments->data.y; + int bufferSize = constant_buffers.stride_arguments->data.z; + int sourceOffset = constant_buffers.stride_arguments->data.w; + + int strideRemainder = targetStride - sourceStride; + int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x); + + int copiesRequired = bufferSize / sourceStride; + + // Find the copies that this invocation should perform. + + // - Copies that all invocations perform. + int allInvocationCopies = copiesRequired / invocations; + + // - Extra remainder copy that this invocation performs. + int index = int(thread_position_in_grid.x); + int extra = (index < (copiesRequired % invocations)) ? 1 : 0; + + int copyCount = allInvocationCopies + extra; + + // Finally, get the starting offset. Make sure to count extra copies. + + int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index); + + int srcOffset = sourceOffset + startCopy * sourceStride; + int dstOffset = startCopy * targetStride; + + // Perform the copies for this region + for (int i = 0; i < copyCount; i++) { + for (int j = 0; j < sourceStride; j++) { + storage_buffers.out_data->data[dstOffset++] = storage_buffers.in_data->data[srcOffset++]; + } + + for (int j = 0; j < strideRemainder; j++) { + storage_buffers.out_data->data[dstOffset++] = uint8_t(0); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ColorClear.metal b/src/Ryujinx.Graphics.Metal/Shaders/ColorClear.metal new file mode 100644 index 0000000000..46a57e035d --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/ColorClear.metal @@ -0,0 +1,38 @@ +#include + +using namespace metal; + +struct VertexOut { + float4 position [[position]]; +}; + +struct ClearColor { + FORMAT4 data; +}; + +struct ConstantBuffers { + constant ClearColor* clear_color; +}; + +vertex VertexOut vertexMain(ushort vid [[vertex_id]]) { + int low = vid & 1; + int high = vid >> 1; + + VertexOut out; + + out.position.x = (float(low) - 0.5f) * 2.0f; + out.position.y = (float(high) - 0.5f) * 2.0f; + out.position.z = 0.0f; + out.position.w = 1.0f; + + return out; +} + +struct FragmentOut { + FORMAT4 color [[color(COLOR_ATTACHMENT_INDEX)]]; +}; + +fragment FragmentOut fragmentMain(VertexOut in [[stage_in]], + constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) { + return {constant_buffers.clear_color->data}; +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal b/src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal new file mode 100644 index 0000000000..870ac3d78d --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal @@ -0,0 +1,66 @@ +#include + +using namespace metal; + +struct StrideArguments { + int pixelCount; + int dstStartOffset; +}; + +struct InData { + uint data[1]; +}; + +struct OutData { + uint data[1]; +}; + +struct ConstantBuffers { + constant StrideArguments* stride_arguments; +}; + +struct StorageBuffers { + device InData* in_data; + device OutData* out_data; +}; + +kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]], + device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]], + uint3 thread_position_in_grid [[thread_position_in_grid]], + uint3 threads_per_threadgroup [[threads_per_threadgroup]], + uint3 threadgroups_per_grid [[threadgroups_per_grid]]) +{ + // Determine what slice of the stride copies this invocation will perform. + int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x); + + int copiesRequired = constant_buffers.stride_arguments->pixelCount; + + // Find the copies that this invocation should perform. + + // - Copies that all invocations perform. + int allInvocationCopies = copiesRequired / invocations; + + // - Extra remainder copy that this invocation performs. + int index = int(thread_position_in_grid.x); + int extra = (index < (copiesRequired % invocations)) ? 1 : 0; + + int copyCount = allInvocationCopies + extra; + + // Finally, get the starting offset. Make sure to count extra copies. + + int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index); + + int srcOffset = startCopy * 2; + int dstOffset = constant_buffers.stride_arguments->dstStartOffset + startCopy; + + // Perform the conversion for this region. + for (int i = 0; i < copyCount; i++) + { + float depth = as_type(storage_buffers.in_data->data[srcOffset++]); + uint stencil = storage_buffers.in_data->data[srcOffset++]; + + uint rescaledDepth = uint(clamp(depth, 0.0, 1.0) * 16777215.0); + + storage_buffers.out_data->data[dstOffset++] = (rescaledDepth << 8) | (stencil & 0xff); + } +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ConvertIndexBuffer.metal b/src/Ryujinx.Graphics.Metal/Shaders/ConvertIndexBuffer.metal new file mode 100644 index 0000000000..c8fee58184 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/ConvertIndexBuffer.metal @@ -0,0 +1,59 @@ +#include + +using namespace metal; + +struct IndexBufferPattern { + int pattern[8]; + int primitiveVertices; + int primitiveVerticesOut; + int indexSize; + int indexSizeOut; + int baseIndex; + int indexStride; + int srcOffset; + int totalPrimitives; +}; + +struct InData { + uint8_t data[1]; +}; + +struct OutData { + uint8_t data[1]; +}; + +struct StorageBuffers { + device InData* in_data; + device OutData* out_data; + constant IndexBufferPattern* index_buffer_pattern; +}; + +kernel void kernelMain(device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]], + uint3 thread_position_in_grid [[thread_position_in_grid]]) +{ + int primitiveIndex = int(thread_position_in_grid.x); + if (primitiveIndex >= storage_buffers.index_buffer_pattern->totalPrimitives) + { + return; + } + + int inOffset = primitiveIndex * storage_buffers.index_buffer_pattern->indexStride; + int outOffset = primitiveIndex * storage_buffers.index_buffer_pattern->primitiveVerticesOut; + + for (int i = 0; i < storage_buffers.index_buffer_pattern->primitiveVerticesOut; i++) + { + int j; + int io = max(0, inOffset + storage_buffers.index_buffer_pattern->baseIndex + storage_buffers.index_buffer_pattern->pattern[i]) * storage_buffers.index_buffer_pattern->indexSize; + int oo = (outOffset + i) * storage_buffers.index_buffer_pattern->indexSizeOut; + + for (j = 0; j < storage_buffers.index_buffer_pattern->indexSize; j++) + { + storage_buffers.out_data->data[oo + j] = storage_buffers.in_data->data[storage_buffers.index_buffer_pattern->srcOffset + io + j]; + } + + for(; j < storage_buffers.index_buffer_pattern->indexSizeOut; j++) + { + storage_buffers.out_data->data[oo + j] = uint8_t(0); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/DepthBlit.metal b/src/Ryujinx.Graphics.Metal/Shaders/DepthBlit.metal new file mode 100644 index 0000000000..8b8467c2ff --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/DepthBlit.metal @@ -0,0 +1,27 @@ +#include + +using namespace metal; + +struct CopyVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct Textures +{ + texture2d texture; + sampler sampler; +}; + +struct FragmentOut { + float depth [[depth(any)]]; +}; + +fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]], + constant Textures &textures [[buffer(TEXTURES_INDEX)]]) { + FragmentOut out; + + out.depth = textures.texture.sample(textures.sampler, in.uv).r; + + return out; +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/DepthBlitMs.metal b/src/Ryujinx.Graphics.Metal/Shaders/DepthBlitMs.metal new file mode 100644 index 0000000000..10791f6363 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/DepthBlitMs.metal @@ -0,0 +1,29 @@ +#include + +using namespace metal; + +struct CopyVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct Textures +{ + texture2d_ms texture; +}; + +struct FragmentOut { + float depth [[depth(any)]]; +}; + +fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]], + constant Textures &textures [[buffer(TEXTURES_INDEX)]], + uint sample_id [[sample_id]]) { + FragmentOut out; + + uint2 tex_size = uint2(textures.texture.get_width(), textures.texture.get_height()); + uint2 tex_coord = uint2(in.uv * float2(tex_size)); + out.depth = textures.texture.read(tex_coord, sample_id).r; + + return out; +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/DepthStencilClear.metal b/src/Ryujinx.Graphics.Metal/Shaders/DepthStencilClear.metal new file mode 100644 index 0000000000..7e50f2ce79 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/DepthStencilClear.metal @@ -0,0 +1,42 @@ +#include + +using namespace metal; + +struct VertexOut { + float4 position [[position]]; +}; + +struct FragmentOut { + float depth [[depth(any)]]; +}; + +struct ClearDepth { + float data; +}; + +struct ConstantBuffers { + constant ClearDepth* clear_depth; +}; + +vertex VertexOut vertexMain(ushort vid [[vertex_id]]) { + int low = vid & 1; + int high = vid >> 1; + + VertexOut out; + + out.position.x = (float(low) - 0.5f) * 2.0f; + out.position.y = (float(high) - 0.5f) * 2.0f; + out.position.z = 0.0f; + out.position.w = 1.0f; + + return out; +} + +fragment FragmentOut fragmentMain(VertexOut in [[stage_in]], + constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) { + FragmentOut out; + + out.depth = constant_buffers.clear_depth->data; + + return out; +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/StencilBlit.metal b/src/Ryujinx.Graphics.Metal/Shaders/StencilBlit.metal new file mode 100644 index 0000000000..0b25f322d9 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/StencilBlit.metal @@ -0,0 +1,27 @@ +#include + +using namespace metal; + +struct CopyVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct Textures +{ + texture2d texture; + sampler sampler; +}; + +struct FragmentOut { + uint stencil [[stencil]]; +}; + +fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]], + constant Textures &textures [[buffer(TEXTURES_INDEX)]]) { + FragmentOut out; + + out.stencil = textures.texture.sample(textures.sampler, in.uv).r; + + return out; +} diff --git a/src/Ryujinx.Graphics.Metal/Shaders/StencilBlitMs.metal b/src/Ryujinx.Graphics.Metal/Shaders/StencilBlitMs.metal new file mode 100644 index 0000000000..e7f2d20b7e --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/StencilBlitMs.metal @@ -0,0 +1,29 @@ +#include + +using namespace metal; + +struct CopyVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct Textures +{ + texture2d_ms texture; +}; + +struct FragmentOut { + uint stencil [[stencil]]; +}; + +fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]], + constant Textures &textures [[buffer(TEXTURES_INDEX)]], + uint sample_id [[sample_id]]) { + FragmentOut out; + + uint2 tex_size = uint2(textures.texture.get_width(), textures.texture.get_height()); + uint2 tex_coord = uint2(in.uv * float2(tex_size)); + out.stencil = textures.texture.read(tex_coord, sample_id).r; + + return out; +} diff --git a/src/Ryujinx.Graphics.Metal/StagingBuffer.cs b/src/Ryujinx.Graphics.Metal/StagingBuffer.cs new file mode 100644 index 0000000000..b250b87f21 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/StagingBuffer.cs @@ -0,0 +1,288 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + readonly struct StagingBufferReserved + { + public readonly BufferHolder Buffer; + public readonly int Offset; + public readonly int Size; + + public StagingBufferReserved(BufferHolder buffer, int offset, int size) + { + Buffer = buffer; + Offset = offset; + Size = size; + } + } + + [SupportedOSPlatform("macos")] + class StagingBuffer : IDisposable + { + private const int BufferSize = 32 * 1024 * 1024; + + private int _freeOffset; + private int _freeSize; + + private readonly MetalRenderer _renderer; + private readonly BufferHolder _buffer; + private readonly int _resourceAlignment; + + public readonly BufferHandle Handle; + + private readonly struct PendingCopy + { + public FenceHolder Fence { get; } + public int Size { get; } + + public PendingCopy(FenceHolder fence, int size) + { + Fence = fence; + Size = size; + fence.Get(); + } + } + + private readonly Queue _pendingCopies; + + public StagingBuffer(MetalRenderer renderer, BufferManager bufferManager) + { + _renderer = renderer; + + Handle = bufferManager.CreateWithHandle(BufferSize, out _buffer); + _pendingCopies = new Queue(); + _freeSize = BufferSize; + _resourceAlignment = Constants.MinResourceAlignment; + } + + public void PushData(CommandBufferPool cbp, CommandBufferScoped? cbs, BufferHolder dst, int dstOffset, ReadOnlySpan data) + { + bool isRender = cbs != null; + CommandBufferScoped scoped = cbs ?? cbp.Rent(); + + // Must push all data to the buffer. If it can't fit, split it up. + + while (data.Length > 0) + { + if (_freeSize < data.Length) + { + FreeCompleted(); + } + + while (_freeSize == 0) + { + if (!WaitFreeCompleted(cbp)) + { + if (isRender) + { + _renderer.FlushAllCommands(); + scoped = cbp.Rent(); + isRender = false; + } + else + { + scoped = cbp.ReturnAndRent(scoped); + } + } + } + + int chunkSize = Math.Min(_freeSize, data.Length); + + PushDataImpl(scoped, dst, dstOffset, data[..chunkSize]); + + dstOffset += chunkSize; + data = data[chunkSize..]; + } + + if (!isRender) + { + scoped.Dispose(); + } + } + + private void PushDataImpl(CommandBufferScoped cbs, BufferHolder dst, int dstOffset, ReadOnlySpan data) + { + var srcBuffer = _buffer.GetBuffer(); + var dstBuffer = dst.GetBuffer(dstOffset, data.Length, true); + + int offset = _freeOffset; + int capacity = BufferSize - offset; + if (capacity < data.Length) + { + _buffer.SetDataUnchecked(offset, data[..capacity]); + _buffer.SetDataUnchecked(0, data[capacity..]); + + BufferHolder.Copy(cbs, srcBuffer, dstBuffer, offset, dstOffset, capacity); + BufferHolder.Copy(cbs, srcBuffer, dstBuffer, 0, dstOffset + capacity, data.Length - capacity); + } + else + { + _buffer.SetDataUnchecked(offset, data); + + BufferHolder.Copy(cbs, srcBuffer, dstBuffer, offset, dstOffset, data.Length); + } + + _freeOffset = (offset + data.Length) & (BufferSize - 1); + _freeSize -= data.Length; + Debug.Assert(_freeSize >= 0); + + _pendingCopies.Enqueue(new PendingCopy(cbs.GetFence(), data.Length)); + } + + public bool TryPushData(CommandBufferScoped cbs, BufferHolder dst, int dstOffset, ReadOnlySpan data) + { + if (data.Length > BufferSize) + { + return false; + } + + if (_freeSize < data.Length) + { + FreeCompleted(); + + if (_freeSize < data.Length) + { + return false; + } + } + + PushDataImpl(cbs, dst, dstOffset, data); + + return true; + } + + private StagingBufferReserved ReserveDataImpl(CommandBufferScoped cbs, int size, int alignment) + { + // Assumes the caller has already determined that there is enough space. + int offset = BitUtils.AlignUp(_freeOffset, alignment); + int padding = offset - _freeOffset; + + int capacity = Math.Min(_freeSize, BufferSize - offset); + int reservedLength = size + padding; + if (capacity < size) + { + offset = 0; // Place at start. + reservedLength += capacity; + } + + _freeOffset = (_freeOffset + reservedLength) & (BufferSize - 1); + _freeSize -= reservedLength; + Debug.Assert(_freeSize >= 0); + + _pendingCopies.Enqueue(new PendingCopy(cbs.GetFence(), reservedLength)); + + return new StagingBufferReserved(_buffer, offset, size); + } + + private int GetContiguousFreeSize(int alignment) + { + int alignedFreeOffset = BitUtils.AlignUp(_freeOffset, alignment); + int padding = alignedFreeOffset - _freeOffset; + + // Free regions: + // - Aligned free offset to end (minimum free size - padding) + // - 0 to _freeOffset + freeSize wrapped (only if free area contains 0) + + int endOffset = (_freeOffset + _freeSize) & (BufferSize - 1); + + return Math.Max( + Math.Min(_freeSize - padding, BufferSize - alignedFreeOffset), + endOffset <= _freeOffset ? Math.Min(_freeSize, endOffset) : 0 + ); + } + + /// + /// Reserve a range on the staging buffer for the current command buffer and upload data to it. + /// + /// Command buffer to reserve the data on + /// The minimum size the reserved data requires + /// The required alignment for the buffer offset + /// The reserved range of the staging buffer + public StagingBufferReserved? TryReserveData(CommandBufferScoped cbs, int size, int alignment) + { + if (size > BufferSize) + { + return null; + } + + // Temporary reserved data cannot be fragmented. + + if (GetContiguousFreeSize(alignment) < size) + { + FreeCompleted(); + + if (GetContiguousFreeSize(alignment) < size) + { + Logger.Debug?.PrintMsg(LogClass.Gpu, $"Staging buffer out of space to reserve data of size {size}."); + return null; + } + } + + return ReserveDataImpl(cbs, size, alignment); + } + + /// + /// Reserve a range on the staging buffer for the current command buffer and upload data to it. + /// Uses the most permissive byte alignment. + /// + /// Command buffer to reserve the data on + /// The minimum size the reserved data requires + /// The reserved range of the staging buffer + public StagingBufferReserved? TryReserveData(CommandBufferScoped cbs, int size) + { + return TryReserveData(cbs, size, _resourceAlignment); + } + + private bool WaitFreeCompleted(CommandBufferPool cbp) + { + if (_pendingCopies.TryPeek(out var pc)) + { + if (!pc.Fence.IsSignaled()) + { + if (cbp.IsFenceOnRentedCommandBuffer(pc.Fence)) + { + return false; + } + + pc.Fence.Wait(); + } + + var dequeued = _pendingCopies.Dequeue(); + Debug.Assert(dequeued.Fence == pc.Fence); + _freeSize += pc.Size; + pc.Fence.Put(); + } + + return true; + } + + public void FreeCompleted() + { + FenceHolder signalledFence = null; + while (_pendingCopies.TryPeek(out var pc) && (pc.Fence == signalledFence || pc.Fence.IsSignaled())) + { + signalledFence = pc.Fence; // Already checked - don't need to do it again. + var dequeued = _pendingCopies.Dequeue(); + Debug.Assert(dequeued.Fence == pc.Fence); + _freeSize += pc.Size; + pc.Fence.Put(); + } + } + + public void Dispose() + { + _renderer.BufferManager.Delete(Handle); + + while (_pendingCopies.TryDequeue(out var pc)) + { + pc.Fence.Put(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/State/DepthStencilUid.cs b/src/Ryujinx.Graphics.Metal/State/DepthStencilUid.cs new file mode 100644 index 0000000000..63b1d8ef4b --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/State/DepthStencilUid.cs @@ -0,0 +1,110 @@ +using SharpMetal.Metal; +using System; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Graphics.Metal.State +{ + [StructLayout(LayoutKind.Sequential, Pack = 1)] + public struct StencilUid + { + public uint ReadMask; + public uint WriteMask; + public ushort Operations; + + public MTLStencilOperation StencilFailureOperation + { + readonly get => (MTLStencilOperation)((Operations >> 0) & 0xF); + set => Operations = (ushort)((Operations & 0xFFF0) | ((int)value << 0)); + } + + public MTLStencilOperation DepthFailureOperation + { + readonly get => (MTLStencilOperation)((Operations >> 4) & 0xF); + set => Operations = (ushort)((Operations & 0xFF0F) | ((int)value << 4)); + } + + public MTLStencilOperation DepthStencilPassOperation + { + readonly get => (MTLStencilOperation)((Operations >> 8) & 0xF); + set => Operations = (ushort)((Operations & 0xF0FF) | ((int)value << 8)); + } + + public MTLCompareFunction StencilCompareFunction + { + readonly get => (MTLCompareFunction)((Operations >> 12) & 0xF); + set => Operations = (ushort)((Operations & 0x0FFF) | ((int)value << 12)); + } + } + + + [StructLayout(LayoutKind.Explicit, Size = 24)] + internal struct DepthStencilUid : IEquatable + { + [FieldOffset(0)] + public StencilUid FrontFace; + + [FieldOffset(10)] + public ushort DepthState; + + [FieldOffset(12)] + public StencilUid BackFace; + + [FieldOffset(22)] + private readonly ushort _padding; + + // Quick access aliases +#pragma warning disable IDE0044 // Add readonly modifier + [FieldOffset(0)] + private ulong _id0; + [FieldOffset(8)] + private ulong _id1; + [FieldOffset(0)] + private Vector128 _id01; + [FieldOffset(16)] + private ulong _id2; +#pragma warning restore IDE0044 // Add readonly modifier + + public MTLCompareFunction DepthCompareFunction + { + readonly get => (MTLCompareFunction)((DepthState >> 0) & 0xF); + set => DepthState = (ushort)((DepthState & 0xFFF0) | ((int)value << 0)); + } + + public bool StencilTestEnabled + { + readonly get => ((DepthState >> 4) & 0x1) != 0; + set => DepthState = (ushort)((DepthState & 0xFFEF) | ((value ? 1 : 0) << 4)); + } + + public bool DepthWriteEnabled + { + readonly get => ((DepthState >> 15) & 0x1) != 0; + set => DepthState = (ushort)((DepthState & 0x7FFF) | ((value ? 1 : 0) << 15)); + } + + public readonly override bool Equals(object obj) + { + return obj is DepthStencilUid other && EqualsRef(ref other); + } + + public readonly bool EqualsRef(ref DepthStencilUid other) + { + return _id01.Equals(other._id01) && _id2 == other._id2; + } + + public readonly bool Equals(DepthStencilUid other) + { + return EqualsRef(ref other); + } + + public readonly override int GetHashCode() + { + ulong hash64 = _id0 * 23 ^ + _id1 * 23 ^ + _id2 * 23; + + return (int)hash64 ^ ((int)(hash64 >> 32) * 17); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/State/PipelineState.cs b/src/Ryujinx.Graphics.Metal/State/PipelineState.cs new file mode 100644 index 0000000000..9f88f30612 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/State/PipelineState.cs @@ -0,0 +1,341 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using SharpMetal.Foundation; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + struct PipelineState + { + public PipelineUid Internal; + + public uint StagesCount + { + readonly get => (byte)((Internal.Id0 >> 0) & 0xFF); + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFFFFFFFF00) | ((ulong)value << 0); + } + + public uint VertexAttributeDescriptionsCount + { + readonly get => (byte)((Internal.Id0 >> 8) & 0xFF); + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFFFFFF00FF) | ((ulong)value << 8); + } + + public uint VertexBindingDescriptionsCount + { + readonly get => (byte)((Internal.Id0 >> 16) & 0xFF); + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFFFF00FFFF) | ((ulong)value << 16); + } + + public uint ColorBlendAttachmentStateCount + { + readonly get => (byte)((Internal.Id0 >> 24) & 0xFF); + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFF00FFFFFF) | ((ulong)value << 24); + } + + /* + * Can be an input to a pipeline, but not sure what the situation for that is. + public PrimitiveTopology Topology + { + readonly get => (PrimitiveTopology)((Internal.Id6 >> 16) & 0xF); + set => Internal.Id6 = (Internal.Id6 & 0xFFFFFFFFFFF0FFFF) | ((ulong)value << 16); + } + */ + + public MTLLogicOperation LogicOp + { + readonly get => (MTLLogicOperation)((Internal.Id0 >> 32) & 0xF); + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFF0FFFFFFFF) | ((ulong)value << 32); + } + + //? + public bool PrimitiveRestartEnable + { + readonly get => ((Internal.Id0 >> 36) & 0x1) != 0UL; + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFEFFFFFFFFF) | ((value ? 1UL : 0UL) << 36); + } + + public bool RasterizerDiscardEnable + { + readonly get => ((Internal.Id0 >> 37) & 0x1) != 0UL; + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFDFFFFFFFFF) | ((value ? 1UL : 0UL) << 37); + } + + public bool LogicOpEnable + { + readonly get => ((Internal.Id0 >> 38) & 0x1) != 0UL; + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFBFFFFFFFFF) | ((value ? 1UL : 0UL) << 38); + } + + public bool AlphaToCoverageEnable + { + readonly get => ((Internal.Id0 >> 40) & 0x1) != 0UL; + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFEFFFFFFFFFF) | ((value ? 1UL : 0UL) << 40); + } + + public bool AlphaToOneEnable + { + readonly get => ((Internal.Id0 >> 41) & 0x1) != 0UL; + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFDFFFFFFFFFF) | ((value ? 1UL : 0UL) << 41); + } + + public MTLPixelFormat DepthStencilFormat + { + readonly get => (MTLPixelFormat)(Internal.Id0 >> 48); + set => Internal.Id0 = (Internal.Id0 & 0x0000FFFFFFFFFFFF) | ((ulong)value << 48); + } + + // Not sure how to appropriately use this, but it does need to be passed for tess. + public uint PatchControlPoints + { + readonly get => (uint)((Internal.Id1 >> 0) & 0xFFFFFFFF); + set => Internal.Id1 = (Internal.Id1 & 0xFFFFFFFF00000000) | ((ulong)value << 0); + } + + public uint SamplesCount + { + readonly get => (uint)((Internal.Id1 >> 32) & 0xFFFFFFFF); + set => Internal.Id1 = (Internal.Id1 & 0xFFFFFFFF) | ((ulong)value << 32); + } + + // Advanced blend not supported + + private readonly void BuildColorAttachment(MTLRenderPipelineColorAttachmentDescriptor descriptor, ColorBlendStateUid blendState) + { + descriptor.PixelFormat = blendState.PixelFormat; + descriptor.SetBlendingEnabled(blendState.Enable); + descriptor.AlphaBlendOperation = blendState.AlphaBlendOperation; + descriptor.RgbBlendOperation = blendState.RgbBlendOperation; + descriptor.SourceAlphaBlendFactor = blendState.SourceAlphaBlendFactor; + descriptor.DestinationAlphaBlendFactor = blendState.DestinationAlphaBlendFactor; + descriptor.SourceRGBBlendFactor = blendState.SourceRGBBlendFactor; + descriptor.DestinationRGBBlendFactor = blendState.DestinationRGBBlendFactor; + descriptor.WriteMask = blendState.WriteMask; + } + + private readonly MTLVertexDescriptor BuildVertexDescriptor() + { + var vertexDescriptor = new MTLVertexDescriptor(); + + for (int i = 0; i < VertexAttributeDescriptionsCount; i++) + { + VertexInputAttributeUid uid = Internal.VertexAttributes[i]; + + var attrib = vertexDescriptor.Attributes.Object((ulong)i); + attrib.Format = uid.Format; + attrib.Offset = uid.Offset; + attrib.BufferIndex = uid.BufferIndex; + } + + for (int i = 0; i < VertexBindingDescriptionsCount; i++) + { + VertexInputLayoutUid uid = Internal.VertexBindings[i]; + + var layout = vertexDescriptor.Layouts.Object((ulong)i); + + layout.StepFunction = uid.StepFunction; + layout.StepRate = uid.StepRate; + layout.Stride = uid.Stride; + } + + return vertexDescriptor; + } + + private MTLRenderPipelineDescriptor CreateRenderDescriptor(Program program) + { + var renderPipelineDescriptor = new MTLRenderPipelineDescriptor(); + + for (int i = 0; i < Constants.MaxColorAttachments; i++) + { + var blendState = Internal.ColorBlendState[i]; + + if (blendState.PixelFormat != MTLPixelFormat.Invalid) + { + var pipelineAttachment = renderPipelineDescriptor.ColorAttachments.Object((ulong)i); + + BuildColorAttachment(pipelineAttachment, blendState); + } + } + + MTLPixelFormat dsFormat = DepthStencilFormat; + if (dsFormat != MTLPixelFormat.Invalid) + { + switch (dsFormat) + { + // Depth Only Attachment + case MTLPixelFormat.Depth16Unorm: + case MTLPixelFormat.Depth32Float: + renderPipelineDescriptor.DepthAttachmentPixelFormat = dsFormat; + break; + + // Stencil Only Attachment + case MTLPixelFormat.Stencil8: + renderPipelineDescriptor.StencilAttachmentPixelFormat = dsFormat; + break; + + // Combined Attachment + case MTLPixelFormat.Depth24UnormStencil8: + case MTLPixelFormat.Depth32FloatStencil8: + renderPipelineDescriptor.DepthAttachmentPixelFormat = dsFormat; + renderPipelineDescriptor.StencilAttachmentPixelFormat = dsFormat; + break; + default: + Logger.Error?.PrintMsg(LogClass.Gpu, $"Unsupported Depth/Stencil Format: {dsFormat}!"); + break; + } + } + + renderPipelineDescriptor.LogicOperationEnabled = LogicOpEnable; + renderPipelineDescriptor.LogicOperation = LogicOp; + renderPipelineDescriptor.AlphaToCoverageEnabled = AlphaToCoverageEnable; + renderPipelineDescriptor.AlphaToOneEnabled = AlphaToOneEnable; + renderPipelineDescriptor.RasterizationEnabled = !RasterizerDiscardEnable; + renderPipelineDescriptor.SampleCount = Math.Max(1, SamplesCount); + + var vertexDescriptor = BuildVertexDescriptor(); + renderPipelineDescriptor.VertexDescriptor = vertexDescriptor; + + renderPipelineDescriptor.VertexFunction = program.VertexFunction; + + if (program.FragmentFunction.NativePtr != 0) + { + renderPipelineDescriptor.FragmentFunction = program.FragmentFunction; + } + + return renderPipelineDescriptor; + } + + public MTLRenderPipelineState CreateRenderPipeline(MTLDevice device, Program program) + { + if (program.TryGetGraphicsPipeline(ref Internal, out var pipelineState)) + { + return pipelineState; + } + + using var descriptor = CreateRenderDescriptor(program); + + var error = new NSError(IntPtr.Zero); + pipelineState = device.NewRenderPipelineState(descriptor, ref error); + if (error != IntPtr.Zero) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create Render Pipeline State: {StringHelper.String(error.LocalizedDescription)}"); + } + + program.AddGraphicsPipeline(ref Internal, pipelineState); + + return pipelineState; + } + + public static MTLComputePipelineDescriptor CreateComputeDescriptor(Program program) + { + ComputeSize localSize = program.ComputeLocalSize; + + uint maxThreads = (uint)(localSize.X * localSize.Y * localSize.Z); + + if (maxThreads == 0) + { + throw new InvalidOperationException($"Local thread size for compute cannot be 0 in any dimension."); + } + + var descriptor = new MTLComputePipelineDescriptor + { + ComputeFunction = program.ComputeFunction, + MaxTotalThreadsPerThreadgroup = maxThreads, + ThreadGroupSizeIsMultipleOfThreadExecutionWidth = true, + }; + + return descriptor; + } + + public static MTLComputePipelineState CreateComputePipeline(MTLDevice device, Program program) + { + if (program.TryGetComputePipeline(out var pipelineState)) + { + return pipelineState; + } + + using MTLComputePipelineDescriptor descriptor = CreateComputeDescriptor(program); + + var error = new NSError(IntPtr.Zero); + pipelineState = device.NewComputePipelineState(descriptor, MTLPipelineOption.None, 0, ref error); + if (error != IntPtr.Zero) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create Compute Pipeline State: {StringHelper.String(error.LocalizedDescription)}"); + } + + program.AddComputePipeline(pipelineState); + + return pipelineState; + } + + public void Initialize() + { + SamplesCount = 1; + + Internal.ResetColorState(); + } + + /* + * TODO, this is from vulkan. + + private void UpdateVertexAttributeDescriptions(VulkanRenderer gd) + { + // Vertex attributes exceeding the stride are invalid. + // In metal, they cause glitches with the vertex shader fetching incorrect values. + // To work around this, we reduce the format to something that doesn't exceed the stride if possible. + // The assumption is that the exceeding components are not actually accessed on the shader. + + for (int index = 0; index < VertexAttributeDescriptionsCount; index++) + { + var attribute = Internal.VertexAttributeDescriptions[index]; + int vbIndex = GetVertexBufferIndex(attribute.Binding); + + if (vbIndex >= 0) + { + ref var vb = ref Internal.VertexBindingDescriptions[vbIndex]; + + Format format = attribute.Format; + + while (vb.Stride != 0 && attribute.Offset + FormatTable.GetAttributeFormatSize(format) > vb.Stride) + { + Format newFormat = FormatTable.DropLastComponent(format); + + if (newFormat == format) + { + // That case means we failed to find a format that fits within the stride, + // so just restore the original format and give up. + format = attribute.Format; + break; + } + + format = newFormat; + } + + if (attribute.Format != format && gd.FormatCapabilities.BufferFormatSupports(FormatFeatureFlags.VertexBufferBit, format)) + { + attribute.Format = format; + } + } + + _vertexAttributeDescriptions2[index] = attribute; + } + } + + private int GetVertexBufferIndex(uint binding) + { + for (int index = 0; index < VertexBindingDescriptionsCount; index++) + { + if (Internal.VertexBindingDescriptions[index].Binding == binding) + { + return index; + } + } + + return -1; + } + */ + } +} diff --git a/src/Ryujinx.Graphics.Metal/State/PipelineUid.cs b/src/Ryujinx.Graphics.Metal/State/PipelineUid.cs new file mode 100644 index 0000000000..c986a7e237 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/State/PipelineUid.cs @@ -0,0 +1,208 @@ +using Ryujinx.Common.Memory; +using SharpMetal.Metal; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + struct VertexInputAttributeUid + { + public ulong Id0; + + public ulong Offset + { + readonly get => (uint)((Id0 >> 0) & 0xFFFFFFFF); + set => Id0 = (Id0 & 0xFFFFFFFF00000000) | ((ulong)value << 0); + } + + public MTLVertexFormat Format + { + readonly get => (MTLVertexFormat)((Id0 >> 32) & 0xFFFF); + set => Id0 = (Id0 & 0xFFFF0000FFFFFFFF) | ((ulong)value << 32); + } + + public ulong BufferIndex + { + readonly get => ((Id0 >> 48) & 0xFFFF); + set => Id0 = (Id0 & 0x0000FFFFFFFFFFFF) | ((ulong)value << 48); + } + } + + struct VertexInputLayoutUid + { + public ulong Id0; + + public uint Stride + { + readonly get => (uint)((Id0 >> 0) & 0xFFFFFFFF); + set => Id0 = (Id0 & 0xFFFFFFFF00000000) | ((ulong)value << 0); + } + + public uint StepRate + { + readonly get => (uint)((Id0 >> 32) & 0x1FFFFFFF); + set => Id0 = (Id0 & 0xE0000000FFFFFFFF) | ((ulong)value << 32); + } + + public MTLVertexStepFunction StepFunction + { + readonly get => (MTLVertexStepFunction)((Id0 >> 61) & 0x7); + set => Id0 = (Id0 & 0x1FFFFFFFFFFFFFFF) | ((ulong)value << 61); + } + } + + struct ColorBlendStateUid + { + public ulong Id0; + + public MTLPixelFormat PixelFormat + { + readonly get => (MTLPixelFormat)((Id0 >> 0) & 0xFFFF); + set => Id0 = (Id0 & 0xFFFFFFFFFFFF0000) | ((ulong)value << 0); + } + + public MTLBlendFactor SourceRGBBlendFactor + { + readonly get => (MTLBlendFactor)((Id0 >> 16) & 0xFF); + set => Id0 = (Id0 & 0xFFFFFFFFFF00FFFF) | ((ulong)value << 16); + } + + public MTLBlendFactor DestinationRGBBlendFactor + { + readonly get => (MTLBlendFactor)((Id0 >> 24) & 0xFF); + set => Id0 = (Id0 & 0xFFFFFFFF00FFFFFF) | ((ulong)value << 24); + } + + public MTLBlendOperation RgbBlendOperation + { + readonly get => (MTLBlendOperation)((Id0 >> 32) & 0xF); + set => Id0 = (Id0 & 0xFFFFFFF0FFFFFFFF) | ((ulong)value << 32); + } + + public MTLBlendOperation AlphaBlendOperation + { + readonly get => (MTLBlendOperation)((Id0 >> 36) & 0xF); + set => Id0 = (Id0 & 0xFFFFFF0FFFFFFFFF) | ((ulong)value << 36); + } + + public MTLBlendFactor SourceAlphaBlendFactor + { + readonly get => (MTLBlendFactor)((Id0 >> 40) & 0xFF); + set => Id0 = (Id0 & 0xFFFF00FFFFFFFFFF) | ((ulong)value << 40); + } + + public MTLBlendFactor DestinationAlphaBlendFactor + { + readonly get => (MTLBlendFactor)((Id0 >> 48) & 0xFF); + set => Id0 = (Id0 & 0xFF00FFFFFFFFFFFF) | ((ulong)value << 48); + } + + public MTLColorWriteMask WriteMask + { + readonly get => (MTLColorWriteMask)((Id0 >> 56) & 0xF); + set => Id0 = (Id0 & 0xF0FFFFFFFFFFFFFF) | ((ulong)value << 56); + } + + public bool Enable + { + readonly get => ((Id0 >> 63) & 0x1) != 0UL; + set => Id0 = (Id0 & 0x7FFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 63); + } + + public void Swap(ColorBlendStateUid uid) + { + var format = PixelFormat; + + this = uid; + PixelFormat = format; + } + } + + [SupportedOSPlatform("macos")] + struct PipelineUid : IRefEquatable + { + public ulong Id0; + public ulong Id1; + + private readonly uint VertexAttributeDescriptionsCount => (byte)((Id0 >> 8) & 0xFF); + private readonly uint VertexBindingDescriptionsCount => (byte)((Id0 >> 16) & 0xFF); + private readonly uint ColorBlendAttachmentStateCount => (byte)((Id0 >> 24) & 0xFF); + + public Array32 VertexAttributes; + public Array33 VertexBindings; + public Array8 ColorBlendState; + public uint AttachmentIntegerFormatMask; + public bool LogicOpsAllowed; + + public void ResetColorState() + { + ColorBlendState = new(); + + for (int i = 0; i < ColorBlendState.Length; i++) + { + ColorBlendState[i].WriteMask = MTLColorWriteMask.All; + } + } + + public readonly override bool Equals(object obj) + { + return obj is PipelineUid other && Equals(other); + } + + public bool Equals(ref PipelineUid other) + { + if (!Unsafe.As>(ref Id0).Equals(Unsafe.As>(ref other.Id0))) + { + return false; + } + + if (!SequenceEqual(VertexAttributes.AsSpan(), other.VertexAttributes.AsSpan(), VertexAttributeDescriptionsCount)) + { + return false; + } + + if (!SequenceEqual(VertexBindings.AsSpan(), other.VertexBindings.AsSpan(), VertexBindingDescriptionsCount)) + { + return false; + } + + if (!SequenceEqual(ColorBlendState.AsSpan(), other.ColorBlendState.AsSpan(), ColorBlendAttachmentStateCount)) + { + return false; + } + + return true; + } + + private static bool SequenceEqual(ReadOnlySpan x, ReadOnlySpan y, uint count) where T : unmanaged + { + return MemoryMarshal.Cast(x[..(int)count]).SequenceEqual(MemoryMarshal.Cast(y[..(int)count])); + } + + public override int GetHashCode() + { + ulong hash64 = Id0 * 23 ^ + Id1 * 23; + + for (int i = 0; i < (int)VertexAttributeDescriptionsCount; i++) + { + hash64 ^= VertexAttributes[i].Id0 * 23; + } + + for (int i = 0; i < (int)VertexBindingDescriptionsCount; i++) + { + hash64 ^= VertexBindings[i].Id0 * 23; + } + + for (int i = 0; i < (int)ColorBlendAttachmentStateCount; i++) + { + hash64 ^= ColorBlendState[i].Id0 * 23; + } + + return (int)hash64 ^ ((int)(hash64 >> 32) * 17); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/StateCache.cs b/src/Ryujinx.Graphics.Metal/StateCache.cs new file mode 100644 index 0000000000..9b8391ffcb --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/StateCache.cs @@ -0,0 +1,42 @@ +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + abstract class StateCache : IDisposable where T : IDisposable + { + private readonly Dictionary _cache = new(); + + protected abstract THash GetHash(TDescriptor descriptor); + + protected abstract T CreateValue(TDescriptor descriptor); + + public void Dispose() + { + foreach (T value in _cache.Values) + { + value.Dispose(); + } + + GC.SuppressFinalize(this); + } + + public T GetOrCreate(TDescriptor descriptor) + { + var hash = GetHash(descriptor); + if (_cache.TryGetValue(hash, out T value)) + { + return value; + } + else + { + var newValue = CreateValue(descriptor); + _cache.Add(hash, newValue); + + return newValue; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/StringHelper.cs b/src/Ryujinx.Graphics.Metal/StringHelper.cs new file mode 100644 index 0000000000..46e8ad2e93 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/StringHelper.cs @@ -0,0 +1,30 @@ +using SharpMetal.Foundation; +using SharpMetal.ObjectiveCCore; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class StringHelper + { + public static NSString NSString(string source) + { + return new(ObjectiveC.IntPtr_objc_msgSend(new ObjectiveCClass("NSString"), "stringWithUTF8String:", source)); + } + + public static unsafe string String(NSString source) + { + char[] sourceBuffer = new char[source.Length]; + fixed (char* pSourceBuffer = sourceBuffer) + { + ObjectiveC.bool_objc_msgSend(source, + "getCString:maxLength:encoding:", + pSourceBuffer, + source.MaximumLengthOfBytes(NSStringEncoding.UTF16) + 1, + (ulong)NSStringEncoding.UTF16); + } + + return new string(sourceBuffer); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/SyncManager.cs b/src/Ryujinx.Graphics.Metal/SyncManager.cs new file mode 100644 index 0000000000..ca49fe2636 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/SyncManager.cs @@ -0,0 +1,214 @@ +using Ryujinx.Common.Logging; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class SyncManager + { + private class SyncHandle + { + public ulong ID; + public MultiFenceHolder Waitable; + public ulong FlushId; + public bool Signalled; + + public bool NeedsFlush(ulong currentFlushId) + { + return (long)(FlushId - currentFlushId) >= 0; + } + } + + private ulong _firstHandle; + + private readonly MetalRenderer _renderer; + private readonly List _handles; + private ulong _flushId; + private long _waitTicks; + + public SyncManager(MetalRenderer renderer) + { + _renderer = renderer; + _handles = new List(); + } + + public void RegisterFlush() + { + _flushId++; + } + + public void Create(ulong id, bool strict) + { + ulong flushId = _flushId; + MultiFenceHolder waitable = new(); + if (strict || _renderer.InterruptAction == null) + { + _renderer.FlushAllCommands(); + _renderer.CommandBufferPool.AddWaitable(waitable); + } + else + { + // Don't flush commands, instead wait for the current command buffer to finish. + // If this sync is waited on before the command buffer is submitted, interrupt the gpu thread and flush it manually. + + _renderer.CommandBufferPool.AddInUseWaitable(waitable); + } + + SyncHandle handle = new() + { + ID = id, + Waitable = waitable, + FlushId = flushId, + }; + + lock (_handles) + { + _handles.Add(handle); + } + } + + public ulong GetCurrent() + { + lock (_handles) + { + ulong lastHandle = _firstHandle; + + foreach (SyncHandle handle in _handles) + { + lock (handle) + { + if (handle.Waitable == null) + { + continue; + } + + if (handle.ID > lastHandle) + { + bool signaled = handle.Signalled || handle.Waitable.WaitForFences(false); + if (signaled) + { + lastHandle = handle.ID; + handle.Signalled = true; + } + } + } + } + + return lastHandle; + } + } + + public void Wait(ulong id) + { + SyncHandle result = null; + + lock (_handles) + { + if ((long)(_firstHandle - id) > 0) + { + return; // The handle has already been signalled or deleted. + } + + foreach (SyncHandle handle in _handles) + { + if (handle.ID == id) + { + result = handle; + break; + } + } + } + + if (result != null) + { + if (result.Waitable == null) + { + return; + } + + long beforeTicks = Stopwatch.GetTimestamp(); + + if (result.NeedsFlush(_flushId)) + { + _renderer.InterruptAction(() => + { + if (result.NeedsFlush(_flushId)) + { + _renderer.FlushAllCommands(); + } + }); + } + + lock (result) + { + if (result.Waitable == null) + { + return; + } + + bool signaled = result.Signalled || result.Waitable.WaitForFences(true); + + if (!signaled) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"Metal Sync Object {result.ID} failed to signal within 1000ms. Continuing..."); + } + else + { + _waitTicks += Stopwatch.GetTimestamp() - beforeTicks; + result.Signalled = true; + } + } + } + } + + public void Cleanup() + { + // Iterate through handles and remove any that have already been signalled. + + while (true) + { + SyncHandle first = null; + lock (_handles) + { + first = _handles.FirstOrDefault(); + } + + if (first == null || first.NeedsFlush(_flushId)) + { + break; + } + + bool signaled = first.Waitable.WaitForFences(false); + if (signaled) + { + // Delete the sync object. + lock (_handles) + { + lock (first) + { + _firstHandle = first.ID + 1; + _handles.RemoveAt(0); + first.Waitable = null; + } + } + } + else + { + // This sync handle and any following have not been reached yet. + break; + } + } + } + + public long GetAndResetWaitTicks() + { + long result = _waitTicks; + _waitTicks = 0; + + return result; + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Texture.cs b/src/Ryujinx.Graphics.Metal/Texture.cs new file mode 100644 index 0000000000..4566d65d89 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Texture.cs @@ -0,0 +1,654 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using SharpMetal.Foundation; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class Texture : TextureBase, ITexture + { + private MTLTexture _identitySwizzleHandle; + private readonly bool _identityIsDifferent; + + public Texture(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info) : base(device, renderer, pipeline, info) + { + MTLPixelFormat pixelFormat = FormatTable.GetFormat(Info.Format); + + var descriptor = new MTLTextureDescriptor + { + PixelFormat = pixelFormat, + Usage = MTLTextureUsage.Unknown, + SampleCount = (ulong)Info.Samples, + TextureType = Info.Target.Convert(), + Width = (ulong)Info.Width, + Height = (ulong)Info.Height, + MipmapLevelCount = (ulong)Info.Levels + }; + + if (info.Target == Target.Texture3D) + { + descriptor.Depth = (ulong)Info.Depth; + } + else if (info.Target != Target.Cubemap) + { + if (info.Target == Target.CubemapArray) + { + descriptor.ArrayLength = (ulong)(Info.Depth / 6); + } + else + { + descriptor.ArrayLength = (ulong)Info.Depth; + } + } + + MTLTextureSwizzleChannels swizzle = GetSwizzle(info, descriptor.PixelFormat); + + _identitySwizzleHandle = Device.NewTexture(descriptor); + + if (SwizzleIsIdentity(swizzle)) + { + MtlTexture = _identitySwizzleHandle; + } + else + { + MtlTexture = CreateDefaultView(_identitySwizzleHandle, swizzle, descriptor); + _identityIsDifferent = true; + } + + MtlFormat = pixelFormat; + descriptor.Dispose(); + } + + public Texture(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info, MTLTexture sourceTexture, int firstLayer, int firstLevel) : base(device, renderer, pipeline, info) + { + var pixelFormat = FormatTable.GetFormat(Info.Format); + + if (info.DepthStencilMode == DepthStencilMode.Stencil) + { + pixelFormat = pixelFormat switch + { + MTLPixelFormat.Depth32FloatStencil8 => MTLPixelFormat.X32Stencil8, + MTLPixelFormat.Depth24UnormStencil8 => MTLPixelFormat.X24Stencil8, + _ => pixelFormat + }; + } + + var textureType = Info.Target.Convert(); + NSRange levels; + levels.location = (ulong)firstLevel; + levels.length = (ulong)Info.Levels; + NSRange slices; + slices.location = (ulong)firstLayer; + slices.length = textureType == MTLTextureType.Type3D ? 1 : (ulong)info.GetDepthOrLayers(); + + var swizzle = GetSwizzle(info, pixelFormat); + + _identitySwizzleHandle = sourceTexture.NewTextureView(pixelFormat, textureType, levels, slices); + + if (SwizzleIsIdentity(swizzle)) + { + MtlTexture = _identitySwizzleHandle; + } + else + { + MtlTexture = sourceTexture.NewTextureView(pixelFormat, textureType, levels, slices, swizzle); + _identityIsDifferent = true; + } + + MtlFormat = pixelFormat; + FirstLayer = firstLayer; + FirstLevel = firstLevel; + } + + public void PopulateRenderPassAttachment(MTLRenderPassColorAttachmentDescriptor descriptor) + { + descriptor.Texture = _identitySwizzleHandle; + } + + private MTLTexture CreateDefaultView(MTLTexture texture, MTLTextureSwizzleChannels swizzle, MTLTextureDescriptor descriptor) + { + NSRange levels; + levels.location = 0; + levels.length = (ulong)Info.Levels; + NSRange slices; + slices.location = 0; + slices.length = Info.Target == Target.Texture3D ? 1 : (ulong)Info.GetDepthOrLayers(); + + return texture.NewTextureView(descriptor.PixelFormat, descriptor.TextureType, levels, slices, swizzle); + } + + private bool SwizzleIsIdentity(MTLTextureSwizzleChannels swizzle) + { + return swizzle.red == MTLTextureSwizzle.Red && + swizzle.green == MTLTextureSwizzle.Green && + swizzle.blue == MTLTextureSwizzle.Blue && + swizzle.alpha == MTLTextureSwizzle.Alpha; + } + + private MTLTextureSwizzleChannels GetSwizzle(TextureCreateInfo info, MTLPixelFormat pixelFormat) + { + var swizzleR = Info.SwizzleR.Convert(); + var swizzleG = Info.SwizzleG.Convert(); + var swizzleB = Info.SwizzleB.Convert(); + var swizzleA = Info.SwizzleA.Convert(); + + if (info.Format == Format.R5G5B5A1Unorm || + info.Format == Format.R5G5B5X1Unorm || + info.Format == Format.R5G6B5Unorm) + { + (swizzleB, swizzleR) = (swizzleR, swizzleB); + } + else if (pixelFormat == MTLPixelFormat.ABGR4Unorm || info.Format == Format.A1B5G5R5Unorm) + { + var tempB = swizzleB; + var tempA = swizzleA; + + swizzleB = swizzleG; + swizzleA = swizzleR; + swizzleR = tempA; + swizzleG = tempB; + } + + return new MTLTextureSwizzleChannels + { + red = swizzleR, + green = swizzleG, + blue = swizzleB, + alpha = swizzleA + }; + } + + public void CopyTo(ITexture destination, int firstLayer, int firstLevel) + { + CommandBufferScoped cbs = Pipeline.Cbs; + + TextureBase src = this; + TextureBase dst = (TextureBase)destination; + + if (!Valid || !dst.Valid) + { + return; + } + + var srcImage = GetHandle(); + var dstImage = dst.GetHandle(); + + if (!dst.Info.Target.IsMultisample() && Info.Target.IsMultisample()) + { + // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer); + + // _gd.HelperShader.CopyMSToNonMS(_gd, cbs, src, dst, 0, firstLayer, layers); + } + else if (dst.Info.Target.IsMultisample() && !Info.Target.IsMultisample()) + { + // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer); + + // _gd.HelperShader.CopyNonMSToMS(_gd, cbs, src, dst, 0, firstLayer, layers); + } + else if (dst.Info.BytesPerPixel != Info.BytesPerPixel) + { + // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer); + // int levels = Math.Min(Info.Levels, dst.Info.Levels - firstLevel); + + // _gd.HelperShader.CopyIncompatibleFormats(_gd, cbs, src, dst, 0, firstLayer, 0, firstLevel, layers, levels); + } + else if (src.Info.Format.IsDepthOrStencil() != dst.Info.Format.IsDepthOrStencil()) + { + // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer); + // int levels = Math.Min(Info.Levels, dst.Info.Levels - firstLevel); + + // TODO: depth copy? + // _gd.HelperShader.CopyColor(_gd, cbs, src, dst, 0, firstLayer, 0, FirstLevel, layers, levels); + } + else + { + TextureCopy.Copy( + cbs, + srcImage, + dstImage, + src.Info, + dst.Info, + 0, + firstLayer, + 0, + firstLevel); + } + } + + public void CopyTo(ITexture destination, int srcLayer, int dstLayer, int srcLevel, int dstLevel) + { + CommandBufferScoped cbs = Pipeline.Cbs; + + TextureBase src = this; + TextureBase dst = (TextureBase)destination; + + if (!Valid || !dst.Valid) + { + return; + } + + var srcImage = GetHandle(); + var dstImage = dst.GetHandle(); + + if (!dst.Info.Target.IsMultisample() && Info.Target.IsMultisample()) + { + // _gd.HelperShader.CopyMSToNonMS(_gd, cbs, src, dst, srcLayer, dstLayer, 1); + } + else if (dst.Info.Target.IsMultisample() && !Info.Target.IsMultisample()) + { + // _gd.HelperShader.CopyNonMSToMS(_gd, cbs, src, dst, srcLayer, dstLayer, 1); + } + else if (dst.Info.BytesPerPixel != Info.BytesPerPixel) + { + // _gd.HelperShader.CopyIncompatibleFormats(_gd, cbs, src, dst, srcLayer, dstLayer, srcLevel, dstLevel, 1, 1); + } + else if (src.Info.Format.IsDepthOrStencil() != dst.Info.Format.IsDepthOrStencil()) + { + // _gd.HelperShader.CopyColor(_gd, cbs, src, dst, srcLayer, dstLayer, srcLevel, dstLevel, 1, 1); + } + else + { + TextureCopy.Copy( + cbs, + srcImage, + dstImage, + src.Info, + dst.Info, + srcLayer, + dstLayer, + srcLevel, + dstLevel, + 1, + 1); + } + } + + public void CopyTo(ITexture destination, Extents2D srcRegion, Extents2D dstRegion, bool linearFilter) + { + if (!Renderer.CommandBufferPool.OwnedByCurrentThread) + { + Logger.Warning?.PrintMsg(LogClass.Gpu, "Metal doesn't currently support scaled blit on background thread."); + + return; + } + + var dst = (Texture)destination; + + bool isDepthOrStencil = dst.Info.Format.IsDepthOrStencil(); + + Pipeline.Blit(this, dst, srcRegion, dstRegion, isDepthOrStencil, linearFilter); + } + + public void CopyTo(BufferRange range, int layer, int level, int stride) + { + var cbs = Pipeline.Cbs; + + int outSize = Info.GetMipSize(level); + int hostSize = GetBufferDataLength(outSize); + + int offset = range.Offset; + + var autoBuffer = Renderer.BufferManager.GetBuffer(range.Handle, true); + var mtlBuffer = autoBuffer.Get(cbs, range.Offset, outSize).Value; + + if (PrepareOutputBuffer(cbs, hostSize, mtlBuffer, out MTLBuffer copyToBuffer, out BufferHolder tempCopyHolder)) + { + offset = 0; + } + + CopyFromOrToBuffer(cbs, copyToBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset, stride); + + if (tempCopyHolder != null) + { + CopyDataToOutputBuffer(cbs, tempCopyHolder, autoBuffer, hostSize, range.Offset); + tempCopyHolder.Dispose(); + } + } + + public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel) + { + return new Texture(Device, Renderer, Pipeline, info, _identitySwizzleHandle, firstLayer, firstLevel); + } + + private void CopyDataToBuffer(Span storage, ReadOnlySpan input) + { + if (NeedsD24S8Conversion()) + { + FormatConverter.ConvertD24S8ToD32FS8(storage, input); + return; + } + + input.CopyTo(storage); + } + + private ReadOnlySpan GetDataFromBuffer(ReadOnlySpan storage, int size, Span output) + { + if (NeedsD24S8Conversion()) + { + if (output.IsEmpty) + { + output = new byte[GetBufferDataLength(size)]; + } + + FormatConverter.ConvertD32FS8ToD24S8(output, storage); + return output; + } + + return storage; + } + + private bool PrepareOutputBuffer(CommandBufferScoped cbs, int hostSize, MTLBuffer target, out MTLBuffer copyTarget, out BufferHolder copyTargetHolder) + { + if (NeedsD24S8Conversion()) + { + copyTargetHolder = Renderer.BufferManager.Create(hostSize); + copyTarget = copyTargetHolder.GetBuffer().Get(cbs, 0, hostSize).Value; + + return true; + } + + copyTarget = target; + copyTargetHolder = null; + + return false; + } + + private void CopyDataToOutputBuffer(CommandBufferScoped cbs, BufferHolder hostData, Auto copyTarget, int hostSize, int dstOffset) + { + if (NeedsD24S8Conversion()) + { + Renderer.HelperShader.ConvertD32S8ToD24S8(cbs, hostData, copyTarget, hostSize / (2 * sizeof(int)), dstOffset); + } + } + + private bool NeedsD24S8Conversion() + { + return FormatTable.IsD24S8(Info.Format) && MtlFormat == MTLPixelFormat.Depth32FloatStencil8; + } + + public void CopyFromOrToBuffer( + CommandBufferScoped cbs, + MTLBuffer buffer, + MTLTexture image, + int size, + bool to, + int dstLayer, + int dstLevel, + int dstLayers, + int dstLevels, + bool singleSlice, + int offset = 0, + int stride = 0) + { + MTLBlitCommandEncoder blitCommandEncoder = cbs.Encoders.EnsureBlitEncoder(); + + bool is3D = Info.Target == Target.Texture3D; + int width = Math.Max(1, Info.Width >> dstLevel); + int height = Math.Max(1, Info.Height >> dstLevel); + int depth = is3D && !singleSlice ? Math.Max(1, Info.Depth >> dstLevel) : 1; + int layers = dstLayers; + int levels = dstLevels; + + for (int oLevel = 0; oLevel < levels; oLevel++) + { + int level = oLevel + dstLevel; + int mipSize = Info.GetMipSize2D(level); + + int mipSizeLevel = GetBufferDataLength(is3D && !singleSlice + ? Info.GetMipSize(level) + : mipSize * dstLayers); + + int endOffset = offset + mipSizeLevel; + + if ((uint)endOffset > (uint)size) + { + break; + } + + for (int oLayer = 0; oLayer < layers; oLayer++) + { + int layer = !is3D ? dstLayer + oLayer : 0; + int z = is3D ? dstLayer + oLayer : 0; + + if (to) + { + blitCommandEncoder.CopyFromTexture( + image, + (ulong)layer, + (ulong)level, + new MTLOrigin { z = (ulong)z }, + new MTLSize { width = (ulong)width, height = (ulong)height, depth = 1 }, + buffer, + (ulong)offset, + (ulong)Info.GetMipStride(level), + (ulong)mipSize + ); + } + else + { + blitCommandEncoder.CopyFromBuffer( + buffer, + (ulong)offset, + (ulong)Info.GetMipStride(level), + (ulong)mipSize, + new MTLSize { width = (ulong)width, height = (ulong)height, depth = 1 }, + image, + (ulong)(layer + oLayer), + (ulong)level, + new MTLOrigin { z = (ulong)z } + ); + } + + offset += mipSize; + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + + if (Info.Target == Target.Texture3D) + { + depth = Math.Max(1, depth >> 1); + } + } + } + + private ReadOnlySpan GetData(CommandBufferPool cbp, PersistentFlushBuffer flushBuffer) + { + int size = 0; + + for (int level = 0; level < Info.Levels; level++) + { + size += Info.GetMipSize(level); + } + + size = GetBufferDataLength(size); + + Span result = flushBuffer.GetTextureData(cbp, this, size); + + return GetDataFromBuffer(result, size, result); + } + + private ReadOnlySpan GetData(CommandBufferPool cbp, PersistentFlushBuffer flushBuffer, int layer, int level) + { + int size = GetBufferDataLength(Info.GetMipSize(level)); + + Span result = flushBuffer.GetTextureData(cbp, this, size, layer, level); + + return GetDataFromBuffer(result, size, result); + } + + public PinnedSpan GetData() + { + BackgroundResource resources = Renderer.BackgroundResources.Get(); + + if (Renderer.CommandBufferPool.OwnedByCurrentThread) + { + Renderer.FlushAllCommands(); + + return PinnedSpan.UnsafeFromSpan(GetData(Renderer.CommandBufferPool, resources.GetFlushBuffer())); + } + + return PinnedSpan.UnsafeFromSpan(GetData(resources.GetPool(), resources.GetFlushBuffer())); + } + + public PinnedSpan GetData(int layer, int level) + { + BackgroundResource resources = Renderer.BackgroundResources.Get(); + + if (Renderer.CommandBufferPool.OwnedByCurrentThread) + { + Renderer.FlushAllCommands(); + + return PinnedSpan.UnsafeFromSpan(GetData(Renderer.CommandBufferPool, resources.GetFlushBuffer(), layer, level)); + } + + return PinnedSpan.UnsafeFromSpan(GetData(resources.GetPool(), resources.GetFlushBuffer(), layer, level)); + } + + public void SetData(MemoryOwner data) + { + var blitCommandEncoder = Pipeline.GetOrCreateBlitEncoder(); + + var dataSpan = data.Memory.Span; + + var buffer = Renderer.BufferManager.Create(dataSpan.Length); + buffer.SetDataUnchecked(0, dataSpan); + var mtlBuffer = buffer.GetBuffer(false).Get(Pipeline.Cbs).Value; + + int width = Info.Width; + int height = Info.Height; + int depth = Info.Depth; + int levels = Info.Levels; + int layers = Info.GetLayers(); + bool is3D = Info.Target == Target.Texture3D; + + int offset = 0; + + for (int level = 0; level < levels; level++) + { + int mipSize = Info.GetMipSize2D(level); + int endOffset = offset + mipSize; + + if ((uint)endOffset > (uint)dataSpan.Length) + { + return; + } + + for (int layer = 0; layer < layers; layer++) + { + blitCommandEncoder.CopyFromBuffer( + mtlBuffer, + (ulong)offset, + (ulong)Info.GetMipStride(level), + (ulong)mipSize, + new MTLSize { width = (ulong)width, height = (ulong)height, depth = is3D ? (ulong)depth : 1 }, + MtlTexture, + (ulong)layer, + (ulong)level, + new MTLOrigin() + ); + + offset += mipSize; + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + + if (is3D) + { + depth = Math.Max(1, depth >> 1); + } + } + + // Cleanup + buffer.Dispose(); + } + + private void SetData(ReadOnlySpan data, int layer, int level, int layers, int levels, bool singleSlice) + { + int bufferDataLength = GetBufferDataLength(data.Length); + + using var bufferHolder = Renderer.BufferManager.Create(bufferDataLength); + + // TODO: loadInline logic + + var cbs = Pipeline.Cbs; + + CopyDataToBuffer(bufferHolder.GetDataStorage(0, bufferDataLength), data); + + var buffer = bufferHolder.GetBuffer().Get(cbs).Value; + var image = GetHandle(); + + CopyFromOrToBuffer(cbs, buffer, image, bufferDataLength, false, layer, level, layers, levels, singleSlice); + } + + public void SetData(MemoryOwner data, int layer, int level) + { + SetData(data.Memory.Span, layer, level, 1, 1, singleSlice: true); + + data.Dispose(); + } + + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) + { + var blitCommandEncoder = Pipeline.GetOrCreateBlitEncoder(); + + ulong bytesPerRow = (ulong)Info.GetMipStride(level); + ulong bytesPerImage = 0; + if (MtlTexture.TextureType == MTLTextureType.Type3D) + { + bytesPerImage = bytesPerRow * (ulong)Info.Height; + } + + var dataSpan = data.Memory.Span; + + var buffer = Renderer.BufferManager.Create(dataSpan.Length); + buffer.SetDataUnchecked(0, dataSpan); + var mtlBuffer = buffer.GetBuffer(false).Get(Pipeline.Cbs).Value; + + blitCommandEncoder.CopyFromBuffer( + mtlBuffer, + 0, + bytesPerRow, + bytesPerImage, + new MTLSize { width = (ulong)region.Width, height = (ulong)region.Height, depth = 1 }, + MtlTexture, + (ulong)layer, + (ulong)level, + new MTLOrigin { x = (ulong)region.X, y = (ulong)region.Y } + ); + + // Cleanup + buffer.Dispose(); + } + + private int GetBufferDataLength(int length) + { + if (NeedsD24S8Conversion()) + { + return length * 2; + } + + return length; + } + + public void SetStorage(BufferRange buffer) + { + throw new NotImplementedException(); + } + + public override void Release() + { + if (_identityIsDifferent) + { + _identitySwizzleHandle.Dispose(); + } + + base.Release(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/TextureArray.cs b/src/Ryujinx.Graphics.Metal/TextureArray.cs new file mode 100644 index 0000000000..ea2c744200 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/TextureArray.cs @@ -0,0 +1,93 @@ +using Ryujinx.Graphics.GAL; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + internal class TextureArray : ITextureArray + { + private readonly TextureRef[] _textureRefs; + private readonly TextureBuffer[] _bufferTextureRefs; + + private readonly bool _isBuffer; + private readonly Pipeline _pipeline; + + public TextureArray(int size, bool isBuffer, Pipeline pipeline) + { + if (isBuffer) + { + _bufferTextureRefs = new TextureBuffer[size]; + } + else + { + _textureRefs = new TextureRef[size]; + } + + _isBuffer = isBuffer; + _pipeline = pipeline; + } + + public void SetSamplers(int index, ISampler[] samplers) + { + for (int i = 0; i < samplers.Length; i++) + { + ISampler sampler = samplers[i]; + + if (sampler is SamplerHolder samp) + { + _textureRefs[index + i].Sampler = samp.GetSampler(); + } + else + { + _textureRefs[index + i].Sampler = default; + } + } + + SetDirty(); + } + + public void SetTextures(int index, ITexture[] textures) + { + for (int i = 0; i < textures.Length; i++) + { + ITexture texture = textures[i]; + + if (texture is TextureBuffer textureBuffer) + { + _bufferTextureRefs[index + i] = textureBuffer; + } + else if (texture is Texture tex) + { + _textureRefs[index + i].Storage = tex; + } + else if (!_isBuffer) + { + _textureRefs[index + i].Storage = null; + } + else + { + _bufferTextureRefs[index + i] = null; + } + } + + SetDirty(); + } + + public TextureRef[] GetTextureRefs() + { + return _textureRefs; + } + + public TextureBuffer[] GetBufferTextureRefs() + { + return _bufferTextureRefs; + } + + private void SetDirty() + { + _pipeline.DirtyTextures(); + } + + public void Dispose() { } + } +} diff --git a/src/Ryujinx.Graphics.Metal/TextureBase.cs b/src/Ryujinx.Graphics.Metal/TextureBase.cs new file mode 100644 index 0000000000..fecd64958b --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/TextureBase.cs @@ -0,0 +1,67 @@ +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + abstract class TextureBase : IDisposable + { + private int _isValid = 1; + + public bool Valid => Volatile.Read(ref _isValid) != 0; + + protected readonly Pipeline Pipeline; + protected readonly MTLDevice Device; + protected readonly MetalRenderer Renderer; + + protected MTLTexture MtlTexture; + + public readonly TextureCreateInfo Info; + public int Width => Info.Width; + public int Height => Info.Height; + public int Depth => Info.Depth; + + public MTLPixelFormat MtlFormat { get; protected set; } + public int FirstLayer { get; protected set; } + public int FirstLevel { get; protected set; } + + public TextureBase(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info) + { + Device = device; + Renderer = renderer; + Pipeline = pipeline; + Info = info; + } + + public MTLTexture GetHandle() + { + if (_isValid == 0) + { + return new MTLTexture(IntPtr.Zero); + } + + return MtlTexture; + } + + public virtual void Release() + { + Dispose(); + } + + public void Dispose() + { + bool wasValid = Interlocked.Exchange(ref _isValid, 0) != 0; + + if (wasValid) + { + if (MtlTexture != IntPtr.Zero) + { + MtlTexture.Dispose(); + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/TextureBuffer.cs b/src/Ryujinx.Graphics.Metal/TextureBuffer.cs new file mode 100644 index 0000000000..483cef28be --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/TextureBuffer.cs @@ -0,0 +1,132 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class TextureBuffer : TextureBase, ITexture + { + private MTLTextureDescriptor _descriptor; + private BufferHandle _bufferHandle; + private int _offset; + private int _size; + + private int _bufferCount; + private Auto _buffer; + + public TextureBuffer(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info) : base(device, renderer, pipeline, info) + { + MTLPixelFormat pixelFormat = FormatTable.GetFormat(Info.Format); + + _descriptor = new MTLTextureDescriptor + { + PixelFormat = pixelFormat, + Usage = MTLTextureUsage.Unknown, + TextureType = MTLTextureType.TextureBuffer, + Width = (ulong)Info.Width, + Height = (ulong)Info.Height, + }; + + MtlFormat = pixelFormat; + } + + public void RebuildStorage(bool write) + { + if (MtlTexture != IntPtr.Zero) + { + MtlTexture.Dispose(); + } + + if (_buffer == null) + { + MtlTexture = default; + } + else + { + DisposableBuffer buffer = _buffer.Get(Pipeline.Cbs, _offset, _size, write); + + _descriptor.Width = (uint)(_size / Info.BytesPerPixel); + MtlTexture = buffer.Value.NewTexture(_descriptor, (ulong)_offset, (ulong)_size); + } + } + + public void CopyTo(ITexture destination, int firstLayer, int firstLevel) + { + throw new NotSupportedException(); + } + + public void CopyTo(ITexture destination, int srcLayer, int dstLayer, int srcLevel, int dstLevel) + { + throw new NotSupportedException(); + } + + public void CopyTo(ITexture destination, Extents2D srcRegion, Extents2D dstRegion, bool linearFilter) + { + throw new NotSupportedException(); + } + + public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel) + { + throw new NotSupportedException(); + } + + public PinnedSpan GetData() + { + return Renderer.GetBufferData(_bufferHandle, _offset, _size); + } + + public PinnedSpan GetData(int layer, int level) + { + return GetData(); + } + + public void CopyTo(BufferRange range, int layer, int level, int stride) + { + throw new NotImplementedException(); + } + + public void SetData(MemoryOwner data) + { + Renderer.SetBufferData(_bufferHandle, _offset, data.Memory.Span); + data.Dispose(); + } + + public void SetData(MemoryOwner data, int layer, int level) + { + throw new NotSupportedException(); + } + + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) + { + throw new NotSupportedException(); + } + + public void SetStorage(BufferRange buffer) + { + if (_bufferHandle == buffer.Handle && + _offset == buffer.Offset && + _size == buffer.Size && + _bufferCount == Renderer.BufferManager.BufferCount) + { + return; + } + + _bufferHandle = buffer.Handle; + _offset = buffer.Offset; + _size = buffer.Size; + _bufferCount = Renderer.BufferManager.BufferCount; + + _buffer = Renderer.BufferManager.GetBuffer(_bufferHandle, false); + } + + public override void Release() + { + _descriptor.Dispose(); + + base.Release(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/TextureCopy.cs b/src/Ryujinx.Graphics.Metal/TextureCopy.cs new file mode 100644 index 0000000000..b91a3cd898 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/TextureCopy.cs @@ -0,0 +1,265 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + static class TextureCopy + { + public static ulong CopyFromOrToBuffer( + CommandBufferScoped cbs, + MTLBuffer buffer, + MTLTexture image, + TextureCreateInfo info, + bool to, + int dstLayer, + int dstLevel, + int x, + int y, + int width, + int height, + ulong offset = 0) + { + MTLBlitCommandEncoder blitCommandEncoder = cbs.Encoders.EnsureBlitEncoder(); + + bool is3D = info.Target == Target.Texture3D; + + int blockWidth = BitUtils.DivRoundUp(width, info.BlockWidth); + int blockHeight = BitUtils.DivRoundUp(height, info.BlockHeight); + ulong bytesPerRow = (ulong)BitUtils.AlignUp(blockWidth * info.BytesPerPixel, 4); + ulong bytesPerImage = bytesPerRow * (ulong)blockHeight; + + MTLOrigin origin = new MTLOrigin { x = (ulong)x, y = (ulong)y, z = is3D ? (ulong)dstLayer : 0 }; + MTLSize region = new MTLSize { width = (ulong)width, height = (ulong)height, depth = 1 }; + + uint layer = is3D ? 0 : (uint)dstLayer; + + if (to) + { + blitCommandEncoder.CopyFromTexture( + image, + layer, + (ulong)dstLevel, + origin, + region, + buffer, + offset, + bytesPerRow, + bytesPerImage); + } + else + { + blitCommandEncoder.CopyFromBuffer(buffer, offset, bytesPerRow, bytesPerImage, region, image, layer, (ulong)dstLevel, origin); + } + + return offset + bytesPerImage; + } + + public static void Copy( + CommandBufferScoped cbs, + MTLTexture srcImage, + MTLTexture dstImage, + TextureCreateInfo srcInfo, + TextureCreateInfo dstInfo, + int srcLayer, + int dstLayer, + int srcLevel, + int dstLevel) + { + int srcDepth = srcInfo.GetDepthOrLayers(); + int srcLevels = srcInfo.Levels; + + int dstDepth = dstInfo.GetDepthOrLayers(); + int dstLevels = dstInfo.Levels; + + if (dstInfo.Target == Target.Texture3D) + { + dstDepth = Math.Max(1, dstDepth >> dstLevel); + } + + int depth = Math.Min(srcDepth, dstDepth); + int levels = Math.Min(srcLevels, dstLevels); + + Copy( + cbs, + srcImage, + dstImage, + srcInfo, + dstInfo, + srcLayer, + dstLayer, + srcLevel, + dstLevel, + depth, + levels); + } + + public static void Copy( + CommandBufferScoped cbs, + MTLTexture srcImage, + MTLTexture dstImage, + TextureCreateInfo srcInfo, + TextureCreateInfo dstInfo, + int srcDepthOrLayer, + int dstDepthOrLayer, + int srcLevel, + int dstLevel, + int depthOrLayers, + int levels) + { + MTLBlitCommandEncoder blitCommandEncoder = cbs.Encoders.EnsureBlitEncoder(); + + int srcZ; + int srcLayer; + int srcDepth; + int srcLayers; + + if (srcInfo.Target == Target.Texture3D) + { + srcZ = srcDepthOrLayer; + srcLayer = 0; + srcDepth = depthOrLayers; + srcLayers = 1; + } + else + { + srcZ = 0; + srcLayer = srcDepthOrLayer; + srcDepth = 1; + srcLayers = depthOrLayers; + } + + int dstZ; + int dstLayer; + int dstLayers; + + if (dstInfo.Target == Target.Texture3D) + { + dstZ = dstDepthOrLayer; + dstLayer = 0; + dstLayers = 1; + } + else + { + dstZ = 0; + dstLayer = dstDepthOrLayer; + dstLayers = depthOrLayers; + } + + int srcWidth = srcInfo.Width; + int srcHeight = srcInfo.Height; + + int dstWidth = dstInfo.Width; + int dstHeight = dstInfo.Height; + + srcWidth = Math.Max(1, srcWidth >> srcLevel); + srcHeight = Math.Max(1, srcHeight >> srcLevel); + + dstWidth = Math.Max(1, dstWidth >> dstLevel); + dstHeight = Math.Max(1, dstHeight >> dstLevel); + + int blockWidth = 1; + int blockHeight = 1; + bool sizeInBlocks = false; + + MTLBuffer tempBuffer = default; + + if (srcInfo.Format != dstInfo.Format && (srcInfo.IsCompressed || dstInfo.IsCompressed)) + { + // Compressed alias copies need to happen through a temporary buffer. + // The data is copied from the source to the buffer, then the buffer to the destination. + // The length of the buffer should be the maximum slice size for the destination. + + tempBuffer = blitCommandEncoder.Device.NewBuffer((ulong)dstInfo.GetMipSize2D(0), MTLResourceOptions.ResourceStorageModePrivate); + } + + // When copying from a compressed to a non-compressed format, + // the non-compressed texture will have the size of the texture + // in blocks (not in texels), so we must adjust that size to + // match the size in texels of the compressed texture. + if (!srcInfo.IsCompressed && dstInfo.IsCompressed) + { + srcWidth *= dstInfo.BlockWidth; + srcHeight *= dstInfo.BlockHeight; + blockWidth = dstInfo.BlockWidth; + blockHeight = dstInfo.BlockHeight; + + sizeInBlocks = true; + } + else if (srcInfo.IsCompressed && !dstInfo.IsCompressed) + { + dstWidth *= srcInfo.BlockWidth; + dstHeight *= srcInfo.BlockHeight; + blockWidth = srcInfo.BlockWidth; + blockHeight = srcInfo.BlockHeight; + } + + int width = Math.Min(srcWidth, dstWidth); + int height = Math.Min(srcHeight, dstHeight); + + for (int level = 0; level < levels; level++) + { + // Stop copy if we are already out of the levels range. + if (level >= srcInfo.Levels || dstLevel + level >= dstInfo.Levels) + { + break; + } + + int copyWidth = sizeInBlocks ? BitUtils.DivRoundUp(width, blockWidth) : width; + int copyHeight = sizeInBlocks ? BitUtils.DivRoundUp(height, blockHeight) : height; + + int layers = Math.Max(dstLayers - dstLayer, srcLayers); + + for (int layer = 0; layer < layers; layer++) + { + if (tempBuffer.NativePtr != 0) + { + // Copy through the temp buffer + CopyFromOrToBuffer(cbs, tempBuffer, srcImage, srcInfo, true, srcLayer + layer, srcLevel + level, 0, 0, copyWidth, copyHeight); + + int dstBufferWidth = sizeInBlocks ? copyWidth * blockWidth : BitUtils.DivRoundUp(copyWidth, blockWidth); + int dstBufferHeight = sizeInBlocks ? copyHeight * blockHeight : BitUtils.DivRoundUp(copyHeight, blockHeight); + + CopyFromOrToBuffer(cbs, tempBuffer, dstImage, dstInfo, false, dstLayer + layer, dstLevel + level, 0, 0, dstBufferWidth, dstBufferHeight); + } + else if (srcInfo.Samples > 1 && srcInfo.Samples != dstInfo.Samples) + { + // TODO + + Logger.Warning?.PrintMsg(LogClass.Gpu, "Unsupported mismatching sample count copy"); + } + else + { + blitCommandEncoder.CopyFromTexture( + srcImage, + (ulong)(srcLayer + layer), + (ulong)(srcLevel + level), + new MTLOrigin { z = (ulong)srcZ }, + new MTLSize { width = (ulong)copyWidth, height = (ulong)copyHeight, depth = (ulong)srcDepth }, + dstImage, + (ulong)(dstLayer + layer), + (ulong)(dstLevel + level), + new MTLOrigin { z = (ulong)dstZ }); + } + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + + if (srcInfo.Target == Target.Texture3D) + { + srcDepth = Math.Max(1, srcDepth >> 1); + } + } + + if (tempBuffer.NativePtr != 0) + { + tempBuffer.Dispose(); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/VertexBufferState.cs b/src/Ryujinx.Graphics.Metal/VertexBufferState.cs new file mode 100644 index 0000000000..6591fe6d65 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/VertexBufferState.cs @@ -0,0 +1,60 @@ +using Ryujinx.Graphics.GAL; +using SharpMetal.Metal; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + readonly internal struct VertexBufferState + { + public static VertexBufferState Null => new(BufferHandle.Null, 0, 0, 0); + + private readonly BufferHandle _handle; + private readonly int _offset; + private readonly int _size; + + public readonly int Stride; + public readonly int Divisor; + + public VertexBufferState(BufferHandle handle, int offset, int size, int divisor, int stride = 0) + { + _handle = handle; + _offset = offset; + _size = size; + + Stride = stride; + Divisor = divisor; + } + + public (MTLBuffer, int) GetVertexBuffer(BufferManager bufferManager, CommandBufferScoped cbs) + { + Auto autoBuffer = null; + + if (_handle != BufferHandle.Null) + { + // TODO: Handle restride if necessary + + autoBuffer = bufferManager.GetBuffer(_handle, false, out int size); + + // The original stride must be reapplied in case it was rewritten. + // TODO: Handle restride if necessary + + if (_offset >= size) + { + autoBuffer = null; + } + } + + if (autoBuffer != null) + { + int offset = _offset; + var buffer = autoBuffer.Get(cbs, offset, _size).Value; + + return (buffer, offset); + } + + return (new MTLBuffer(IntPtr.Zero), 0); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/Window.cs b/src/Ryujinx.Graphics.Metal/Window.cs new file mode 100644 index 0000000000..65a47d2173 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Window.cs @@ -0,0 +1,231 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Metal.Effects; +using SharpMetal.ObjectiveCCore; +using SharpMetal.QuartzCore; +using System; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + [SupportedOSPlatform("macos")] + class Window : IWindow, IDisposable + { + public bool ScreenCaptureRequested { get; set; } + + private readonly MetalRenderer _renderer; + private readonly CAMetalLayer _metalLayer; + + private int _width; + private int _height; + + private int _requestedWidth; + private int _requestedHeight; + + // private bool _vsyncEnabled; + private AntiAliasing _currentAntiAliasing; + private bool _updateEffect; + private IPostProcessingEffect _effect; + private IScalingFilter _scalingFilter; + private bool _isLinear; + // private float _scalingFilterLevel; + private bool _updateScalingFilter; + private ScalingFilter _currentScalingFilter; + // private bool _colorSpacePassthroughEnabled; + + public Window(MetalRenderer renderer, CAMetalLayer metalLayer) + { + _renderer = renderer; + _metalLayer = metalLayer; + } + + private unsafe void ResizeIfNeeded() + { + if (_requestedWidth != 0 && _requestedHeight != 0) + { + // TODO: This is actually a CGSize, but there is no overload for that, so fill the first two fields of rect with the size. + var rect = new NSRect(_requestedWidth, _requestedHeight, 0, 0); + + ObjectiveC.objc_msgSend(_metalLayer, "setDrawableSize:", rect); + + _requestedWidth = 0; + _requestedHeight = 0; + } + } + + public unsafe void Present(ITexture texture, ImageCrop crop, Action swapBuffersCallback) + { + if (_renderer.Pipeline is Pipeline pipeline && texture is Texture tex) + { + ResizeIfNeeded(); + + var drawable = new CAMetalDrawable(ObjectiveC.IntPtr_objc_msgSend(_metalLayer, "nextDrawable")); + + _width = (int)drawable.Texture.Width; + _height = (int)drawable.Texture.Height; + + UpdateEffect(); + + if (_effect != null) + { + // TODO: Run Effects + // view = _effect.Run() + } + + int srcX0, srcX1, srcY0, srcY1; + + if (crop.Left == 0 && crop.Right == 0) + { + srcX0 = 0; + srcX1 = tex.Width; + } + else + { + srcX0 = crop.Left; + srcX1 = crop.Right; + } + + if (crop.Top == 0 && crop.Bottom == 0) + { + srcY0 = 0; + srcY1 = tex.Height; + } + else + { + srcY0 = crop.Top; + srcY1 = crop.Bottom; + } + + if (ScreenCaptureRequested) + { + // TODO: Support screen captures + + ScreenCaptureRequested = false; + } + + float ratioX = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _height * crop.AspectRatioX / (_width * crop.AspectRatioY)); + float ratioY = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _width * crop.AspectRatioY / (_height * crop.AspectRatioX)); + + int dstWidth = (int)(_width * ratioX); + int dstHeight = (int)(_height * ratioY); + + int dstPaddingX = (_width - dstWidth) / 2; + int dstPaddingY = (_height - dstHeight) / 2; + + int dstX0 = crop.FlipX ? _width - dstPaddingX : dstPaddingX; + int dstX1 = crop.FlipX ? dstPaddingX : _width - dstPaddingX; + + int dstY0 = crop.FlipY ? _height - dstPaddingY : dstPaddingY; + int dstY1 = crop.FlipY ? dstPaddingY : _height - dstPaddingY; + + if (_scalingFilter != null) + { + // TODO: Run scaling filter + } + + pipeline.Present( + drawable, + tex, + new Extents2D(srcX0, srcY0, srcX1, srcY1), + new Extents2D(dstX0, dstY0, dstX1, dstY1), + _isLinear); + } + } + + public void SetSize(int width, int height) + { + _requestedWidth = width; + _requestedHeight = height; + } + + public void ChangeVSyncMode(VSyncMode vSyncMode) + { + //_vSyncMode = vSyncMode; + } + + public void SetAntiAliasing(AntiAliasing effect) + { + if (_currentAntiAliasing == effect && _effect != null) + { + return; + } + + _currentAntiAliasing = effect; + + _updateEffect = true; + } + + public void SetScalingFilter(ScalingFilter type) + { + if (_currentScalingFilter == type && _effect != null) + { + return; + } + + _currentScalingFilter = type; + + _updateScalingFilter = true; + } + + public void SetScalingFilterLevel(float level) + { + // _scalingFilterLevel = level; + _updateScalingFilter = true; + } + + public void SetColorSpacePassthrough(bool colorSpacePassThroughEnabled) + { + // _colorSpacePassthroughEnabled = colorSpacePassThroughEnabled; + } + + private void UpdateEffect() + { + if (_updateEffect) + { + _updateEffect = false; + + switch (_currentAntiAliasing) + { + case AntiAliasing.Fxaa: + _effect?.Dispose(); + Logger.Warning?.PrintMsg(LogClass.Gpu, "FXAA not implemented for Metal backend!"); + break; + case AntiAliasing.None: + _effect?.Dispose(); + _effect = null; + break; + case AntiAliasing.SmaaLow: + case AntiAliasing.SmaaMedium: + case AntiAliasing.SmaaHigh: + case AntiAliasing.SmaaUltra: + // var quality = _currentAntiAliasing - AntiAliasing.SmaaLow; + Logger.Warning?.PrintMsg(LogClass.Gpu, "SMAA not implemented for Metal backend!"); + break; + } + } + + if (_updateScalingFilter) + { + _updateScalingFilter = false; + + switch (_currentScalingFilter) + { + case ScalingFilter.Bilinear: + case ScalingFilter.Nearest: + _scalingFilter?.Dispose(); + _scalingFilter = null; + _isLinear = _currentScalingFilter == ScalingFilter.Bilinear; + break; + case ScalingFilter.Fsr: + Logger.Warning?.PrintMsg(LogClass.Gpu, "FSR not implemented for Metal backend!"); + break; + } + } + } + + public void Dispose() + { + _metalLayer.Dispose(); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/CodeGenContext.cs new file mode 100644 index 0000000000..2fa188ea95 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/CodeGenContext.cs @@ -0,0 +1,108 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System.Text; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + class CodeGenContext + { + public const string Tab = " "; + + // The number of additional arguments that every function (except for the main one) must have (for instance support_buffer) + public const int AdditionalArgCount = 2; + + public StructuredFunction CurrentFunction { get; set; } + + public StructuredProgramInfo Info { get; } + + public AttributeUsage AttributeUsage { get; } + public ShaderDefinitions Definitions { get; } + public ShaderProperties Properties { get; } + public HostCapabilities HostCapabilities { get; } + public ILogger Logger { get; } + public TargetApi TargetApi { get; } + + public OperandManager OperandManager { get; } + + private readonly StringBuilder _sb; + + private int _level; + + private string _indentation; + + public CodeGenContext(StructuredProgramInfo info, CodeGenParameters parameters) + { + Info = info; + AttributeUsage = parameters.AttributeUsage; + Definitions = parameters.Definitions; + Properties = parameters.Properties; + HostCapabilities = parameters.HostCapabilities; + Logger = parameters.Logger; + TargetApi = parameters.TargetApi; + + OperandManager = new OperandManager(); + + _sb = new StringBuilder(); + } + + public void AppendLine() + { + _sb.AppendLine(); + } + + public void AppendLine(string str) + { + _sb.AppendLine(_indentation + str); + } + + public string GetCode() + { + return _sb.ToString(); + } + + public void EnterScope(string prefix = "") + { + AppendLine(prefix + "{"); + + _level++; + + UpdateIndentation(); + } + + public void LeaveScope(string suffix = "") + { + if (_level == 0) + { + return; + } + + _level--; + + UpdateIndentation(); + + AppendLine("}" + suffix); + } + + public StructuredFunction GetFunction(int id) + { + return Info.Functions[id]; + } + + private void UpdateIndentation() + { + _indentation = GetIndentation(_level); + } + + private static string GetIndentation(int level) + { + string indentation = string.Empty; + + for (int index = 0; index < level; index++) + { + indentation += Tab; + } + + return indentation; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Declarations.cs new file mode 100644 index 0000000000..912b162d26 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Declarations.cs @@ -0,0 +1,578 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + static class Declarations + { + /* + * Description of MSL Binding Model + * + * There are a few fundamental differences between how GLSL and MSL handle I/O. + * This comment will set out to describe the reasons why things are done certain ways + * and to describe the overall binding model that we're striving for here. + * + * Main I/O Structs + * + * Each stage has a main input and output struct (if applicable) labeled as [Stage][In/Out], i.e VertexIn. + * Every field within these structs is labeled with an [[attribute(n)]] property, + * and the overall struct is labeled with [[stage_in]] for input structs, and defined as the + * output type of the main shader function for the output struct. This struct also contains special + * attribute-based properties like [[position]] that would be "built-ins" in a GLSL context. + * + * These structs are passed as inputs to all inline functions due to containing "built-ins" + * that inline functions assume access to. + * + * Vertex & Zero Buffers + * + * Binding indices 0-16 are reserved for vertex buffers, and binding 18 is reserved for the zero buffer. + * + * Uniforms & Storage Buffers + * + * Uniforms and storage buffers are tightly packed into their respective argument buffers + * (effectively ignoring binding indices at shader level), with each pointer to the corresponding + * struct that defines the layout and fields of these buffers (usually just a single data array), laid + * out one after the other in ascending order of their binding index. + * + * The uniforms argument buffer is always bound at a fixed index of 20. + * The storage buffers argument buffer is always bound at a fixed index of 21. + * + * These structs are passed as inputs to all inline functions as in GLSL or SPIRV, + * uniforms and storage buffers would be globals, and inline functions assume access to these buffers. + * + * Samplers & Textures + * + * Metal does not have a combined image sampler like sampler2D in GLSL, as a result we need to bind + * an individual texture and a sampler object for each instance of a combined image sampler. + * Samplers and textures are bound in a shared argument buffer. This argument buffer is tightly packed + * (effectively ignoring binding indices at shader level), with texture and their samplers (if present) + * laid out one after the other in ascending order of their binding index. + * + * The samplers and textures argument buffer is always bound at a fixed index of 22. + * + */ + + public static int[] Declare(CodeGenContext context, StructuredProgramInfo info) + { + // TODO: Re-enable this warning + context.AppendLine("#pragma clang diagnostic ignored \"-Wunused-variable\""); + context.AppendLine(); + context.AppendLine("#include "); + context.AppendLine("#include "); + context.AppendLine(); + context.AppendLine("using namespace metal;"); + context.AppendLine(); + + var fsi = (info.HelperFunctionsMask & HelperFunctionsMask.FSI) != 0; + + DeclareInputAttributes(context, info.IoDefinitions.Where(x => IsUserDefined(x, StorageKind.Input))); + context.AppendLine(); + DeclareOutputAttributes(context, info.IoDefinitions.Where(x => x.StorageKind == StorageKind.Output)); + context.AppendLine(); + DeclareBufferStructures(context, context.Properties.ConstantBuffers.Values.OrderBy(x => x.Binding).ToArray(), true, fsi); + DeclareBufferStructures(context, context.Properties.StorageBuffers.Values.OrderBy(x => x.Binding).ToArray(), false, fsi); + + // We need to declare each set as a new struct + var textureDefinitions = context.Properties.Textures.Values + .GroupBy(x => x.Set) + .ToDictionary(x => x.Key, x => x.OrderBy(y => y.Binding).ToArray()); + + var imageDefinitions = context.Properties.Images.Values + .GroupBy(x => x.Set) + .ToDictionary(x => x.Key, x => x.OrderBy(y => y.Binding).ToArray()); + + var textureSets = textureDefinitions.Keys.ToArray(); + var imageSets = imageDefinitions.Keys.ToArray(); + + var sets = textureSets.Union(imageSets).ToArray(); + + foreach (var set in textureDefinitions) + { + DeclareTextures(context, set.Value, set.Key); + } + + foreach (var set in imageDefinitions) + { + DeclareImages(context, set.Value, set.Key, fsi); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.FindLSB) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindLSB.metal"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.FindMSBS32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBS32.metal"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.FindMSBU32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBU32.metal"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/SwizzleAdd.metal"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.Precise) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/Precise.metal"); + } + + return sets; + } + + static bool IsUserDefined(IoDefinition ioDefinition, StorageKind storageKind) + { + return ioDefinition.StorageKind == storageKind && ioDefinition.IoVariable == IoVariable.UserDefined; + } + + public static void DeclareLocals(CodeGenContext context, StructuredFunction function, ShaderStage stage, bool isMainFunc = false) + { + if (isMainFunc) + { + // TODO: Support OaIndexing + if (context.Definitions.IaIndexing) + { + context.EnterScope($"array {Defaults.IAttributePrefix} = "); + + for (int i = 0; i < Constants.MaxAttributes; i++) + { + context.AppendLine($"in.{Defaults.IAttributePrefix}{i},"); + } + + context.LeaveScope(";"); + } + + DeclareMemories(context, context.Properties.LocalMemories.Values, isShared: false); + DeclareMemories(context, context.Properties.SharedMemories.Values, isShared: true); + + switch (stage) + { + case ShaderStage.Vertex: + context.AppendLine("VertexOut out = {};"); + // TODO: Only add if necessary + context.AppendLine("uint instance_index = instance_id + base_instance;"); + break; + case ShaderStage.Fragment: + context.AppendLine("FragmentOut out = {};"); + break; + } + + // TODO: Only add if necessary + if (stage != ShaderStage.Compute) + { + // MSL does not give us access to [[thread_index_in_simdgroup]] + // outside compute. But we may still need to provide this value in frag/vert. + context.AppendLine("uint thread_index_in_simdgroup = simd_prefix_exclusive_sum(1);"); + } + } + + foreach (AstOperand decl in function.Locals) + { + string name = context.OperandManager.DeclareLocal(decl); + + context.AppendLine(GetVarTypeName(decl.VarType) + " " + name + ";"); + } + } + + public static string GetVarTypeName(AggregateType type, bool atomic = false) + { + var s32 = atomic ? "atomic_int" : "int"; + var u32 = atomic ? "atomic_uint" : "uint"; + + return type switch + { + AggregateType.Void => "void", + AggregateType.Bool => "bool", + AggregateType.FP32 => "float", + AggregateType.S32 => s32, + AggregateType.U32 => u32, + AggregateType.Vector2 | AggregateType.Bool => "bool2", + AggregateType.Vector2 | AggregateType.FP32 => "float2", + AggregateType.Vector2 | AggregateType.S32 => "int2", + AggregateType.Vector2 | AggregateType.U32 => "uint2", + AggregateType.Vector3 | AggregateType.Bool => "bool3", + AggregateType.Vector3 | AggregateType.FP32 => "float3", + AggregateType.Vector3 | AggregateType.S32 => "int3", + AggregateType.Vector3 | AggregateType.U32 => "uint3", + AggregateType.Vector4 | AggregateType.Bool => "bool4", + AggregateType.Vector4 | AggregateType.FP32 => "float4", + AggregateType.Vector4 | AggregateType.S32 => "int4", + AggregateType.Vector4 | AggregateType.U32 => "uint4", + _ => throw new ArgumentException($"Invalid variable type \"{type}\"."), + }; + } + + private static void DeclareMemories(CodeGenContext context, IEnumerable memories, bool isShared) + { + string prefix = isShared ? "threadgroup " : string.Empty; + + foreach (var memory in memories) + { + string arraySize = ""; + if ((memory.Type & AggregateType.Array) != 0) + { + arraySize = $"[{memory.ArrayLength}]"; + } + var typeName = GetVarTypeName(memory.Type & ~AggregateType.Array); + context.AppendLine($"{prefix}{typeName} {memory.Name}{arraySize};"); + } + } + + private static void DeclareBufferStructures(CodeGenContext context, BufferDefinition[] buffers, bool constant, bool fsi) + { + var name = constant ? "ConstantBuffers" : "StorageBuffers"; + var addressSpace = constant ? "constant" : "device"; + + string[] bufferDec = new string[buffers.Length]; + + for (int i = 0; i < buffers.Length; i++) + { + BufferDefinition buffer = buffers[i]; + + var needsPadding = buffer.Layout == BufferLayout.Std140; + string fsiSuffix = !constant && fsi ? " [[raster_order_group(0)]]" : ""; + + bufferDec[i] = $"{addressSpace} {Defaults.StructPrefix}_{buffer.Name}* {buffer.Name}{fsiSuffix};"; + + context.AppendLine($"struct {Defaults.StructPrefix}_{buffer.Name}"); + context.EnterScope(); + + foreach (StructureField field in buffer.Type.Fields) + { + var type = field.Type; + type |= (needsPadding && (field.Type & AggregateType.Array) != 0) + ? AggregateType.Vector4 + : AggregateType.Invalid; + + type &= ~AggregateType.Array; + + string typeName = GetVarTypeName(type); + string arraySuffix = ""; + + if (field.Type.HasFlag(AggregateType.Array)) + { + if (field.ArrayLength > 0) + { + arraySuffix = $"[{field.ArrayLength}]"; + } + else + { + // Probably UB, but this is the approach that MVK takes + arraySuffix = "[1]"; + } + } + + context.AppendLine($"{typeName} {field.Name}{arraySuffix};"); + } + + context.LeaveScope(";"); + context.AppendLine(); + } + + context.AppendLine($"struct {name}"); + context.EnterScope(); + + foreach (var declaration in bufferDec) + { + context.AppendLine(declaration); + } + + context.LeaveScope(";"); + context.AppendLine(); + } + + private static void DeclareTextures(CodeGenContext context, TextureDefinition[] textures, int set) + { + var setName = GetNameForSet(set); + context.AppendLine($"struct {setName}"); + context.EnterScope(); + + List textureDec = []; + + foreach (TextureDefinition texture in textures) + { + if (texture.Type != SamplerType.None) + { + var textureTypeName = texture.Type.ToMslTextureType(texture.Format.GetComponentType()); + + if (texture.ArrayLength > 1) + { + textureTypeName = $"array<{textureTypeName}, {texture.ArrayLength}>"; + } + + textureDec.Add($"{textureTypeName} tex_{texture.Name};"); + } + + if (!texture.Separate && texture.Type != SamplerType.TextureBuffer) + { + var samplerType = "sampler"; + + if (texture.ArrayLength > 1) + { + samplerType = $"array<{samplerType}, {texture.ArrayLength}>"; + } + + textureDec.Add($"{samplerType} samp_{texture.Name};"); + } + } + + foreach (var declaration in textureDec) + { + context.AppendLine(declaration); + } + + context.LeaveScope(";"); + context.AppendLine(); + } + + private static void DeclareImages(CodeGenContext context, TextureDefinition[] images, int set, bool fsi) + { + var setName = GetNameForSet(set); + context.AppendLine($"struct {setName}"); + context.EnterScope(); + + string[] imageDec = new string[images.Length]; + + for (int i = 0; i < images.Length; i++) + { + TextureDefinition image = images[i]; + + var imageTypeName = image.Type.ToMslTextureType(image.Format.GetComponentType(), true); + if (image.ArrayLength > 1) + { + imageTypeName = $"array<{imageTypeName}, {image.ArrayLength}>"; + } + + string fsiSuffix = fsi ? " [[raster_order_group(0)]]" : ""; + + imageDec[i] = $"{imageTypeName} {image.Name}{fsiSuffix};"; + } + + foreach (var declaration in imageDec) + { + context.AppendLine(declaration); + } + + context.LeaveScope(";"); + context.AppendLine(); + } + + private static void DeclareInputAttributes(CodeGenContext context, IEnumerable inputs) + { + if (context.Definitions.Stage == ShaderStage.Compute) + { + return; + } + + switch (context.Definitions.Stage) + { + case ShaderStage.Vertex: + context.AppendLine("struct VertexIn"); + break; + case ShaderStage.Fragment: + context.AppendLine("struct FragmentIn"); + break; + } + + context.EnterScope(); + + if (context.Definitions.Stage == ShaderStage.Fragment) + { + // TODO: check if it's needed + context.AppendLine("float4 position [[position, invariant]];"); + context.AppendLine("bool front_facing [[front_facing]];"); + context.AppendLine("float2 point_coord [[point_coord]];"); + context.AppendLine("uint primitive_id [[primitive_id]];"); + } + + if (context.Definitions.IaIndexing) + { + // MSL does not support arrays in stage I/O + // We need to use the SPIRV-Cross workaround + for (int i = 0; i < Constants.MaxAttributes; i++) + { + var suffix = context.Definitions.Stage == ShaderStage.Fragment ? $"[[user(loc{i})]]" : $"[[attribute({i})]]"; + context.AppendLine($"float4 {Defaults.IAttributePrefix}{i} {suffix};"); + } + } + + if (inputs.Any()) + { + foreach (var ioDefinition in inputs.OrderBy(x => x.Location)) + { + if (context.Definitions.IaIndexing && ioDefinition.IoVariable == IoVariable.UserDefined) + { + continue; + } + + string iq = string.Empty; + + if (context.Definitions.Stage == ShaderStage.Fragment) + { + iq = context.Definitions.ImapTypes[ioDefinition.Location].GetFirstUsedType() switch + { + PixelImap.Constant => "[[flat]] ", + PixelImap.ScreenLinear => "[[center_no_perspective]] ", + _ => string.Empty, + }; + } + + string type = ioDefinition.IoVariable switch + { + // IoVariable.Position => "float4", + IoVariable.GlobalId => "uint3", + IoVariable.VertexId => "uint", + IoVariable.VertexIndex => "uint", + // IoVariable.PointCoord => "float2", + _ => GetVarTypeName(context.Definitions.GetUserDefinedType(ioDefinition.Location, isOutput: false)) + }; + string name = ioDefinition.IoVariable switch + { + // IoVariable.Position => "position", + IoVariable.GlobalId => "global_id", + IoVariable.VertexId => "vertex_id", + IoVariable.VertexIndex => "vertex_index", + // IoVariable.PointCoord => "point_coord", + _ => $"{Defaults.IAttributePrefix}{ioDefinition.Location}" + }; + string suffix = ioDefinition.IoVariable switch + { + // IoVariable.Position => "[[position, invariant]]", + IoVariable.GlobalId => "[[thread_position_in_grid]]", + IoVariable.VertexId => "[[vertex_id]]", + // TODO: Avoid potential redeclaration + IoVariable.VertexIndex => "[[vertex_id]]", + // IoVariable.PointCoord => "[[point_coord]]", + IoVariable.UserDefined => context.Definitions.Stage == ShaderStage.Fragment ? $"[[user(loc{ioDefinition.Location})]]" : $"[[attribute({ioDefinition.Location})]]", + _ => "" + }; + + context.AppendLine($"{type} {name} {iq}{suffix};"); + } + } + + context.LeaveScope(";"); + } + + private static void DeclareOutputAttributes(CodeGenContext context, IEnumerable outputs) + { + switch (context.Definitions.Stage) + { + case ShaderStage.Vertex: + context.AppendLine("struct VertexOut"); + break; + case ShaderStage.Fragment: + context.AppendLine("struct FragmentOut"); + break; + case ShaderStage.Compute: + context.AppendLine("struct KernelOut"); + break; + } + + context.EnterScope(); + + if (context.Definitions.OaIndexing) + { + // MSL does not support arrays in stage I/O + // We need to use the SPIRV-Cross workaround + for (int i = 0; i < Constants.MaxAttributes; i++) + { + context.AppendLine($"float4 {Defaults.OAttributePrefix}{i} [[user(loc{i})]];"); + } + } + + if (outputs.Any()) + { + outputs = outputs.OrderBy(x => x.Location); + + if (context.Definitions.Stage == ShaderStage.Fragment && context.Definitions.DualSourceBlend) + { + IoDefinition firstOutput = outputs.ElementAtOrDefault(0); + IoDefinition secondOutput = outputs.ElementAtOrDefault(1); + + var type1 = GetVarTypeName(context.Definitions.GetFragmentOutputColorType(firstOutput.Location)); + var type2 = GetVarTypeName(context.Definitions.GetFragmentOutputColorType(secondOutput.Location)); + + var name1 = $"color{firstOutput.Location}"; + var name2 = $"color{firstOutput.Location + 1}"; + + context.AppendLine($"{type1} {name1} [[color({firstOutput.Location}), index(0)]];"); + context.AppendLine($"{type2} {name2} [[color({firstOutput.Location}), index(1)]];"); + + outputs = outputs.Skip(2); + } + + foreach (var ioDefinition in outputs) + { + if (context.Definitions.OaIndexing && ioDefinition.IoVariable == IoVariable.UserDefined) + { + continue; + } + + string type = ioDefinition.IoVariable switch + { + IoVariable.Position => "float4", + IoVariable.PointSize => "float", + IoVariable.FragmentOutputColor => GetVarTypeName(context.Definitions.GetFragmentOutputColorType(ioDefinition.Location)), + IoVariable.FragmentOutputDepth => "float", + IoVariable.ClipDistance => "float", + _ => GetVarTypeName(context.Definitions.GetUserDefinedType(ioDefinition.Location, isOutput: true)) + }; + string name = ioDefinition.IoVariable switch + { + IoVariable.Position => "position", + IoVariable.PointSize => "point_size", + IoVariable.FragmentOutputColor => $"color{ioDefinition.Location}", + IoVariable.FragmentOutputDepth => "depth", + IoVariable.ClipDistance => "clip_distance", + _ => $"{Defaults.OAttributePrefix}{ioDefinition.Location}" + }; + string suffix = ioDefinition.IoVariable switch + { + IoVariable.Position => "[[position, invariant]]", + IoVariable.PointSize => "[[point_size]]", + IoVariable.UserDefined => $"[[user(loc{ioDefinition.Location})]]", + IoVariable.FragmentOutputColor => $"[[color({ioDefinition.Location})]]", + IoVariable.FragmentOutputDepth => "[[depth(any)]]", + IoVariable.ClipDistance => $"[[clip_distance]][{Defaults.TotalClipDistances}]", + _ => "" + }; + + context.AppendLine($"{type} {name} {suffix};"); + } + } + + context.LeaveScope(";"); + } + + private static void AppendHelperFunction(CodeGenContext context, string filename) + { + string code = EmbeddedResources.ReadAllText(filename); + + code = code.Replace("\t", CodeGenContext.Tab); + + context.AppendLine(code); + context.AppendLine(); + } + + public static string GetNameForSet(int set, bool forVar = false) + { + return (uint)set switch + { + Defaults.TexturesSetIndex => forVar ? "textures" : "Textures", + Defaults.ImagesSetIndex => forVar ? "images" : "Images", + _ => $"{(forVar ? "set" : "Set")}{set}" + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Defaults.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Defaults.cs new file mode 100644 index 0000000000..511a2f606c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Defaults.cs @@ -0,0 +1,34 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + static class Defaults + { + public const string LocalNamePrefix = "temp"; + + public const string PerPatchAttributePrefix = "patchAttr"; + public const string IAttributePrefix = "inAttr"; + public const string OAttributePrefix = "outAttr"; + + public const string StructPrefix = "struct"; + + public const string ArgumentNamePrefix = "a"; + + public const string UndefinedName = "0"; + + public const int MaxVertexBuffers = 16; + + public const uint ZeroBufferIndex = MaxVertexBuffers; + public const uint BaseSetIndex = MaxVertexBuffers + 1; + + public const uint ConstantBuffersIndex = BaseSetIndex; + public const uint StorageBuffersIndex = BaseSetIndex + 1; + public const uint TexturesIndex = BaseSetIndex + 2; + public const uint ImagesIndex = BaseSetIndex + 3; + + public const uint ConstantBuffersSetIndex = 0; + public const uint StorageBuffersSetIndex = 1; + public const uint TexturesSetIndex = 2; + public const uint ImagesSetIndex = 3; + + public const int TotalClipDistances = 8; + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindLSB.metal b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindLSB.metal new file mode 100644 index 0000000000..ad786adb36 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindLSB.metal @@ -0,0 +1,5 @@ +template +inline T findLSB(T x) +{ + return select(ctz(x), T(-1), x == T(0)); +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBS32.metal b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBS32.metal new file mode 100644 index 0000000000..af4eb6cbdf --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBS32.metal @@ -0,0 +1,5 @@ +template +inline T findMSBS32(T x) +{ + return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0)); +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBU32.metal b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBU32.metal new file mode 100644 index 0000000000..6d97c41a94 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBU32.metal @@ -0,0 +1,6 @@ +template +inline T findMSBU32(T x) +{ + T v = select(x, T(-1) - x, x < T(0)); + return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0)); +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/HelperFunctionNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/HelperFunctionNames.cs new file mode 100644 index 0000000000..370159a0e6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/HelperFunctionNames.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + static class HelperFunctionNames + { + public static string FindLSB = "findLSB"; + public static string FindMSBS32 = "findMSBS32"; + public static string FindMSBU32 = "findMSBU32"; + public static string SwizzleAdd = "swizzleAdd"; + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/Precise.metal b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/Precise.metal new file mode 100644 index 0000000000..366bea1ac0 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/Precise.metal @@ -0,0 +1,14 @@ +template +[[clang::optnone]] T PreciseFAdd(T l, T r) { + return fma(T(1), l, r); +} + +template +[[clang::optnone]] T PreciseFSub(T l, T r) { + return fma(T(-1), r, l); +} + +template +[[clang::optnone]] T PreciseFMul(T l, T r) { + return fma(l, r, T(0)); +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/SwizzleAdd.metal b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/SwizzleAdd.metal new file mode 100644 index 0000000000..22a079b01c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/SwizzleAdd.metal @@ -0,0 +1,7 @@ +float swizzleAdd(float x, float y, int mask, uint thread_index_in_simdgroup) +{ + float4 xLut = float4(1.0, -1.0, 1.0, 0.0); + float4 yLut = float4(1.0, 1.0, -1.0, 1.0); + int lutIdx = (mask >> (int(thread_index_in_simdgroup & 3u) * 2)) & 3; + return x * xLut[lutIdx] + y * yLut[lutIdx]; +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGen.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGen.cs new file mode 100644 index 0000000000..7156889875 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGen.cs @@ -0,0 +1,185 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Text; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenBallot; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenBarrier; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenCall; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenMemory; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenVector; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class InstGen + { + public static string GetExpression(CodeGenContext context, IAstNode node) + { + if (node is AstOperation operation) + { + return GetExpression(context, operation); + } + else if (node is AstOperand operand) + { + return context.OperandManager.GetExpression(context, operand); + } + + throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\"."); + } + + private static string GetExpression(CodeGenContext context, AstOperation operation) + { + Instruction inst = operation.Inst; + + InstInfo info = GetInstructionInfo(inst); + + if ((info.Type & InstType.Call) != 0) + { + bool atomic = (info.Type & InstType.Atomic) != 0; + + int arity = (int)(info.Type & InstType.ArityMask); + + StringBuilder builder = new(); + + if (atomic && (operation.StorageKind == StorageKind.StorageBuffer || operation.StorageKind == StorageKind.SharedMemory)) + { + AggregateType dstType = operation.Inst == Instruction.AtomicMaxS32 || operation.Inst == Instruction.AtomicMinS32 + ? AggregateType.S32 + : AggregateType.U32; + + var shared = operation.StorageKind == StorageKind.SharedMemory; + + builder.Append($"({(shared ? "threadgroup" : "device")} {Declarations.GetVarTypeName(dstType, true)}*)&{GenerateLoadOrStore(context, operation, isStore: false)}"); + + for (int argIndex = operation.SourcesCount - arity + 2; argIndex < operation.SourcesCount; argIndex++) + { + builder.Append($", {GetSourceExpr(context, operation.GetSource(argIndex), dstType)}, memory_order_relaxed"); + } + } + else + { + for (int argIndex = 0; argIndex < arity; argIndex++) + { + if (argIndex != 0) + { + builder.Append(", "); + } + + AggregateType dstType = GetSrcVarType(inst, argIndex); + + builder.Append(GetSourceExpr(context, operation.GetSource(argIndex), dstType)); + } + + if ((operation.Inst & Instruction.Mask) == Instruction.SwizzleAdd) + { + // SwizzleAdd takes one last argument, the thread_index_in_simdgroup + builder.Append(", thread_index_in_simdgroup"); + } + } + + return $"{info.OpName}({builder})"; + } + else if ((info.Type & InstType.Op) != 0) + { + string op = info.OpName; + + if (inst == Instruction.Return && operation.SourcesCount != 0) + { + return $"{op} {GetSourceExpr(context, operation.GetSource(0), context.CurrentFunction.ReturnType)}"; + } + if (inst == Instruction.Return && context.Definitions.Stage is ShaderStage.Vertex or ShaderStage.Fragment) + { + return $"{op} out"; + } + + int arity = (int)(info.Type & InstType.ArityMask); + + string[] expr = new string[arity]; + + for (int index = 0; index < arity; index++) + { + IAstNode src = operation.GetSource(index); + + string srcExpr = GetSourceExpr(context, src, GetSrcVarType(inst, index)); + + bool isLhs = arity == 2 && index == 0; + + expr[index] = Enclose(srcExpr, src, inst, info, isLhs); + } + + switch (arity) + { + case 0: + return op; + + case 1: + return op + expr[0]; + + case 2: + if (operation.ForcePrecise) + { + var func = (inst & Instruction.Mask) switch + { + Instruction.Add => "PreciseFAdd", + Instruction.Subtract => "PreciseFSub", + Instruction.Multiply => "PreciseFMul", + }; + + return $"{func}({expr[0]}, {expr[1]})"; + } + + return $"{expr[0]} {op} {expr[1]}"; + + case 3: + return $"{expr[0]} {op[0]} {expr[1]} {op[1]} {expr[2]}"; + } + } + else if ((info.Type & InstType.Special) != 0) + { + switch (inst & Instruction.Mask) + { + case Instruction.Ballot: + return Ballot(context, operation); + case Instruction.Call: + return Call(context, operation); + case Instruction.FSIBegin: + case Instruction.FSIEnd: + return "// FSI implemented with raster order groups in MSL"; + case Instruction.GroupMemoryBarrier: + case Instruction.MemoryBarrier: + case Instruction.Barrier: + return Barrier(context, operation); + case Instruction.ImageLoad: + case Instruction.ImageStore: + case Instruction.ImageAtomic: + return ImageLoadOrStore(context, operation); + case Instruction.Load: + return Load(context, operation); + case Instruction.Lod: + return Lod(context, operation); + case Instruction.Store: + return Store(context, operation); + case Instruction.TextureSample: + return TextureSample(context, operation); + case Instruction.TextureQuerySamples: + return TextureQuerySamples(context, operation); + case Instruction.TextureQuerySize: + return TextureQuerySize(context, operation); + case Instruction.PackHalf2x16: + return PackHalf2x16(context, operation); + case Instruction.UnpackHalf2x16: + return UnpackHalf2x16(context, operation); + case Instruction.VectorExtract: + return VectorExtract(context, operation); + case Instruction.VoteAllEqual: + return VoteAllEqual(context, operation); + } + } + + // TODO: Return this to being an error + return $"Unexpected instruction type \"{info.Type}\"."; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBallot.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBallot.cs new file mode 100644 index 0000000000..19a065d77b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBallot.cs @@ -0,0 +1,30 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class InstGenBallot + { + public static string Ballot(CodeGenContext context, AstOperation operation) + { + AggregateType dstType = GetSrcVarType(operation.Inst, 0); + + string arg = GetSourceExpr(context, operation.GetSource(0), dstType); + char component = "xyzw"[operation.Index]; + + return $"uint4(as_type((simd_vote::vote_t)simd_ballot({arg})), 0, 0).{component}"; + } + + public static string VoteAllEqual(CodeGenContext context, AstOperation operation) + { + AggregateType dstType = GetSrcVarType(operation.Inst, 0); + + string arg = GetSourceExpr(context, operation.GetSource(0), dstType); + + return $"simd_all({arg}) || !simd_any({arg})"; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBarrier.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBarrier.cs new file mode 100644 index 0000000000..77f05defc4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBarrier.cs @@ -0,0 +1,15 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class InstGenBarrier + { + public static string Barrier(CodeGenContext context, AstOperation operation) + { + var device = (operation.Inst & Instruction.Mask) == Instruction.MemoryBarrier; + + return $"threadgroup_barrier(mem_flags::mem_{(device ? "device" : "threadgroup")})"; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenCall.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenCall.cs new file mode 100644 index 0000000000..44881deee9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenCall.cs @@ -0,0 +1,60 @@ +using Ryujinx.Graphics.Shader.StructuredIr; + +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class InstGenCall + { + public static string Call(CodeGenContext context, AstOperation operation) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + var function = context.GetFunction(funcId.Value); + + int argCount = operation.SourcesCount - 1; + int additionalArgCount = CodeGenContext.AdditionalArgCount + (context.Definitions.Stage != ShaderStage.Compute ? 1 : 0); + bool needsThreadIndex = false; + + // TODO: Replace this with a proper flag + if (function.Name.Contains("Shuffle")) + { + needsThreadIndex = true; + additionalArgCount++; + } + + string[] args = new string[argCount + additionalArgCount]; + + // Additional arguments + if (context.Definitions.Stage != ShaderStage.Compute) + { + args[0] = "in"; + args[1] = "constant_buffers"; + args[2] = "storage_buffers"; + + if (needsThreadIndex) + { + args[3] = "thread_index_in_simdgroup"; + } + } + else + { + args[0] = "constant_buffers"; + args[1] = "storage_buffers"; + + if (needsThreadIndex) + { + args[2] = "thread_index_in_simdgroup"; + } + } + + int argIndex = additionalArgCount; + for (int i = 0; i < argCount; i++) + { + args[argIndex++] = GetSourceExpr(context, operation.GetSource(i + 1), function.GetArgumentType(i)); + } + + return $"{function.Name}({string.Join(", ", args)})"; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenHelper.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenHelper.cs new file mode 100644 index 0000000000..49f3c63aa4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenHelper.cs @@ -0,0 +1,222 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.CodeGen.Msl.TypeConversion; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class InstGenHelper + { + private static readonly InstInfo[] _infoTable; + + static InstGenHelper() + { + _infoTable = new InstInfo[(int)Instruction.Count]; + +#pragma warning disable IDE0055 // Disable formatting + Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomic_fetch_add_explicit"); + Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomic_fetch_and_explicit"); + Add(Instruction.AtomicCompareAndSwap, InstType.AtomicBinary, "atomic_compare_exchange_weak_explicit"); + Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomic_fetch_max_explicit"); + Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomic_fetch_min_explicit"); + Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomic_fetch_or_explicit"); + Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomic_exchange_explicit"); + Add(Instruction.AtomicXor, InstType.AtomicBinary, "atomic_fetch_xor_explicit"); + Add(Instruction.Absolute, InstType.CallUnary, "abs"); + Add(Instruction.Add, InstType.OpBinaryCom, "+", 2); + Add(Instruction.Ballot, InstType.Special); + Add(Instruction.Barrier, InstType.Special); + Add(Instruction.BitCount, InstType.CallUnary, "popcount"); + Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "extract_bits"); + Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "extract_bits"); + Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "insert_bits"); + Add(Instruction.BitfieldReverse, InstType.CallUnary, "reverse_bits"); + Add(Instruction.BitwiseAnd, InstType.OpBinaryCom, "&", 6); + Add(Instruction.BitwiseExclusiveOr, InstType.OpBinaryCom, "^", 7); + Add(Instruction.BitwiseNot, InstType.OpUnary, "~", 0); + Add(Instruction.BitwiseOr, InstType.OpBinaryCom, "|", 8); + Add(Instruction.Call, InstType.Special); + Add(Instruction.Ceiling, InstType.CallUnary, "ceil"); + Add(Instruction.Clamp, InstType.CallTernary, "clamp"); + Add(Instruction.ClampU32, InstType.CallTernary, "clamp"); + Add(Instruction.CompareEqual, InstType.OpBinaryCom, "==", 5); + Add(Instruction.CompareGreater, InstType.OpBinary, ">", 4); + Add(Instruction.CompareGreaterOrEqual, InstType.OpBinary, ">=", 4); + Add(Instruction.CompareGreaterOrEqualU32, InstType.OpBinary, ">=", 4); + Add(Instruction.CompareGreaterU32, InstType.OpBinary, ">", 4); + Add(Instruction.CompareLess, InstType.OpBinary, "<", 4); + Add(Instruction.CompareLessOrEqual, InstType.OpBinary, "<=", 4); + Add(Instruction.CompareLessOrEqualU32, InstType.OpBinary, "<=", 4); + Add(Instruction.CompareLessU32, InstType.OpBinary, "<", 4); + Add(Instruction.CompareNotEqual, InstType.OpBinaryCom, "!=", 5); + Add(Instruction.ConditionalSelect, InstType.OpTernary, "?:", 12); + Add(Instruction.ConvertFP32ToFP64, 0); // MSL does not have a 64-bit FP + Add(Instruction.ConvertFP64ToFP32, 0); // MSL does not have a 64-bit FP + Add(Instruction.ConvertFP32ToS32, InstType.CallUnary, "int"); + Add(Instruction.ConvertFP32ToU32, InstType.CallUnary, "uint"); + Add(Instruction.ConvertFP64ToS32, 0); // MSL does not have a 64-bit FP + Add(Instruction.ConvertFP64ToU32, 0); // MSL does not have a 64-bit FP + Add(Instruction.ConvertS32ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertS32ToFP64, 0); // MSL does not have a 64-bit FP + Add(Instruction.ConvertU32ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertU32ToFP64, 0); // MSL does not have a 64-bit FP + Add(Instruction.Cosine, InstType.CallUnary, "cos"); + Add(Instruction.Ddx, InstType.CallUnary, "dfdx"); + Add(Instruction.Ddy, InstType.CallUnary, "dfdy"); + Add(Instruction.Discard, InstType.CallNullary, "discard_fragment"); + Add(Instruction.Divide, InstType.OpBinary, "/", 1); + Add(Instruction.EmitVertex, 0); // MSL does not have geometry shaders + Add(Instruction.EndPrimitive, 0); // MSL does not have geometry shaders + Add(Instruction.ExponentB2, InstType.CallUnary, "exp2"); + Add(Instruction.FSIBegin, InstType.Special); + Add(Instruction.FSIEnd, InstType.Special); + Add(Instruction.FindLSB, InstType.CallUnary, HelperFunctionNames.FindLSB); + Add(Instruction.FindMSBS32, InstType.CallUnary, HelperFunctionNames.FindMSBS32); + Add(Instruction.FindMSBU32, InstType.CallUnary, HelperFunctionNames.FindMSBU32); + Add(Instruction.Floor, InstType.CallUnary, "floor"); + Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma"); + Add(Instruction.GroupMemoryBarrier, InstType.Special); + Add(Instruction.ImageLoad, InstType.Special); + Add(Instruction.ImageStore, InstType.Special); + Add(Instruction.ImageAtomic, InstType.Special); // Metal 3.1+ + Add(Instruction.IsNan, InstType.CallUnary, "isnan"); + Add(Instruction.Load, InstType.Special); + Add(Instruction.Lod, InstType.Special); + Add(Instruction.LogarithmB2, InstType.CallUnary, "log2"); + Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9); + Add(Instruction.LogicalExclusiveOr, InstType.OpBinaryCom, "^", 10); + Add(Instruction.LogicalNot, InstType.OpUnary, "!", 0); + Add(Instruction.LogicalOr, InstType.OpBinaryCom, "||", 11); + Add(Instruction.LoopBreak, InstType.OpNullary, "break"); + Add(Instruction.LoopContinue, InstType.OpNullary, "continue"); + Add(Instruction.PackDouble2x32, 0); // MSL does not have a 64-bit FP + Add(Instruction.PackHalf2x16, InstType.Special); + Add(Instruction.Maximum, InstType.CallBinary, "max"); + Add(Instruction.MaximumU32, InstType.CallBinary, "max"); + Add(Instruction.MemoryBarrier, InstType.Special); + Add(Instruction.Minimum, InstType.CallBinary, "min"); + Add(Instruction.MinimumU32, InstType.CallBinary, "min"); + Add(Instruction.Modulo, InstType.CallBinary, "fmod"); + Add(Instruction.Multiply, InstType.OpBinaryCom, "*", 1); + Add(Instruction.MultiplyHighS32, InstType.CallBinary, "mulhi"); + Add(Instruction.MultiplyHighU32, InstType.CallBinary, "mulhi"); + Add(Instruction.Negate, InstType.OpUnary, "-"); + Add(Instruction.ReciprocalSquareRoot, InstType.CallUnary, "rsqrt"); + Add(Instruction.Return, InstType.OpNullary, "return"); + Add(Instruction.Round, InstType.CallUnary, "round"); + Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3); + Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3); + Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3); + Add(Instruction.Shuffle, InstType.CallBinary, "simd_shuffle"); + Add(Instruction.ShuffleDown, InstType.CallBinary, "simd_shuffle_down"); + Add(Instruction.ShuffleUp, InstType.CallBinary, "simd_shuffle_up"); + Add(Instruction.ShuffleXor, InstType.CallBinary, "simd_shuffle_xor"); + Add(Instruction.Sine, InstType.CallUnary, "sin"); + Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt"); + Add(Instruction.Store, InstType.Special); + Add(Instruction.Subtract, InstType.OpBinary, "-", 2); + Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd); + Add(Instruction.TextureSample, InstType.Special); + Add(Instruction.TextureQuerySamples, InstType.Special); + Add(Instruction.TextureQuerySize, InstType.Special); + Add(Instruction.Truncate, InstType.CallUnary, "trunc"); + Add(Instruction.UnpackDouble2x32, 0); // MSL does not have a 64-bit FP + Add(Instruction.UnpackHalf2x16, InstType.Special); + Add(Instruction.VectorExtract, InstType.Special); + Add(Instruction.VoteAll, InstType.CallUnary, "simd_all"); + Add(Instruction.VoteAllEqual, InstType.Special); + Add(Instruction.VoteAny, InstType.CallUnary, "simd_any"); +#pragma warning restore IDE0055 + } + + private static void Add(Instruction inst, InstType flags, string opName = null, int precedence = 0) + { + _infoTable[(int)inst] = new InstInfo(flags, opName, precedence); + } + + public static InstInfo GetInstructionInfo(Instruction inst) + { + return _infoTable[(int)(inst & Instruction.Mask)]; + } + + public static string GetSourceExpr(CodeGenContext context, IAstNode node, AggregateType dstType) + { + return ReinterpretCast(context, node, OperandManager.GetNodeDestType(context, node), dstType); + } + + public static string Enclose(string expr, IAstNode node, Instruction pInst, bool isLhs) + { + InstInfo pInfo = GetInstructionInfo(pInst); + + return Enclose(expr, node, pInst, pInfo, isLhs); + } + + public static string Enclose(string expr, IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs = false) + { + if (NeedsParenthesis(node, pInst, pInfo, isLhs)) + { + expr = "(" + expr + ")"; + } + + return expr; + } + + public static bool NeedsParenthesis(IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs) + { + // If the node isn't an operation, then it can only be an operand, + // and those never needs to be surrounded in parentheses. + if (node is not AstOperation operation) + { + // This is sort of a special case, if this is a negative constant, + // and it is consumed by a unary operation, we need to put on the parenthesis, + // as in MSL, while a sequence like ~-1 is valid, --2 is not. + if (IsNegativeConst(node) && pInfo.Type == InstType.OpUnary) + { + return true; + } + + return false; + } + + if ((pInfo.Type & (InstType.Call | InstType.Special)) != 0) + { + return false; + } + + InstInfo info = _infoTable[(int)(operation.Inst & Instruction.Mask)]; + + if ((info.Type & (InstType.Call | InstType.Special)) != 0) + { + return false; + } + + if (info.Precedence < pInfo.Precedence) + { + return false; + } + + if (info.Precedence == pInfo.Precedence && isLhs) + { + return false; + } + + if (pInst == operation.Inst && info.Type == InstType.OpBinaryCom) + { + return false; + } + + return true; + } + + private static bool IsNegativeConst(IAstNode node) + { + if (node is not AstOperand operand) + { + return false; + } + + return operand.Type == OperandType.Constant && operand.Value < 0; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenMemory.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenMemory.cs new file mode 100644 index 0000000000..6ccacc1c4b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenMemory.cs @@ -0,0 +1,672 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Text; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class InstGenMemory + { + public static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) + { + StorageKind storageKind = operation.StorageKind; + + string varName; + AggregateType varType; + int srcIndex = 0; + bool isStoreOrAtomic = operation.Inst == Instruction.Store || operation.Inst.IsAtomic(); + int inputsCount = isStoreOrAtomic ? operation.SourcesCount - 1 : operation.SourcesCount; + bool fieldHasPadding = false; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inputsCount--; + } + + string fieldName = ""; + switch (storageKind) + { + case StorageKind.ConstantBuffer: + case StorageKind.StorageBuffer: + if (operation.GetSource(srcIndex++) is not AstOperand bindingIndex || bindingIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + int binding = bindingIndex.Value; + BufferDefinition buffer = storageKind == StorageKind.ConstantBuffer + ? context.Properties.ConstantBuffers[binding] + : context.Properties.StorageBuffers[binding]; + + if (operation.GetSource(srcIndex++) is not AstOperand fieldIndex || fieldIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + StructureField field = buffer.Type.Fields[fieldIndex.Value]; + + fieldHasPadding = buffer.Layout == BufferLayout.Std140 + && ((field.Type & AggregateType.Vector4) == 0) + && ((field.Type & AggregateType.Array) != 0); + + varName = storageKind == StorageKind.ConstantBuffer + ? "constant_buffers" + : "storage_buffers"; + varName += "." + buffer.Name; + varName += "->" + field.Name; + varType = field.Type; + break; + + case StorageKind.LocalMemory: + case StorageKind.SharedMemory: + if (operation.GetSource(srcIndex++) is not AstOperand { Type: OperandType.Constant } bindingId) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + MemoryDefinition memory = storageKind == StorageKind.LocalMemory + ? context.Properties.LocalMemories[bindingId.Value] + : context.Properties.SharedMemories[bindingId.Value]; + + varName = memory.Name; + varType = memory.Type; + break; + + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (operation.GetSource(srcIndex++) is not AstOperand varId || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = storageKind.IsOutput(); + bool isPerPatch = storageKind.IsPerPatch(); + int location = -1; + int component = 0; + + if (context.Definitions.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (operation.GetSource(srcIndex++) is not AstOperand vecIndex || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > srcIndex && + operation.GetSource(srcIndex) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Definitions.HasPerLocationInputOrOutputComponent(ioVariable, vecIndex.Value, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + srcIndex++; + } + } + + (varName, varType) = IoMap.GetMslBuiltIn( + context.Definitions, + ioVariable, + location, + component, + isOutput, + isPerPatch); + break; + + default: + throw new InvalidOperationException($"Invalid storage kind {storageKind}."); + } + + for (; srcIndex < inputsCount; srcIndex++) + { + IAstNode src = operation.GetSource(srcIndex); + + if ((varType & AggregateType.ElementCountMask) != 0 && + srcIndex == inputsCount - 1 && + src is AstOperand elementIndex && + elementIndex.Type == OperandType.Constant) + { + varName += "." + "xyzw"[elementIndex.Value & 3]; + } + else + { + varName += $"[{GetSourceExpr(context, src, AggregateType.S32)}]"; + } + } + varName += fieldName; + varName += fieldHasPadding ? ".x" : ""; + + if (isStore) + { + varType &= AggregateType.ElementTypeMask; + varName = $"{varName} = {GetSourceExpr(context, operation.GetSource(srcIndex), varType)}"; + } + + return varName; + } + + public static string ImageLoadOrStore(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + + var texCallBuilder = new StringBuilder(); + + int srcIndex = 0; + + string Src(AggregateType type) + { + return GetSourceExpr(context, texOp.GetSource(srcIndex++), type); + } + + string imageName = GetImageName(context, texOp, ref srcIndex); + texCallBuilder.Append(imageName); + texCallBuilder.Append('.'); + + if (texOp.Inst == Instruction.ImageAtomic) + { + texCallBuilder.Append((texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Add => "atomic_fetch_add", + TextureFlags.Minimum => "atomic_min", + TextureFlags.Maximum => "atomic_max", + TextureFlags.Increment => "atomic_fetch_add", + TextureFlags.Decrement => "atomic_fetch_sub", + TextureFlags.BitwiseAnd => "atomic_fetch_and", + TextureFlags.BitwiseOr => "atomic_fetch_or", + TextureFlags.BitwiseXor => "atomic_fetch_xor", + TextureFlags.Swap => "atomic_exchange", + TextureFlags.CAS => "atomic_compare_exchange_weak", + _ => "atomic_fetch_add", + }); + } + else + { + texCallBuilder.Append(texOp.Inst == Instruction.ImageLoad ? "read" : "write"); + } + + texCallBuilder.Append('('); + + var coordsBuilder = new StringBuilder(); + + int coordsCount = texOp.Type.GetDimensions(); + + if (coordsCount > 1) + { + string[] elems = new string[coordsCount]; + + for (int index = 0; index < coordsCount; index++) + { + elems[index] = Src(AggregateType.S32); + } + + coordsBuilder.Append($"uint{coordsCount}({string.Join(", ", elems)})"); + } + else + { + coordsBuilder.Append($"uint({Src(AggregateType.S32)})"); + } + + if (isArray) + { + coordsBuilder.Append(", "); + coordsBuilder.Append(Src(AggregateType.S32)); + } + + if (texOp.Inst == Instruction.ImageStore) + { + AggregateType type = texOp.Format.GetComponentType(); + + string[] cElems = new string[4]; + + for (int index = 0; index < 4; index++) + { + if (srcIndex < texOp.SourcesCount) + { + cElems[index] = Src(type); + } + else + { + cElems[index] = type switch + { + AggregateType.S32 => NumberFormatter.FormatInt(0), + AggregateType.U32 => NumberFormatter.FormatUint(0), + _ => NumberFormatter.FormatFloat(0), + }; + } + } + + string prefix = type switch + { + AggregateType.S32 => "int", + AggregateType.U32 => "uint", + AggregateType.FP32 => "float", + _ => string.Empty, + }; + + texCallBuilder.Append($"{prefix}4({string.Join(", ", cElems)})"); + texCallBuilder.Append(", "); + } + + texCallBuilder.Append(coordsBuilder); + + if (texOp.Inst == Instruction.ImageAtomic) + { + texCallBuilder.Append(", "); + + AggregateType type = texOp.Format.GetComponentType(); + + if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS) + { + texCallBuilder.Append(Src(type)); // Compare value. + } + + string value = (texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value + TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value + _ => Src(type), + }; + + texCallBuilder.Append(value); + // This doesn't match what the MSL spec document says so either + // it is wrong or the MSL compiler has a bug. + texCallBuilder.Append(")[0]"); + } + else + { + texCallBuilder.Append(')'); + + if (texOp.Inst == Instruction.ImageLoad) + { + texCallBuilder.Append(GetMaskMultiDest(texOp.Index)); + } + } + + return texCallBuilder.ToString(); + } + + public static string Load(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: false); + } + + public static string Lod(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + int coordsCount = texOp.Type.GetDimensions(); + int coordsIndex = 0; + + string textureName = GetTextureName(context, texOp, ref coordsIndex); + string samplerName = GetSamplerName(context, texOp, ref coordsIndex); + + string coordsExpr; + + if (coordsCount > 1) + { + string[] elems = new string[coordsCount]; + + for (int index = 0; index < coordsCount; index++) + { + elems[index] = GetSourceExpr(context, texOp.GetSource(coordsIndex + index), AggregateType.FP32); + } + + coordsExpr = "float" + coordsCount + "(" + string.Join(", ", elems) + ")"; + } + else + { + coordsExpr = GetSourceExpr(context, texOp.GetSource(coordsIndex), AggregateType.FP32); + } + + var clamped = $"{textureName}.calculate_clamped_lod({samplerName}, {coordsExpr})"; + var unclamped = $"{textureName}.calculate_unclamped_lod({samplerName}, {coordsExpr})"; + + return $"float2({clamped}, {unclamped}){GetMask(texOp.Index)}"; + } + + public static string Store(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: true); + } + + public static string TextureSample(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + var texCallBuilder = new StringBuilder(); + + bool colorIsVector = isGather || !isShadow; + + int srcIndex = 0; + + string Src(AggregateType type) + { + return GetSourceExpr(context, texOp.GetSource(srcIndex++), type); + } + + string textureName = GetTextureName(context, texOp, ref srcIndex); + string samplerName = GetSamplerName(context, texOp, ref srcIndex); + + texCallBuilder.Append(textureName); + texCallBuilder.Append('.'); + + if (intCoords) + { + texCallBuilder.Append("read("); + } + else + { + if (isGather) + { + texCallBuilder.Append("gather"); + } + else + { + texCallBuilder.Append("sample"); + } + + if (isShadow) + { + texCallBuilder.Append("_compare"); + } + + texCallBuilder.Append($"({samplerName}, "); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount; + + bool appended = false; + void Append(string str) + { + if (appended) + { + texCallBuilder.Append(", "); + } + else + { + appended = true; + } + + texCallBuilder.Append(str); + } + + AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32; + + string AssemblePVector(int count) + { + string coords; + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(coordType); + } + + coords = string.Join(", ", elems); + } + else + { + coords = Src(coordType); + } + + string prefix = intCoords ? "uint" : "float"; + + return prefix + (count > 1 ? count : "") + "(" + coords + ")"; + } + + Append(AssemblePVector(pCount)); + + if (isArray) + { + Append(Src(AggregateType.S32)); + } + + if (isShadow) + { + Append(Src(AggregateType.FP32)); + } + + if (hasDerivatives) + { + Logger.Warning?.PrintMsg(LogClass.Gpu, "Unused sampler derivatives!"); + } + + if (hasLodBias) + { + Logger.Warning?.PrintMsg(LogClass.Gpu, "Unused sample LOD bias!"); + } + + if (hasLodLevel) + { + if (intCoords) + { + Append(Src(coordType)); + } + else + { + Append($"level({Src(coordType)})"); + } + } + + string AssembleOffsetVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.S32); + } + + return "int" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(AggregateType.S32); + } + } + + // TODO: Support reads with offsets + if (!intCoords) + { + if (hasOffset) + { + Append(AssembleOffsetVector(coordsCount)); + } + else if (hasOffsets) + { + Logger.Warning?.PrintMsg(LogClass.Gpu, "Multiple offsets on gathers are not yet supported!"); + } + } + + texCallBuilder.Append(')'); + texCallBuilder.Append(colorIsVector ? GetMaskMultiDest(texOp.Index) : ""); + + return texCallBuilder.ToString(); + } + + private static string GetTextureName(CodeGenContext context, AstTextureOperation texOp, ref int srcIndex) + { + TextureDefinition textureDefinition = context.Properties.Textures[texOp.GetTextureSetAndBinding()]; + string name = textureDefinition.Name; + string setName = Declarations.GetNameForSet(textureDefinition.Set, true); + + if (textureDefinition.ArrayLength != 1) + { + name = $"{name}[{GetSourceExpr(context, texOp.GetSource(srcIndex++), AggregateType.S32)}]"; + } + + return $"{setName}.tex_{name}"; + } + + private static string GetSamplerName(CodeGenContext context, AstTextureOperation texOp, ref int srcIndex) + { + var index = texOp.IsSeparate ? texOp.GetSamplerSetAndBinding() : texOp.GetTextureSetAndBinding(); + var sourceIndex = texOp.IsSeparate ? srcIndex++ : srcIndex + 1; + + TextureDefinition samplerDefinition = context.Properties.Textures[index]; + string name = samplerDefinition.Name; + string setName = Declarations.GetNameForSet(samplerDefinition.Set, true); + + if (samplerDefinition.ArrayLength != 1) + { + name = $"{name}[{GetSourceExpr(context, texOp.GetSource(sourceIndex), AggregateType.S32)}]"; + } + + return $"{setName}.samp_{name}"; + } + + private static string GetImageName(CodeGenContext context, AstTextureOperation texOp, ref int srcIndex) + { + TextureDefinition imageDefinition = context.Properties.Images[texOp.GetTextureSetAndBinding()]; + string name = imageDefinition.Name; + string setName = Declarations.GetNameForSet(imageDefinition.Set, true); + + if (imageDefinition.ArrayLength != 1) + { + name = $"{name}[{GetSourceExpr(context, texOp.GetSource(srcIndex++), AggregateType.S32)}]"; + } + + return $"{setName}.{name}"; + } + + private static string GetMaskMultiDest(int mask) + { + if (mask == 0x0) + { + return ""; + } + + string swizzle = "."; + + for (int i = 0; i < 4; i++) + { + if ((mask & (1 << i)) != 0) + { + swizzle += "xyzw"[i]; + } + } + + return swizzle; + } + + public static string TextureQuerySamples(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + int srcIndex = 0; + + string textureName = GetTextureName(context, texOp, ref srcIndex); + + return $"{textureName}.get_num_samples()"; + } + + public static string TextureQuerySize(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + var texCallBuilder = new StringBuilder(); + + int srcIndex = 0; + + string textureName = GetTextureName(context, texOp, ref srcIndex); + texCallBuilder.Append(textureName); + texCallBuilder.Append('.'); + + if (texOp.Index == 3) + { + texCallBuilder.Append("get_num_mip_levels()"); + } + else + { + context.Properties.Textures.TryGetValue(texOp.GetTextureSetAndBinding(), out TextureDefinition definition); + bool hasLod = !definition.Type.HasFlag(SamplerType.Multisample) && (definition.Type & SamplerType.Mask) != SamplerType.TextureBuffer; + bool isArray = definition.Type.HasFlag(SamplerType.Array); + texCallBuilder.Append("get_"); + + if (texOp.Index == 0) + { + texCallBuilder.Append("width"); + } + else if (texOp.Index == 1) + { + texCallBuilder.Append("height"); + } + else + { + if (isArray) + { + texCallBuilder.Append("array_size"); + } + else + { + texCallBuilder.Append("depth"); + } + } + + texCallBuilder.Append('('); + + if (hasLod && !isArray) + { + IAstNode lod = operation.GetSource(0); + string lodExpr = GetSourceExpr(context, lod, GetSrcVarType(operation.Inst, 0)); + + texCallBuilder.Append(lodExpr); + } + + texCallBuilder.Append(')'); + } + + return texCallBuilder.ToString(); + } + + public static string PackHalf2x16(CodeGenContext context, AstOperation operation) + { + IAstNode src0 = operation.GetSource(0); + IAstNode src1 = operation.GetSource(1); + + string src0Expr = GetSourceExpr(context, src0, GetSrcVarType(operation.Inst, 0)); + string src1Expr = GetSourceExpr(context, src1, GetSrcVarType(operation.Inst, 1)); + + return $"as_type(half2({src0Expr}, {src1Expr}))"; + } + + public static string UnpackHalf2x16(CodeGenContext context, AstOperation operation) + { + IAstNode src = operation.GetSource(0); + + string srcExpr = GetSourceExpr(context, src, GetSrcVarType(operation.Inst, 0)); + + return $"float2(as_type({srcExpr})){GetMask(operation.Index)}"; + } + + private static string GetMask(int index) + { + return $".{"xy".AsSpan(index, 1)}"; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenVector.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenVector.cs new file mode 100644 index 0000000000..9d8dae543b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenVector.cs @@ -0,0 +1,32 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; + +using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class InstGenVector + { + public static string VectorExtract(CodeGenContext context, AstOperation operation) + { + IAstNode vector = operation.GetSource(0); + IAstNode index = operation.GetSource(1); + + string vectorExpr = GetSourceExpr(context, vector, OperandManager.GetNodeDestType(context, vector)); + + if (index is AstOperand indexOperand && indexOperand.Type == OperandType.Constant) + { + char elem = "xyzw"[indexOperand.Value]; + + return $"{vectorExpr}.{elem}"; + } + else + { + string indexExpr = GetSourceExpr(context, index, GetSrcVarType(operation.Inst, 1)); + + return $"{vectorExpr}[{indexExpr}]"; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstInfo.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstInfo.cs new file mode 100644 index 0000000000..5e5d04d6b4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstInfo.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + readonly struct InstInfo + { + public InstType Type { get; } + + public string OpName { get; } + + public int Precedence { get; } + + public InstInfo(InstType type, string opName, int precedence) + { + Type = type; + OpName = opName; + Precedence = precedence; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstType.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstType.cs new file mode 100644 index 0000000000..d8f6bfed18 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstType.cs @@ -0,0 +1,35 @@ +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + [Flags] + [SuppressMessage("Design", "CA1069: Enums values should not be duplicated")] + public enum InstType + { + OpNullary = Op | 0, + OpUnary = Op | 1, + OpBinary = Op | 2, + OpBinaryCom = Op | 2 | Commutative, + OpTernary = Op | 3, + + CallNullary = Call | 0, + CallUnary = Call | 1, + CallBinary = Call | 2, + CallTernary = Call | 3, + CallQuaternary = Call | 4, + + // The atomic instructions have one extra operand, + // for the storage slot and offset pair. + AtomicBinary = Call | Atomic | 3, + AtomicTernary = Call | Atomic | 4, + + Commutative = 1 << 8, + Op = 1 << 9, + Call = 1 << 10, + Atomic = 1 << 11, + Special = 1 << 12, + + ArityMask = 0xff, + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/IoMap.cs new file mode 100644 index 0000000000..e02d0a61f8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/IoMap.cs @@ -0,0 +1,83 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Globalization; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions +{ + static class IoMap + { + public static (string, AggregateType) GetMslBuiltIn( + ShaderDefinitions definitions, + IoVariable ioVariable, + int location, + int component, + bool isOutput, + bool isPerPatch) + { + var returnValue = ioVariable switch + { + IoVariable.BaseInstance => ("base_instance", AggregateType.U32), + IoVariable.BaseVertex => ("base_vertex", AggregateType.U32), + IoVariable.CtaId => ("threadgroup_position_in_grid", AggregateType.Vector3 | AggregateType.U32), + IoVariable.ClipDistance => ("out.clip_distance", AggregateType.Array | AggregateType.FP32), + IoVariable.FragmentOutputColor => ($"out.color{location}", definitions.GetFragmentOutputColorType(location)), + IoVariable.FragmentOutputDepth => ("out.depth", AggregateType.FP32), + IoVariable.FrontFacing => ("in.front_facing", AggregateType.Bool), + IoVariable.GlobalId => ("thread_position_in_grid", AggregateType.Vector3 | AggregateType.U32), + IoVariable.InstanceId => ("instance_id", AggregateType.U32), + IoVariable.InstanceIndex => ("instance_index", AggregateType.U32), + IoVariable.InvocationId => ("INVOCATION_ID", AggregateType.S32), + IoVariable.PointCoord => ("in.point_coord", AggregateType.Vector2 | AggregateType.FP32), + IoVariable.PointSize => ("out.point_size", AggregateType.FP32), + IoVariable.Position => ("out.position", AggregateType.Vector4 | AggregateType.FP32), + IoVariable.PrimitiveId => ("in.primitive_id", AggregateType.U32), + IoVariable.SubgroupEqMask => ("thread_index_in_simdgroup >= 32 ? uint4(0, (1 << (thread_index_in_simdgroup - 32)), uint2(0)) : uint4(1 << thread_index_in_simdgroup, uint3(0))", AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupGeMask => ("uint4(insert_bits(0u, 0xFFFFFFFF, thread_index_in_simdgroup, 32 - thread_index_in_simdgroup), uint3(0)) & (uint4((uint)((simd_vote::vote_t)simd_ballot(true) & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)simd_ballot(true) >> 32) & 0xFFFFFFFF), 0, 0))", AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupGtMask => ("uint4(insert_bits(0u, 0xFFFFFFFF, thread_index_in_simdgroup + 1, 32 - thread_index_in_simdgroup - 1), uint3(0)) & (uint4((uint)((simd_vote::vote_t)simd_ballot(true) & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)simd_ballot(true) >> 32) & 0xFFFFFFFF), 0, 0))", AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupLaneId => ("thread_index_in_simdgroup", AggregateType.U32), + IoVariable.SubgroupLeMask => ("uint4(extract_bits(0xFFFFFFFF, 0, min(thread_index_in_simdgroup + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)thread_index_in_simdgroup + 1 - 32, 0)), uint2(0))", AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupLtMask => ("uint4(extract_bits(0xFFFFFFFF, 0, min(thread_index_in_simdgroup, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)thread_index_in_simdgroup - 32, 0)), uint2(0))", AggregateType.Vector4 | AggregateType.U32), + IoVariable.ThreadKill => ("simd_is_helper_thread()", AggregateType.Bool), + IoVariable.UserDefined => GetUserDefinedVariableName(definitions, location, component, isOutput, isPerPatch), + IoVariable.ThreadId => ("thread_position_in_threadgroup", AggregateType.Vector3 | AggregateType.U32), + IoVariable.VertexId => ("vertex_id", AggregateType.S32), + // gl_VertexIndex does not have a direct equivalent in MSL + IoVariable.VertexIndex => ("vertex_id", AggregateType.U32), + IoVariable.ViewportIndex => ("viewport_array_index", AggregateType.S32), + IoVariable.FragmentCoord => ("in.position", AggregateType.Vector4 | AggregateType.FP32), + _ => (null, AggregateType.Invalid), + }; + + if (returnValue.Item2 == AggregateType.Invalid) + { + Logger.Warning?.PrintMsg(LogClass.Gpu, $"Unable to find type for IoVariable {ioVariable}!"); + } + + return returnValue; + } + + private static (string, AggregateType) GetUserDefinedVariableName(ShaderDefinitions definitions, int location, int component, bool isOutput, bool isPerPatch) + { + string name = isPerPatch + ? Defaults.PerPatchAttributePrefix + : (isOutput ? Defaults.OAttributePrefix : Defaults.IAttributePrefix); + + if (location < 0) + { + return (name, definitions.GetUserDefinedType(0, isOutput)); + } + + name += location.ToString(CultureInfo.InvariantCulture); + + if (definitions.HasPerLocationInputOrOutputComponent(IoVariable.UserDefined, location, component, isOutput)) + { + name += "_" + "xyzw"[component & 3]; + } + + string prefix = isOutput ? "out" : "in"; + + return (prefix + "." + name, definitions.GetUserDefinedType(location, isOutput)); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/MslGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/MslGenerator.cs new file mode 100644 index 0000000000..7de6ee5dd9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/MslGenerator.cs @@ -0,0 +1,286 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Linq; +using static Ryujinx.Graphics.Shader.CodeGen.Msl.TypeConversion; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + static class MslGenerator + { + public static string Generate(StructuredProgramInfo info, CodeGenParameters parameters) + { + if (parameters.Definitions.Stage is not (ShaderStage.Vertex or ShaderStage.Fragment or ShaderStage.Compute)) + { + Logger.Warning?.Print(LogClass.Gpu, $"Attempted to generate unsupported shader type {parameters.Definitions.Stage}!"); + return ""; + } + + CodeGenContext context = new(info, parameters); + + var sets = Declarations.Declare(context, info); + + if (info.Functions.Count != 0) + { + for (int i = 1; i < info.Functions.Count; i++) + { + PrintFunction(context, info.Functions[i], parameters.Definitions.Stage, sets); + + context.AppendLine(); + } + } + + PrintFunction(context, info.Functions[0], parameters.Definitions.Stage, sets, true); + + return context.GetCode(); + } + + private static void PrintFunction(CodeGenContext context, StructuredFunction function, ShaderStage stage, int[] sets, bool isMainFunc = false) + { + context.CurrentFunction = function; + + context.AppendLine(GetFunctionSignature(context, function, stage, sets, isMainFunc)); + context.EnterScope(); + + Declarations.DeclareLocals(context, function, stage, isMainFunc); + + PrintBlock(context, function.MainBlock, isMainFunc); + + // In case the shader hasn't returned, return + if (isMainFunc && stage != ShaderStage.Compute) + { + context.AppendLine("return out;"); + } + + context.LeaveScope(); + } + + private static string GetFunctionSignature( + CodeGenContext context, + StructuredFunction function, + ShaderStage stage, + int[] sets, + bool isMainFunc = false) + { + int additionalArgCount = isMainFunc ? 0 : CodeGenContext.AdditionalArgCount + (context.Definitions.Stage != ShaderStage.Compute ? 1 : 0); + bool needsThreadIndex = false; + + // TODO: Replace this with a proper flag + if (function.Name.Contains("Shuffle")) + { + needsThreadIndex = true; + additionalArgCount++; + } + + string[] args = new string[additionalArgCount + function.InArguments.Length + function.OutArguments.Length]; + + // All non-main functions need to be able to access the support_buffer as well + if (!isMainFunc) + { + if (stage != ShaderStage.Compute) + { + args[0] = stage == ShaderStage.Vertex ? "VertexIn in" : "FragmentIn in"; + args[1] = "constant ConstantBuffers &constant_buffers"; + args[2] = "device StorageBuffers &storage_buffers"; + + if (needsThreadIndex) + { + args[3] = "uint thread_index_in_simdgroup"; + } + } + else + { + args[0] = "constant ConstantBuffers &constant_buffers"; + args[1] = "device StorageBuffers &storage_buffers"; + + if (needsThreadIndex) + { + args[2] = "uint thread_index_in_simdgroup"; + } + } + } + + int argIndex = additionalArgCount; + for (int i = 0; i < function.InArguments.Length; i++) + { + args[argIndex++] = $"{Declarations.GetVarTypeName(function.InArguments[i])} {OperandManager.GetArgumentName(i)}"; + } + + for (int i = 0; i < function.OutArguments.Length; i++) + { + int j = i + function.InArguments.Length; + + args[argIndex++] = $"thread {Declarations.GetVarTypeName(function.OutArguments[i])} &{OperandManager.GetArgumentName(j)}"; + } + + string funcKeyword = "inline"; + string funcName = null; + string returnType = Declarations.GetVarTypeName(function.ReturnType); + + if (isMainFunc) + { + if (stage == ShaderStage.Vertex) + { + funcKeyword = "vertex"; + funcName = "vertexMain"; + returnType = "VertexOut"; + } + else if (stage == ShaderStage.Fragment) + { + funcKeyword = "fragment"; + funcName = "fragmentMain"; + returnType = "FragmentOut"; + } + else if (stage == ShaderStage.Compute) + { + funcKeyword = "kernel"; + funcName = "kernelMain"; + returnType = "void"; + } + + if (stage == ShaderStage.Vertex) + { + args = args.Prepend("VertexIn in [[stage_in]]").ToArray(); + } + else if (stage == ShaderStage.Fragment) + { + args = args.Prepend("FragmentIn in [[stage_in]]").ToArray(); + } + + // TODO: add these only if they are used + if (stage == ShaderStage.Vertex) + { + args = args.Append("uint vertex_id [[vertex_id]]").ToArray(); + args = args.Append("uint instance_id [[instance_id]]").ToArray(); + args = args.Append("uint base_instance [[base_instance]]").ToArray(); + args = args.Append("uint base_vertex [[base_vertex]]").ToArray(); + } + else if (stage == ShaderStage.Compute) + { + args = args.Append("uint3 threadgroup_position_in_grid [[threadgroup_position_in_grid]]").ToArray(); + args = args.Append("uint3 thread_position_in_grid [[thread_position_in_grid]]").ToArray(); + args = args.Append("uint3 thread_position_in_threadgroup [[thread_position_in_threadgroup]]").ToArray(); + args = args.Append("uint thread_index_in_simdgroup [[thread_index_in_simdgroup]]").ToArray(); + } + + args = args.Append($"constant ConstantBuffers &constant_buffers [[buffer({Defaults.ConstantBuffersIndex})]]").ToArray(); + args = args.Append($"device StorageBuffers &storage_buffers [[buffer({Defaults.StorageBuffersIndex})]]").ToArray(); + + foreach (var set in sets) + { + var bindingIndex = set + Defaults.BaseSetIndex; + args = args.Append($"constant {Declarations.GetNameForSet(set)} &{Declarations.GetNameForSet(set, true)} [[buffer({bindingIndex})]]").ToArray(); + } + } + + var funcPrefix = $"{funcKeyword} {returnType} {funcName ?? function.Name}("; + var indent = new string(' ', funcPrefix.Length); + + return $"{funcPrefix}{string.Join($", \n{indent}", args)})"; + } + + private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction) + { + AstBlockVisitor visitor = new(block); + + visitor.BlockEntered += (sender, e) => + { + switch (e.Block.Type) + { + case AstBlockType.DoWhile: + context.AppendLine("do"); + break; + + case AstBlockType.Else: + context.AppendLine("else"); + break; + + case AstBlockType.ElseIf: + context.AppendLine($"else if ({GetCondExpr(context, e.Block.Condition)})"); + break; + + case AstBlockType.If: + context.AppendLine($"if ({GetCondExpr(context, e.Block.Condition)})"); + break; + + default: + throw new InvalidOperationException($"Found unexpected block type \"{e.Block.Type}\"."); + } + + context.EnterScope(); + }; + + visitor.BlockLeft += (sender, e) => + { + context.LeaveScope(); + + if (e.Block.Type == AstBlockType.DoWhile) + { + context.AppendLine($"while ({GetCondExpr(context, e.Block.Condition)});"); + } + }; + + bool supportsBarrierDivergence = context.HostCapabilities.SupportsShaderBarrierDivergence; + bool mayHaveReturned = false; + + foreach (IAstNode node in visitor.Visit()) + { + if (node is AstOperation operation) + { + if (!supportsBarrierDivergence) + { + if (operation.Inst == IntermediateRepresentation.Instruction.Barrier) + { + // Barrier on divergent control flow paths may cause the GPU to hang, + // so skip emitting the barrier for those cases. + if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction) + { + context.Logger.Log($"Shader has barrier on potentially divergent block, the barrier will be removed."); + + continue; + } + } + else if (operation.Inst == IntermediateRepresentation.Instruction.Return) + { + mayHaveReturned = true; + } + } + + string expr = InstGen.GetExpression(context, operation); + + if (expr != null) + { + context.AppendLine(expr + ";"); + } + } + else if (node is AstAssignment assignment) + { + AggregateType dstType = OperandManager.GetNodeDestType(context, assignment.Destination); + AggregateType srcType = OperandManager.GetNodeDestType(context, assignment.Source); + + string dest = InstGen.GetExpression(context, assignment.Destination); + string src = ReinterpretCast(context, assignment.Source, srcType, dstType); + + context.AppendLine(dest + " = " + src + ";"); + } + else if (node is AstComment comment) + { + context.AppendLine("// " + comment.Comment); + } + else + { + throw new InvalidOperationException($"Found unexpected node type \"{node?.GetType().Name ?? "null"}\"."); + } + } + } + + private static string GetCondExpr(CodeGenContext context, IAstNode cond) + { + AggregateType srcType = OperandManager.GetNodeDestType(context, cond); + + return ReinterpretCast(context, cond, srcType, AggregateType.Bool); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/NumberFormatter.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/NumberFormatter.cs new file mode 100644 index 0000000000..86cdfc0e6d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/NumberFormatter.cs @@ -0,0 +1,94 @@ +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Globalization; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + static class NumberFormatter + { + private const int MaxDecimal = 256; + + public static bool TryFormat(int value, AggregateType dstType, out string formatted) + { + switch (dstType) + { + case AggregateType.FP32: + return TryFormatFloat(BitConverter.Int32BitsToSingle(value), out formatted); + case AggregateType.S32: + formatted = FormatInt(value); + break; + case AggregateType.U32: + formatted = FormatUint((uint)value); + break; + case AggregateType.Bool: + formatted = value != 0 ? "true" : "false"; + break; + default: + throw new ArgumentException($"Invalid variable type \"{dstType}\"."); + } + + return true; + } + + public static string FormatFloat(float value) + { + if (!TryFormatFloat(value, out string formatted)) + { + throw new ArgumentException("Failed to convert float value to string."); + } + + return formatted; + } + + public static bool TryFormatFloat(float value, out string formatted) + { + if (float.IsNaN(value) || float.IsInfinity(value)) + { + formatted = null; + + return false; + } + + formatted = value.ToString("G9", CultureInfo.InvariantCulture); + + if (!(formatted.Contains('.') || + formatted.Contains('e') || + formatted.Contains('E'))) + { + formatted += ".0f"; + } + + return true; + } + + public static string FormatInt(int value, AggregateType dstType) + { + return dstType switch + { + AggregateType.S32 => FormatInt(value), + AggregateType.U32 => FormatUint((uint)value), + _ => throw new ArgumentException($"Invalid variable type \"{dstType}\".") + }; + } + + public static string FormatInt(int value) + { + if (value <= MaxDecimal && value >= -MaxDecimal) + { + return value.ToString(CultureInfo.InvariantCulture); + } + + return $"as_type(0x{value.ToString("X", CultureInfo.InvariantCulture)})"; + } + + public static string FormatUint(uint value) + { + if (value <= MaxDecimal && value >= 0) + { + return value.ToString(CultureInfo.InvariantCulture) + "u"; + } + + return $"as_type(0x{value.ToString("X", CultureInfo.InvariantCulture)})"; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/OperandManager.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/OperandManager.cs new file mode 100644 index 0000000000..e131a645e9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/OperandManager.cs @@ -0,0 +1,176 @@ +using Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + class OperandManager + { + private readonly Dictionary _locals; + + public OperandManager() + { + _locals = new Dictionary(); + } + + public string DeclareLocal(AstOperand operand) + { + string name = $"{Defaults.LocalNamePrefix}_{_locals.Count}"; + + _locals.Add(operand, name); + + return name; + } + + public string GetExpression(CodeGenContext context, AstOperand operand) + { + return operand.Type switch + { + OperandType.Argument => GetArgumentName(operand.Value), + OperandType.Constant => NumberFormatter.FormatInt(operand.Value), + OperandType.LocalVariable => _locals[operand], + OperandType.Undefined => Defaults.UndefinedName, + _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\"."), + }; + } + + public static string GetArgumentName(int argIndex) + { + return $"{Defaults.ArgumentNamePrefix}{argIndex}"; + } + + public static AggregateType GetNodeDestType(CodeGenContext context, IAstNode node) + { + if (node is AstOperation operation) + { + if (operation.Inst == Instruction.Load || operation.Inst.IsAtomic()) + { + switch (operation.StorageKind) + { + case StorageKind.ConstantBuffer: + case StorageKind.StorageBuffer: + if (operation.GetSource(0) is not AstOperand bindingIndex || bindingIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + if (operation.GetSource(1) is not AstOperand fieldIndex || fieldIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + BufferDefinition buffer = operation.StorageKind == StorageKind.ConstantBuffer + ? context.Properties.ConstantBuffers[bindingIndex.Value] + : context.Properties.StorageBuffers[bindingIndex.Value]; + StructureField field = buffer.Type.Fields[fieldIndex.Value]; + + return field.Type & AggregateType.ElementTypeMask; + + case StorageKind.LocalMemory: + case StorageKind.SharedMemory: + if (operation.GetSource(0) is not AstOperand { Type: OperandType.Constant } bindingId) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + MemoryDefinition memory = operation.StorageKind == StorageKind.LocalMemory + ? context.Properties.LocalMemories[bindingId.Value] + : context.Properties.SharedMemories[bindingId.Value]; + + return memory.Type & AggregateType.ElementTypeMask; + + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (operation.GetSource(0) is not AstOperand varId || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = operation.StorageKind == StorageKind.Output || operation.StorageKind == StorageKind.OutputPerPatch; + bool isPerPatch = operation.StorageKind == StorageKind.InputPerPatch || operation.StorageKind == StorageKind.OutputPerPatch; + int location = 0; + int component = 0; + + if (context.Definitions.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (operation.GetSource(1) is not AstOperand vecIndex || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > 2 && + operation.GetSource(2) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Definitions.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + } + } + + (_, AggregateType varType) = IoMap.GetMslBuiltIn( + context.Definitions, + ioVariable, + location, + component, + isOutput, + isPerPatch); + + return varType & AggregateType.ElementTypeMask; + } + } + else if (operation.Inst == Instruction.Call) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + return context.GetFunction(funcId.Value).ReturnType; + } + else if (operation.Inst == Instruction.VectorExtract) + { + return GetNodeDestType(context, operation.GetSource(0)) & ~AggregateType.ElementCountMask; + } + else if (operation is AstTextureOperation texOp) + { + if (texOp.Inst == Instruction.ImageLoad || + texOp.Inst == Instruction.ImageStore || + texOp.Inst == Instruction.ImageAtomic) + { + return texOp.GetVectorType(texOp.Format.GetComponentType()); + } + else if (texOp.Inst == Instruction.TextureSample) + { + return texOp.GetVectorType(GetDestVarType(operation.Inst)); + } + } + + return GetDestVarType(operation.Inst); + } + else if (node is AstOperand operand) + { + if (operand.Type == OperandType.Argument) + { + int argIndex = operand.Value; + + return context.CurrentFunction.GetArgumentType(argIndex); + } + + return OperandInfo.GetVarType(operand); + } + else + { + throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\"."); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Msl/TypeConversion.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/TypeConversion.cs new file mode 100644 index 0000000000..e145bb8b08 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Msl/TypeConversion.cs @@ -0,0 +1,93 @@ +using Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader.CodeGen.Msl +{ + static class TypeConversion + { + public static string ReinterpretCast( + CodeGenContext context, + IAstNode node, + AggregateType srcType, + AggregateType dstType) + { + if (node is AstOperand operand && operand.Type == OperandType.Constant) + { + if (NumberFormatter.TryFormat(operand.Value, dstType, out string formatted)) + { + return formatted; + } + } + + string expr = InstGen.GetExpression(context, node); + + return ReinterpretCast(expr, node, srcType, dstType); + } + + private static string ReinterpretCast(string expr, IAstNode node, AggregateType srcType, AggregateType dstType) + { + if (srcType == dstType) + { + return expr; + } + + if (srcType == AggregateType.FP32) + { + switch (dstType) + { + case AggregateType.Bool: + return $"(as_type({expr}) != 0)"; + case AggregateType.S32: + return $"as_type({expr})"; + case AggregateType.U32: + return $"as_type({expr})"; + } + } + else if (dstType == AggregateType.FP32) + { + switch (srcType) + { + case AggregateType.Bool: + return $"as_type({ReinterpretBoolToInt(expr, node, AggregateType.S32)})"; + case AggregateType.S32: + return $"as_type({expr})"; + case AggregateType.U32: + return $"as_type({expr})"; + } + } + else if (srcType == AggregateType.Bool) + { + return ReinterpretBoolToInt(expr, node, dstType); + } + else if (dstType == AggregateType.Bool) + { + expr = InstGenHelper.Enclose(expr, node, Instruction.CompareNotEqual, isLhs: true); + + return $"({expr} != 0)"; + } + else if (dstType == AggregateType.S32) + { + return $"int({expr})"; + } + else if (dstType == AggregateType.U32) + { + return $"uint({expr})"; + } + + throw new ArgumentException($"Invalid reinterpret cast from \"{srcType}\" to \"{dstType}\"."); + } + + private static string ReinterpretBoolToInt(string expr, IAstNode node, AggregateType dstType) + { + string trueExpr = NumberFormatter.FormatInt(IrConsts.True, dstType); + string falseExpr = NumberFormatter.FormatInt(IrConsts.False, dstType); + + expr = InstGenHelper.Enclose(expr, node, Instruction.ConditionalSelect, isLhs: false); + + return $"({expr} ? {trueExpr} : {falseExpr})"; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index cfb188dafa..8b05d88293 100644 --- a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -15,4 +15,11 @@ + + + + + + + diff --git a/src/Ryujinx.Graphics.Shader/SamplerType.cs b/src/Ryujinx.Graphics.Shader/SamplerType.cs index a693495fa5..18285cd70a 100644 --- a/src/Ryujinx.Graphics.Shader/SamplerType.cs +++ b/src/Ryujinx.Graphics.Shader/SamplerType.cs @@ -155,5 +155,51 @@ public static string ToGlslImageType(this SamplerType type, AggregateType compon return typeName; } + + public static string ToMslTextureType(this SamplerType type, AggregateType aggregateType, bool image = false) + { + string typeName; + + if ((type & SamplerType.Shadow) != 0) + { + typeName = (type & SamplerType.Mask) switch + { + SamplerType.Texture2D => "depth2d", + SamplerType.TextureCube => "depthcube", + _ => throw new ArgumentException($"Invalid shadow texture type \"{type}\"."), + }; + } + else + { + typeName = (type & SamplerType.Mask) switch + { + SamplerType.Texture1D => "texture1d", + SamplerType.TextureBuffer => "texture_buffer", + SamplerType.Texture2D => "texture2d", + SamplerType.Texture3D => "texture3d", + SamplerType.TextureCube => "texturecube", + _ => throw new ArgumentException($"Invalid texture type \"{type}\"."), + }; + } + + if ((type & SamplerType.Multisample) != 0) + { + typeName += "_ms"; + } + + if ((type & SamplerType.Array) != 0) + { + typeName += "_array"; + } + + var format = aggregateType switch + { + AggregateType.S32 => "int", + AggregateType.U32 => "uint", + _ => "float" + }; + + return $"{typeName}<{format}{(image ? ", access::read_write" : "")}>"; + } } } diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index 2a3d65e75e..b70def78cc 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -7,7 +7,14 @@ enum HelperFunctionsMask { MultiplyHighS32 = 1 << 2, MultiplyHighU32 = 1 << 3, + + FindLSB = 1 << 5, + FindMSBS32 = 1 << 6, + FindMSBU32 = 1 << 7, + SwizzleAdd = 1 << 10, FSI = 1 << 11, + + Precise = 1 << 13 } } diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 88053658dd..a1aef7f970 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -18,9 +18,10 @@ public static StructuredProgramInfo MakeStructuredProgram( ShaderDefinitions definitions, ResourceManager resourceManager, TargetLanguage targetLanguage, + bool precise, bool debugMode) { - StructuredProgramContext context = new(attributeUsage, definitions, resourceManager, debugMode); + StructuredProgramContext context = new(attributeUsage, definitions, resourceManager, precise, debugMode); for (int funcIndex = 0; funcIndex < functions.Count; funcIndex++) { @@ -321,8 +322,9 @@ AstTextureOperation GetAstTextureOperation(TextureOperation texOp) } // Those instructions needs to be emulated by using helper functions, - // because they are NVIDIA specific. Those flags helps the backend to - // decide which helper functions are needed on the final generated code. + // because they are NVIDIA specific or because the target language has + // no direct equivalent. Those flags helps the backend to decide which + // helper functions are needed on the final generated code. switch (operation.Inst) { case Instruction.MultiplyHighS32: @@ -331,6 +333,15 @@ AstTextureOperation GetAstTextureOperation(TextureOperation texOp) case Instruction.MultiplyHighU32: context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighU32; break; + case Instruction.FindLSB: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.FindLSB; + break; + case Instruction.FindMSBS32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.FindMSBS32; + break; + case Instruction.FindMSBU32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.FindMSBU32; + break; case Instruction.SwizzleAdd: context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd; break; diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs index 045662a1e5..c26086c72a 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs @@ -36,9 +36,10 @@ public StructuredProgramContext( AttributeUsage attributeUsage, ShaderDefinitions definitions, ResourceManager resourceManager, + bool precise, bool debugMode) { - Info = new StructuredProgramInfo(); + Info = new StructuredProgramInfo(precise); Definitions = definitions; ResourceManager = resourceManager; diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs index ded2f2a892..585497ed3f 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs @@ -10,11 +10,16 @@ class StructuredProgramInfo public HelperFunctionsMask HelperFunctionsMask { get; set; } - public StructuredProgramInfo() + public StructuredProgramInfo(bool precise) { Functions = new List(); IoDefinitions = new HashSet(); + + if (precise) + { + HelperFunctionsMask |= HelperFunctionsMask.Precise; + } } } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs index 82a54db835..26c924e898 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -26,5 +26,6 @@ public enum FeatureFlags SharedMemory = 1 << 11, Store = 1 << 12, VtgAsCompute = 1 << 13, + Precise = 1 << 14, } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index 94691a5b4c..83e4dc0ac2 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -43,6 +43,11 @@ private struct TextureMeta private readonly Dictionary _usedTextures; private readonly Dictionary _usedImages; + private readonly List _vacConstantBuffers; + private readonly List _vacStorageBuffers; + private readonly List _vacTextures; + private readonly List _vacImages; + public int LocalMemoryId { get; private set; } public int SharedMemoryId { get; private set; } @@ -78,6 +83,11 @@ public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ResourceRese _usedTextures = new(); _usedImages = new(); + _vacConstantBuffers = new(); + _vacStorageBuffers = new(); + _vacTextures = new(); + _vacImages = new(); + Properties.AddOrUpdateConstantBuffer(new(BufferLayout.Std140, 0, SupportBuffer.Binding, "support_buffer", SupportBuffer.GetStructureType())); LocalMemoryId = -1; @@ -563,6 +573,75 @@ private static TextureDescriptor[] GetDescriptors(IReadOnlyDictionary RunPass(TransformContext context, LinkedList addOp.Inst == (Instruction.FP32 | Instruction.Add) && addOp.GetSource(1).Type == OperandType.Constant) { + context.UsedFeatures |= FeatureFlags.Precise; + addOp.ForcePrecise = true; } diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index a579433f92..bec20bc2cb 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.Shader.CodeGen; using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen.Msl; using Ryujinx.Graphics.Shader.CodeGen.Spirv; using Ryujinx.Graphics.Shader.Decoders; using Ryujinx.Graphics.Shader.IntermediateRepresentation; @@ -331,6 +332,7 @@ private ShaderProgram Generate( definitions, resourceManager, Options.TargetLanguage, + usedFeatures.HasFlag(FeatureFlags.Precise), Options.Flags.HasFlag(TranslationFlags.DebugMode)); int geometryVerticesPerPrimitive = Definitions.OutputTopology switch @@ -373,6 +375,7 @@ private ShaderProgram Generate( { TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, parameters)), TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, parameters)), + TargetLanguage.Msl => new ShaderProgram(info, TargetLanguage.Msl, MslGenerator.Generate(sInfo, parameters)), _ => throw new NotImplementedException(Options.TargetLanguage.ToString()), }; } @@ -392,7 +395,7 @@ private ResourceManager CreateResourceManager(bool vertexAsCompute) { int binding = resourceManager.Reservations.GetTfeBufferStorageBufferBinding(i); BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct); - resourceManager.Properties.AddOrUpdateStorageBuffer(tfeDataBuffer); + resourceManager.AddVertexAsComputeStorageBuffer(tfeDataBuffer); } } @@ -400,7 +403,7 @@ private ResourceManager CreateResourceManager(bool vertexAsCompute) { int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; BufferDefinition vertexInfoBuffer = new(BufferLayout.Std140, 0, vertexInfoCbBinding, "vb_info", VertexInfoBuffer.GetStructureType()); - resourceManager.Properties.AddOrUpdateConstantBuffer(vertexInfoBuffer); + resourceManager.AddVertexAsComputeConstantBuffer(vertexInfoBuffer); StructureType vertexOutputStruct = new(new StructureField[] { @@ -409,13 +412,13 @@ private ResourceManager CreateResourceManager(bool vertexAsCompute) int vertexOutputSbBinding = resourceManager.Reservations.VertexOutputStorageBufferBinding; BufferDefinition vertexOutputBuffer = new(BufferLayout.Std430, 1, vertexOutputSbBinding, "vertex_output", vertexOutputStruct); - resourceManager.Properties.AddOrUpdateStorageBuffer(vertexOutputBuffer); + resourceManager.AddVertexAsComputeStorageBuffer(vertexOutputBuffer); if (Stage == ShaderStage.Vertex) { SetBindingPair ibSetAndBinding = resourceManager.Reservations.GetIndexBufferTextureSetAndBinding(); TextureDefinition indexBuffer = new(ibSetAndBinding.SetIndex, ibSetAndBinding.Binding, "ib_data", SamplerType.TextureBuffer); - resourceManager.Properties.AddOrUpdateTexture(indexBuffer); + resourceManager.AddVertexAsComputeTexture(indexBuffer); int inputMap = _program.AttributeUsage.UsedInputAttributes; @@ -424,7 +427,7 @@ private ResourceManager CreateResourceManager(bool vertexAsCompute) int location = BitOperations.TrailingZeroCount(inputMap); SetBindingPair setAndBinding = resourceManager.Reservations.GetVertexBufferTextureSetAndBinding(location); TextureDefinition vaBuffer = new(setAndBinding.SetIndex, setAndBinding.Binding, $"vb_data{location}", SamplerType.TextureBuffer); - resourceManager.Properties.AddOrUpdateTexture(vaBuffer); + resourceManager.AddVertexAsComputeTexture(vaBuffer); inputMap &= ~(1 << location); } @@ -433,11 +436,11 @@ private ResourceManager CreateResourceManager(bool vertexAsCompute) { SetBindingPair trbSetAndBinding = resourceManager.Reservations.GetTopologyRemapBufferTextureSetAndBinding(); TextureDefinition remapBuffer = new(trbSetAndBinding.SetIndex, trbSetAndBinding.Binding, "trb_data", SamplerType.TextureBuffer); - resourceManager.Properties.AddOrUpdateTexture(remapBuffer); + resourceManager.AddVertexAsComputeTexture(remapBuffer); int geometryVbOutputSbBinding = resourceManager.Reservations.GeometryVertexOutputStorageBufferBinding; BufferDefinition geometryVbOutputBuffer = new(BufferLayout.Std430, 1, geometryVbOutputSbBinding, "geometry_vb_output", vertexOutputStruct); - resourceManager.Properties.AddOrUpdateStorageBuffer(geometryVbOutputBuffer); + resourceManager.AddVertexAsComputeStorageBuffer(geometryVbOutputBuffer); StructureType geometryIbOutputStruct = new(new StructureField[] { @@ -446,7 +449,7 @@ private ResourceManager CreateResourceManager(bool vertexAsCompute) int geometryIbOutputSbBinding = resourceManager.Reservations.GeometryIndexOutputStorageBufferBinding; BufferDefinition geometryIbOutputBuffer = new(BufferLayout.Std430, 1, geometryIbOutputSbBinding, "geometry_ib_output", geometryIbOutputStruct); - resourceManager.Properties.AddOrUpdateStorageBuffer(geometryIbOutputBuffer); + resourceManager.AddVertexAsComputeStorageBuffer(geometryIbOutputBuffer); } resourceManager.SetVertexAsComputeLocalMemories(Definitions.Stage, Definitions.InputTopology); @@ -479,12 +482,17 @@ public ResourceReservations GetResourceReservations() return new ResourceReservations(GpuAccessor, IsTransformFeedbackEmulated, vertexAsCompute: true, _vertexOutput, ioUsage); } + public ShaderProgramInfo GetVertexAsComputeInfo() + { + return CreateResourceManager(true).GetVertexAsComputeInfo(); + } + public void SetVertexOutputMapForGeometryAsCompute(TranslatorContext vertexContext) { _vertexOutput = vertexContext._program.GetIoUsage(); } - public ShaderProgram GenerateVertexPassthroughForCompute() + public (ShaderProgram, ShaderProgramInfo) GenerateVertexPassthroughForCompute() { var attributeUsage = new AttributeUsage(GpuAccessor); var resourceManager = new ResourceManager(ShaderStage.Vertex, GpuAccessor); @@ -496,7 +504,7 @@ public ShaderProgram GenerateVertexPassthroughForCompute() if (Stage == ShaderStage.Vertex) { BufferDefinition vertexInfoBuffer = new(BufferLayout.Std140, 0, vertexInfoCbBinding, "vb_info", VertexInfoBuffer.GetStructureType()); - resourceManager.Properties.AddOrUpdateConstantBuffer(vertexInfoBuffer); + resourceManager.AddVertexAsComputeConstantBuffer(vertexInfoBuffer); } StructureType vertexInputStruct = new(new StructureField[] @@ -506,7 +514,7 @@ public ShaderProgram GenerateVertexPassthroughForCompute() int vertexDataSbBinding = reservations.VertexOutputStorageBufferBinding; BufferDefinition vertexOutputBuffer = new(BufferLayout.Std430, 1, vertexDataSbBinding, "vb_input", vertexInputStruct); - resourceManager.Properties.AddOrUpdateStorageBuffer(vertexOutputBuffer); + resourceManager.AddVertexAsComputeStorageBuffer(vertexOutputBuffer); var context = new EmitterContext(); @@ -564,14 +572,14 @@ public ShaderProgram GenerateVertexPassthroughForCompute() LastInVertexPipeline = true }; - return Generate( + return (Generate( new[] { function }, attributeUsage, definitions, definitions, resourceManager, FeatureFlags.None, - 0); + 0), resourceManager.GetVertexAsComputeInfo(isVertex: true)); } public ShaderProgram GenerateGeometryPassthrough() diff --git a/src/Ryujinx.Headless.SDL2/Metal/MetalWindow.cs b/src/Ryujinx.Headless.SDL2/Metal/MetalWindow.cs new file mode 100644 index 0000000000..5140d639b3 --- /dev/null +++ b/src/Ryujinx.Headless.SDL2/Metal/MetalWindow.cs @@ -0,0 +1,47 @@ +using Ryujinx.Common.Configuration; +using Ryujinx.Input.HLE; +using Ryujinx.SDL2.Common; +using SharpMetal.QuartzCore; +using System.Runtime.Versioning; +using static SDL2.SDL; + +namespace Ryujinx.Headless.SDL2.Metal +{ + [SupportedOSPlatform("macos")] + class MetalWindow : WindowBase + { + private CAMetalLayer _caMetalLayer; + + public CAMetalLayer GetLayer() + { + return _caMetalLayer; + } + + public MetalWindow( + InputManager inputManager, + GraphicsDebugLevel glLogLevel, + AspectRatio aspectRatio, + bool enableMouse, + HideCursorMode hideCursorMode, + bool ignoreControllerApplet) + : base(inputManager, glLogLevel, aspectRatio, enableMouse, hideCursorMode, ignoreControllerApplet) { } + + public override SDL_WindowFlags GetWindowFlags() => SDL_WindowFlags.SDL_WINDOW_METAL; + + protected override void InitializeWindowRenderer() + { + void CreateLayer() + { + _caMetalLayer = new CAMetalLayer(SDL_Metal_GetLayer(SDL_Metal_CreateView(WindowHandle))); + } + + SDL2Driver.MainThreadDispatcher?.Invoke(CreateLayer); + } + + protected override void InitializeRenderer() { } + + protected override void FinalizeWindowRenderer() { } + + protected override void SwapBuffers() { } + } +} diff --git a/src/Ryujinx.Headless.SDL2/Program.cs b/src/Ryujinx.Headless.SDL2/Program.cs index 12158176a5..ea789095e4 100644 --- a/src/Ryujinx.Headless.SDL2/Program.cs +++ b/src/Ryujinx.Headless.SDL2/Program.cs @@ -18,9 +18,11 @@ using Ryujinx.Graphics.GAL.Multithreading; using Ryujinx.Graphics.Gpu; using Ryujinx.Graphics.Gpu.Shader; +using Ryujinx.Graphics.Metal; using Ryujinx.Graphics.OpenGL; using Ryujinx.Graphics.Vulkan; using Ryujinx.Graphics.Vulkan.MoltenVK; +using Ryujinx.Headless.SDL2.Metal; using Ryujinx.Headless.SDL2.OpenGL; using Ryujinx.Headless.SDL2.Vulkan; using Ryujinx.HLE; @@ -510,9 +512,14 @@ private static void ProgressHandler(T state, int current, int total) where T private static WindowBase CreateWindow(Options options) { - return options.GraphicsBackend == GraphicsBackend.Vulkan - ? new VulkanWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, options.EnableMouse, options.HideCursorMode, options.IgnoreControllerApplet) - : new OpenGLWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, options.EnableMouse, options.HideCursorMode, options.IgnoreControllerApplet); + return options.GraphicsBackend switch + { + GraphicsBackend.Vulkan => new VulkanWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, options.EnableMouse, options.HideCursorMode, options.IgnoreControllerApplet), + GraphicsBackend.Metal => OperatingSystem.IsMacOS() ? + new MetalWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, options.EnableKeyboard, options.HideCursorMode, options.IgnoreControllerApplet) : + throw new Exception("Attempted to use Metal renderer on non-macOS platform!"), + _ => new OpenGLWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, options.EnableMouse, options.HideCursorMode, options.IgnoreControllerApplet) + }; } private static IRenderer CreateRenderer(Options options, WindowBase window) @@ -544,6 +551,11 @@ private static IRenderer CreateRenderer(Options options, WindowBase window) preferredGpuId); } + if (options.GraphicsBackend == GraphicsBackend.Metal && window is MetalWindow metalWindow && OperatingSystem.IsMacOS()) + { + return new MetalRenderer(metalWindow.GetLayer); + } + return new OpenGLRenderer(); } diff --git a/src/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj b/src/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj index fe535e6d50..d39f2b4812 100644 --- a/src/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj +++ b/src/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj @@ -29,6 +29,7 @@ + diff --git a/src/Ryujinx.ShaderTools/Program.cs b/src/Ryujinx.ShaderTools/Program.cs index a84d7b4664..564960c6f1 100644 --- a/src/Ryujinx.ShaderTools/Program.cs +++ b/src/Ryujinx.ShaderTools/Program.cs @@ -116,7 +116,7 @@ static void HandleArguments(Options options) if (options.VertexPassthrough) { - program = translatorContext.GenerateVertexPassthroughForCompute(); + (program, _) = translatorContext.GenerateVertexPassthroughForCompute(); } else { diff --git a/src/Ryujinx.UI.Common/Configuration/ConfigurationState.cs b/src/Ryujinx.UI.Common/Configuration/ConfigurationState.cs index 04ddd442f0..90bdc34091 100644 --- a/src/Ryujinx.UI.Common/Configuration/ConfigurationState.cs +++ b/src/Ryujinx.UI.Common/Configuration/ConfigurationState.cs @@ -305,14 +305,15 @@ public void LoadDefault() private static GraphicsBackend DefaultGraphicsBackend() { - // Any system running macOS or returning any amount of valid Vulkan devices should default to Vulkan. - // Checks for if the Vulkan version and featureset is compatible should be performed within VulkanRenderer. - if (OperatingSystem.IsMacOS() || VulkanRenderer.GetPhysicalDevices().Length > 0) - { - return GraphicsBackend.Vulkan; - } + // Any system running macOS should default to auto, so it uses Vulkan everywhere and Metal in games where it works well. + if (OperatingSystem.IsMacOS()) + return GraphicsBackend.Auto; - return GraphicsBackend.OpenGl; - } - } + // Any system returning any amount of valid Vulkan devices should default to Vulkan. + // Checks for if the Vulkan version and featureset is compatible should be performed within VulkanRenderer. + return VulkanRenderer.GetPhysicalDevices().Length > 0 + ? GraphicsBackend.Vulkan + : GraphicsBackend.OpenGl; } + } +} diff --git a/src/Ryujinx/AppHost.cs b/src/Ryujinx/AppHost.cs index 5872b278f6..abcc357a6e 100644 --- a/src/Ryujinx/AppHost.cs +++ b/src/Ryujinx/AppHost.cs @@ -3,6 +3,7 @@ using Avalonia.Controls.ApplicationLifetimes; using Avalonia.Input; using Avalonia.Threading; +using Gommon; using LibHac.Common; using LibHac.Ns; using LibHac.Tools.FsSystem; @@ -28,6 +29,7 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.GAL.Multithreading; using Ryujinx.Graphics.Gpu; +using Ryujinx.Graphics.Metal; using Ryujinx.Graphics.OpenGL; using Ryujinx.Graphics.Vulkan; using Ryujinx.HLE; @@ -141,6 +143,23 @@ private enum CursorStates public ulong ApplicationId { get; private set; } public bool ScreenshotRequested { get; set; } + public bool ShouldInitMetal + { + get + { + return OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64 && + ( + ( + ( + ConfigurationState.Instance.Graphics.GraphicsBackend.Value == GraphicsBackend.Auto && + RendererHost.KnownGreatMetalTitles.ContainsIgnoreCase(ApplicationId.ToString("X16")) + ) || + ConfigurationState.Instance.Graphics.GraphicsBackend.Value == GraphicsBackend.Metal + ) + ); + } + } + public AppHost( RendererHost renderer, InputManager inputManager, @@ -893,12 +912,27 @@ private void InitializeSwitchInstance() VirtualFileSystem.ReloadKeySet(); // Initialize Renderer. - IRenderer renderer = ConfigurationState.Instance.Graphics.GraphicsBackend.Value == GraphicsBackend.OpenGl - ? new OpenGLRenderer() - : VulkanRenderer.Create( + IRenderer renderer; + GraphicsBackend backend = ConfigurationState.Instance.Graphics.GraphicsBackend; + + if (ShouldInitMetal) + { +#pragma warning disable CA1416 This call site is reachable on all platforms + // The condition does a check for Mac, on top of checking if it's an ARM Mac. This isn't a problem. + renderer = new MetalRenderer((RendererHost.EmbeddedWindow as EmbeddedWindowMetal)!.CreateSurface); +#pragma warning restore CA1416 + } + else if (backend == GraphicsBackend.Vulkan || (backend == GraphicsBackend.Auto && !ShouldInitMetal)) + { + renderer = VulkanRenderer.Create( ConfigurationState.Instance.Graphics.PreferredGpu, (RendererHost.EmbeddedWindow as EmbeddedWindowVulkan)!.CreateSurface, VulkanHelper.GetRequiredInstanceExtensions); + } + else + { + renderer = new OpenGLRenderer(); + } BackendThreading threadingMode = ConfigurationState.Instance.Graphics.BackendThreading; @@ -1111,10 +1145,11 @@ private void RenderLoop() public void InitStatus() { - _viewModel.BackendText = ConfigurationState.Instance.Graphics.GraphicsBackend.Value switch + _viewModel.BackendText = RendererHost.Backend switch { GraphicsBackend.Vulkan => "Vulkan", GraphicsBackend.OpenGl => "OpenGL", + GraphicsBackend.Metal => "Metal", _ => throw new NotImplementedException() }; diff --git a/src/Ryujinx/Assets/locales.json b/src/Ryujinx/Assets/locales.json index 2e86df0aa7..cdf43f474e 100644 --- a/src/Ryujinx/Assets/locales.json +++ b/src/Ryujinx/Assets/locales.json @@ -19677,6 +19677,54 @@ "zh_TW": "選擇模擬器將使用的圖形後端。\n\n只要驅動程式是最新的,Vulkan 對所有現代顯示卡來說都更好用。Vulkan 還能在所有 GPU 廠商上實現更快的著色器編譯 (減少卡頓)。\n\nOpenGL 在舊式 Nvidia GPU、Linux 上的舊式 AMD GPU 或 VRAM 較低的 GPU 上可能會取得更好的效果,不過著色器編譯的卡頓會更嚴重。\n\n如果不確定,請設定為 Vulkan。如果您的 GPU 使用最新的圖形驅動程式也不支援 Vulkan,請設定為 OpenGL。" } }, + { + "ID": "SettingsTabGraphicsBackendAuto", + "Translations": { + "ar_SA": "", + "de_DE": "", + "el_GR": "", + "en_US": "Auto", + "es_ES": "", + "fr_FR": "", + "he_IL": "", + "it_IT": "", + "ja_JP": "", + "ko_KR": "", + "no_NO": "", + "pl_PL": "", + "pt_BR": "", + "ru_RU": "", + "th_TH": "", + "tr_TR": "", + "uk_UA": "", + "zh_CN": "", + "zh_TW": "" + } + }, + { + "ID": "SettingsTabGraphicsBackendAutoTooltip", + "Translations": { + "ar_SA": "", + "de_DE": "", + "el_GR": "", + "en_US": "Uses Vulkan.\nOn an ARM Mac, and when playing a game that runs well under it, uses the Metal backend.", + "es_ES": "", + "fr_FR": "", + "he_IL": "", + "it_IT": "", + "ja_JP": "", + "ko_KR": "", + "no_NO": "", + "pl_PL": "", + "pt_BR": "", + "ru_RU": "", + "th_TH": "", + "tr_TR": "", + "uk_UA": "", + "zh_CN": "", + "zh_TW": "" + } + }, { "ID": "SettingsEnableTextureRecompression", "Translations": { diff --git a/src/Ryujinx/Program.cs b/src/Ryujinx/Program.cs index 05fd66b90f..3c24b8e271 100644 --- a/src/Ryujinx/Program.cs +++ b/src/Ryujinx/Program.cs @@ -198,6 +198,7 @@ public static void ReloadConfig() { "opengl" => GraphicsBackend.OpenGl, "vulkan" => GraphicsBackend.Vulkan, + "metal" => GraphicsBackend.Metal, _ => ConfigurationState.Instance.Graphics.GraphicsBackend }; diff --git a/src/Ryujinx/Ryujinx.csproj b/src/Ryujinx/Ryujinx.csproj index ce75b1d878..d5bad2ee6a 100644 --- a/src/Ryujinx/Ryujinx.csproj +++ b/src/Ryujinx/Ryujinx.csproj @@ -66,6 +66,7 @@ + diff --git a/src/Ryujinx/UI/Renderer/EmbeddedWindowMetal.cs b/src/Ryujinx/UI/Renderer/EmbeddedWindowMetal.cs new file mode 100644 index 0000000000..eaf6f7bdf1 --- /dev/null +++ b/src/Ryujinx/UI/Renderer/EmbeddedWindowMetal.cs @@ -0,0 +1,20 @@ +using SharpMetal.QuartzCore; +using System; + +namespace Ryujinx.Ava.UI.Renderer +{ + public class EmbeddedWindowMetal : EmbeddedWindow + { + public CAMetalLayer CreateSurface() + { + if (OperatingSystem.IsMacOS()) + { + return new CAMetalLayer(MetalLayer); + } + else + { + throw new NotSupportedException(); + } + } + } +} diff --git a/src/Ryujinx/UI/Renderer/RendererHost.axaml.cs b/src/Ryujinx/UI/Renderer/RendererHost.axaml.cs index 4bf10d0d7c..d191a11ed0 100644 --- a/src/Ryujinx/UI/Renderer/RendererHost.axaml.cs +++ b/src/Ryujinx/UI/Renderer/RendererHost.axaml.cs @@ -1,8 +1,10 @@ using Avalonia; using Avalonia.Controls; +using Gommon; using Ryujinx.Common.Configuration; using Ryujinx.UI.Common.Configuration; using System; +using System.Runtime.InteropServices; namespace Ryujinx.Ava.UI.Renderer { @@ -17,18 +19,64 @@ public RendererHost() { InitializeComponent(); - if (ConfigurationState.Instance.Graphics.GraphicsBackend.Value == GraphicsBackend.OpenGl) + EmbeddedWindow = ConfigurationState.Instance.Graphics.GraphicsBackend.Value switch { - EmbeddedWindow = new EmbeddedWindowOpenGL(); - } - else + GraphicsBackend.OpenGl => new EmbeddedWindowOpenGL(), + GraphicsBackend.Metal => new EmbeddedWindowMetal(), + GraphicsBackend.Vulkan or GraphicsBackend.Auto => new EmbeddedWindowVulkan(), + _ => throw new NotSupportedException() + }; + + Initialize(); + } + + public static readonly string[] KnownGreatMetalTitles = + [ + "01006A800016E000", // Smash Ultimate + "0100000000010000", // Super Mario Odyessy + "01008C0016544000", // Sea of Stars + "01005CA01580E000", // Persona 5 + "010028600EBDA000", // Mario 3D World + ]; + + public GraphicsBackend Backend => + EmbeddedWindow switch { - EmbeddedWindow = new EmbeddedWindowVulkan(); + EmbeddedWindowVulkan => GraphicsBackend.Vulkan, + EmbeddedWindowOpenGL => GraphicsBackend.OpenGl, + EmbeddedWindowMetal => GraphicsBackend.Metal, + _ => throw new NotImplementedException() + }; + + public RendererHost(string titleId) + { + InitializeComponent(); + + switch (ConfigurationState.Instance.Graphics.GraphicsBackend.Value) + { + case GraphicsBackend.Auto: + EmbeddedWindow = + OperatingSystem.IsMacOS() && + RuntimeInformation.ProcessArchitecture == Architecture.Arm64 && + KnownGreatMetalTitles.ContainsIgnoreCase(titleId) + ? new EmbeddedWindowMetal() + : new EmbeddedWindowVulkan(); + break; + case GraphicsBackend.OpenGl: + EmbeddedWindow = new EmbeddedWindowOpenGL(); + break; + case GraphicsBackend.Metal: + EmbeddedWindow = new EmbeddedWindowMetal(); + break; + case GraphicsBackend.Vulkan: + EmbeddedWindow = new EmbeddedWindowVulkan(); + break; } Initialize(); } - + + private void Initialize() { EmbeddedWindow.WindowCreated += CurrentWindow_WindowCreated; diff --git a/src/Ryujinx/UI/ViewModels/MainWindowViewModel.cs b/src/Ryujinx/UI/ViewModels/MainWindowViewModel.cs index ae373c2675..f7cd83ed6b 100644 --- a/src/Ryujinx/UI/ViewModels/MainWindowViewModel.cs +++ b/src/Ryujinx/UI/ViewModels/MainWindowViewModel.cs @@ -1938,7 +1938,7 @@ await ContentDialogHelper.CreateInfoDialog( PrepareLoadScreen(); - RendererHostControl = new RendererHost(); + RendererHostControl = new RendererHost(application.Id.ToString("X16")); AppHost = new AppHost( RendererHostControl, diff --git a/src/Ryujinx/UI/ViewModels/SettingsViewModel.cs b/src/Ryujinx/UI/ViewModels/SettingsViewModel.cs index 0824e3f868..85ba203f99 100644 --- a/src/Ryujinx/UI/ViewModels/SettingsViewModel.cs +++ b/src/Ryujinx/UI/ViewModels/SettingsViewModel.cs @@ -122,7 +122,7 @@ public bool IsVulkanAvailable public bool IsOpenGLAvailable => !OperatingSystem.IsMacOS(); - public bool IsHypervisorAvailable => OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64; + public bool IsAppleSiliconMac => OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64; public bool GameDirectoryChanged { @@ -252,7 +252,8 @@ public int CustomVSyncInterval public bool IsCustomResolutionScaleActive => _resolutionScale == 4; public bool IsScalingFilterActive => _scalingFilter == (int)Ryujinx.Common.Configuration.ScalingFilter.Fsr; - public bool IsVulkanSelected => GraphicsBackendIndex == 0; + public bool IsVulkanSelected => + GraphicsBackendIndex == 1 || (GraphicsBackendIndex == 0 && !OperatingSystem.IsMacOS()); public bool UseHypervisor { get; set; } public bool DisableP2P { get; set; } @@ -432,7 +433,7 @@ private async Task LoadAvailableGpus() if (devices.Length == 0) { IsVulkanAvailable = false; - GraphicsBackendIndex = 1; + GraphicsBackendIndex = 2; } else { diff --git a/src/Ryujinx/UI/Views/Settings/SettingsCPUView.axaml b/src/Ryujinx/UI/Views/Settings/SettingsCPUView.axaml index c7f03a45db..83f908a9c4 100644 --- a/src/Ryujinx/UI/Views/Settings/SettingsCPUView.axaml +++ b/src/Ryujinx/UI/Views/Settings/SettingsCPUView.axaml @@ -69,7 +69,7 @@ diff --git a/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml b/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml index 219efcef8c..a2559f3934 100644 --- a/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml +++ b/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml @@ -33,16 +33,24 @@ ToolTip.Tip="{ext:Locale SettingsTabGraphicsBackendTooltip}" Text="{ext:Locale SettingsTabGraphicsBackend}" Width="250" /> - + + + + + + + diff --git a/src/Ryujinx/UI/Windows/SettingsWindow.axaml.cs b/src/Ryujinx/UI/Windows/SettingsWindow.axaml.cs index d8c88bed8c..b004d9fba9 100644 --- a/src/Ryujinx/UI/Windows/SettingsWindow.axaml.cs +++ b/src/Ryujinx/UI/Windows/SettingsWindow.axaml.cs @@ -56,34 +56,34 @@ private void NavPanelOnSelectionChanged(object sender, NavigationViewSelectionCh { switch (navItem.Tag.ToString()) { - case "UiPage": + case nameof(UiPage): UiPage.ViewModel = ViewModel; NavPanel.Content = UiPage; break; - case "InputPage": + case nameof(InputPage): NavPanel.Content = InputPage; break; - case "HotkeysPage": + case nameof(HotkeysPage): NavPanel.Content = HotkeysPage; break; - case "SystemPage": + case nameof(SystemPage): SystemPage.ViewModel = ViewModel; NavPanel.Content = SystemPage; break; - case "CpuPage": + case nameof(CpuPage): NavPanel.Content = CpuPage; break; - case "GraphicsPage": + case nameof(GraphicsPage): NavPanel.Content = GraphicsPage; break; - case "AudioPage": + case nameof(AudioPage): NavPanel.Content = AudioPage; break; - case "NetworkPage": + case nameof(NetworkPage): NetworkPage.ViewModel = ViewModel; NavPanel.Content = NetworkPage; break; - case "LoggingPage": + case nameof(LoggingPage): NavPanel.Content = LoggingPage; break; default: