From 43ebd7a9bbba0c1290a9e98b9224f0752627c400 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 10 Apr 2022 10:49:44 -0300 Subject: [PATCH 1/2] New shader cache implementation (#3194) * New shader cache implementation * Remove some debug code * Take transform feedback varying count into account * Create shader cache directory if it does not exist + fragment output map related fixes * Remove debug code * Only check texture descriptors if the constant buffer is bound * Also check CPU VA on GetSpanMapped * Remove more unused code and move cache related code * XML docs + remove more unused methods * Better codegen for TransformFeedbackDescriptor.AsSpan * Support migration from old cache format, remove more unused code Shader cache rebuild now also rewrites the shared toc and data files * Fix migration error with BRX shaders * Add a limit to the async translation queue Avoid async translation threads not being able to keep up and the queue growing very large * Re-create specialization state on recompile This might be required if a new version of the shader translator requires more or less state, or if there is a bug related to the GPU state access * Make shader cache more error resilient * Add some missing XML docs and move GpuAccessor docs to the interface/use inheritdoc * Address early PR feedback * Fix rebase * Remove IRenderer.CompileShader and IShader interface, replace with new ShaderSource struct passed to CreateProgram directly * Handle some missing exceptions * Make shader cache purge delete both old and new shader caches * Register textures on new specialization state * Translate and compile shaders in forward order (eliminates diffs due to different binding numbers) * Limit in-flight shader compilation to the maximum number of compilation threads * Replace ParallelDiskCacheLoader state changed event with a callback function * Better handling for invalid constant buffer 1 data length * Do not create the old cache directory structure if the old cache does not exist * Constant buffer use should be per-stage. This change will invalidate existing new caches (file format version was incremented) * Replace rectangle texture with just coordinate normalization * Skip incompatible shaders that are missing texture information, instead of crashing This is required if we, for example, support new texture instruction to the shader translator, and then they allow access to textures that were not accessed before. In this scenario, the old cache entry is no longer usable * Fix coordinates normalization on cubemap textures * Check if title ID is null before combining shader cache path * More robust constant buffer address validation on spec state * More robust constant buffer address validation on spec state (2) * Regenerate shader cache with one stream, rather than one per shader. * Only create shader cache directory during initialization * Logging improvements * Proper shader program disposal * PR feedback, and add a comment on serialized structs * XML docs for RegisterTexture Co-authored-by: riperiperi --- Ryujinx.Graphics.GAL/Capabilities.cs | 9 + Ryujinx.Graphics.GAL/IRenderer.cs | 4 +- .../Multithreading/CommandHelper.cs | 6 - .../Multithreading/CommandType.cs | 3 - .../Commands/Renderer/CompileShaderCommand.cs | 22 - .../Commands/Renderer/CreateBufferCommand.cs | 5 +- .../Commands/Renderer/PreFrameCommand.cs | 4 +- .../Commands/Shader/ShaderDisposeCommand.cs | 21 - .../Programs/SourceProgramRequest.cs | 13 +- .../Resources/ThreadedShader.cs | 38 - .../Multithreading/ThreadedPipeline.cs | 1 - .../Multithreading/ThreadedRenderer.cs | 11 +- Ryujinx.Graphics.GAL/ShaderSource.cs | 29 + .../Engine/Compute/ComputeClass.cs | 16 +- .../Engine/Threed/StateUpdater.cs | 156 ++- Ryujinx.Graphics.Gpu/GpuContext.cs | 4 +- Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 67 + Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs | 4 +- .../Shader/Cache/CacheHelper.cs | 331 +---- .../Shader/Cache/CacheManager.cs | 2 - .../Shader/Cache/CacheMigration.cs | 175 --- .../Cache/Definition/HostShaderCacheEntry.cs | 3 +- .../Shader/Cache/Migration.cs | 255 ++++ .../Cache/TransformFeedbackDescriptorOld.cs | 19 + .../Shader/CachedGpuAccessor.cs | 222 ---- ...ShaderBundle.cs => CachedShaderProgram.cs} | 18 +- .../Shader/CachedShaderStage.cs | 38 + .../Shader/ComputeShaderCacheHashTable.cs | 68 + .../DiskCache/BackgroundDiskCacheWriter.cs | 138 ++ .../Shader/DiskCache/BinarySerializer.cs | 216 ++++ .../Shader/DiskCache/CompressionAlgorithm.cs | 18 + .../Shader/DiskCache/DiskCacheCommon.cs | 57 + .../Shader/DiskCache/DiskCacheGpuAccessor.cs | 202 +++ .../Shader/DiskCache/DiskCacheGuestStorage.cs | 459 +++++++ .../Shader/DiskCache/DiskCacheHostStorage.cs | 763 +++++++++++ .../DiskCache/DiskCacheLoadException.cs | 48 + .../Shader/DiskCache/DiskCacheLoadResult.cs | 72 ++ .../DiskCache/DiskCacheOutputStreams.cs | 57 + .../DiskCache/ParallelDiskCacheLoader.cs | 672 ++++++++++ Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs | 249 ++-- ...pableGpuAccessor.cs => GpuAccessorBase.cs} | 84 +- .../Shader/GpuAccessorState.cs | 71 +- .../Shader/GpuChannelComputeState.cs | 57 + .../Shader/GpuChannelGraphicsState.cs | 41 + .../Shader/GpuChannelPoolState.cs | 36 + .../Shader/HashTable/HashState.cs | 113 ++ .../Shader/HashTable/IDataAccessor.cs | 27 + .../Shader/HashTable/PartitionHashTable.cs | 452 +++++++ .../Shader/HashTable/PartitionedHashTable.cs | 244 ++++ .../Shader/HashTable/SmartDataAccessor.cs | 96 ++ Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs | 36 + .../Shader/ShaderAddresses.cs | 17 +- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 1133 +++++------------ .../Shader/ShaderCacheHashTable.cs | 280 ++++ .../Shader/ShaderCodeAccessor.cs | 32 + .../Shader/ShaderCodeHolder.cs | 52 - .../Shader/ShaderCompileTask.cs | 95 -- .../Shader/ShaderSpecializationList.cs | 76 ++ .../Shader/ShaderSpecializationState.cs | 615 +++++++++ .../Shader/TransformFeedbackDescriptor.cs | 53 +- Ryujinx.Graphics.OpenGL/EnumConversion.cs | 15 + Ryujinx.Graphics.OpenGL/Program.cs | 72 +- Ryujinx.Graphics.OpenGL/Renderer.cs | 14 +- Ryujinx.Graphics.OpenGL/Shader.cs | 42 - Ryujinx.Graphics.Shader/BufferDescriptor.cs | 2 + Ryujinx.Graphics.Shader/Decoders/Decoder.cs | 2 +- Ryujinx.Graphics.Shader/IGpuAccessor.cs | 190 ++- Ryujinx.Graphics.Shader/ShaderProgram.cs | 13 +- Ryujinx.Graphics.Shader/ShaderProgramInfo.cs | 3 + .../StructuredIr/StructuredProgram.cs | 2 +- .../StructuredIr/StructuredProgramInfo.cs | 2 +- Ryujinx.Graphics.Shader/TextureDescriptor.cs | 2 + .../Translation/Rewriter.cs | 16 +- .../Translation/ShaderConfig.cs | 80 +- .../Translation/TranslationCounts.cs | 36 - .../Translation/Translator.cs | 52 +- .../Translation/TranslatorContext.cs | 20 +- Ryujinx.Headless.SDL2/WindowBase.cs | 6 +- Ryujinx.ShaderTools/Program.cs | 2 +- Ryujinx/Ui/RendererWidgetBase.cs | 10 +- Ryujinx/Ui/Widgets/GameTableContextMenu.cs | 41 +- 81 files changed, 6371 insertions(+), 2356 deletions(-) delete mode 100644 Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CompileShaderCommand.cs delete mode 100644 Ryujinx.Graphics.GAL/Multithreading/Commands/Shader/ShaderDisposeCommand.cs delete mode 100644 Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedShader.cs create mode 100644 Ryujinx.Graphics.GAL/ShaderSource.cs delete mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/CacheMigration.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/TransformFeedbackDescriptorOld.cs delete mode 100644 Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs rename Ryujinx.Graphics.Gpu/Shader/{ShaderBundle.cs => CachedShaderProgram.cs} (63%) create mode 100644 Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs rename Ryujinx.Graphics.Gpu/Shader/{TextureDescriptorCapableGpuAccessor.cs => GpuAccessorBase.cs} (74%) create mode 100644 Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs delete mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs delete mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs delete mode 100644 Ryujinx.Graphics.OpenGL/Shader.cs delete mode 100644 Ryujinx.Graphics.Shader/Translation/TranslationCounts.cs diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 4e5dff596cc4..af8541fb8d44 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -1,7 +1,12 @@ +using Ryujinx.Graphics.Shader.Translation; + namespace Ryujinx.Graphics.GAL { public struct Capabilities { + public readonly TargetApi Api; + public readonly string VendorName; + public readonly bool HasFrontFacingBug; public readonly bool HasVectorIndexingBug; @@ -24,6 +29,8 @@ public struct Capabilities public readonly int StorageBufferOffsetAlignment; public Capabilities( + TargetApi api, + string vendorName, bool hasFrontFacingBug, bool hasVectorIndexingBug, bool supportsAstcCompression, @@ -43,6 +50,8 @@ public Capabilities( float maximumSupportedAnisotropy, int storageBufferOffsetAlignment) { + Api = api; + VendorName = vendorName; HasFrontFacingBug = hasFrontFacingBug; HasVectorIndexingBug = hasVectorIndexingBug; SupportsAstcCompression = supportsAstcCompression; diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs index a36d999d6017..b051e9dc8b46 100644 --- a/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/Ryujinx.Graphics.GAL/IRenderer.cs @@ -16,11 +16,9 @@ public interface IRenderer : IDisposable void BackgroundContextAction(Action action, bool alwaysBackground = false); - IShader CompileShader(ShaderStage stage, string code); - BufferHandle CreateBuffer(int size); - IProgram CreateProgram(IShader[] shaders, ShaderInfo info); + IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info); ISampler CreateSampler(SamplerCreateInfo info); ITexture CreateTexture(TextureCreateInfo info, float scale); diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs index 67e8315b4e19..442a9045968c 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs @@ -4,7 +4,6 @@ using Ryujinx.Graphics.GAL.Multithreading.Commands.Program; using Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer; using Ryujinx.Graphics.GAL.Multithreading.Commands.Sampler; -using Ryujinx.Graphics.GAL.Multithreading.Commands.Shader; using Ryujinx.Graphics.GAL.Multithreading.Commands.Texture; using Ryujinx.Graphics.GAL.Multithreading.Commands.Window; using System; @@ -53,8 +52,6 @@ private static void InitLookup() { _lookup[(int)CommandType.Action] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => ActionCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.CompileShader] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => - CompileShaderCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.CreateBuffer] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => CreateBufferCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.CreateProgram] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => @@ -98,9 +95,6 @@ private static void InitLookup() _lookup[(int)CommandType.SamplerDispose] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => SamplerDisposeCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.ShaderDispose] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => - ShaderDisposeCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.TextureCopyTo] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => TextureCopyToCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.TextureCopyToScaled] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs index e0a03ce7f8b0..5c42abd12c2e 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs @@ -3,7 +3,6 @@ enum CommandType : byte { Action, - CompileShader, CreateBuffer, CreateProgram, CreateSampler, @@ -29,8 +28,6 @@ enum CommandType : byte SamplerDispose, - ShaderDispose, - TextureCopyTo, TextureCopyToScaled, TextureCopyToSlice, diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CompileShaderCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CompileShaderCommand.cs deleted file mode 100644 index 2bd9725d2be7..000000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CompileShaderCommand.cs +++ /dev/null @@ -1,22 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.GAL.Multithreading.Resources; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer -{ - struct CompileShaderCommand : IGALCommand - { - public CommandType CommandType => CommandType.CompileShader; - private TableRef _shader; - - public void Set(TableRef shader) - { - _shader = shader; - } - - public static void Run(ref CompileShaderCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - ThreadedShader shader = command._shader.Get(threaded); - shader.EnsureCreated(); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs index 4d1cbb28462a..a96b3cef45e5 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs @@ -1,7 +1,4 @@ -using Ryujinx.Graphics.GAL.Multithreading.Resources; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer +namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer { struct CreateBufferCommand : IGALCommand { diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs index 67cafd188aa7..1048dc9e6ac7 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs @@ -1,6 +1,4 @@ -using System; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer +namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer { struct PreFrameCommand : IGALCommand { diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Shader/ShaderDisposeCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Shader/ShaderDisposeCommand.cs deleted file mode 100644 index ebb2c927fb68..000000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Shader/ShaderDisposeCommand.cs +++ /dev/null @@ -1,21 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.GAL.Multithreading.Resources; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Shader -{ - struct ShaderDisposeCommand : IGALCommand - { - public CommandType CommandType => CommandType.ShaderDispose; - private TableRef _shader; - - public void Set(TableRef shader) - { - _shader = shader; - } - - public static void Run(ref ShaderDisposeCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - command._shader.Get(threaded).Base.Dispose(); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs b/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs index d808fe221660..7c5f03631414 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs @@ -6,10 +6,10 @@ class SourceProgramRequest : IProgramRequest { public ThreadedProgram Threaded { get; set; } - private IShader[] _shaders; + private ShaderSource[] _shaders; private ShaderInfo _info; - public SourceProgramRequest(ThreadedProgram program, IShader[] shaders, ShaderInfo info) + public SourceProgramRequest(ThreadedProgram program, ShaderSource[] shaders, ShaderInfo info) { Threaded = program; @@ -19,14 +19,7 @@ public SourceProgramRequest(ThreadedProgram program, IShader[] shaders, ShaderIn public IProgram Create(IRenderer renderer) { - IShader[] shaders = _shaders.Select(shader => - { - var threaded = (ThreadedShader)shader; - threaded?.EnsureCreated(); - return threaded?.Base; - }).ToArray(); - - return renderer.CreateProgram(shaders, _info); + return renderer.CreateProgram(_shaders, _info); } } } diff --git a/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedShader.cs b/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedShader.cs deleted file mode 100644 index dcbecf389188..000000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedShader.cs +++ /dev/null @@ -1,38 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Commands.Shader; -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.GAL.Multithreading.Resources -{ - class ThreadedShader : IShader - { - private ThreadedRenderer _renderer; - private ShaderStage _stage; - private string _code; - - public IShader Base; - - public ThreadedShader(ThreadedRenderer renderer, ShaderStage stage, string code) - { - _renderer = renderer; - - _stage = stage; - _code = code; - } - - internal void EnsureCreated() - { - if (_code != null && Base == null) - { - Base = _renderer.BaseRenderer.CompileShader(_stage, _code); - _code = null; - } - } - - public void Dispose() - { - _renderer.New().Set(new TableRef(_renderer, this)); - _renderer.QueueCommand(); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs index 6dc8ef38671c..b6acfaa8374b 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs @@ -1,7 +1,6 @@ using Ryujinx.Graphics.GAL.Multithreading.Commands; using Ryujinx.Graphics.GAL.Multithreading.Model; using Ryujinx.Graphics.GAL.Multithreading.Resources; -using Ryujinx.Graphics.Shader; using System; using System.Linq; diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs index 5030fee623b7..63b668baccf5 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs @@ -250,15 +250,6 @@ public void BackgroundContextAction(Action action, bool alwaysBackground = false } } - public IShader CompileShader(ShaderStage stage, string code) - { - var shader = new ThreadedShader(this, stage, code); - New().Set(Ref(shader)); - QueueCommand(); - - return shader; - } - public BufferHandle CreateBuffer(int size) { BufferHandle handle = Buffers.CreateBufferHandle(); @@ -268,7 +259,7 @@ public BufferHandle CreateBuffer(int size) return handle; } - public IProgram CreateProgram(IShader[] shaders, ShaderInfo info) + public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info) { var program = new ThreadedProgram(this); SourceProgramRequest request = new SourceProgramRequest(program, shaders, info); diff --git a/Ryujinx.Graphics.GAL/ShaderSource.cs b/Ryujinx.Graphics.GAL/ShaderSource.cs new file mode 100644 index 000000000000..13b92f20a48f --- /dev/null +++ b/Ryujinx.Graphics.GAL/ShaderSource.cs @@ -0,0 +1,29 @@ +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.GAL +{ + public struct ShaderSource + { + public string Code { get; } + public byte[] BinaryCode { get; } + public ShaderStage Stage { get; } + public TargetLanguage Language { get; } + + public ShaderSource(string code, byte[] binaryCode, ShaderStage stage, TargetLanguage language) + { + Code = code; + BinaryCode = binaryCode; + Stage = stage; + Language = language; + } + + public ShaderSource(string code, ShaderStage stage, TargetLanguage language) : this(code, null, stage, language) + { + } + + public ShaderSource(byte[] binaryCode, ShaderStage stage, TargetLanguage language) : this(null, binaryCode, stage, language) + { + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs index 00015c4046bc..87c14da8fdee 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -124,24 +124,20 @@ private void SendSignalingPcasB(int argument) ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB; ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB; - GpuAccessorState gas = new GpuAccessorState( + GpuChannelPoolState poolState = new GpuChannelPoolState( texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex, - _state.State.SetBindlessTextureConstantBufferSlotSelect, - false, - PrimitiveTopology.Points, - default); - - ShaderBundle cs = memoryManager.Physical.ShaderCache.GetComputeShader( - _channel, - gas, - shaderGpuVa, + _state.State.SetBindlessTextureConstantBufferSlotSelect); + + GpuChannelComputeState computeState = new GpuChannelComputeState( qmd.CtaThreadDimension0, qmd.CtaThreadDimension1, qmd.CtaThreadDimension2, localMemorySize, sharedMemorySize); + CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); + _context.Renderer.Pipeline.SetProgram(cs.HostProgram); _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex); diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index 8d67d0fd737a..c9a18f144045 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -7,7 +7,6 @@ using Ryujinx.Graphics.Texture; using System; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Engine.Threed { @@ -30,6 +29,7 @@ class StateUpdater private readonly StateUpdateTracker _updateTracker; private readonly ShaderProgramInfo[] _currentProgramInfo; + private ShaderSpecializationState _shaderSpecState; private bool _vtgWritesRtLayer; private byte _vsClipDistancesWritten; @@ -195,6 +195,17 @@ public void SetAllDirty() [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Update() { + // If any state that the shader depends on changed, + // then we may need to compile/bind a different version + // of the shader for the new state. + if (_shaderSpecState != null) + { + if (!_shaderSpecState.MatchesGraphics(_channel, GetPoolState())) + { + ForceShaderUpdate(); + } + } + // The vertex buffer size is calculated using a different // method when doing indexed draws, so we need to make sure // to update the vertex buffers if we are doing a regular @@ -1065,106 +1076,125 @@ private void UpdateLogicOpState() /// private void UpdateShaderState() { - ShaderAddresses addresses = new ShaderAddresses(); + var shaderCache = _channel.MemoryManager.Physical.ShaderCache; - Span addressesSpan = MemoryMarshal.CreateSpan(ref addresses, 1); + _vtgWritesRtLayer = false; - Span addressesArray = MemoryMarshal.Cast(addressesSpan); + ShaderAddresses addresses = new ShaderAddresses(); + Span addressesSpan = addresses.AsSpan(); ulong baseAddress = _state.State.ShaderBaseAddress.Pack(); for (int index = 0; index < 6; index++) { var shader = _state.State.ShaderState[index]; - if (!shader.UnpackEnable() && index != 1) { continue; } - addressesArray[index] = baseAddress + shader.Offset; + addressesSpan[index] = baseAddress + shader.Offset; } - GpuAccessorState gas = new GpuAccessorState( - _state.State.TexturePoolState.Address.Pack(), - _state.State.TexturePoolState.MaximumId, - (int)_state.State.TextureBufferIndex, - _state.State.EarlyZForce, - _drawState.Topology, - _state.State.TessMode); + GpuChannelPoolState poolState = GetPoolState(); + GpuChannelGraphicsState graphicsState = GetGraphicsState(); - ShaderBundle gs = _channel.MemoryManager.Physical.ShaderCache.GetGraphicsShader(ref _state.State, _channel, gas, addresses); + CachedShaderProgram gs = shaderCache.GetGraphicsShader(ref _state.State, _channel, poolState, graphicsState, addresses); + + _shaderSpecState = gs.SpecializationState; byte oldVsClipDistancesWritten = _vsClipDistancesWritten; - _drawState.VsUsesInstanceId = gs.Shaders[0]?.Info.UsesInstanceId ?? false; - _vsClipDistancesWritten = gs.Shaders[0]?.Info.ClipDistancesWritten ?? 0; - _vtgWritesRtLayer = false; + _drawState.VsUsesInstanceId = gs.Shaders[1]?.Info.UsesInstanceId ?? false; + _vsClipDistancesWritten = gs.Shaders[1]?.Info.ClipDistancesWritten ?? 0; if (oldVsClipDistancesWritten != _vsClipDistancesWritten) { UpdateUserClipState(); } - for (int stage = 0; stage < Constants.ShaderStages; stage++) + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { - ShaderProgramInfo info = gs.Shaders[stage]?.Info; + UpdateStageBindings(stageIndex, gs.Shaders[stageIndex + 1]?.Info); + } - _currentProgramInfo[stage] = info; + _context.Renderer.Pipeline.SetProgram(gs.HostProgram); + } - if (info == null) - { - _channel.TextureManager.RentGraphicsTextureBindings(stage, 0); - _channel.TextureManager.RentGraphicsImageBindings(stage, 0); - _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, null); - _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, null); - continue; - } + private void UpdateStageBindings(int stage, ShaderProgramInfo info) + { + _currentProgramInfo[stage] = info; - Span textureBindings = _channel.TextureManager.RentGraphicsTextureBindings(stage, info.Textures.Count); + if (info == null) + { + _channel.TextureManager.RentGraphicsTextureBindings(stage, 0); + _channel.TextureManager.RentGraphicsImageBindings(stage, 0); + _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, null); + _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, null); + return; + } - if (info.UsesRtLayer) - { - _vtgWritesRtLayer = true; - } + Span textureBindings = _channel.TextureManager.RentGraphicsTextureBindings(stage, info.Textures.Count); - for (int index = 0; index < info.Textures.Count; index++) - { - var descriptor = info.Textures[index]; + if (info.UsesRtLayer) + { + _vtgWritesRtLayer = true; + } - Target target = ShaderTexture.GetTarget(descriptor.Type); + for (int index = 0; index < info.Textures.Count; index++) + { + var descriptor = info.Textures[index]; - textureBindings[index] = new TextureBindingInfo( - target, - descriptor.Binding, - descriptor.CbufSlot, - descriptor.HandleIndex, - descriptor.Flags); - } + Target target = ShaderTexture.GetTarget(descriptor.Type); - TextureBindingInfo[] imageBindings = _channel.TextureManager.RentGraphicsImageBindings(stage, info.Images.Count); + textureBindings[index] = new TextureBindingInfo( + target, + descriptor.Binding, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Flags); + } - for (int index = 0; index < info.Images.Count; index++) - { - var descriptor = info.Images[index]; - - Target target = ShaderTexture.GetTarget(descriptor.Type); - Format format = ShaderTexture.GetFormat(descriptor.Format); - - imageBindings[index] = new TextureBindingInfo( - target, - format, - descriptor.Binding, - descriptor.CbufSlot, - descriptor.HandleIndex, - descriptor.Flags); - } + TextureBindingInfo[] imageBindings = _channel.TextureManager.RentGraphicsImageBindings(stage, info.Images.Count); - _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, info.SBuffers); - _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, info.CBuffers); + for (int index = 0; index < info.Images.Count; index++) + { + var descriptor = info.Images[index]; + + Target target = ShaderTexture.GetTarget(descriptor.Type); + Format format = ShaderTexture.GetFormat(descriptor.Format); + + imageBindings[index] = new TextureBindingInfo( + target, + format, + descriptor.Binding, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Flags); } - _context.Renderer.Pipeline.SetProgram(gs.HostProgram); + _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, info.SBuffers); + _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, info.CBuffers); + } + + private GpuChannelPoolState GetPoolState() + { + return new GpuChannelPoolState( + _state.State.TexturePoolState.Address.Pack(), + _state.State.TexturePoolState.MaximumId, + (int)_state.State.TextureBufferIndex); + } + + /// + /// Gets the current GPU channel state for shader creation or compatibility verification. + /// + /// Current GPU channel state + private GpuChannelGraphicsState GetGraphicsState() + { + return new GpuChannelGraphicsState( + _state.State.EarlyZForce, + _drawState.Topology, + _state.State.TessMode); } /// diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs index e6697b3aedc3..66077c3bf6b5 100644 --- a/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -238,13 +238,13 @@ private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int tot /// /// Initialize the GPU shader cache. /// - public void InitializeShaderCache() + public void InitializeShaderCache(CancellationToken cancellationToken) { HostInitalized.WaitOne(); foreach (var physicalMemory in PhysicalMemoryRegistry.Values) { - physicalMemory.ShaderCache.Initialize(); + physicalMemory.ShaderCache.Initialize(cancellationToken); } } diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index ae27c71293c8..0ac6160d9583 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -115,6 +115,73 @@ public ReadOnlySpan GetSpan(ulong va, int size, bool tracked = false) } } + /// + /// Gets a read-only span of data from GPU mapped memory, up to the entire range specified, + /// or the last mapped page if the range is not fully mapped. + /// + /// GPU virtual address where the data is located + /// Size of the data + /// True if read tracking is triggered on the span + /// The span of the data at the specified memory location + public ReadOnlySpan GetSpanMapped(ulong va, int size, bool tracked = false) + { + bool isContiguous = true; + int mappedSize; + + if (ValidateAddress(va) && GetPte(va) != PteUnmapped && Physical.IsMapped(Translate(va))) + { + ulong endVa = va + (ulong)size; + ulong endVaAligned = (endVa + PageMask) & ~PageMask; + ulong currentVa = va & ~PageMask; + + int pages = (int)((endVaAligned - currentVa) / PageSize); + + for (int page = 0; page < pages - 1; page++) + { + ulong nextVa = currentVa + PageSize; + ulong nextPa = Translate(nextVa); + + if (!ValidateAddress(nextVa) || GetPte(nextVa) == PteUnmapped || !Physical.IsMapped(nextPa)) + { + break; + } + + if (Translate(currentVa) + PageSize != nextPa) + { + isContiguous = false; + } + + currentVa += PageSize; + } + + currentVa += PageSize; + + if (currentVa > endVa) + { + currentVa = endVa; + } + + mappedSize = (int)(currentVa - va); + } + else + { + return ReadOnlySpan.Empty; + } + + if (isContiguous) + { + return Physical.GetSpan(Translate(va), mappedSize, tracked); + } + else + { + Span data = new byte[mappedSize]; + + ReadImpl(va, data, tracked); + + return data; + } + } + /// /// Reads data from a possibly non-contiguous region of GPU mapped memory. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs index 57590fb38bfd..155cba0f55db 100644 --- a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs +++ b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs @@ -341,9 +341,9 @@ public CpuSmartMultiRegionHandle BeginSmartGranularTracking(ulong address, ulong } /// - /// Checks if the page at a given address is mapped on CPU memory. + /// Checks if a given memory page is mapped. /// - /// CPU virtual address of the page to check + /// CPU virtual address of the page /// True if mapped, false otherwise public bool IsMapped(ulong address) { diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs index 464436ea0540..d16afb650392 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs @@ -2,11 +2,8 @@ using Ryujinx.Common; using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; using Ryujinx.Graphics.Shader; -using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; using System.IO; @@ -20,70 +17,6 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache /// static class CacheHelper { - /// - /// Try to read the manifest header from a given file path. - /// - /// The path to the manifest file - /// The manifest header read - /// Return true if the manifest header was read - public static bool TryReadManifestHeader(string manifestPath, out CacheManifestHeader header) - { - header = default; - - if (File.Exists(manifestPath)) - { - Memory rawManifest = File.ReadAllBytes(manifestPath); - - if (MemoryMarshal.TryRead(rawManifest.Span, out header)) - { - return true; - } - } - - return false; - } - - /// - /// Try to read the manifest from a given file path. - /// - /// The path to the manifest file - /// The graphics api used by the cache - /// The hash type of the cache - /// The manifest header read - /// The entries read from the cache manifest - /// Return true if the manifest was read - public static bool TryReadManifestFile(string manifestPath, CacheGraphicsApi graphicsApi, CacheHashType hashType, out CacheManifestHeader header, out HashSet entries) - { - header = default; - entries = new HashSet(); - - if (File.Exists(manifestPath)) - { - Memory rawManifest = File.ReadAllBytes(manifestPath); - - if (MemoryMarshal.TryRead(rawManifest.Span, out header)) - { - Memory hashTableRaw = rawManifest.Slice(Unsafe.SizeOf()); - - bool isValid = header.IsValid(graphicsApi, hashType, hashTableRaw.Span); - - if (isValid) - { - ReadOnlySpan hashTable = MemoryMarshal.Cast(hashTableRaw.Span); - - foreach (Hash128 hash in hashTable) - { - entries.Add(hash); - } - } - - return isValid; - } - } - - return false; - } - /// /// Compute a cache manifest from runtime data. /// @@ -246,82 +179,23 @@ public static byte[] ReadFromFile(string cacheDirectory, Hash128 entry) return null; } - /// - /// Compute the guest program code for usage while dumping to disk or hash. - /// - /// The guest shader entries to use - /// The transform feedback descriptors - /// Used to determine if the guest program code is generated for hashing - /// The guest program code for usage while dumping to disk or hash - private static byte[] ComputeGuestProgramCode(ReadOnlySpan cachedShaderEntries, TransformFeedbackDescriptor[] tfd, bool forHashCompute = false) - { - using (MemoryStream stream = new MemoryStream()) - { - BinaryWriter writer = new BinaryWriter(stream); - - foreach (GuestShaderCacheEntry cachedShaderEntry in cachedShaderEntries) - { - if (cachedShaderEntry != null) - { - // Code (and Code A if present) - stream.Write(cachedShaderEntry.Code); - - if (forHashCompute) - { - // Guest GPU accessor header (only write this for hashes, already present in the header for dumps) - writer.WriteStruct(cachedShaderEntry.Header.GpuAccessorHeader); - } - - // Texture descriptors - foreach (GuestTextureDescriptor textureDescriptor in cachedShaderEntry.TextureDescriptors.Values) - { - writer.WriteStruct(textureDescriptor); - } - } - } - - // Transform feedback - if (tfd != null) - { - foreach (TransformFeedbackDescriptor transform in tfd) - { - writer.WriteStruct(new GuestShaderCacheTransformFeedbackHeader(transform.BufferIndex, transform.Stride, transform.VaryingLocations.Length)); - writer.Write(transform.VaryingLocations); - } - } - - return stream.ToArray(); - } - } - - /// - /// Compute a guest hash from shader entries. - /// - /// The guest shader entries to use - /// The optional transform feedback descriptors - /// A guest hash from shader entries - public static Hash128 ComputeGuestHashFromCache(ReadOnlySpan cachedShaderEntries, TransformFeedbackDescriptor[] tfd = null) - { - return XXHash128.ComputeHash(ComputeGuestProgramCode(cachedShaderEntries, tfd, true)); - } - /// /// Read transform feedback descriptors from guest. /// /// The raw guest transform feedback descriptors /// The guest shader program header /// The transform feedback descriptors read from guest - public static TransformFeedbackDescriptor[] ReadTransformFeedbackInformation(ref ReadOnlySpan data, GuestShaderCacheHeader header) + public static TransformFeedbackDescriptorOld[] ReadTransformFeedbackInformation(ref ReadOnlySpan data, GuestShaderCacheHeader header) { if (header.TransformFeedbackCount != 0) { - TransformFeedbackDescriptor[] result = new TransformFeedbackDescriptor[header.TransformFeedbackCount]; + TransformFeedbackDescriptorOld[] result = new TransformFeedbackDescriptorOld[header.TransformFeedbackCount]; for (int i = 0; i < result.Length; i++) { GuestShaderCacheTransformFeedbackHeader feedbackHeader = MemoryMarshal.Read(data); - result[i] = new TransformFeedbackDescriptor(feedbackHeader.BufferIndex, feedbackHeader.Stride, data.Slice(Unsafe.SizeOf(), feedbackHeader.VaryingLocationsLength).ToArray()); + result[i] = new TransformFeedbackDescriptorOld(feedbackHeader.BufferIndex, feedbackHeader.Stride, data.Slice(Unsafe.SizeOf(), feedbackHeader.VaryingLocationsLength).ToArray()); data = data.Slice(Unsafe.SizeOf() + feedbackHeader.VaryingLocationsLength); } @@ -332,205 +206,6 @@ public static TransformFeedbackDescriptor[] ReadTransformFeedbackInformation(ref return null; } - /// - /// Builds gpu state flags using information from the given gpu accessor. - /// - /// The gpu accessor - /// The gpu state flags - private static GuestGpuStateFlags GetGpuStateFlags(IGpuAccessor gpuAccessor) - { - GuestGpuStateFlags flags = 0; - - if (gpuAccessor.QueryEarlyZForce()) - { - flags |= GuestGpuStateFlags.EarlyZForce; - } - - return flags; - } - - /// - /// Packs the tessellation parameters from the gpu accessor. - /// - /// The gpu accessor - /// The packed tessellation parameters - private static byte GetTessellationModePacked(IGpuAccessor gpuAccessor) - { - byte value; - - value = (byte)((int)gpuAccessor.QueryTessPatchType() & 3); - value |= (byte)(((int)gpuAccessor.QueryTessSpacing() & 3) << 2); - - if (gpuAccessor.QueryTessCw()) - { - value |= 0x10; - } - - return value; - } - - /// - /// Create a new instance of from an gpu accessor. - /// - /// The gpu accessor - /// A new instance of - public static GuestGpuAccessorHeader CreateGuestGpuAccessorCache(IGpuAccessor gpuAccessor) - { - return new GuestGpuAccessorHeader - { - ComputeLocalSizeX = gpuAccessor.QueryComputeLocalSizeX(), - ComputeLocalSizeY = gpuAccessor.QueryComputeLocalSizeY(), - ComputeLocalSizeZ = gpuAccessor.QueryComputeLocalSizeZ(), - ComputeLocalMemorySize = gpuAccessor.QueryComputeLocalMemorySize(), - ComputeSharedMemorySize = gpuAccessor.QueryComputeSharedMemorySize(), - PrimitiveTopology = gpuAccessor.QueryPrimitiveTopology(), - TessellationModePacked = GetTessellationModePacked(gpuAccessor), - StateFlags = GetGpuStateFlags(gpuAccessor) - }; - } - - /// - /// Create guest shader cache entries from the runtime contexts. - /// - /// The GPU channel in use - /// The runtime contexts - /// Guest shader cahe entries from the runtime contexts - public static GuestShaderCacheEntry[] CreateShaderCacheEntries(GpuChannel channel, ReadOnlySpan shaderContexts) - { - MemoryManager memoryManager = channel.MemoryManager; - - int startIndex = shaderContexts.Length > 1 ? 1 : 0; - - GuestShaderCacheEntry[] entries = new GuestShaderCacheEntry[shaderContexts.Length - startIndex]; - - for (int i = startIndex; i < shaderContexts.Length; i++) - { - TranslatorContext context = shaderContexts[i]; - - if (context == null) - { - continue; - } - - GpuAccessor gpuAccessor = context.GpuAccessor as GpuAccessor; - - ulong cb1DataAddress; - int cb1DataSize = gpuAccessor?.Cb1DataSize ?? 0; - - if (context.Stage == ShaderStage.Compute) - { - cb1DataAddress = channel.BufferManager.GetComputeUniformBufferAddress(1); - } - else - { - int stageIndex = context.Stage switch - { - ShaderStage.TessellationControl => 1, - ShaderStage.TessellationEvaluation => 2, - ShaderStage.Geometry => 3, - ShaderStage.Fragment => 4, - _ => 0 - }; - - cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, 1); - } - - int size = context.Size; - - TranslatorContext translatorContext2 = i == 1 ? shaderContexts[0] : null; - - int sizeA = translatorContext2 != null ? translatorContext2.Size : 0; - - byte[] code = new byte[size + cb1DataSize + sizeA]; - - memoryManager.GetSpan(context.Address, size).CopyTo(code); - - if (cb1DataAddress != 0 && cb1DataSize != 0) - { - memoryManager.Physical.GetSpan(cb1DataAddress, cb1DataSize).CopyTo(code.AsSpan(size, cb1DataSize)); - } - - if (translatorContext2 != null) - { - memoryManager.GetSpan(translatorContext2.Address, sizeA).CopyTo(code.AsSpan(size + cb1DataSize, sizeA)); - } - - GuestGpuAccessorHeader gpuAccessorHeader = CreateGuestGpuAccessorCache(context.GpuAccessor); - - if (gpuAccessor != null) - { - gpuAccessorHeader.TextureDescriptorCount = context.TextureHandlesForCache.Count; - } - - GuestShaderCacheEntryHeader header = new GuestShaderCacheEntryHeader( - context.Stage, - size + cb1DataSize, - sizeA, - cb1DataSize, - gpuAccessorHeader); - - GuestShaderCacheEntry entry = new GuestShaderCacheEntry(header, code); - - if (gpuAccessor != null) - { - foreach (int textureHandle in context.TextureHandlesForCache) - { - GuestTextureDescriptor textureDescriptor = ((Image.TextureDescriptor)gpuAccessor.GetTextureDescriptor(textureHandle, -1)).ToCache(); - - textureDescriptor.Handle = (uint)textureHandle; - - entry.TextureDescriptors.Add(textureHandle, textureDescriptor); - } - } - - entries[i - startIndex] = entry; - } - - return entries; - } - - /// - /// Create a guest shader program. - /// - /// The entries composing the guest program dump - /// The transform feedback descriptors in use - /// The resulting guest shader program - public static byte[] CreateGuestProgramDump(GuestShaderCacheEntry[] shaderCacheEntries, TransformFeedbackDescriptor[] tfd = null) - { - using (MemoryStream resultStream = new MemoryStream()) - { - BinaryWriter resultStreamWriter = new BinaryWriter(resultStream); - - byte transformFeedbackCount = 0; - - if (tfd != null) - { - transformFeedbackCount = (byte)tfd.Length; - } - - // Header - resultStreamWriter.WriteStruct(new GuestShaderCacheHeader((byte)shaderCacheEntries.Length, transformFeedbackCount)); - - // Write all entries header - foreach (GuestShaderCacheEntry entry in shaderCacheEntries) - { - if (entry == null) - { - resultStreamWriter.WriteStruct(new GuestShaderCacheEntryHeader()); - } - else - { - resultStreamWriter.WriteStruct(entry.Header); - } - } - - // Finally, write all program code and all transform feedback information. - resultStreamWriter.Write(ComputeGuestProgramCode(shaderCacheEntries, tfd)); - - return resultStream.ToArray(); - } - } - /// /// Save temporary files not in archive. /// diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs index 3fc11e822ec6..e67221e7918b 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs @@ -47,8 +47,6 @@ public CacheManager(CacheGraphicsApi graphicsApi, CacheHashType hashType, string string baseCacheDirectory = CacheHelper.GetBaseCacheDirectory(titleId); - CacheMigration.Run(baseCacheDirectory, graphicsApi, hashType, shaderProvider); - _guestProgramCache = new CacheCollection(baseCacheDirectory, _hashType, CacheGraphicsApi.Guest, "", "program", GuestCacheVersion); _hostProgramCache = new CacheCollection(baseCacheDirectory, _hashType, _graphicsApi, _shaderProvider, "host", shaderCodeGenVersion); } diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheMigration.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheMigration.cs deleted file mode 100644 index 5b4a171354ce..000000000000 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheMigration.cs +++ /dev/null @@ -1,175 +0,0 @@ -using ICSharpCode.SharpZipLib.Zip; -using Ryujinx.Common; -using Ryujinx.Common.Logging; -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; -using System; -using System.Collections.Generic; -using System.IO; - -namespace Ryujinx.Graphics.Gpu.Shader.Cache -{ - /// - /// Class handling shader cache migrations. - /// - static class CacheMigration - { - /// - /// Check if the given cache version need to recompute its hash. - /// - /// The version in use - /// The new version after migration - /// True if a hash recompute is needed - public static bool NeedHashRecompute(ulong version, out ulong newVersion) - { - const ulong TargetBrokenVersion = 1717; - const ulong TargetFixedVersion = 1759; - - newVersion = TargetFixedVersion; - - if (version == TargetBrokenVersion) - { - return true; - } - - return false; - } - - private class StreamZipEntryDataSource : IStaticDataSource - { - private readonly ZipFile Archive; - private readonly ZipEntry Entry; - public StreamZipEntryDataSource(ZipFile archive, ZipEntry entry) - { - Archive = archive; - Entry = entry; - } - - public Stream GetSource() - { - return Archive.GetInputStream(Entry); - } - } - - /// - /// Move a file with the name of a given hash to another in the cache archive. - /// - /// The archive in use - /// The old key - /// The new key - private static void MoveEntry(ZipFile archive, Hash128 oldKey, Hash128 newKey) - { - ZipEntry oldGuestEntry = archive.GetEntry($"{oldKey}"); - - if (oldGuestEntry != null) - { - archive.Add(new StreamZipEntryDataSource(archive, oldGuestEntry), $"{newKey}", CompressionMethod.Deflated); - archive.Delete(oldGuestEntry); - } - } - - /// - /// Recompute all the hashes of a given cache. - /// - /// The guest cache directory path - /// The host cache directory path - /// The graphics api in use - /// The hash type in use - /// The version to write in the host and guest manifest after migration - private static void RecomputeHashes(string guestBaseCacheDirectory, string hostBaseCacheDirectory, CacheGraphicsApi graphicsApi, CacheHashType hashType, ulong newVersion) - { - string guestManifestPath = CacheHelper.GetManifestPath(guestBaseCacheDirectory); - string hostManifestPath = CacheHelper.GetManifestPath(hostBaseCacheDirectory); - - if (CacheHelper.TryReadManifestFile(guestManifestPath, CacheGraphicsApi.Guest, hashType, out _, out HashSet guestEntries)) - { - CacheHelper.TryReadManifestFile(hostManifestPath, graphicsApi, hashType, out _, out HashSet hostEntries); - - Logger.Info?.Print(LogClass.Gpu, "Shader cache hashes need to be recomputed, performing migration..."); - - string guestArchivePath = CacheHelper.GetArchivePath(guestBaseCacheDirectory); - string hostArchivePath = CacheHelper.GetArchivePath(hostBaseCacheDirectory); - - ZipFile guestArchive = new ZipFile(File.Open(guestArchivePath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None)); - ZipFile hostArchive = new ZipFile(File.Open(hostArchivePath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None)); - - CacheHelper.EnsureArchiveUpToDate(guestBaseCacheDirectory, guestArchive, guestEntries); - CacheHelper.EnsureArchiveUpToDate(hostBaseCacheDirectory, hostArchive, hostEntries); - - int programIndex = 0; - - HashSet newEntries = new HashSet(); - - foreach (Hash128 oldHash in guestEntries) - { - byte[] guestProgram = CacheHelper.ReadFromArchive(guestArchive, oldHash); - - Logger.Info?.Print(LogClass.Gpu, $"Migrating shader {oldHash} ({programIndex + 1} / {guestEntries.Count})"); - - if (guestProgram != null) - { - ReadOnlySpan guestProgramReadOnlySpan = guestProgram; - - ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - - TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - - Hash128 newHash = CacheHelper.ComputeGuestHashFromCache(cachedShaderEntries, tfd); - - if (newHash != oldHash) - { - MoveEntry(guestArchive, oldHash, newHash); - MoveEntry(hostArchive, oldHash, newHash); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Same hashes for shader {oldHash}"); - } - - newEntries.Add(newHash); - } - - programIndex++; - } - - byte[] newGuestManifestContent = CacheHelper.ComputeManifest(newVersion, CacheGraphicsApi.Guest, hashType, newEntries); - byte[] newHostManifestContent = CacheHelper.ComputeManifest(newVersion, graphicsApi, hashType, newEntries); - - File.WriteAllBytes(guestManifestPath, newGuestManifestContent); - File.WriteAllBytes(hostManifestPath, newHostManifestContent); - - guestArchive.CommitUpdate(); - hostArchive.CommitUpdate(); - - guestArchive.Close(); - hostArchive.Close(); - } - } - - /// - /// Check and run cache migration if needed. - /// - /// The base path of the cache - /// The graphics api in use - /// The hash type in use - /// The shader provider name of the cache - public static void Run(string baseCacheDirectory, CacheGraphicsApi graphicsApi, CacheHashType hashType, string shaderProvider) - { - string guestBaseCacheDirectory = CacheHelper.GenerateCachePath(baseCacheDirectory, CacheGraphicsApi.Guest, "", "program"); - string hostBaseCacheDirectory = CacheHelper.GenerateCachePath(baseCacheDirectory, graphicsApi, shaderProvider, "host"); - - string guestArchivePath = CacheHelper.GetArchivePath(guestBaseCacheDirectory); - string hostArchivePath = CacheHelper.GetArchivePath(hostBaseCacheDirectory); - - bool isReadOnly = CacheHelper.IsArchiveReadOnly(guestArchivePath) || CacheHelper.IsArchiveReadOnly(hostArchivePath); - - if (!isReadOnly && CacheHelper.TryReadManifestHeader(CacheHelper.GetManifestPath(guestBaseCacheDirectory), out CacheManifestHeader header)) - { - if (NeedHashRecompute(header.Version, out ulong newVersion)) - { - RecomputeHashes(guestBaseCacheDirectory, hostBaseCacheDirectory, graphicsApi, hashType, newVersion); - } - } - } - } -} diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs index 819c6bcc9021..fe79acb34cc5 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs @@ -96,6 +96,7 @@ internal ShaderProgramInfo ToShaderProgramInfo() SBuffers, Textures, Images, + default, Header.UseFlags.HasFlag(UseFlags.InstanceId), Header.UseFlags.HasFlag(UseFlags.RtLayer), Header.ClipDistancesWritten, @@ -160,7 +161,7 @@ internal static HostShaderCacheEntry[] Parse(ReadOnlySpan data, out ReadOn /// The host shader program /// The shaders code holder /// Raw data of a new host shader cache file - internal static byte[] Create(ReadOnlySpan programCode, ShaderCodeHolder[] codeHolders) + internal static byte[] Create(ReadOnlySpan programCode, CachedShaderStage[] codeHolders) { HostShaderCacheHeader header = new HostShaderCacheHeader((byte)codeHolders.Length, programCode.Length); diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs new file mode 100644 index 000000000000..27fac8f37998 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs @@ -0,0 +1,255 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.Cache +{ + /// + /// Class handling shader cache migrations. + /// + static class Migration + { + // Last codegen version before the migration to the new cache. + private const ulong ShaderCodeGenVersion = 3054; + + /// + /// Migrates from the old cache format to the new one. + /// + /// GPU context + /// Disk cache host storage (used to create the new shader files) + /// Number of migrated shaders + public static int MigrateFromLegacyCache(GpuContext context, DiskCacheHostStorage hostStorage) + { + string baseCacheDirectory = CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId); + string cacheDirectory = CacheHelper.GenerateCachePath(baseCacheDirectory, CacheGraphicsApi.Guest, "", "program"); + + // If the directory does not exist, we have no old cache. + // Exist early as the CacheManager constructor will create the directories. + if (!Directory.Exists(cacheDirectory)) + { + return 0; + } + + if (GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null) + { + CacheManager cacheManager = new CacheManager(CacheGraphicsApi.OpenGL, CacheHashType.XxHash128, "glsl", GraphicsConfig.TitleId, ShaderCodeGenVersion); + + bool isReadOnly = cacheManager.IsReadOnly; + + HashSet invalidEntries = null; + + if (isReadOnly) + { + Logger.Warning?.Print(LogClass.Gpu, "Loading shader cache in read-only mode (cache in use by another program!)"); + } + else + { + invalidEntries = new HashSet(); + } + + ReadOnlySpan guestProgramList = cacheManager.GetGuestProgramList(); + + for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++) + { + Hash128 key = guestProgramList[programIndex]; + + byte[] guestProgram = cacheManager.GetGuestProgramByHash(ref key); + + if (guestProgram == null) + { + Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); + + continue; + } + + ReadOnlySpan guestProgramReadOnlySpan = guestProgram; + + ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); + + if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) + { + Debug.Assert(cachedShaderEntries.Length == 1); + + GuestShaderCacheEntry entry = cachedShaderEntries[0]; + + byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); + + Span codeSpan = entry.Code; + byte[] cb1Data = codeSpan.Slice(codeSpan.Length - entry.Header.Cb1DataSize).ToArray(); + + ShaderProgramInfo info = new ShaderProgramInfo( + Array.Empty(), + Array.Empty(), + Array.Empty(), + Array.Empty(), + ShaderStage.Compute, + false, + false, + 0, + 0); + + GpuChannelComputeState computeState = new GpuChannelComputeState( + entry.Header.GpuAccessorHeader.ComputeLocalSizeX, + entry.Header.GpuAccessorHeader.ComputeLocalSizeY, + entry.Header.GpuAccessorHeader.ComputeLocalSizeZ, + entry.Header.GpuAccessorHeader.ComputeLocalMemorySize, + entry.Header.GpuAccessorHeader.ComputeSharedMemorySize); + + ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + + foreach (var td in entry.TextureDescriptors) + { + var handle = td.Key; + var data = td.Value; + + specState.RegisterTexture( + 0, + handle, + -1, + data.UnpackFormat(), + data.UnpackSrgb(), + data.UnpackTextureTarget(), + data.UnpackTextureCoordNormalized()); + } + + CachedShaderStage shader = new CachedShaderStage(info, code, cb1Data); + CachedShaderProgram program = new CachedShaderProgram(null, specState, shader); + + hostStorage.AddShader(context, program, ReadOnlySpan.Empty); + } + else + { + Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); + + CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; + List shaderPrograms = new List(); + + TransformFeedbackDescriptorOld[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); + + GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); + + GuestGpuAccessorHeader accessorHeader = entries[0].Header.GpuAccessorHeader; + + TessMode tessMode = new TessMode(); + + int tessPatchType = accessorHeader.TessellationModePacked & 3; + int tessSpacing = (accessorHeader.TessellationModePacked >> 2) & 3; + bool tessCw = (accessorHeader.TessellationModePacked & 0x10) != 0; + + tessMode.Packed = (uint)tessPatchType; + tessMode.Packed |= (uint)(tessSpacing << 4); + + if (tessCw) + { + tessMode.Packed |= 0x100; + } + + PrimitiveTopology topology = accessorHeader.PrimitiveTopology switch + { + InputTopology.Lines => PrimitiveTopology.Lines, + InputTopology.LinesAdjacency => PrimitiveTopology.LinesAdjacency, + InputTopology.Triangles => PrimitiveTopology.Triangles, + InputTopology.TrianglesAdjacency => PrimitiveTopology.TrianglesAdjacency, + _ => PrimitiveTopology.Points + }; + + GpuChannelGraphicsState graphicsState = new GpuChannelGraphicsState( + accessorHeader.StateFlags.HasFlag(GuestGpuStateFlags.EarlyZForce), + topology, + tessMode); + + TransformFeedbackDescriptor[] tfdNew = null; + + if (tfd != null) + { + tfdNew = new TransformFeedbackDescriptor[tfd.Length]; + + for (int tfIndex = 0; tfIndex < tfd.Length; tfIndex++) + { + Array32 varyingLocations = new Array32(); + Span varyingLocationsSpan = MemoryMarshal.Cast(varyingLocations.ToSpan()); + tfd[tfIndex].VaryingLocations.CopyTo(varyingLocationsSpan.Slice(0, tfd[tfIndex].VaryingLocations.Length)); + + tfdNew[tfIndex] = new TransformFeedbackDescriptor( + tfd[tfIndex].BufferIndex, + tfd[tfIndex].Stride, + tfd[tfIndex].VaryingLocations.Length, + ref varyingLocations); + } + } + + ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, tfdNew); + + for (int i = 0; i < entries.Length; i++) + { + GuestShaderCacheEntry entry = entries[i]; + + if (entry == null) + { + continue; + } + + ShaderProgramInfo info = new ShaderProgramInfo( + Array.Empty(), + Array.Empty(), + Array.Empty(), + Array.Empty(), + (ShaderStage)(i + 1), + false, + false, + 0, + 0); + + // NOTE: Vertex B comes first in the shader cache. + byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); + byte[] code2 = entry.Header.SizeA != 0 ? entry.Code.AsSpan(entry.Header.Size, entry.Header.SizeA).ToArray() : null; + + Span codeSpan = entry.Code; + byte[] cb1Data = codeSpan.Slice(codeSpan.Length - entry.Header.Cb1DataSize).ToArray(); + + shaders[i + 1] = new CachedShaderStage(info, code, cb1Data); + + if (code2 != null) + { + shaders[0] = new CachedShaderStage(null, code2, cb1Data); + } + + foreach (var td in entry.TextureDescriptors) + { + var handle = td.Key; + var data = td.Value; + + specState.RegisterTexture( + i, + handle, + -1, + data.UnpackFormat(), + data.UnpackSrgb(), + data.UnpackTextureTarget(), + data.UnpackTextureCoordNormalized()); + } + } + + CachedShaderProgram program = new CachedShaderProgram(null, specState, shaders); + + hostStorage.AddShader(context, program, ReadOnlySpan.Empty); + } + } + + return guestProgramList.Length; + } + + return 0; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/TransformFeedbackDescriptorOld.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/TransformFeedbackDescriptorOld.cs new file mode 100644 index 000000000000..5e9c6711de89 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/TransformFeedbackDescriptorOld.cs @@ -0,0 +1,19 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.Cache +{ + struct TransformFeedbackDescriptorOld + { + public int BufferIndex { get; } + public int Stride { get; } + + public byte[] VaryingLocations { get; } + + public TransformFeedbackDescriptorOld(int bufferIndex, int stride, byte[] varyingLocations) + { + BufferIndex = bufferIndex; + Stride = stride; + VaryingLocations = varyingLocations ?? throw new ArgumentNullException(nameof(varyingLocations)); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs deleted file mode 100644 index d65349a52b64..000000000000 --- a/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs +++ /dev/null @@ -1,222 +0,0 @@ -using Ryujinx.Common.Logging; -using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; -using Ryujinx.Graphics.Shader; -using System; -using System.Collections.Generic; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Gpu.Shader -{ - class CachedGpuAccessor : TextureDescriptorCapableGpuAccessor, IGpuAccessor - { - private readonly ReadOnlyMemory _data; - private readonly ReadOnlyMemory _cb1Data; - private readonly GuestGpuAccessorHeader _header; - private readonly Dictionary _textureDescriptors; - private readonly TransformFeedbackDescriptor[] _tfd; - - /// - /// Creates a new instance of the cached GPU state accessor for shader translation. - /// - /// GPU context - /// The data of the shader - /// The constant buffer 1 data of the shader - /// The cache of the GPU accessor - /// The cache of the texture descriptors - public CachedGpuAccessor( - GpuContext context, - ReadOnlyMemory data, - ReadOnlyMemory cb1Data, - GuestGpuAccessorHeader header, - IReadOnlyDictionary guestTextureDescriptors, - TransformFeedbackDescriptor[] tfd) : base(context) - { - _data = data; - _cb1Data = cb1Data; - _header = header; - _textureDescriptors = new Dictionary(); - - foreach (KeyValuePair guestTextureDescriptor in guestTextureDescriptors) - { - _textureDescriptors.Add(guestTextureDescriptor.Key, guestTextureDescriptor.Value); - } - - _tfd = tfd; - } - - /// - /// Reads data from the constant buffer 1. - /// - /// Offset in bytes to read from - /// Value at the given offset - public uint ConstantBuffer1Read(int offset) - { - return MemoryMarshal.Cast(_cb1Data.Span.Slice(offset))[0]; - } - - /// - /// Prints a log message. - /// - /// Message to print - public void Log(string message) - { - Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); - } - - /// - /// Gets a span of the specified memory location, containing shader code. - /// - /// GPU virtual address of the data - /// Minimum size that the returned span may have - /// Span of the memory location - public override ReadOnlySpan GetCode(ulong address, int minimumSize) - { - return MemoryMarshal.Cast(_data.Span.Slice((int)address)); - } - - /// - /// Checks if a given memory address is mapped. - /// - /// GPU virtual address to be checked - /// True if the address is mapped, false otherwise - public bool MemoryMapped(ulong address) - { - return address < (ulong)_data.Length; - } - - /// - /// Queries Local Size X for compute shaders. - /// - /// Local Size X - public int QueryComputeLocalSizeX() - { - return _header.ComputeLocalSizeX; - } - - /// - /// Queries Local Size Y for compute shaders. - /// - /// Local Size Y - public int QueryComputeLocalSizeY() - { - return _header.ComputeLocalSizeY; - } - - /// - /// Queries Local Size Z for compute shaders. - /// - /// Local Size Z - public int QueryComputeLocalSizeZ() - { - return _header.ComputeLocalSizeZ; - } - - /// - /// Queries Local Memory size in bytes for compute shaders. - /// - /// Local Memory size in bytes - public int QueryComputeLocalMemorySize() - { - return _header.ComputeLocalMemorySize; - } - - /// - /// Queries Shared Memory size in bytes for compute shaders. - /// - /// Shared Memory size in bytes - public int QueryComputeSharedMemorySize() - { - return _header.ComputeSharedMemorySize; - } - - /// - /// Queries current primitive topology for geometry shaders. - /// - /// Current primitive topology - public InputTopology QueryPrimitiveTopology() - { - return _header.PrimitiveTopology; - } - - /// - /// Queries the tessellation evaluation shader primitive winding order. - /// - /// True if the primitive winding order is clockwise, false if counter-clockwise - public bool QueryTessCw() - { - return (_header.TessellationModePacked & 0x10) != 0; - } - - /// - /// Queries the tessellation evaluation shader abstract patch type. - /// - /// Abstract patch type - public TessPatchType QueryTessPatchType() - { - return (TessPatchType)(_header.TessellationModePacked & 3); - } - - /// - /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch. - /// - /// Spacing between tessellated vertices of the patch - public TessSpacing QueryTessSpacing() - { - return (TessSpacing)((_header.TessellationModePacked >> 2) & 3); - } - - /// - /// Gets the texture descriptor for a given texture on the pool. - /// - /// Index of the texture (this is the word offset of the handle in the constant buffer) - /// Constant buffer slot for the texture handle - /// Texture descriptor - public override Image.ITextureDescriptor GetTextureDescriptor(int handle, int cbufSlot) - { - if (!_textureDescriptors.TryGetValue(handle, out GuestTextureDescriptor textureDescriptor)) - { - throw new ArgumentException(); - } - - return textureDescriptor; - } - - /// - /// Queries transform feedback enable state. - /// - /// True if the shader uses transform feedback, false otherwise - public bool QueryTransformFeedbackEnabled() - { - return _tfd != null; - } - - /// - /// Queries the varying locations that should be written to the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Varying locations for the specified buffer - public ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) - { - return _tfd[bufferIndex].VaryingLocations; - } - - /// - /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Stride for the specified buffer - public int QueryTransformFeedbackStride(int bufferIndex) - { - return _tfd[bufferIndex].Stride; - } - - /// - /// Queries if host state forces early depth testing. - /// - /// True if early depth testing is forced - public bool QueryEarlyZForce() - { - return (_header.StateFlags & GuestGpuStateFlags.EarlyZForce) != 0; - } - } -} diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs b/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs similarity index 63% rename from Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs rename to Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs index efdbc3ebe09d..3b4c65f3dc2e 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs +++ b/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs @@ -7,26 +7,33 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Represents a program composed of one or more shader stages (for graphics shaders), /// or a single shader (for compute shaders). /// - class ShaderBundle : IDisposable + class CachedShaderProgram : IDisposable { /// /// Host shader program object. /// public IProgram HostProgram { get; } + /// + /// GPU state used to create this version of the shader. + /// + public ShaderSpecializationState SpecializationState { get; } + /// /// Compiled shader for each shader stage. /// - public ShaderCodeHolder[] Shaders { get; } + public CachedShaderStage[] Shaders { get; } /// /// Creates a new instance of the shader bundle. /// /// Host program with all the shader stages + /// GPU state used to create this version of the shader /// Shaders - public ShaderBundle(IProgram hostProgram, params ShaderCodeHolder[] shaders) + public CachedShaderProgram(IProgram hostProgram, ShaderSpecializationState specializationState, params CachedShaderStage[] shaders) { HostProgram = hostProgram; + SpecializationState = specializationState; Shaders = shaders; } @@ -36,11 +43,6 @@ public ShaderBundle(IProgram hostProgram, params ShaderCodeHolder[] shaders) public void Dispose() { HostProgram.Dispose(); - - foreach (ShaderCodeHolder holder in Shaders) - { - holder?.HostShader?.Dispose(); - } } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs b/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs new file mode 100644 index 000000000000..22b08dd5aeb7 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs @@ -0,0 +1,38 @@ +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Cached shader code for a single shader stage. + /// + class CachedShaderStage + { + /// + /// Shader program information. + /// + public ShaderProgramInfo Info { get; } + + /// + /// Maxwell binary shader code. + /// + public byte[] Code { get; } + + /// + /// Constant buffer 1 data accessed by the shader. + /// + public byte[] Cb1Data { get; } + + /// + /// Creates a new instance of the shader code holder. + /// + /// Shader program information + /// Maxwell binary shader code + /// Constant buffer 1 data accessed by the shader + public CachedShaderStage(ShaderProgramInfo info, byte[] code, byte[] cb1Data) + { + Info = info; + Code = code; + Cb1Data = cb1Data; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs new file mode 100644 index 000000000000..08154df32e7b --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs @@ -0,0 +1,68 @@ +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Compute shader cache hash table. + /// + class ComputeShaderCacheHashTable + { + private readonly PartitionedHashTable _cache; + private readonly List _shaderPrograms; + + /// + /// Creates a new compute shader cache hash table. + /// + public ComputeShaderCacheHashTable() + { + _cache = new PartitionedHashTable(); + _shaderPrograms = new List(); + } + + /// + /// Adds a program to the cache. + /// + /// Program to be added + public void Add(CachedShaderProgram program) + { + var specList = _cache.GetOrAdd(program.Shaders[0].Code, new ShaderSpecializationList()); + specList.Add(program); + _shaderPrograms.Add(program); + } + + /// + /// Tries to find a cached program. + /// + /// GPU channel + /// Texture pool state + /// GPU virtual address of the compute shader + /// Cached host program for the given state, if found + /// Cached guest code, if any found + /// True if a cached host program was found, false otherwise + public bool TryFind( + GpuChannel channel, + GpuChannelPoolState poolState, + ulong gpuVa, + out CachedShaderProgram program, + out byte[] cachedGuestCode) + { + program = null; + ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa); + bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode); + return hasSpecList && specList.TryFindForCompute(channel, poolState, out program); + } + + /// + /// Gets all programs that have been added to the table. + /// + /// Programs added to the table + public IEnumerable GetPrograms() + { + foreach (var program in _shaderPrograms) + { + yield return program; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs new file mode 100644 index 000000000000..5c5e41c693fc --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs @@ -0,0 +1,138 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using System; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Represents a background disk cache writer. + /// + class BackgroundDiskCacheWriter : IDisposable + { + /// + /// Possible operation to do on the . + /// + private enum CacheFileOperation + { + /// + /// Operation to add a shader to the cache. + /// + AddShader + } + + /// + /// Represents an operation to perform on the . + /// + private struct CacheFileOperationTask + { + /// + /// The type of operation to perform. + /// + public readonly CacheFileOperation Type; + + /// + /// The data associated to this operation or null. + /// + public readonly object Data; + + public CacheFileOperationTask(CacheFileOperation type, object data) + { + Type = type; + Data = data; + } + } + + /// + /// Background shader cache write information. + /// + private struct AddShaderData + { + /// + /// Cached shader program. + /// + public readonly CachedShaderProgram Program; + + /// + /// Binary host code. + /// + public readonly byte[] HostCode; + + /// + /// Creates a new background shader cache write information. + /// + /// Cached shader program + /// Binary host code + public AddShaderData(CachedShaderProgram program, byte[] hostCode) + { + Program = program; + HostCode = hostCode; + } + } + + private readonly GpuContext _context; + private readonly DiskCacheHostStorage _hostStorage; + private readonly AsyncWorkQueue _fileWriterWorkerQueue; + + /// + /// Creates a new background disk cache writer. + /// + /// GPU context + /// Disk cache host storage + public BackgroundDiskCacheWriter(GpuContext context, DiskCacheHostStorage hostStorage) + { + _context = context; + _hostStorage = hostStorage; + _fileWriterWorkerQueue = new AsyncWorkQueue(ProcessTask, "Gpu.BackgroundDiskCacheWriter"); + } + + /// + /// Processes a shader cache background operation. + /// + /// Task to process + private void ProcessTask(CacheFileOperationTask task) + { + switch (task.Type) + { + case CacheFileOperation.AddShader: + AddShaderData data = (AddShaderData)task.Data; + try + { + _hostStorage.AddShader(_context, data.Program, data.HostCode); + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {diskCacheLoadException.Message}"); + } + catch (IOException ioException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {ioException.Message}"); + } + break; + } + } + + /// + /// Adds a shader program to be cached in the background. + /// + /// Shader program to cache + /// Host binary code of the program + public void AddShader(CachedShaderProgram program, byte[] hostCode) + { + _fileWriterWorkerQueue.Add(new CacheFileOperationTask(CacheFileOperation.AddShader, new AddShaderData(program, hostCode))); + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _fileWriterWorkerQueue.Dispose(); + } + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs new file mode 100644 index 000000000000..50e37033edd4 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs @@ -0,0 +1,216 @@ +using System; +using System.IO; +using System.IO.Compression; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Binary data serializer. + /// + struct BinarySerializer + { + private readonly Stream _stream; + private Stream _activeStream; + + /// + /// Creates a new binary serializer. + /// + /// Stream to read from or write into + public BinarySerializer(Stream stream) + { + _stream = stream; + _activeStream = stream; + } + + /// + /// Reads data from the stream. + /// + /// Type of the data + /// Data read + public void Read(ref T data) where T : unmanaged + { + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)); + for (int offset = 0; offset < buffer.Length;) + { + offset += _activeStream.Read(buffer.Slice(offset)); + } + } + + /// + /// Tries to read data from the stream. + /// + /// Type of the data + /// Data read + /// True if the read was successful, false otherwise + public bool TryRead(ref T data) where T : unmanaged + { + // Length is unknown on compressed streams. + if (_activeStream == _stream) + { + int size = Unsafe.SizeOf(); + if (_activeStream.Length - _activeStream.Position < size) + { + return false; + } + } + + Read(ref data); + return true; + } + + /// + /// Reads data prefixed with a magic and size from the stream. + /// + /// Type of the data + /// Data read + /// Expected magic value, for validation + public void ReadWithMagicAndSize(ref T data, uint magic) where T : unmanaged + { + uint actualMagic = 0; + int size = 0; + Read(ref actualMagic); + Read(ref size); + + if (actualMagic != magic) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidMagic); + } + + // Structs are expected to expand but not shrink between versions. + if (size > Unsafe.SizeOf()) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidLength); + } + + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)).Slice(0, size); + for (int offset = 0; offset < buffer.Length;) + { + offset += _activeStream.Read(buffer.Slice(offset)); + } + } + + /// + /// Writes data into the stream. + /// + /// Type of the data + /// Data to be written + public void Write(ref T data) where T : unmanaged + { + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)); + _activeStream.Write(buffer); + } + + /// + /// Writes data prefixed with a magic and size into the stream. + /// + /// Type of the data + /// Data to write + /// Magic value to write + public void WriteWithMagicAndSize(ref T data, uint magic) where T : unmanaged + { + int size = Unsafe.SizeOf(); + Write(ref magic); + Write(ref size); + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)); + _activeStream.Write(buffer); + } + + /// + /// Indicates that all data that will be read from the stream has been compressed. + /// + public void BeginCompression() + { + CompressionAlgorithm algorithm = CompressionAlgorithm.None; + Read(ref algorithm); + + if (algorithm == CompressionAlgorithm.Deflate) + { + _activeStream = new DeflateStream(_stream, CompressionMode.Decompress, true); + } + } + + /// + /// Indicates that all data that will be written into the stream should be compressed. + /// + /// Compression algorithm that should be used + public void BeginCompression(CompressionAlgorithm algorithm) + { + Write(ref algorithm); + + if (algorithm == CompressionAlgorithm.Deflate) + { + _activeStream = new DeflateStream(_stream, CompressionLevel.SmallestSize, true); + } + } + + /// + /// Indicates the end of a compressed chunck. + /// + /// + /// Any data written after this will not be compressed unless is called again. + /// Any data read after this will be assumed to be uncompressed unless is called again. + /// + public void EndCompression() + { + if (_activeStream != _stream) + { + _activeStream.Dispose(); + _activeStream = _stream; + } + } + + /// + /// Reads compressed data from the stream. + /// + /// + /// must have the exact length of the uncompressed data, + /// otherwise decompression will fail. + /// + /// Stream to read from + /// Buffer to write the uncompressed data into + public static void ReadCompressed(Stream stream, Span data) + { + CompressionAlgorithm algorithm = (CompressionAlgorithm)stream.ReadByte(); + + switch (algorithm) + { + case CompressionAlgorithm.None: + stream.Read(data); + break; + case CompressionAlgorithm.Deflate: + stream = new DeflateStream(stream, CompressionMode.Decompress, true); + for (int offset = 0; offset < data.Length;) + { + offset += stream.Read(data.Slice(offset)); + } + stream.Dispose(); + break; + } + } + + /// + /// Compresses and writes the compressed data into the stream. + /// + /// Stream to write into + /// Data to compress + /// Compression algorithm to be used + public static void WriteCompressed(Stream stream, ReadOnlySpan data, CompressionAlgorithm algorithm) + { + stream.WriteByte((byte)algorithm); + + switch (algorithm) + { + case CompressionAlgorithm.None: + stream.Write(data); + break; + case CompressionAlgorithm.Deflate: + stream = new DeflateStream(stream, CompressionLevel.SmallestSize, true); + stream.Write(data); + stream.Dispose(); + break; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs new file mode 100644 index 000000000000..a46e1ef7650a --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Algorithm used to compress the cache. + /// + enum CompressionAlgorithm : byte + { + /// + /// No compression, the data is stored as-is. + /// + None, + + /// + /// Deflate compression (RFC 1951). + /// + Deflate + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs new file mode 100644 index 000000000000..c8a9f7ff2215 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs @@ -0,0 +1,57 @@ +using Ryujinx.Common.Logging; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Common disk cache utility methods. + /// + static class DiskCacheCommon + { + /// + /// Opens a file for read or write. + /// + /// Base path of the file (should not include the file name) + /// Name of the file + /// Indicates if the file will be read or written + /// File stream + public static FileStream OpenFile(string basePath, string fileName, bool writable) + { + string fullPath = Path.Combine(basePath, fileName); + + FileMode mode; + FileAccess access; + + if (writable) + { + mode = FileMode.OpenOrCreate; + access = FileAccess.ReadWrite; + } + else + { + mode = FileMode.Open; + access = FileAccess.Read; + } + + try + { + return new FileStream(fullPath, mode, access, FileShare.Read); + } + catch (IOException ioException) + { + Logger.Error?.Print(LogClass.Gpu, $"Could not access file \"{fullPath}\". {ioException.Message}"); + + throw new DiskCacheLoadException(DiskCacheLoadResult.NoAccess); + } + } + + /// + /// Gets the compression algorithm that should be used when writing the disk cache. + /// + /// Compression algorithm + public static CompressionAlgorithm GetCompressionAlgorithm() + { + return CompressionAlgorithm.Deflate; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs new file mode 100644 index 000000000000..b1c04eac0c89 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs @@ -0,0 +1,202 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Represents a GPU state and memory accessor. + /// + class DiskCacheGpuAccessor : GpuAccessorBase, IGpuAccessor + { + private readonly ReadOnlyMemory _data; + private readonly ReadOnlyMemory _cb1Data; + private readonly ShaderSpecializationState _oldSpecState; + private readonly ShaderSpecializationState _newSpecState; + private readonly int _stageIndex; + private ResourceCounts _resourceCounts; + + /// + /// Creates a new instance of the cached GPU state accessor for shader translation. + /// + /// GPU context + /// The data of the shader + /// The constant buffer 1 data of the shader + /// Shader specialization state of the cached shader + /// Shader specialization state of the recompiled shader + /// Shader stage index + public DiskCacheGpuAccessor( + GpuContext context, + ReadOnlyMemory data, + ReadOnlyMemory cb1Data, + ShaderSpecializationState oldSpecState, + ShaderSpecializationState newSpecState, + ResourceCounts counts, + int stageIndex) : base(context) + { + _data = data; + _cb1Data = cb1Data; + _oldSpecState = oldSpecState; + _newSpecState = newSpecState; + _stageIndex = stageIndex; + _resourceCounts = counts; + } + + /// + public uint ConstantBuffer1Read(int offset) + { + if (offset + sizeof(uint) > _cb1Data.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.InvalidCb1DataLength); + } + + return MemoryMarshal.Cast(_cb1Data.Span.Slice(offset))[0]; + } + + /// + public void Log(string message) + { + Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); + } + + /// + public ReadOnlySpan GetCode(ulong address, int minimumSize) + { + return MemoryMarshal.Cast(_data.Span.Slice((int)address)); + } + + /// + public int QueryBindingConstantBuffer(int index) + { + return _resourceCounts.UniformBuffersCount++; + } + + /// + public int QueryBindingStorageBuffer(int index) + { + return _resourceCounts.StorageBuffersCount++; + } + + /// + public int QueryBindingTexture(int index) + { + return _resourceCounts.TexturesCount++; + } + + /// + public int QueryBindingImage(int index) + { + return _resourceCounts.ImagesCount++; + } + + /// + public int QueryComputeLocalSizeX() => _oldSpecState.ComputeState.LocalSizeX; + + /// + public int QueryComputeLocalSizeY() => _oldSpecState.ComputeState.LocalSizeY; + + /// + public int QueryComputeLocalSizeZ() => _oldSpecState.ComputeState.LocalSizeZ; + + /// + public int QueryComputeLocalMemorySize() => _oldSpecState.ComputeState.LocalMemorySize; + + /// + public int QueryComputeSharedMemorySize() => _oldSpecState.ComputeState.SharedMemorySize; + + /// + public uint QueryConstantBufferUse() + { + _newSpecState.RecordConstantBufferUse(_stageIndex, _oldSpecState.ConstantBufferUse[_stageIndex]); + return _oldSpecState.ConstantBufferUse[_stageIndex]; + } + + /// + public InputTopology QueryPrimitiveTopology() + { + _newSpecState.RecordPrimitiveTopology(); + return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode); + } + + /// + public bool QueryTessCw() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackCw(); + } + + /// + public TessPatchType QueryTessPatchType() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackPatchType(); + } + + /// + public TessSpacing QueryTessSpacing() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackSpacing(); + } + + /// + public TextureFormat QueryTextureFormat(int handle, int cbufSlot) + { + _newSpecState.RecordTextureFormat(_stageIndex, handle, cbufSlot); + (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot); + return ConvertToTextureFormat(format, formatSrgb); + } + + /// + public SamplerType QuerySamplerType(int handle, int cbufSlot) + { + _newSpecState.RecordTextureSamplerType(_stageIndex, handle, cbufSlot); + return _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot).ConvertSamplerType(); + } + + /// + public bool QueryTextureCoordNormalized(int handle, int cbufSlot) + { + _newSpecState.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot); + return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); + } + + /// + public bool QueryTransformFeedbackEnabled() + { + return _oldSpecState.TransformFeedbackDescriptors != null; + } + + /// + public ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) + { + return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].AsSpan(); + } + + /// + public int QueryTransformFeedbackStride(int bufferIndex) + { + return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].Stride; + } + + /// + public bool QueryEarlyZForce() + { + _newSpecState.RecordEarlyZForce(); + return _oldSpecState.GraphicsState.EarlyZForce; + } + + /// + public void RegisterTexture(int handle, int cbufSlot) + { + if (!_oldSpecState.TextureRegistered(_stageIndex, handle, cbufSlot)) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.MissingTextureDescriptor); + } + + (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot); + TextureTarget target = _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot); + bool coordNormalized = _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); + _newSpecState.RegisterTexture(_stageIndex, handle, cbufSlot, format, formatSrgb, target, coordNormalized); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs new file mode 100644 index 000000000000..4e338094f15e --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs @@ -0,0 +1,459 @@ +using Ryujinx.Common; +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// On-disk shader cache storage for guest code. + /// + class DiskCacheGuestStorage + { + private const uint TocMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'G' << 24); + + private const ushort VersionMajor = 1; + private const ushort VersionMinor = 0; + private const uint VersionPacked = ((uint)VersionMajor << 16) | VersionMinor; + + private const string TocFileName = "guest.toc"; + private const string DataFileName = "guest.data"; + + private readonly string _basePath; + + /// + /// TOC (Table of contents) file header. + /// + private struct TocHeader + { + /// + /// Magic value, for validation and identification purposes. + /// + public uint Magic; + + /// + /// File format version. + /// + public uint Version; + + /// + /// Header padding. + /// + public uint Padding; + + /// + /// Number of modifications to the file, also the shaders count. + /// + public uint ModificationsCount; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved2; + } + + /// + /// TOC (Table of contents) file entry. + /// + private struct TocEntry + { + /// + /// Offset of the data on the data file. + /// + public uint Offset; + + /// + /// Code size. + /// + public uint CodeSize; + + /// + /// Constant buffer 1 data size. + /// + public uint Cb1DataSize; + + /// + /// Hash of the code and constant buffer data. + /// + public uint Hash; + } + + /// + /// TOC (Table of contents) memory cache entry. + /// + private struct TocMemoryEntry + { + /// + /// Offset of the data on the data file. + /// + public uint Offset; + + /// + /// Code size. + /// + public uint CodeSize; + + /// + /// Constant buffer 1 data size. + /// + public uint Cb1DataSize; + + /// + /// Index of the shader on the cache. + /// + public readonly int Index; + + /// + /// Creates a new TOC memory entry. + /// + /// Offset of the data on the data file + /// Code size + /// Constant buffer 1 data size + /// Index of the shader on the cache + public TocMemoryEntry(uint offset, uint codeSize, uint cb1DataSize, int index) + { + Offset = offset; + CodeSize = codeSize; + Cb1DataSize = cb1DataSize; + Index = index; + } + } + + private Dictionary> _toc; + private uint _tocModificationsCount; + + private (byte[], byte[])[] _cache; + + /// + /// Creates a new disk cache guest storage. + /// + /// Base path of the disk shader cache + public DiskCacheGuestStorage(string basePath) + { + _basePath = basePath; + } + + /// + /// Checks if the TOC (table of contents) file for the guest cache exists. + /// + /// True if the file exists, false otherwise + public bool TocFileExists() + { + return File.Exists(Path.Combine(_basePath, TocFileName)); + } + + /// + /// Checks if the data file for the guest cache exists. + /// + /// True if the file exists, false otherwise + public bool DataFileExists() + { + return File.Exists(Path.Combine(_basePath, DataFileName)); + } + + /// + /// Opens the guest cache TOC (table of contents) file. + /// + /// File stream + public Stream OpenTocFileStream() + { + return DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: false); + } + + /// + /// Opens the guest cache data file. + /// + /// File stream + public Stream OpenDataFileStream() + { + return DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: false); + } + + /// + /// Clear all content from the guest cache files. + /// + public void ClearCache() + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// + /// Loads the guest cache from file or memory cache. + /// + /// Guest TOC file stream + /// Guest data file stream + /// Guest shader index + /// Tuple with the guest code and constant buffer 1 data, respectively + public (byte[], byte[]) LoadShader(Stream tocFileStream, Stream dataFileStream, int index) + { + if (_cache == null || index >= _cache.Length) + { + _cache = new (byte[], byte[])[Math.Max(index + 1, GetShadersCountFromLength(tocFileStream.Length))]; + } + + (byte[] guestCode, byte[] cb1Data) = _cache[index]; + + if (guestCode == null || cb1Data == null) + { + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + tocFileStream.Seek(Unsafe.SizeOf() + index * Unsafe.SizeOf(), SeekOrigin.Begin); + + TocEntry entry = new TocEntry(); + tocReader.Read(ref entry); + + guestCode = new byte[entry.CodeSize]; + cb1Data = new byte[entry.Cb1DataSize]; + + if (entry.Offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin); + dataFileStream.Read(cb1Data); + BinarySerializer.ReadCompressed(dataFileStream, guestCode); + + _cache[index] = (guestCode, cb1Data); + } + + return (guestCode, cb1Data); + } + + /// + /// Clears guest code memory cache, forcing future loads to be from file. + /// + public void ClearMemoryCache() + { + _cache = null; + } + + /// + /// Calculates the guest shaders count from the TOC file length. + /// + /// TOC file length + /// Shaders count + private static int GetShadersCountFromLength(long length) + { + return (int)((length - Unsafe.SizeOf()) / Unsafe.SizeOf()); + } + + /// + /// Adds a guest shader to the cache. + /// + /// + /// If the shader is already on the cache, the existing index will be returned and nothing will be written. + /// + /// Guest code + /// Constant buffer 1 data accessed by the code + /// Index of the shader on the cache + public int AddShader(ReadOnlySpan data, ReadOnlySpan cb1Data) + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true); + + TocHeader header = new TocHeader(); + + LoadOrCreateToc(tocFileStream, ref header); + + uint hash = CalcHash(data, cb1Data); + + if (_toc.TryGetValue(hash, out var list)) + { + foreach (var entry in list) + { + if (data.Length != entry.CodeSize || cb1Data.Length != entry.Cb1DataSize) + { + continue; + } + + dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin); + byte[] cachedCode = new byte[entry.CodeSize]; + byte[] cachedCb1Data = new byte[entry.Cb1DataSize]; + dataFileStream.Read(cachedCb1Data); + BinarySerializer.ReadCompressed(dataFileStream, cachedCode); + + if (data.SequenceEqual(cachedCode) && cb1Data.SequenceEqual(cachedCb1Data)) + { + return entry.Index; + } + } + } + + return WriteNewEntry(tocFileStream, dataFileStream, ref header, data, cb1Data, hash); + } + + /// + /// Loads the guest cache TOC file, or create a new one if not present. + /// + /// Guest TOC file stream + /// Set to the TOC file header + private void LoadOrCreateToc(Stream tocFileStream, ref TocHeader header) + { + BinarySerializer reader = new BinarySerializer(tocFileStream); + + if (!reader.TryRead(ref header) || header.Magic != TocMagic || header.Version != VersionPacked) + { + CreateToc(tocFileStream, ref header); + } + + if (_toc == null || header.ModificationsCount != _tocModificationsCount) + { + if (!LoadTocEntries(tocFileStream, ref reader)) + { + CreateToc(tocFileStream, ref header); + } + + _tocModificationsCount = header.ModificationsCount; + } + } + + /// + /// Creates a new guest cache TOC file. + /// + /// Guest TOC file stream + /// Set to the TOC header + private void CreateToc(Stream tocFileStream, ref TocHeader header) + { + BinarySerializer writer = new BinarySerializer(tocFileStream); + + header.Magic = TocMagic; + header.Version = VersionPacked; + header.Padding = 0; + header.ModificationsCount = 0; + header.Reserved = 0; + header.Reserved2 = 0; + + if (tocFileStream.Length > 0) + { + tocFileStream.Seek(0, SeekOrigin.Begin); + tocFileStream.SetLength(0); + } + + writer.Write(ref header); + } + + /// + /// Reads all the entries on the guest TOC file. + /// + /// Guest TOC file stream + /// TOC file reader + /// True if the operation was successful, false otherwise + private bool LoadTocEntries(Stream tocFileStream, ref BinarySerializer reader) + { + _toc = new Dictionary>(); + + TocEntry entry = new TocEntry(); + int index = 0; + + while (tocFileStream.Position < tocFileStream.Length) + { + if (!reader.TryRead(ref entry)) + { + return false; + } + + AddTocMemoryEntry(entry.Offset, entry.CodeSize, entry.Cb1DataSize, entry.Hash, index++); + } + + return true; + } + + /// + /// Writes a new guest code entry into the file. + /// + /// TOC file stream + /// Data file stream + /// TOC header, to be updated with the new count + /// Guest code + /// Constant buffer 1 data accessed by the guest code + /// Code and constant buffer data hash + /// Entry index + private int WriteNewEntry( + Stream tocFileStream, + Stream dataFileStream, + ref TocHeader header, + ReadOnlySpan data, + ReadOnlySpan cb1Data, + uint hash) + { + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + + dataFileStream.Seek(0, SeekOrigin.End); + uint dataOffset = checked((uint)dataFileStream.Position); + uint codeSize = (uint)data.Length; + uint cb1DataSize = (uint)cb1Data.Length; + dataFileStream.Write(cb1Data); + BinarySerializer.WriteCompressed(dataFileStream, data, DiskCacheCommon.GetCompressionAlgorithm()); + + _tocModificationsCount = ++header.ModificationsCount; + tocFileStream.Seek(0, SeekOrigin.Begin); + tocWriter.Write(ref header); + + TocEntry entry = new TocEntry() + { + Offset = dataOffset, + CodeSize = codeSize, + Cb1DataSize = cb1DataSize, + Hash = hash + }; + + tocFileStream.Seek(0, SeekOrigin.End); + int index = (int)((tocFileStream.Position - Unsafe.SizeOf()) / Unsafe.SizeOf()); + + tocWriter.Write(ref entry); + + AddTocMemoryEntry(dataOffset, codeSize, cb1DataSize, hash, index); + + return index; + } + + /// + /// Adds an entry to the memory TOC cache. This can be used to avoid reading the TOC file all the time. + /// + /// Offset of the code and constant buffer data in the data file + /// Code size + /// Constant buffer 1 data size + /// Code and constant buffer data hash + /// Index of the data on the cache + private void AddTocMemoryEntry(uint dataOffset, uint codeSize, uint cb1DataSize, uint hash, int index) + { + if (!_toc.TryGetValue(hash, out var list)) + { + _toc.Add(hash, list = new List()); + } + + list.Add(new TocMemoryEntry(dataOffset, codeSize, cb1DataSize, index)); + } + + /// + /// Calculates the hash for a data pair. + /// + /// Data 1 + /// Data 2 + /// Hash of both data + private static uint CalcHash(ReadOnlySpan data, ReadOnlySpan data2) + { + return CalcHash(data2) * 23 ^ CalcHash(data); + } + + /// + /// Calculates the hash for data. + /// + /// Data to be hashed + /// Hash of the data + private static uint CalcHash(ReadOnlySpan data) + { + return (uint)XXHash128.ComputeHash(data).Low; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs new file mode 100644 index 000000000000..0028e87961c3 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -0,0 +1,763 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using System; +using System.IO; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// On-disk shader cache storage for host code. + /// + class DiskCacheHostStorage + { + private const uint TocsMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'S' << 24); + private const uint TochMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'H' << 24); + private const uint ShdiMagic = (byte)'S' | ((byte)'H' << 8) | ((byte)'D' << 16) | ((byte)'I' << 24); + private const uint BufdMagic = (byte)'B' | ((byte)'U' << 8) | ((byte)'F' << 16) | ((byte)'D' << 24); + private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24); + + private const ushort FileFormatVersionMajor = 1; + private const ushort FileFormatVersionMinor = 1; + private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; + private const uint CodeGenVersion = 0; + + private const string SharedTocFileName = "shared.toc"; + private const string SharedDataFileName = "shared.data"; + + private readonly string _basePath; + + public bool CacheEnabled => !string.IsNullOrEmpty(_basePath); + + /// + /// TOC (Table of contents) file header. + /// + private struct TocHeader + { + /// + /// Magic value, for validation and identification. + /// + public uint Magic; + + /// + /// File format version. + /// + public uint FormatVersion; + + /// + /// Generated shader code version. + /// + public uint CodeGenVersion; + + /// + /// Header padding. + /// + public uint Padding; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved2; + } + + /// + /// Offset and size pair. + /// + private struct OffsetAndSize + { + /// + /// Offset. + /// + public ulong Offset; + + /// + /// Size. + /// + public uint Size; + } + + /// + /// Per-stage data entry. + /// + private struct DataEntryPerStage + { + /// + /// Index of the guest code on the guest code cache TOC file. + /// + public int GuestCodeIndex; + } + + /// + /// Per-program data entry. + /// + private struct DataEntry + { + /// + /// Bit mask where each bit set is a used shader stage. Should be zero for compute shaders. + /// + public uint StagesBitMask; + } + + /// + /// Per-stage shader information, returned by the translator. + /// + private struct DataShaderInfo + { + /// + /// Total constant buffers used. + /// + public ushort CBuffersCount; + + /// + /// Total storage buffers used. + /// + public ushort SBuffersCount; + + /// + /// Total textures used. + /// + public ushort TexturesCount; + + /// + /// Total images used. + /// + public ushort ImagesCount; + + /// + /// Shader stage. + /// + public ShaderStage Stage; + + /// + /// Indicates if the shader accesses the Instance ID built-in variable. + /// + public bool UsesInstanceId; + + /// + /// Indicates if the shader modifies the Layer built-in variable. + /// + public bool UsesRtLayer; + + /// + /// Bit mask with the clip distances written on the vertex stage. + /// + public byte ClipDistancesWritten; + + /// + /// Bit mask of the render target components written by the fragment stage. + /// + public int FragmentOutputMap; + } + + private readonly DiskCacheGuestStorage _guestStorage; + + /// + /// Creates a disk cache host storage. + /// + /// Base path of the shader cache + public DiskCacheHostStorage(string basePath) + { + _basePath = basePath; + _guestStorage = new DiskCacheGuestStorage(basePath); + + if (CacheEnabled) + { + Directory.CreateDirectory(basePath); + } + } + + /// + /// Gets the total of host programs on the cache. + /// + /// Host programs count + public int GetProgramCount() + { + string tocFilePath = Path.Combine(_basePath, SharedTocFileName); + + if (!File.Exists(tocFilePath)) + { + return 0; + } + + return (int)((new FileInfo(tocFilePath).Length - Unsafe.SizeOf()) / sizeof(ulong)); + } + + /// + /// Guest the name of the host program cache file, with extension. + /// + /// GPU context + /// Name of the file, without extension + private static string GetHostFileName(GpuContext context) + { + string apiName = context.Capabilities.Api.ToString().ToLowerInvariant(); + string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant()); + return $"{apiName}_{vendorName}"; + } + + /// + /// Removes invalid path characters and spaces from a file name. + /// + /// File name + /// Filtered file name + private static string RemoveInvalidCharacters(string fileName) + { + int indexOfSpace = fileName.IndexOf(' '); + if (indexOfSpace >= 0) + { + fileName = fileName.Substring(0, indexOfSpace); + } + + return string.Concat(fileName.Split(Path.GetInvalidFileNameChars(), StringSplitOptions.RemoveEmptyEntries)); + } + + /// + /// Gets the name of the TOC host file. + /// + /// GPU context + /// File name + private static string GetHostTocFileName(GpuContext context) + { + return GetHostFileName(context) + ".toc"; + } + + /// + /// Gets the name of the data host file. + /// + /// GPU context + /// File name + private static string GetHostDataFileName(GpuContext context) + { + return GetHostFileName(context) + ".data"; + } + + /// + /// Checks if a disk cache exists for the current application. + /// + /// True if a disk cache exists, false otherwise + public bool CacheExists() + { + string tocFilePath = Path.Combine(_basePath, SharedTocFileName); + string dataFilePath = Path.Combine(_basePath, SharedDataFileName); + + if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath) || !_guestStorage.TocFileExists() || !_guestStorage.DataFileExists()) + { + return false; + } + + return true; + } + + /// + /// Loads all shaders from the cache. + /// + /// GPU context + /// Parallel disk cache loader + public void LoadShaders(GpuContext context, ParallelDiskCacheLoader loader) + { + if (!CacheExists()) + { + return; + } + + Stream hostTocFileStream = null; + Stream hostDataFileStream = null; + + try + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: false); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: false); + + using var guestTocFileStream = _guestStorage.OpenTocFileStream(); + using var guestDataFileStream = _guestStorage.OpenDataFileStream(); + + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + BinarySerializer dataReader = new BinarySerializer(dataFileStream); + + TocHeader header = new TocHeader(); + + if (!tocReader.TryRead(ref header) || header.Magic != TocsMagic) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + if (header.FormatVersion != FileFormatVersionPacked) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.IncompatibleVersion); + } + + bool loadHostCache = header.CodeGenVersion == CodeGenVersion; + + int programIndex = 0; + + DataEntry entry = new DataEntry(); + + while (tocFileStream.Position < tocFileStream.Length && loader.Active) + { + ulong dataOffset = 0; + tocReader.Read(ref dataOffset); + + if ((ulong)dataOffset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)dataOffset, SeekOrigin.Begin); + + dataReader.BeginCompression(); + dataReader.Read(ref entry); + uint stagesBitMask = entry.StagesBitMask; + + if ((stagesBitMask & ~0x3fu) != 0) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + bool isCompute = stagesBitMask == 0; + if (isCompute) + { + stagesBitMask = 1; + } + + CachedShaderStage[] shaders = new CachedShaderStage[isCompute ? 1 : Constants.ShaderStages + 1]; + + DataEntryPerStage stageEntry = new DataEntryPerStage(); + + while (stagesBitMask != 0) + { + int stageIndex = BitOperations.TrailingZeroCount(stagesBitMask); + + dataReader.Read(ref stageEntry); + + ShaderProgramInfo info = stageIndex != 0 || isCompute ? ReadShaderProgramInfo(ref dataReader) : null; + + (byte[] guestCode, byte[] cb1Data) = _guestStorage.LoadShader( + guestTocFileStream, + guestDataFileStream, + stageEntry.GuestCodeIndex); + + shaders[stageIndex] = new CachedShaderStage(info, guestCode, cb1Data); + + stagesBitMask &= ~(1u << stageIndex); + } + + ShaderSpecializationState specState = ShaderSpecializationState.Read(ref dataReader); + dataReader.EndCompression(); + + if (loadHostCache) + { + byte[] hostCode = ReadHostCode(context, ref hostTocFileStream, ref hostDataFileStream, programIndex); + + if (hostCode != null) + { + bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null; + int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1; + IProgram hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, new ShaderInfo(fragmentOutputMap)); + + CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders); + + loader.QueueHostProgram(program, hostProgram, programIndex, isCompute); + } + else + { + loadHostCache = false; + } + } + + if (!loadHostCache) + { + loader.QueueGuestProgram(shaders, specState, programIndex, isCompute); + } + + loader.CheckCompilation(); + programIndex++; + } + } + finally + { + _guestStorage.ClearMemoryCache(); + + hostTocFileStream?.Dispose(); + hostDataFileStream?.Dispose(); + } + } + + /// + /// Reads the host code for a given shader, if existent. + /// + /// GPU context + /// Host TOC file stream, intialized if needed + /// Host data file stream, initialized if needed + /// Index of the program on the cache + /// Host binary code, or null if not found + private byte[] ReadHostCode(GpuContext context, ref Stream tocFileStream, ref Stream dataFileStream, int programIndex) + { + if (tocFileStream == null && dataFileStream == null) + { + string tocFilePath = Path.Combine(_basePath, GetHostTocFileName(context)); + string dataFilePath = Path.Combine(_basePath, GetHostDataFileName(context)); + + if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath)) + { + return null; + } + + tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false); + dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: false); + } + + int offset = Unsafe.SizeOf() + programIndex * Unsafe.SizeOf(); + if (offset + Unsafe.SizeOf() > tocFileStream.Length) + { + return null; + } + + if ((ulong)offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + tocFileStream.Seek(offset, SeekOrigin.Begin); + + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + + OffsetAndSize offsetAndSize = new OffsetAndSize(); + tocReader.Read(ref offsetAndSize); + + if (offsetAndSize.Offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin); + + byte[] hostCode = new byte[offsetAndSize.Size]; + + BinarySerializer.ReadCompressed(dataFileStream, hostCode); + + return hostCode; + } + + /// + /// Gets output streams for the disk cache, for faster batch writing. + /// + /// The GPU context, used to determine the host disk cache + /// A collection of disk cache output streams + public DiskCacheOutputStreams GetOutputStreams(GpuContext context) + { + var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + var hostTocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + var hostDataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + return new DiskCacheOutputStreams(tocFileStream, dataFileStream, hostTocFileStream, hostDataFileStream); + } + + /// + /// Adds a shader to the cache. + /// + /// GPU context + /// Cached program + /// Optional host binary code + /// Output streams to use + public void AddShader(GpuContext context, CachedShaderProgram program, ReadOnlySpan hostCode, DiskCacheOutputStreams streams = null) + { + uint stagesBitMask = 0; + + for (int index = 0; index < program.Shaders.Length; index++) + { + var shader = program.Shaders[index]; + if (shader == null || (shader.Info != null && shader.Info.Stage == ShaderStage.Compute)) + { + continue; + } + + stagesBitMask |= 1u << index; + } + + var tocFileStream = streams != null ? streams.TocFileStream : DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + var dataFileStream = streams != null ? streams.DataFileStream : DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + if (tocFileStream.Length == 0) + { + TocHeader header = new TocHeader(); + CreateToc(tocFileStream, ref header, TocsMagic, CodeGenVersion); + } + + tocFileStream.Seek(0, SeekOrigin.End); + dataFileStream.Seek(0, SeekOrigin.End); + + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + BinarySerializer dataWriter = new BinarySerializer(dataFileStream); + + ulong dataOffset = (ulong)dataFileStream.Position; + tocWriter.Write(ref dataOffset); + + DataEntry entry = new DataEntry(); + + entry.StagesBitMask = stagesBitMask; + + dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm()); + dataWriter.Write(ref entry); + + DataEntryPerStage stageEntry = new DataEntryPerStage(); + + for (int index = 0; index < program.Shaders.Length; index++) + { + var shader = program.Shaders[index]; + if (shader == null) + { + continue; + } + + stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data); + + dataWriter.Write(ref stageEntry); + + WriteShaderProgramInfo(ref dataWriter, shader.Info); + } + + program.SpecializationState.Write(ref dataWriter); + dataWriter.EndCompression(); + + if (streams == null) + { + tocFileStream.Dispose(); + dataFileStream.Dispose(); + } + + if (hostCode.IsEmpty) + { + return; + } + + WriteHostCode(context, hostCode, -1, streams); + } + + /// + /// Clears all content from the guest cache files. + /// + public void ClearGuestCache() + { + _guestStorage.ClearCache(); + } + + /// + /// Clears all content from the shared cache files. + /// + /// GPU context + public void ClearSharedCache() + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// + /// Deletes all content from the host cache files. + /// + /// GPU context + public void ClearHostCache(GpuContext context) + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// + /// Adds a host binary shader to the host cache. + /// + /// + /// This only modifies the host cache. The shader must already exist in the other caches. + /// This method should only be used for rebuilding the host cache after a clear. + /// + /// GPU context + /// Host binary code + /// Index of the program in the cache + public void AddHostShader(GpuContext context, ReadOnlySpan hostCode, int programIndex) + { + WriteHostCode(context, hostCode, programIndex); + } + + /// + /// Writes the host binary code on the host cache. + /// + /// GPU context + /// Host binary code + /// Index of the program in the cache + /// Output streams to use + private void WriteHostCode(GpuContext context, ReadOnlySpan hostCode, int programIndex, DiskCacheOutputStreams streams = null) + { + var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + if (tocFileStream.Length == 0) + { + TocHeader header = new TocHeader(); + CreateToc(tocFileStream, ref header, TochMagic, 0); + } + + if (programIndex == -1) + { + tocFileStream.Seek(0, SeekOrigin.End); + } + else + { + tocFileStream.Seek(Unsafe.SizeOf() + (programIndex * Unsafe.SizeOf()), SeekOrigin.Begin); + } + + dataFileStream.Seek(0, SeekOrigin.End); + + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + + OffsetAndSize offsetAndSize = new OffsetAndSize(); + offsetAndSize.Offset = (ulong)dataFileStream.Position; + offsetAndSize.Size = (uint)hostCode.Length; + tocWriter.Write(ref offsetAndSize); + + BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm()); + + if (streams == null) + { + tocFileStream.Dispose(); + dataFileStream.Dispose(); + } + } + + /// + /// Creates a TOC file for the host or shared cache. + /// + /// TOC file stream + /// Set to the TOC file header + /// Magic value to be written + /// Shader codegen version, only valid for the host file + private void CreateToc(Stream tocFileStream, ref TocHeader header, uint magic, uint codegenVersion) + { + BinarySerializer writer = new BinarySerializer(tocFileStream); + + header.Magic = magic; + header.FormatVersion = FileFormatVersionPacked; + header.CodeGenVersion = codegenVersion; + header.Padding = 0; + header.Reserved = 0; + header.Reserved2 = 0; + + if (tocFileStream.Length > 0) + { + tocFileStream.Seek(0, SeekOrigin.Begin); + tocFileStream.SetLength(0); + } + + writer.Write(ref header); + } + + /// + /// Reads the shader program info from the cache. + /// + /// Cache data reader + /// Shader program info + private static ShaderProgramInfo ReadShaderProgramInfo(ref BinarySerializer dataReader) + { + DataShaderInfo dataInfo = new DataShaderInfo(); + + dataReader.ReadWithMagicAndSize(ref dataInfo, ShdiMagic); + + BufferDescriptor[] cBuffers = new BufferDescriptor[dataInfo.CBuffersCount]; + BufferDescriptor[] sBuffers = new BufferDescriptor[dataInfo.SBuffersCount]; + TextureDescriptor[] textures = new TextureDescriptor[dataInfo.TexturesCount]; + TextureDescriptor[] images = new TextureDescriptor[dataInfo.ImagesCount]; + + for (int index = 0; index < dataInfo.CBuffersCount; index++) + { + dataReader.ReadWithMagicAndSize(ref cBuffers[index], BufdMagic); + } + + for (int index = 0; index < dataInfo.SBuffersCount; index++) + { + dataReader.ReadWithMagicAndSize(ref sBuffers[index], BufdMagic); + } + + for (int index = 0; index < dataInfo.TexturesCount; index++) + { + dataReader.ReadWithMagicAndSize(ref textures[index], TexdMagic); + } + + for (int index = 0; index < dataInfo.ImagesCount; index++) + { + dataReader.ReadWithMagicAndSize(ref images[index], TexdMagic); + } + + return new ShaderProgramInfo( + cBuffers, + sBuffers, + textures, + images, + dataInfo.Stage, + dataInfo.UsesInstanceId, + dataInfo.UsesRtLayer, + dataInfo.ClipDistancesWritten, + dataInfo.FragmentOutputMap); + } + + /// + /// Writes the shader program info into the cache. + /// + /// Cache data writer + /// Program info + private static void WriteShaderProgramInfo(ref BinarySerializer dataWriter, ShaderProgramInfo info) + { + if (info == null) + { + return; + } + + DataShaderInfo dataInfo = new DataShaderInfo(); + + dataInfo.CBuffersCount = (ushort)info.CBuffers.Count; + dataInfo.SBuffersCount = (ushort)info.SBuffers.Count; + dataInfo.TexturesCount = (ushort)info.Textures.Count; + dataInfo.ImagesCount = (ushort)info.Images.Count; + dataInfo.Stage = info.Stage; + dataInfo.UsesInstanceId = info.UsesInstanceId; + dataInfo.UsesRtLayer = info.UsesRtLayer; + dataInfo.ClipDistancesWritten = info.ClipDistancesWritten; + dataInfo.FragmentOutputMap = info.FragmentOutputMap; + + dataWriter.WriteWithMagicAndSize(ref dataInfo, ShdiMagic); + + for (int index = 0; index < info.CBuffers.Count; index++) + { + var entry = info.CBuffers[index]; + dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic); + } + + for (int index = 0; index < info.SBuffers.Count; index++) + { + var entry = info.SBuffers[index]; + dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic); + } + + for (int index = 0; index < info.Textures.Count; index++) + { + var entry = info.Textures[index]; + dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic); + } + + for (int index = 0; index < info.Images.Count; index++) + { + var entry = info.Images[index]; + dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs new file mode 100644 index 000000000000..d6e23302c5d2 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs @@ -0,0 +1,48 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Disk cache load exception. + /// + class DiskCacheLoadException : Exception + { + /// + /// Result of the cache load operation. + /// + public DiskCacheLoadResult Result { get; } + + /// + /// Creates a new instance of the disk cache load exception. + /// + public DiskCacheLoadException() + { + } + + /// + /// Creates a new instance of the disk cache load exception. + /// + /// Exception message + public DiskCacheLoadException(string message) : base(message) + { + } + + /// + /// Creates a new instance of the disk cache load exception. + /// + /// Exception message + /// Inner exception + public DiskCacheLoadException(string message, Exception inner) : base(message, inner) + { + } + + /// + /// Creates a new instance of the disk cache load exception. + /// + /// Result code + public DiskCacheLoadException(DiskCacheLoadResult result) : base(result.GetMessage()) + { + Result = result; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs new file mode 100644 index 000000000000..b3ffa4a73786 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs @@ -0,0 +1,72 @@ +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Result of a shader cache load operation. + /// + enum DiskCacheLoadResult + { + /// + /// No error. + /// + Success, + + /// + /// File can't be accessed. + /// + NoAccess, + + /// + /// The constant buffer 1 data length is too low for the translation of the guest shader. + /// + InvalidCb1DataLength, + + /// + /// The cache is missing the descriptor of a texture used by the shader. + /// + MissingTextureDescriptor, + + /// + /// File is corrupted. + /// + FileCorruptedGeneric, + + /// + /// File is corrupted, detected by magic value check. + /// + FileCorruptedInvalidMagic, + + /// + /// File is corrupted, detected by length check. + /// + FileCorruptedInvalidLength, + + /// + /// File might be valid, but is incompatible with the current emulator version. + /// + IncompatibleVersion + } + + static class DiskCacheLoadResultExtensions + { + /// + /// Gets an error message from a result code. + /// + /// Result code + /// Error message + public static string GetMessage(this DiskCacheLoadResult result) + { + return result switch + { + DiskCacheLoadResult.Success => "No error.", + DiskCacheLoadResult.NoAccess => "Could not access the cache file.", + DiskCacheLoadResult.InvalidCb1DataLength => "Constant buffer 1 data length is too low.", + DiskCacheLoadResult.MissingTextureDescriptor => "Texture descriptor missing from the cache file.", + DiskCacheLoadResult.FileCorruptedGeneric => "The cache file is corrupted.", + DiskCacheLoadResult.FileCorruptedInvalidMagic => "Magic check failed, the cache file is corrupted.", + DiskCacheLoadResult.FileCorruptedInvalidLength => "Length check failed, the cache file is corrupted.", + DiskCacheLoadResult.IncompatibleVersion => "The version of the disk cache is not compatible with this version of the emulator.", + _ => "Unknown error." + }; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs new file mode 100644 index 000000000000..1e0df2647313 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs @@ -0,0 +1,57 @@ +using System; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Output streams for the disk shader cache. + /// + class DiskCacheOutputStreams : IDisposable + { + /// + /// Shared table of contents (TOC) file stream. + /// + public readonly FileStream TocFileStream; + + /// + /// Shared data file stream. + /// + public readonly FileStream DataFileStream; + + /// + /// Host table of contents (TOC) file stream. + /// + public readonly FileStream HostTocFileStream; + + /// + /// Host data file stream. + /// + public readonly FileStream HostDataFileStream; + + /// + /// Creates a new instance of a disk cache output stream container. + /// + /// Stream for the shared table of contents file + /// Stream for the shared data file + /// Stream for the host table of contents file + /// Stream for the host data file + public DiskCacheOutputStreams(FileStream tocFileStream, FileStream dataFileStream, FileStream hostTocFileStream, FileStream hostDataFileStream) + { + TocFileStream = tocFileStream; + DataFileStream = dataFileStream; + HostTocFileStream = hostTocFileStream; + HostDataFileStream = hostDataFileStream; + } + + /// + /// Disposes the output file streams. + /// + public void Dispose() + { + TocFileStream.Dispose(); + DataFileStream.Dispose(); + HostTocFileStream.Dispose(); + HostDataFileStream.Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs new file mode 100644 index 000000000000..af7579d5de10 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -0,0 +1,672 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using static Ryujinx.Graphics.Gpu.Shader.ShaderCache; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + class ParallelDiskCacheLoader + { + private const int ThreadCount = 8; + + private readonly GpuContext _context; + private readonly ShaderCacheHashTable _graphicsCache; + private readonly ComputeShaderCacheHashTable _computeCache; + private readonly DiskCacheHostStorage _hostStorage; + private readonly CancellationToken _cancellationToken; + private readonly Action _stateChangeCallback; + + /// + /// Indicates if the cache should be loaded. + /// + public bool Active => !_cancellationToken.IsCancellationRequested; + + private bool _needsHostRegen; + + /// + /// Number of shaders that failed to compile from the cache. + /// + public int ErrorCount { get; private set; } + + /// + /// Program validation entry. + /// + private struct ProgramEntry + { + /// + /// Cached shader program. + /// + public readonly CachedShaderProgram CachedProgram; + + /// + /// Host program. + /// + public readonly IProgram HostProgram; + + /// + /// Program index. + /// + public readonly int ProgramIndex; + + /// + /// Indicates if the program is a compute shader. + /// + public readonly bool IsCompute; + + /// + /// Indicates if the program is a host binary shader. + /// + public readonly bool IsBinary; + + /// + /// Creates a new program validation entry. + /// + /// Cached shader program + /// Host program + /// Program index + /// Indicates if the program is a compute shader + /// Indicates if the program is a host binary shader + public ProgramEntry( + CachedShaderProgram cachedProgram, + IProgram hostProgram, + int programIndex, + bool isCompute, + bool isBinary) + { + CachedProgram = cachedProgram; + HostProgram = hostProgram; + ProgramIndex = programIndex; + IsCompute = isCompute; + IsBinary = isBinary; + } + } + + /// + /// Translated shader compilation entry. + /// + private struct ProgramCompilation + { + /// + /// Translated shader stages. + /// + public readonly ShaderProgram[] TranslatedStages; + + /// + /// Cached shaders. + /// + public readonly CachedShaderStage[] Shaders; + + /// + /// Specialization state. + /// + public readonly ShaderSpecializationState SpecializationState; + + /// + /// Program index. + /// + public readonly int ProgramIndex; + + /// + /// Indicates if the program is a compute shader. + /// + public readonly bool IsCompute; + + /// + /// Creates a new translated shader compilation entry. + /// + /// Translated shader stages + /// Cached shaders + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + public ProgramCompilation( + ShaderProgram[] translatedStages, + CachedShaderStage[] shaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + TranslatedStages = translatedStages; + Shaders = shaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + /// + /// Program translation entry. + /// + private struct AsyncProgramTranslation + { + /// + /// Cached shader stages. + /// + public readonly CachedShaderStage[] Shaders; + + /// + /// Specialization state. + /// + public readonly ShaderSpecializationState SpecializationState; + + /// + /// Program index. + /// + public readonly int ProgramIndex; + + /// + /// Indicates if the program is a compute shader. + /// + public readonly bool IsCompute; + + /// + /// Creates a new program translation entry. + /// + /// Cached shader stages + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + public AsyncProgramTranslation( + CachedShaderStage[] shaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + Shaders = shaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + private readonly Queue _validationQueue; + private readonly ConcurrentQueue _compilationQueue; + private readonly BlockingCollection _asyncTranslationQueue; + private readonly SortedList _programList; + + private int _backendParallelCompileThreads; + private int _compiledCount; + private int _totalCount; + + /// + /// Creates a new parallel disk cache loader. + /// + /// GPU context + /// Graphics shader cache + /// Compute shader cache + /// Disk cache host storage + /// Cancellation token + /// Function to be called when there is a state change, reporting state, compiled and total shaders count + public ParallelDiskCacheLoader( + GpuContext context, + ShaderCacheHashTable graphicsCache, + ComputeShaderCacheHashTable computeCache, + DiskCacheHostStorage hostStorage, + CancellationToken cancellationToken, + Action stateChangeCallback) + { + _context = context; + _graphicsCache = graphicsCache; + _computeCache = computeCache; + _hostStorage = hostStorage; + _cancellationToken = cancellationToken; + _stateChangeCallback = stateChangeCallback; + _validationQueue = new Queue(); + _compilationQueue = new ConcurrentQueue(); + _asyncTranslationQueue = new BlockingCollection(ThreadCount); + _programList = new SortedList(); + _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code. + } + + /// + /// Loads all shaders from the cache. + /// + public void LoadShaders() + { + Thread[] workThreads = new Thread[ThreadCount]; + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index] = new Thread(ProcessAsyncQueue) + { + Name = $"Gpu.AsyncTranslationThread.{index}" + }; + } + + int programCount = _hostStorage.GetProgramCount(); + + _compiledCount = 0; + _totalCount = programCount; + + _stateChangeCallback(ShaderCacheState.Start, 0, programCount); + + Logger.Info?.Print(LogClass.Gpu, $"Loading {programCount} shaders from the cache..."); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Start(_cancellationToken); + } + + try + { + _hostStorage.LoadShaders(_context, this); + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error loading the shader cache. {diskCacheLoadException.Message}"); + + // If we can't even access the file, then we also can't rebuild. + if (diskCacheLoadException.Result != DiskCacheLoadResult.NoAccess) + { + _needsHostRegen = true; + } + } + catch (InvalidDataException invalidDataException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error decompressing the shader cache file. {invalidDataException.Message}"); + _needsHostRegen = true; + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error reading the shader cache file. {ioException.Message}"); + _needsHostRegen = true; + } + + _asyncTranslationQueue.CompleteAdding(); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Join(); + } + + CheckCompilationBlocking(); + + if (_needsHostRegen) + { + // Rebuild both shared and host cache files. + // Rebuilding shared is required because the shader information returned by the translator + // might have changed, and so we have to reconstruct the file with the new information. + try + { + _hostStorage.ClearSharedCache(); + _hostStorage.ClearHostCache(_context); + + if (_programList.Count != 0) + { + Logger.Info?.Print(LogClass.Gpu, $"Rebuilding {_programList.Count} shaders..."); + + using var streams = _hostStorage.GetOutputStreams(_context); + + foreach (var kv in _programList) + { + if (!Active) + { + break; + } + + CachedShaderProgram program = kv.Value; + _hostStorage.AddShader(_context, program, program.HostProgram.GetBinary(), streams); + } + + Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully."); + } + else + { + _hostStorage.ClearGuestCache(); + + Logger.Info?.Print(LogClass.Gpu, "Shader cache deleted due to corruption."); + } + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache. {diskCacheLoadException.Message}"); + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache file. {ioException.Message}"); + } + } + + Logger.Info?.Print(LogClass.Gpu, "Shader cache loaded."); + + _stateChangeCallback(ShaderCacheState.Loaded, programCount, programCount); + } + + /// + /// Enqueues a host program for compilation. + /// + /// Cached program + /// Host program to be compiled + /// Program index + /// Indicates if the program is a compute shader + public void QueueHostProgram(CachedShaderProgram cachedProgram, IProgram hostProgram, int programIndex, bool isCompute) + { + EnqueueForValidation(new ProgramEntry(cachedProgram, hostProgram, programIndex, isCompute, isBinary: true)); + } + + /// + /// Enqueues a guest program for compilation. + /// + /// Cached shader stages + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + public void QueueGuestProgram(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + _asyncTranslationQueue.Add(new AsyncProgramTranslation(shaders, specState, programIndex, isCompute)); + } + + /// + /// Check the state of programs that have already been compiled, + /// and add to the cache if the compilation was successful. + /// + public void CheckCompilation() + { + ProcessCompilationQueue(); + + // Process programs that already finished compiling. + // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation. + while (_validationQueue.TryPeek(out ProgramEntry entry)) + { + ProgramLinkStatus result = entry.HostProgram.CheckProgramLink(false); + + if (result != ProgramLinkStatus.Incomplete) + { + ProcessCompiledProgram(ref entry, result); + _validationQueue.Dequeue(); + } + else + { + break; + } + } + } + + /// + /// Waits until all programs finishes compiling, then adds the ones + /// with successful compilation to the cache. + /// + private void CheckCompilationBlocking() + { + ProcessCompilationQueue(); + + while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active) + { + ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// + /// Process a compiled program result. + /// + /// Compiled program entry + /// Compilation result + /// For failed host compilations, indicates if a guest compilation should be done asynchronously + private void ProcessCompiledProgram(ref ProgramEntry entry, ProgramLinkStatus result, bool asyncCompile = true) + { + if (result == ProgramLinkStatus.Success) + { + // Compilation successful, add to memory cache. + if (entry.IsCompute) + { + _computeCache.Add(entry.CachedProgram); + } + else + { + _graphicsCache.Add(entry.CachedProgram); + } + + if (!entry.IsBinary) + { + _needsHostRegen = true; + } + + _programList.Add(entry.ProgramIndex, entry.CachedProgram); + SignalCompiled(); + } + else if (entry.IsBinary) + { + // If this is a host binary and compilation failed, + // we still have a chance to recompile from the guest binary. + CachedShaderProgram program = entry.CachedProgram; + + if (asyncCompile) + { + QueueGuestProgram(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + } + else + { + RecompileFromGuestCode(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + ProcessCompilationQueue(); + } + } + else + { + // Failed to compile from both host and guest binary. + ErrorCount++; + SignalCompiled(); + } + } + + /// + /// Processes the queue of translated guest programs that should be compiled on the host. + /// + private void ProcessCompilationQueue() + { + while (_compilationQueue.TryDequeue(out ProgramCompilation compilation) && Active) + { + ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length]; + + int fragmentOutputMap = -1; + + for (int index = 0; index < compilation.TranslatedStages.Length; index++) + { + ShaderProgram shader = compilation.TranslatedStages[index]; + shaderSources[index] = CreateShaderSource(shader); + + if (shader.Info.Stage == ShaderStage.Fragment) + { + fragmentOutputMap = shader.Info.FragmentOutputMap; + } + } + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, new ShaderInfo(fragmentOutputMap)); + CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders); + + EnqueueForValidation(new ProgramEntry(program, hostProgram, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); + } + } + + /// + /// Enqueues a program for validation, which will check if the program was compiled successfully. + /// + /// Program entry to be validated + private void EnqueueForValidation(ProgramEntry newEntry) + { + _validationQueue.Enqueue(newEntry); + + // Do not allow more than N shader compilation in-flight, where N is the maximum number of threads + // the driver will be using for parallel compilation. + // Submitting more seems to cause NVIDIA OpenGL driver to crash. + if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry)) + { + ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// + /// Processses the queue of programs that should be translated from guest code. + /// + /// Cancellation token + private void ProcessAsyncQueue(object state) + { + CancellationToken ct = (CancellationToken)state; + + try + { + foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct)) + { + RecompileFromGuestCode( + asyncCompilation.Shaders, + asyncCompilation.SpecializationState, + asyncCompilation.ProgramIndex, + asyncCompilation.IsCompute); + } + } + catch (OperationCanceledException) + { + } + } + + /// + /// Recompiles a program from guest code. + /// + /// Shader stages + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + private void RecompileFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + try + { + if (isCompute) + { + RecompileComputeFromGuestCode(shaders, specState, programIndex); + } + else + { + RecompileGraphicsFromGuestCode(shaders, specState, programIndex); + } + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error translating guest shader. {diskCacheLoadException.Message}"); + + ErrorCount++; + SignalCompiled(); + } + } + + /// + /// Recompiles a graphics program from guest code. + /// + /// Shader stages + /// Specialization state + /// Program index + private void RecompileGraphicsFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + { + ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.GraphicsState, specState.TransformFeedbackDescriptors); + ResourceCounts counts = new ResourceCounts(); + + TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; + TranslatorContext nextStage = null; + + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) + { + CachedShaderStage shader = shaders[stageIndex + 1]; + + if (shader != null) + { + byte[] guestCode = shader.Code; + byte[] cb1Data = shader.Cb1Data; + + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, 0); + + if (nextStage != null) + { + currentStage.SetNextStage(nextStage); + } + + if (stageIndex == 0 && shaders[0] != null) + { + byte[] guestCodeA = shaders[0].Code; + byte[] cb1DataA = shaders[0].Cb1Data; + + DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0); + translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, DefaultFlags | TranslationFlags.VertexA, 0); + } + + translatorContexts[stageIndex + 1] = currentStage; + nextStage = currentStage; + } + } + + List translatedStages = new List(); + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + TranslatorContext currentStage = translatorContexts[stageIndex + 1]; + + if (currentStage != null) + { + ShaderProgram program; + + byte[] guestCode = shaders[stageIndex + 1].Code; + byte[] cb1Data = shaders[stageIndex + 1].Cb1Data; + + if (stageIndex == 0 && shaders[0] != null) + { + program = currentStage.Translate(translatorContexts[0]); + + byte[] guestCodeA = shaders[0].Code; + byte[] cb1DataA = shaders[0].Cb1Data; + + shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA); + shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + else + { + program = currentStage.Translate(); + + shaders[stageIndex + 1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + + if (program != null) + { + translatedStages.Add(program); + } + } + } + + _compilationQueue.Enqueue(new ProgramCompilation(translatedStages.ToArray(), shaders, newSpecState, programIndex, isCompute: false)); + } + + /// + /// Recompiles a compute program from guest code. + /// + /// Shader stages + /// Specialization state + /// Program index + private void RecompileComputeFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + { + CachedShaderStage shader = shaders[0]; + ResourceCounts counts = new ResourceCounts(); + ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.ComputeState); + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); + + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, 0); + + ShaderProgram program = translatorContext.Translate(); + + shaders[0] = new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data); + + _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true)); + } + + /// + /// Signals that compilation of a program has been finished successfully, + /// or that it failed and guest recompilation has also been attempted. + /// + private void SignalCompiled() + { + _stateChangeCallback(ShaderCacheState.Loading, ++_compiledCount, _totalCount); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs index a5c7575f2cba..192467b75064 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs @@ -1,5 +1,5 @@ using Ryujinx.Common.Logging; -using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; using System; using System.Runtime.InteropServices; @@ -9,19 +9,12 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Represents a GPU state and memory accessor. /// - class GpuAccessor : TextureDescriptorCapableGpuAccessor, IGpuAccessor + class GpuAccessor : GpuAccessorBase, IGpuAccessor { private readonly GpuChannel _channel; private readonly GpuAccessorState _state; private readonly int _stageIndex; private readonly bool _compute; - private readonly int _localSizeX; - private readonly int _localSizeY; - private readonly int _localSizeZ; - private readonly int _localMemorySize; - private readonly int _sharedMemorySize; - - public int Cb1DataSize { get; private set; } /// /// Creates a new instance of the GPU state accessor for graphics shader translation. @@ -43,43 +36,16 @@ public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState stat /// GPU context /// GPU channel /// Current GPU state - /// Local group size X of the compute shader - /// Local group size Y of the compute shader - /// Local group size Z of the compute shader - /// Local memory size of the compute shader - /// Shared memory size of the compute shader - public GpuAccessor( - GpuContext context, - GpuChannel channel, - GpuAccessorState state, - int localSizeX, - int localSizeY, - int localSizeZ, - int localMemorySize, - int sharedMemorySize) : base(context) + public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context) { _channel = channel; _state = state; _compute = true; - _localSizeX = localSizeX; - _localSizeY = localSizeY; - _localSizeZ = localSizeZ; - _localMemorySize = localMemorySize; - _sharedMemorySize = sharedMemorySize; } - /// - /// Reads data from the constant buffer 1. - /// - /// Offset in bytes to read from - /// Value at the given offset + /// public uint ConstantBuffer1Read(int offset) { - if (Cb1DataSize < offset + 4) - { - Cb1DataSize = offset + 4; - } - ulong baseAddress = _compute ? _channel.BufferManager.GetComputeUniformBufferAddress(1) : _channel.BufferManager.GetGraphicsUniformBufferAddress(_stageIndex, 1); @@ -87,111 +53,115 @@ public uint ConstantBuffer1Read(int offset) return _channel.MemoryManager.Physical.Read(baseAddress + (ulong)offset); } - /// - /// Prints a log message. - /// - /// Message to print + /// public void Log(string message) { Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); } - /// - /// Gets a span of the specified memory location, containing shader code. - /// - /// GPU virtual address of the data - /// Minimum size that the returned span may have - /// Span of the memory location - public override ReadOnlySpan GetCode(ulong address, int minimumSize) + /// + public ReadOnlySpan GetCode(ulong address, int minimumSize) { int size = Math.Max(minimumSize, 0x1000 - (int)(address & 0xfff)); return MemoryMarshal.Cast(_channel.MemoryManager.GetSpan(address, size)); } - /// - /// Queries Local Size X for compute shaders. - /// - /// Local Size X - public int QueryComputeLocalSizeX() => _localSizeX; + /// + public int QueryBindingConstantBuffer(int index) + { + return _state.ResourceCounts.UniformBuffersCount++; + } - /// - /// Queries Local Size Y for compute shaders. - /// - /// Local Size Y - public int QueryComputeLocalSizeY() => _localSizeY; + /// + public int QueryBindingStorageBuffer(int index) + { + return _state.ResourceCounts.StorageBuffersCount++; + } - /// - /// Queries Local Size Z for compute shaders. - /// - /// Local Size Z - public int QueryComputeLocalSizeZ() => _localSizeZ; + /// + public int QueryBindingTexture(int index) + { + return _state.ResourceCounts.TexturesCount++; + } - /// - /// Queries Local Memory size in bytes for compute shaders. - /// - /// Local Memory size in bytes - public int QueryComputeLocalMemorySize() => _localMemorySize; + /// + public int QueryBindingImage(int index) + { + return _state.ResourceCounts.ImagesCount++; + } - /// - /// Queries Shared Memory size in bytes for compute shaders. - /// - /// Shared Memory size in bytes - public int QueryComputeSharedMemorySize() => _sharedMemorySize; + /// + public int QueryComputeLocalSizeX() => _state.ComputeState.LocalSizeX; - /// - /// Queries Constant Buffer usage information. - /// - /// A mask where each bit set indicates a bound constant buffer + /// + public int QueryComputeLocalSizeY() => _state.ComputeState.LocalSizeY; + + /// + public int QueryComputeLocalSizeZ() => _state.ComputeState.LocalSizeZ; + + /// + public int QueryComputeLocalMemorySize() => _state.ComputeState.LocalMemorySize; + + /// + public int QueryComputeSharedMemorySize() => _state.ComputeState.SharedMemorySize; + + /// public uint QueryConstantBufferUse() { - return _compute + uint useMask = _compute ? _channel.BufferManager.GetComputeUniformBufferUseMask() : _channel.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex); + + _state.SpecializationState?.RecordConstantBufferUse(_stageIndex, useMask); + return useMask; } - /// - /// Queries current primitive topology for geometry shaders. - /// - /// Current primitive topology + /// public InputTopology QueryPrimitiveTopology() { - return _state.Topology switch - { - PrimitiveTopology.Points => InputTopology.Points, - PrimitiveTopology.Lines or - PrimitiveTopology.LineLoop or - PrimitiveTopology.LineStrip => InputTopology.Lines, - PrimitiveTopology.LinesAdjacency or - PrimitiveTopology.LineStripAdjacency => InputTopology.LinesAdjacency, - PrimitiveTopology.Triangles or - PrimitiveTopology.TriangleStrip or - PrimitiveTopology.TriangleFan => InputTopology.Triangles, - PrimitiveTopology.TrianglesAdjacency or - PrimitiveTopology.TriangleStripAdjacency => InputTopology.TrianglesAdjacency, - PrimitiveTopology.Patches => _state.TessellationMode.UnpackPatchType() == TessPatchType.Isolines - ? InputTopology.Lines - : InputTopology.Triangles, - _ => InputTopology.Points - }; + _state.SpecializationState?.RecordPrimitiveTopology(); + return ConvertToInputTopology(_state.GraphicsState.Topology, _state.GraphicsState.TessellationMode); } - /// - /// Queries the tessellation evaluation shader primitive winding order. - /// - /// True if the primitive winding order is clockwise, false if counter-clockwise - public bool QueryTessCw() => _state.TessellationMode.UnpackCw(); + /// + public bool QueryTessCw() + { + return _state.GraphicsState.TessellationMode.UnpackCw(); + } - /// - /// Queries the tessellation evaluation shader abstract patch type. - /// - /// Abstract patch type - public TessPatchType QueryTessPatchType() => _state.TessellationMode.UnpackPatchType(); + /// + public TessPatchType QueryTessPatchType() + { + return _state.GraphicsState.TessellationMode.UnpackPatchType(); + } - /// - /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch. - /// - /// Spacing between tessellated vertices of the patch - public TessSpacing QueryTessSpacing() => _state.TessellationMode.UnpackSpacing(); + /// + public TessSpacing QueryTessSpacing() + { + return _state.GraphicsState.TessellationMode.UnpackSpacing(); + } + + //// + public TextureFormat QueryTextureFormat(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureFormat(_stageIndex, handle, cbufSlot); + var descriptor = GetTextureDescriptor(handle, cbufSlot); + return ConvertToTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb()); + } + + /// + public SamplerType QuerySamplerType(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureSamplerType(_stageIndex, handle, cbufSlot); + return GetTextureDescriptor(handle, cbufSlot).UnpackTextureTarget().ConvertSamplerType(); + } + + /// + public bool QueryTextureCoordNormalized(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot); + return GetTextureDescriptor(handle, cbufSlot).UnpackTextureCoordNormalized(); + } /// /// Gets the texture descriptor for a given texture on the pool. @@ -199,65 +169,58 @@ PrimitiveTopology.TrianglesAdjacency or /// Index of the texture (this is the word offset of the handle in the constant buffer) /// Constant buffer slot for the texture handle /// Texture descriptor - public override Image.ITextureDescriptor GetTextureDescriptor(int handle, int cbufSlot) + private Image.TextureDescriptor GetTextureDescriptor(int handle, int cbufSlot) { if (_compute) { return _channel.TextureManager.GetComputeTextureDescriptor( - _state.TexturePoolGpuVa, - _state.TextureBufferIndex, - _state.TexturePoolMaximumId, + _state.PoolState.TexturePoolGpuVa, + _state.PoolState.TextureBufferIndex, + _state.PoolState.TexturePoolMaximumId, handle, cbufSlot); } else { return _channel.TextureManager.GetGraphicsTextureDescriptor( - _state.TexturePoolGpuVa, - _state.TextureBufferIndex, - _state.TexturePoolMaximumId, + _state.PoolState.TexturePoolGpuVa, + _state.PoolState.TextureBufferIndex, + _state.PoolState.TexturePoolMaximumId, _stageIndex, handle, cbufSlot); } } - /// - /// Queries transform feedback enable state. - /// - /// True if the shader uses transform feedback, false otherwise + /// public bool QueryTransformFeedbackEnabled() { return _state.TransformFeedbackDescriptors != null; } - /// - /// Queries the varying locations that should be written to the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Varying locations for the specified buffer + /// public ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) { - return _state.TransformFeedbackDescriptors[bufferIndex].VaryingLocations; + return _state.TransformFeedbackDescriptors[bufferIndex].AsSpan(); } - /// - /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Stride for the specified buffer + /// public int QueryTransformFeedbackStride(int bufferIndex) { return _state.TransformFeedbackDescriptors[bufferIndex].Stride; } - /// - /// Queries if host state forces early depth testing. - /// - /// True if early depth testing is forced + /// public bool QueryEarlyZForce() { - return _state.EarlyZForce; + _state.SpecializationState?.RecordEarlyZForce(); + return _state.GraphicsState.EarlyZForce; + } + + /// + public void RegisterTexture(int handle, int cbufSlot) + { + _state.SpecializationState?.RegisterTexture(_stageIndex, handle, cbufSlot, GetTextureDescriptor(handle, cbufSlot)); } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs similarity index 74% rename from Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs rename to Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 3a8ee67a940a..5f9dd5880ee0 100644 --- a/Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -1,23 +1,26 @@ -using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; -using System; namespace Ryujinx.Graphics.Gpu.Shader { - abstract class TextureDescriptorCapableGpuAccessor : IGpuAccessor + /// + /// GPU accessor. + /// + class GpuAccessorBase { private readonly GpuContext _context; - public TextureDescriptorCapableGpuAccessor(GpuContext context) + /// + /// Creates a new GPU accessor. + /// + /// GPU context + public GpuAccessorBase(GpuContext context) { _context = context; } - public abstract ReadOnlySpan GetCode(ulong address, int minimumSize); - - public abstract ITextureDescriptor GetTextureDescriptor(int handle, int cbufSlot); - /// /// Queries host about the presence of the FrontFacing built-in variable bug. /// @@ -79,20 +82,14 @@ public TextureDescriptorCapableGpuAccessor(GpuContext context) public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod; /// - /// Queries texture format information, for shaders using image load or store. + /// Converts a packed Maxwell texture format to the shader translator texture format. /// - /// - /// This only returns non-compressed color formats. - /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned. - /// - /// Texture handle - /// Constant buffer slot for the texture handle - /// Color format of the non-compressed texture - public TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1) + /// Packed maxwell format + /// Indicates if the format is sRGB + /// Shader translator texture format + protected static TextureFormat ConvertToTextureFormat(uint format, bool formatSrgb) { - var descriptor = GetTextureDescriptor(handle, cbufSlot); - - if (!FormatTable.TryGetTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb(), out FormatInfo formatInfo)) + if (!FormatTable.TryGetTextureFormat(format, formatSrgb, out FormatInfo formatInfo)) { return TextureFormat.Unknown; } @@ -144,32 +141,31 @@ public TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1) } /// - /// Queries sampler type information. + /// Converts the Maxwell primitive topology to the shader translator topology. /// - /// Texture handle - /// Constant buffer slot for the texture handle - /// The sampler type value for the given handle - public SamplerType QuerySamplerType(int handle, int cbufSlot = -1) + /// Maxwell primitive topology + /// Maxwell tessellation mode + /// Shader translator topology + protected static InputTopology ConvertToInputTopology(PrimitiveTopology topology, TessMode tessellationMode) { - return GetTextureDescriptor(handle, cbufSlot).UnpackTextureTarget().ConvertSamplerType(); - } - - /// - /// Queries texture target information. - /// - /// Texture handle - /// Constant buffer slot for the texture handle - /// True if the texture is a rectangle texture, false otherwise - public bool QueryIsTextureRectangle(int handle, int cbufSlot = -1) - { - var descriptor = GetTextureDescriptor(handle, cbufSlot); - - TextureTarget target = descriptor.UnpackTextureTarget(); - - bool is2DTexture = target == TextureTarget.Texture2D || - target == TextureTarget.Texture2DRect; - - return !descriptor.UnpackTextureCoordNormalized() && is2DTexture; + return topology switch + { + PrimitiveTopology.Points => InputTopology.Points, + PrimitiveTopology.Lines or + PrimitiveTopology.LineLoop or + PrimitiveTopology.LineStrip => InputTopology.Lines, + PrimitiveTopology.LinesAdjacency or + PrimitiveTopology.LineStripAdjacency => InputTopology.LinesAdjacency, + PrimitiveTopology.Triangles or + PrimitiveTopology.TriangleStrip or + PrimitiveTopology.TriangleFan => InputTopology.Triangles, + PrimitiveTopology.TrianglesAdjacency or + PrimitiveTopology.TriangleStripAdjacency => InputTopology.TrianglesAdjacency, + PrimitiveTopology.Patches => tessellationMode.UnpackPatchType() == TessPatchType.Isolines + ? InputTopology.Lines + : InputTopology.Triangles, + _ => InputTopology.Points + }; } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs index 6818072b4514..0e8e979c88de 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs @@ -1,72 +1,61 @@ -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Gpu.Engine.Threed; - namespace Ryujinx.Graphics.Gpu.Shader { /// /// State used by the . /// - struct GpuAccessorState + class GpuAccessorState { /// - /// GPU virtual address of the texture pool. - /// - public ulong TexturePoolGpuVa { get; } - - /// - /// Maximum ID of the texture pool. + /// GPU texture pool state. /// - public int TexturePoolMaximumId { get; } + public readonly GpuChannelPoolState PoolState; /// - /// Constant buffer slot where the texture handles are located. + /// GPU compute state, for compute shaders. /// - public int TextureBufferIndex { get; } + public readonly GpuChannelComputeState ComputeState; /// - /// Early Z force enable. + /// GPU graphics state, for vertex, tessellation, geometry and fragment shaders. /// - public bool EarlyZForce { get; } + public readonly GpuChannelGraphicsState GraphicsState; /// - /// Primitive topology of current draw. + /// Shader specialization state (shared by all stages). /// - public PrimitiveTopology Topology { get; } + public readonly ShaderSpecializationState SpecializationState; /// - /// Tessellation mode. + /// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null. /// - public TessMode TessellationMode { get; } + public readonly TransformFeedbackDescriptor[] TransformFeedbackDescriptors; /// - /// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null. + /// Shader resource counts (shared by all stages). /// - public TransformFeedbackDescriptor[] TransformFeedbackDescriptors { get; set; } + public readonly ResourceCounts ResourceCounts; /// - /// Creates a new instance of the GPU accessor state. + /// Creates a new GPU accessor state. /// - /// GPU virtual address of the texture pool - /// Maximum ID of the texture pool - /// Constant buffer slot where the texture handles are located - /// Early Z force enable - /// Primitive topology - /// Tessellation mode + /// GPU texture pool state + /// GPU compute state, for compute shaders + /// GPU graphics state, for vertex, tessellation, geometry and fragment shaders + /// Shader specialization state (shared by all stages) + /// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null public GpuAccessorState( - ulong texturePoolGpuVa, - int texturePoolMaximumId, - int textureBufferIndex, - bool earlyZForce, - PrimitiveTopology topology, - TessMode tessellationMode) + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + GpuChannelGraphicsState graphicsState, + ShaderSpecializationState specializationState, + TransformFeedbackDescriptor[] transformFeedbackDescriptors = null) { - TexturePoolGpuVa = texturePoolGpuVa; - TexturePoolMaximumId = texturePoolMaximumId; - TextureBufferIndex = textureBufferIndex; - EarlyZForce = earlyZForce; - Topology = topology; - TessellationMode = tessellationMode; - TransformFeedbackDescriptors = null; + PoolState = poolState; + GraphicsState = graphicsState; + ComputeState = computeState; + SpecializationState = specializationState; + TransformFeedbackDescriptors = transformFeedbackDescriptors; + ResourceCounts = new ResourceCounts(); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs new file mode 100644 index 000000000000..89a3db7126f8 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs @@ -0,0 +1,57 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// State used by the . + /// + struct GpuChannelComputeState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Local group size X of the compute shader. + /// + public readonly int LocalSizeX; + + /// + /// Local group size Y of the compute shader. + /// + public readonly int LocalSizeY; + + /// + /// Local group size Z of the compute shader. + /// + public readonly int LocalSizeZ; + + /// + /// Local memory size of the compute shader. + /// + public readonly int LocalMemorySize; + + /// + /// Shared memory size of the compute shader. + /// + public readonly int SharedMemorySize; + + /// + /// Creates a new GPU compute state. + /// + /// Local group size X of the compute shader + /// Local group size Y of the compute shader + /// Local group size Z of the compute shader + /// Local memory size of the compute shader + /// Shared memory size of the compute shader + public GpuChannelComputeState( + int localSizeX, + int localSizeY, + int localSizeZ, + int localMemorySize, + int sharedMemorySize) + { + LocalSizeX = localSizeX; + LocalSizeY = localSizeY; + LocalSizeZ = localSizeZ; + LocalMemorySize = localMemorySize; + SharedMemorySize = sharedMemorySize; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs new file mode 100644 index 000000000000..5eb31db69989 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs @@ -0,0 +1,41 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// State used by the . + /// + struct GpuChannelGraphicsState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Early Z force enable. + /// + public readonly bool EarlyZForce; + + /// + /// Primitive topology of current draw. + /// + public readonly PrimitiveTopology Topology; + + /// + /// Tessellation mode. + /// + public readonly TessMode TessellationMode; + + /// + /// Creates a new GPU graphics state. + /// + /// Early Z force enable + /// Primitive topology + /// Tessellation mode + public GpuChannelGraphicsState(bool earlyZForce, PrimitiveTopology topology, TessMode tessellationMode) + { + EarlyZForce = earlyZForce; + Topology = topology; + TessellationMode = tessellationMode; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs new file mode 100644 index 000000000000..0b36227ac9f4 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs @@ -0,0 +1,36 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// State used by the . + /// + struct GpuChannelPoolState + { + /// + /// GPU virtual address of the texture pool. + /// + public readonly ulong TexturePoolGpuVa; + + /// + /// Maximum ID of the texture pool. + /// + public readonly int TexturePoolMaximumId; + + /// + /// Constant buffer slot where the texture handles are located. + /// + public readonly int TextureBufferIndex; + + /// + /// Creates a new GPU texture pool state. + /// + /// GPU virtual address of the texture pool + /// Maximum ID of the texture pool + /// Constant buffer slot where the texture handles are located + public GpuChannelPoolState(ulong texturePoolGpuVa, int texturePoolMaximumId, int textureBufferIndex) + { + TexturePoolGpuVa = texturePoolGpuVa; + TexturePoolMaximumId = texturePoolMaximumId; + TextureBufferIndex = textureBufferIndex; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs new file mode 100644 index 000000000000..584eefdc6986 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs @@ -0,0 +1,113 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// State of a hash calculation. + /// + struct HashState + { + // This is using a slightly modified implementation of FastHash64. + // Reference: https://github.com/ztanml/fast-hash/blob/master/fasthash.c + private const ulong M = 0x880355f21e6d1965UL; + private ulong _hash; + private int _start; + + /// + /// One shot hash calculation for a given data. + /// + /// Data to be hashed + /// Hash of the given data + public static uint CalcHash(ReadOnlySpan data) + { + HashState state = new HashState(); + + state.Initialize(); + state.Continue(data); + return state.Finalize(data); + } + + /// + /// Initializes the hash state. + /// + public void Initialize() + { + _hash = 23; + } + + /// + /// Calculates the hash of the given data. + /// + /// + /// The full data must be passed on . + /// If this is not the first time the method is called, then must start with the data passed on the last call. + /// If a smaller slice of the data was already hashed before, only the additional data will be hashed. + /// This can be used for additive hashing of data in chuncks. + /// + /// Data to be hashed + public void Continue(ReadOnlySpan data) + { + ulong h = _hash; + + ReadOnlySpan dataAsUlong = MemoryMarshal.Cast(data.Slice(_start)); + + for (int i = 0; i < dataAsUlong.Length; i++) + { + ulong value = dataAsUlong[i]; + + h ^= Mix(value); + h *= M; + } + + _hash = h; + _start = data.Length & ~7; + } + + /// + /// Performs the hash finalization step, and returns the calculated hash. + /// + /// + /// The full data must be passed on . + /// must start with the data passed on the last call to . + /// No internal state is changed, so one can still continue hashing data with + /// after calling this method. + /// + /// Data to be hashed + /// Hash of all the data hashed with this + public uint Finalize(ReadOnlySpan data) + { + ulong h = _hash; + + int remainder = data.Length & 7; + if (remainder != 0) + { + ulong v = 0; + + for (int i = data.Length - remainder; i < data.Length; i++) + { + v |= (ulong)data[i] << ((i - remainder) * 8); + } + + h ^= Mix(v); + h *= M; + } + + h = Mix(h); + return (uint)(h - (h >> 32)); + } + + /// + /// Hash mix function. + /// + /// Hash to mix + /// Mixed hash + private static ulong Mix(ulong h) + { + h ^= h >> 23; + h *= 0x2127599bf4325c37UL; + h ^= h >> 47; + return h; + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs new file mode 100644 index 000000000000..c982cd9f621e --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs @@ -0,0 +1,27 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Data accessor, used by to access data of unknown length. + /// + /// + /// This will be used to access chuncks of data and try finding a match on the table. + /// This is necessary because the data size is assumed to be unknown, and so the + /// hash table must try to "guess" the size of the data based on the entries on the table. + /// + public interface IDataAccessor + { + /// + /// Gets a span of shader code at the specified offset, with at most the specified size. + /// + /// + /// This might return a span smaller than the requested if there's + /// no more code available. + /// + /// Offset in shader code + /// Size in bytes + /// Shader code span + ReadOnlySpan GetSpan(int offset, int length); + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs new file mode 100644 index 000000000000..6a563c16fea1 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs @@ -0,0 +1,452 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Partitioned hash table. + /// + /// Hash table entry type + class PartitionHashTable + { + /// + /// Hash table entry. + /// + private struct Entry + { + /// + /// Hash bytes of . + /// + public readonly uint Hash; + + /// + /// If this entry is only a sub-region of , this indicates the size in bytes + /// of that region. Otherwise, it should be zero. + /// + public readonly int OwnSize; + + /// + /// Data used to compute the hash for this entry. + /// + /// + /// To avoid additional allocations, this might be a instance of the full entry data, + /// and only a sub-region of it might be actually used by this entry. Such sub-region + /// has its size indicated by in this case. + /// + public readonly byte[] Data; + + /// + /// Item associated with this entry. + /// + public T Item; + + /// + /// Indicates if the entry is partial, which means that this entry is only for a sub-region of the data. + /// + /// + /// Partial entries have no items associated with them. They just indicates that the data might be present on + /// the table, and one must keep looking for the full entry on other tables of larger data size. + /// + public bool IsPartial => OwnSize != 0; + + /// + /// Creates a new partial hash table entry. + /// + /// Hash of the data + /// Full data + /// Size of the sub-region of data that belongs to this entry + public Entry(uint hash, byte[] ownerData, int ownSize) + { + Hash = hash; + OwnSize = ownSize; + Data = ownerData; + Item = default; + } + + /// + /// Creates a new full hash table entry. + /// + /// Hash of the data + /// Data + /// Item associated with this entry + public Entry(uint hash, byte[] data, T item) + { + Hash = hash; + OwnSize = 0; + Data = data; + Item = item; + } + + /// + /// Gets the data for this entry, either full or partial. + /// + /// Data sub-region + public ReadOnlySpan GetData() + { + if (OwnSize != 0) + { + return new ReadOnlySpan(Data).Slice(0, OwnSize); + } + + return Data; + } + } + + /// + /// Hash table bucket. + /// + private struct Bucket + { + /// + /// Inline entry, to avoid allocations for the common single entry case. + /// + public Entry InlineEntry; + + /// + /// List of additional entries for the not-so-common multiple entries case. + /// + public List MoreEntries; + } + + private Bucket[] _buckets; + private int _count; + + /// + /// Total amount of entries on the hash table. + /// + public int Count => _count; + + /// + /// Creates a new instance of the partitioned hash table. + /// + public PartitionHashTable() + { + _buckets = Array.Empty(); + } + + /// + /// Gets an item on the table, or adds a new one if not present. + /// + /// Data + /// Hash of the data + /// Item to be added if not found + /// Existing item if found, or if not found + public T GetOrAdd(byte[] data, uint dataHash, T item) + { + if (TryFindItem(dataHash, data, out T existingItem)) + { + return existingItem; + } + + Entry entry = new Entry(dataHash, data, item); + + AddToBucket(dataHash, ref entry); + + return item; + } + + /// + /// Adds an item to the hash table. + /// + /// Data + /// Hash of the data + /// Item to be added + /// True if the item was added, false due to an item associated with the data already being on the table + public bool Add(byte[] data, uint dataHash, T item) + { + if (TryFindItem(dataHash, data, out _)) + { + return false; + } + + Entry entry = new Entry(dataHash, data, item); + + AddToBucket(dataHash, ref entry); + + return true; + } + + /// + /// Adds a partial entry to the hash table. + /// + /// Full data + /// Size of the sub-region of used by the partial entry + /// True if added, false otherwise + public bool AddPartial(byte[] ownerData, int ownSize) + { + ReadOnlySpan data = new ReadOnlySpan(ownerData).Slice(0, ownSize); + + return AddPartial(ownerData, HashState.CalcHash(data), ownSize); + } + + /// + /// Adds a partial entry to the hash table. + /// + /// Full data + /// Hash of the data sub-region + /// Size of the sub-region of used by the partial entry + /// True if added, false otherwise + public bool AddPartial(byte[] ownerData, uint dataHash, int ownSize) + { + ReadOnlySpan data = new ReadOnlySpan(ownerData).Slice(0, ownSize); + + if (TryFindItem(dataHash, data, out _)) + { + return false; + } + + Entry entry = new Entry(dataHash, ownerData, ownSize); + + AddToBucket(dataHash, ref entry); + + return true; + } + + /// + /// Adds entry with a given hash to the table. + /// + /// Hash of the entry + /// Entry + private void AddToBucket(uint dataHash, ref Entry entry) + { + int pow2Count = GetPow2Count(++_count); + if (pow2Count != _buckets.Length) + { + Rebuild(pow2Count); + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + AddToBucket(ref bucket, ref entry); + } + + /// + /// Adds an entry to a bucket. + /// + /// Bucket to add the entry into + /// Entry to be added + private void AddToBucket(ref Bucket bucket, ref Entry entry) + { + if (bucket.InlineEntry.Data == null) + { + bucket.InlineEntry = entry; + } + else + { + (bucket.MoreEntries ??= new List()).Add(entry); + } + } + + /// + /// Creates partial entries on a new hash table for all existing full entries. + /// + /// + /// This should be called every time a new hash table is created, and there are hash + /// tables with data sizes that are higher than that of the new table. + /// This will then fill the new hash table with "partial" entries of full entries + /// on the hash tables with higher size. + /// + /// New hash table + /// Size of the data on the new hash table + public void FillPartials(PartitionHashTable newTable, int newEntrySize) + { + for (int i = 0; i < _buckets.Length; i++) + { + ref Bucket bucket = ref _buckets[i]; + ref Entry inlineEntry = ref bucket.InlineEntry; + + if (inlineEntry.Data != null) + { + if (!inlineEntry.IsPartial) + { + newTable.AddPartial(inlineEntry.Data, newEntrySize); + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.IsPartial) + { + continue; + } + + newTable.AddPartial(entry.Data, newEntrySize); + } + } + } + } + } + + /// + /// Tries to find an item on the table. + /// + /// Hash of + /// Data to find + /// Item associated with the data + /// True if an item was found, false otherwise + private bool TryFindItem(uint dataHash, ReadOnlySpan data, out T item) + { + if (_count == 0) + { + item = default; + return false; + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + if (bucket.InlineEntry.Data != null) + { + if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(data)) + { + item = bucket.InlineEntry.Item; + return true; + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.Hash == dataHash && entry.GetData().SequenceEqual(data)) + { + item = entry.Item; + return true; + } + } + } + } + + item = default; + return false; + } + + /// + /// Indicates the result of a hash table lookup. + /// + public enum SearchResult + { + /// + /// No entry was found, the search must continue on hash tables of lower size. + /// + NotFound, + + /// + /// A partial entry was found, the search must continue on hash tables of higher size. + /// + FoundPartial, + + /// + /// A full entry was found, the search was concluded and the item can be retrieved. + /// + FoundFull + } + + /// + /// Tries to find an item on the table. + /// + /// Data accessor + /// Size of the hash table data + /// The item on the table, if found, otherwise unmodified + /// The data on the table, if found, otherwise unmodified + /// Table lookup result + public SearchResult TryFindItem(ref SmartDataAccessor dataAccessor, int size, ref T item, ref byte[] data) + { + if (_count == 0) + { + return SearchResult.NotFound; + } + + ReadOnlySpan dataSpan = dataAccessor.GetSpanAndHash(size, out uint dataHash); + + if (dataSpan.Length != size) + { + return SearchResult.NotFound; + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + if (bucket.InlineEntry.Data != null) + { + if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(dataSpan)) + { + item = bucket.InlineEntry.Item; + data = bucket.InlineEntry.Data; + return bucket.InlineEntry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull; + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.Hash == dataHash && entry.GetData().SequenceEqual(dataSpan)) + { + item = entry.Item; + data = entry.Data; + return entry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull; + } + } + } + } + + return SearchResult.NotFound; + } + + /// + /// Rebuilds the table for a new count. + /// + /// New power of two count of the table + private void Rebuild(int newPow2Count) + { + Bucket[] newBuckets = new Bucket[newPow2Count]; + + uint mask = (uint)newPow2Count - 1; + + for (int i = 0; i < _buckets.Length; i++) + { + ref Bucket bucket = ref _buckets[i]; + + if (bucket.InlineEntry.Data != null) + { + AddToBucket(ref newBuckets[(int)(bucket.InlineEntry.Hash & mask)], ref bucket.InlineEntry); + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + Entry entryCopy = entry; + AddToBucket(ref newBuckets[(int)(entry.Hash & mask)], ref entryCopy); + } + } + } + } + + _buckets = newBuckets; + } + + /// + /// Gets the bucket for a given hash. + /// + /// Data hash + /// Bucket for the hash + private ref Bucket GetBucketForHash(uint hash) + { + int index = (int)(hash & (_buckets.Length - 1)); + + return ref _buckets[index]; + } + + /// + /// Gets a power of two count from a regular count. + /// + /// Count + /// Power of two count + private static int GetPow2Count(int count) + { + // This returns the nearest power of two that is lower than count. + // This was done to optimize memory usage rather than performance. + return 1 << BitOperations.Log2((uint)count); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs new file mode 100644 index 000000000000..4c9cc4d40713 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs @@ -0,0 +1,244 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Partitioned hash table. + /// + /// + public class PartitionedHashTable + { + /// + /// Entry for a given data size. + /// + private struct SizeEntry + { + /// + /// Size for the data that will be stored on the hash table on this entry. + /// + public int Size { get; } + + /// + /// Number of entries on the hash table. + /// + public int TableCount => _table.Count; + + private readonly PartitionHashTable _table; + + /// + /// Creates an entry for a given size. + /// + /// Size of the data to be stored on this entry + public SizeEntry(int size) + { + Size = size; + _table = new PartitionHashTable(); + } + + /// + /// Gets an item for existing data, or adds a new one. + /// + /// Data associated with the item + /// Hash of + /// Item to be added + /// Existing item, or if not present + public T GetOrAdd(byte[] data, uint dataHash, T item) + { + Debug.Assert(data.Length == Size); + return _table.GetOrAdd(data, dataHash, item); + } + + /// + /// Adds a new item. + /// + /// Data associated with the item + /// Hash of + /// Item to be added + /// True if added, false otherwise + public bool Add(byte[] data, uint dataHash, T item) + { + Debug.Assert(data.Length == Size); + return _table.Add(data, dataHash, item); + } + + /// + /// Adds a partial entry. + /// + /// Full entry data + /// Hash of the sub-region of the data that belongs to this entry + /// True if added, false otherwise + public bool AddPartial(byte[] ownerData, uint dataHash) + { + return _table.AddPartial(ownerData, dataHash, Size); + } + + /// + /// Fills a new hash table with "partials" of existing full entries of higher size. + /// + /// Entry with the new hash table + public void FillPartials(SizeEntry newEntry) + { + Debug.Assert(newEntry.Size < Size); + _table.FillPartials(newEntry._table, newEntry.Size); + } + + /// + /// Tries to find an item on the hash table. + /// + /// Data accessor + /// The item on the table, if found, otherwise unmodified + /// The data on the table, if found, otherwise unmodified + /// Table lookup result + public PartitionHashTable.SearchResult TryFindItem(ref SmartDataAccessor dataAccessor, ref T item, ref byte[] data) + { + return _table.TryFindItem(ref dataAccessor, Size, ref item, ref data); + } + } + + private readonly List _sizeTable; + + /// + /// Creates a new partitioned hash table. + /// + public PartitionedHashTable() + { + _sizeTable = new List(); + } + + /// + /// Adds a new item to the table. + /// + /// Data + /// Item associated with the data + public void Add(byte[] data, T item) + { + GetOrAdd(data, item); + } + + /// + /// Gets an existing item from the table, or adds a new one if not present. + /// + /// Data + /// Item associated with the data + /// Existing item, or if not present + public T GetOrAdd(byte[] data, T item) + { + SizeEntry sizeEntry; + + int index = BinarySearch(_sizeTable, data.Length); + if (index < _sizeTable.Count && _sizeTable[index].Size == data.Length) + { + sizeEntry = _sizeTable[index]; + } + else + { + if (index < _sizeTable.Count && _sizeTable[index].Size < data.Length) + { + index++; + } + + sizeEntry = new SizeEntry(data.Length); + + _sizeTable.Insert(index, sizeEntry); + + for (int i = index + 1; i < _sizeTable.Count; i++) + { + _sizeTable[i].FillPartials(sizeEntry); + } + } + + HashState hashState = new HashState(); + hashState.Initialize(); + + for (int i = 0; i < index; i++) + { + ReadOnlySpan dataSlice = new ReadOnlySpan(data).Slice(0, _sizeTable[i].Size); + hashState.Continue(dataSlice); + _sizeTable[i].AddPartial(data, hashState.Finalize(dataSlice)); + } + + hashState.Continue(data); + return sizeEntry.GetOrAdd(data, hashState.Finalize(data), item); + } + + /// + /// Performs binary search on a list of hash tables, each one with a fixed data size. + /// + /// List of hash tables + /// Size to search for + /// Index of the hash table with the given size, or nearest one otherwise + private static int BinarySearch(List entries, int size) + { + int left = 0; + int middle = 0; + int right = entries.Count - 1; + + while (left <= right) + { + middle = left + ((right - left) >> 1); + + SizeEntry entry = entries[middle]; + + if (size == entry.Size) + { + break; + } + + if (size < entry.Size) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return middle; + } + + /// + /// Tries to find an item on the table. + /// + /// Data accessor + /// Item, if found + /// Data, if found + /// True if the item was found on the table, false otherwise + public bool TryFindItem(IDataAccessor dataAccessor, out T item, out byte[] data) + { + SmartDataAccessor sda = new SmartDataAccessor(dataAccessor); + + item = default; + data = null; + + int left = 0; + int right = _sizeTable.Count; + + while (left != right) + { + int index = left + ((right - left) >> 1); + + PartitionHashTable.SearchResult result = _sizeTable[index].TryFindItem(ref sda, ref item, ref data); + + if (result == PartitionHashTable.SearchResult.FoundFull) + { + return true; + } + + if (result == PartitionHashTable.SearchResult.NotFound) + { + right = index; + } + else /* if (result == PartitionHashTable.SearchResult.FoundPartial) */ + { + left = index + 1; + } + } + + data = null; + return false; + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs new file mode 100644 index 000000000000..0632add6c624 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Smart data accessor that can cache data and hashes to avoid reading and re-hashing the same memory regions. + /// + ref struct SmartDataAccessor + { + private readonly IDataAccessor _dataAccessor; + private ReadOnlySpan _data; + private readonly SortedList _cachedHashes; + + /// + /// Creates a new smart data accessor. + /// + /// Data accessor + public SmartDataAccessor(IDataAccessor dataAccessor) + { + _dataAccessor = dataAccessor; + _data = ReadOnlySpan.Empty; + _cachedHashes = new SortedList(); + } + + /// + /// Get a spans of a given size. + /// + /// + /// The actual length of the span returned depends on the + /// and might be less than requested. + /// + /// Size in bytes + /// Span with the requested size + public ReadOnlySpan GetSpan(int length) + { + if (_data.Length < length) + { + _data = _dataAccessor.GetSpan(0, length); + } + else if (_data.Length > length) + { + return _data.Slice(0, length); + } + + return _data; + } + + /// + /// Gets a span of the requested size, and a hash of its data. + /// + /// Length of the span + /// Hash of the span data + /// Span of data + public ReadOnlySpan GetSpanAndHash(int length, out uint hash) + { + ReadOnlySpan data = GetSpan(length); + hash = data.Length == length ? CalcHashCached(data) : 0; + return data; + } + + /// + /// Calculates the hash for a requested span. + /// This will try to use a cached hash if the data was already accessed before, to avoid re-hashing. + /// + /// Data to be hashed + /// Hash of the data + private uint CalcHashCached(ReadOnlySpan data) + { + HashState state = default; + bool found = false; + + for (int i = _cachedHashes.Count - 1; i >= 0; i--) + { + int cachedHashSize = _cachedHashes.Keys[i]; + + if (cachedHashSize < data.Length) + { + state = _cachedHashes.Values[i]; + found = true; + break; + } + } + + if (!found) + { + state = new HashState(); + state.Initialize(); + } + + state.Continue(data); + _cachedHashes[data.Length & ~7] = state; + return state.Finalize(data); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs b/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs new file mode 100644 index 000000000000..b85423cb333b --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs @@ -0,0 +1,36 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Holds counts for the resources used by a shader. + /// + class ResourceCounts + { + /// + /// Total of uniform buffers used by the shaders. + /// + public int UniformBuffersCount; + + /// + /// Total of storage buffers used by the shaders. + /// + public int StorageBuffersCount; + + /// + /// Total of textures used by the shaders. + /// + public int TexturesCount; + + /// + /// Total of images used by the shaders. + /// + public int ImagesCount; + + /// + /// Creates a new instance of the shader resource counts class. + /// + public ResourceCounts() + { + UniformBuffersCount = 1; // The first binding is reserved for the support buffer. + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs index 39bf10fab6e2..651dfd263e1b 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs @@ -1,4 +1,6 @@ using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Shader { @@ -9,7 +11,7 @@ struct ShaderAddresses : IEquatable { #pragma warning disable CS0649 public ulong VertexA; - public ulong Vertex; + public ulong VertexB; public ulong TessControl; public ulong TessEvaluation; public ulong Geometry; @@ -34,7 +36,7 @@ public override bool Equals(object other) public bool Equals(ShaderAddresses other) { return VertexA == other.VertexA && - Vertex == other.Vertex && + VertexB == other.VertexB && TessControl == other.TessControl && TessEvaluation == other.TessEvaluation && Geometry == other.Geometry && @@ -47,7 +49,16 @@ public bool Equals(ShaderAddresses other) /// Hash code public override int GetHashCode() { - return HashCode.Combine(VertexA, Vertex, TessControl, TessEvaluation, Geometry, Fragment); + return HashCode.Combine(VertexA, VertexB, TessControl, TessEvaluation, Geometry, Fragment); + } + + /// + /// Gets a view of the structure as a span of addresses. + /// + /// Span of addresses + public Span AsSpan() + { + return MemoryMarshal.CreateSpan(ref VertexA, Unsafe.SizeOf() / sizeof(ulong)); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index f38709001bda..03d5ecade2fa 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -1,18 +1,14 @@ -using Ryujinx.Common; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader.Cache; -using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; -using System.Diagnostics; -using System.Runtime.InteropServices; using System.Threading; -using System.Threading.Tasks; namespace Ryujinx.Graphics.Gpu.Shader { @@ -21,30 +17,66 @@ namespace Ryujinx.Graphics.Gpu.Shader /// class ShaderCache : IDisposable { - private const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; + /// + /// Default flags used on the shader translation process. + /// + public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; + + private struct TranslatedShader + { + public readonly CachedShaderStage Shader; + public readonly ShaderProgram Program; + + public TranslatedShader(CachedShaderStage shader, ShaderProgram program) + { + Shader = shader; + Program = program; + } + } + + private struct TranslatedShaderVertexPair + { + public readonly CachedShaderStage VertexA; + public readonly CachedShaderStage VertexB; + public readonly ShaderProgram Program; + + public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program) + { + VertexA = vertexA; + VertexB = vertexB; + Program = program; + } + } private readonly GpuContext _context; private readonly ShaderDumper _dumper; - private readonly Dictionary> _cpPrograms; - private readonly Dictionary> _gpPrograms; + private readonly Dictionary _cpPrograms; + private readonly Dictionary _gpPrograms; - private CacheManager _cacheManager; + private struct ProgramToSave + { + public readonly CachedShaderProgram CachedProgram; + public readonly IProgram HostProgram; + + public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram) + { + CachedProgram = cachedProgram; + HostProgram = hostProgram; + } + } - private Dictionary _gpProgramsDiskCache; - private Dictionary _cpProgramsDiskCache; + private Queue _programsToSaveQueue; - private Queue<(IProgram, Action)> _programsToSaveQueue; + private readonly ComputeShaderCacheHashTable _computeShaderCache; + private readonly ShaderCacheHashTable _graphicsShaderCache; + private readonly DiskCacheHostStorage _diskCacheHostStorage; + private readonly BackgroundDiskCacheWriter _cacheWriter; /// - /// Version of the codegen (to be changed when codegen or guest format change). + /// Event for signalling shader cache loading progress. /// - private const ulong ShaderCodeGenVersion = 3251; - - // Progress reporting helpers - private volatile int _shaderCount; - private volatile int _totalShaderCount; public event Action ShaderCacheStateChanged; /// @@ -57,12 +89,23 @@ public ShaderCache(GpuContext context) _dumper = new ShaderDumper(); - _cpPrograms = new Dictionary>(); - _gpPrograms = new Dictionary>(); - _gpProgramsDiskCache = new Dictionary(); - _cpProgramsDiskCache = new Dictionary(); + _cpPrograms = new Dictionary(); + _gpPrograms = new Dictionary(); + + _programsToSaveQueue = new Queue(); + + string diskCacheTitleId = GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null + ? CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId) + : null; + + _computeShaderCache = new ComputeShaderCacheHashTable(); + _graphicsShaderCache = new ShaderCacheHashTable(); + _diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId); - _programsToSaveQueue = new Queue<(IProgram, Action)>(); + if (_diskCacheHostStorage.CacheEnabled) + { + _cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage); + } } /// @@ -72,13 +115,17 @@ public void ProcessShaderCacheQueue() { // Check to see if the binaries for previously compiled shaders are ready, and save them out. - while (_programsToSaveQueue.Count > 0) + while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave)) { - (IProgram program, Action dataAction) = _programsToSaveQueue.Peek(); + ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false); - if (program.CheckProgramLink(false) != ProgramLinkStatus.Incomplete) + if (result != ProgramLinkStatus.Incomplete) { - dataAction(program.GetBinary()); + if (result == ProgramLinkStatus.Success) + { + _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary()); + } + _programsToSaveQueue.Dequeue(); } else @@ -91,463 +138,48 @@ public void ProcessShaderCacheQueue() /// /// Initialize the cache. /// - internal void Initialize() + /// Cancellation token to cancel the shader cache initialization process + internal void Initialize(CancellationToken cancellationToken) { - if (GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null) + if (_diskCacheHostStorage.CacheEnabled) { - _cacheManager = new CacheManager(CacheGraphicsApi.OpenGL, CacheHashType.XxHash128, "glsl", GraphicsConfig.TitleId, ShaderCodeGenVersion); - - bool isReadOnly = _cacheManager.IsReadOnly; - - HashSet invalidEntries = null; - - if (isReadOnly) + if (!_diskCacheHostStorage.CacheExists()) { - Logger.Warning?.Print(LogClass.Gpu, "Loading shader cache in read-only mode (cache in use by another program!)"); - } - else - { - invalidEntries = new HashSet(); - } - - ReadOnlySpan guestProgramList = _cacheManager.GetGuestProgramList(); + // If we don't have a shader cache on the new format, try to perform migration from the old shader cache. + Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache..."); - using AutoResetEvent progressReportEvent = new AutoResetEvent(false); + int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage); - _shaderCount = 0; - _totalShaderCount = guestProgramList.Length; - - ShaderCacheStateChanged?.Invoke(ShaderCacheState.Start, _shaderCount, _totalShaderCount); - Thread progressReportThread = null; - - if (guestProgramList.Length > 0) - { - progressReportThread = new Thread(ReportProgress) - { - Name = "ShaderCache.ProgressReporter", - Priority = ThreadPriority.Lowest, - IsBackground = true - }; - - progressReportThread.Start(progressReportEvent); + Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders."); } - // Make sure these are initialized before doing compilation. - Capabilities caps = _context.Capabilities; - - int maxTaskCount = Math.Min(Environment.ProcessorCount, 8); - int programIndex = 0; - List activeTasks = new List(); - - using AutoResetEvent taskDoneEvent = new AutoResetEvent(false); - - // This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background. - // The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once. - - while (programIndex < guestProgramList.Length || activeTasks.Count > 0) - { - if (activeTasks.Count < maxTaskCount && programIndex < guestProgramList.Length) - { - // Begin a new shader compilation. - Hash128 key = guestProgramList[programIndex]; - - byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key); - bool hasHostCache = hostProgramBinary != null; - - IProgram hostProgram = null; - - // If the program sources aren't in the cache, compile from saved guest program. - byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key); - - if (guestProgram == null) - { - Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); - - // Should not happen, but if someone messed with the cache it's better to catch it. - invalidEntries?.Add(key); - - _shaderCount = ++programIndex; - - continue; - } - - ReadOnlySpan guestProgramReadOnlySpan = guestProgram; - - ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - - if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) - { - Debug.Assert(cachedShaderEntries.Length == 1); - - GuestShaderCacheEntry entry = cachedShaderEntries[0]; - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) - { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary, false, new ShaderInfo(-1)); - } - - ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); - activeTasks.Add(task); - - task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => - { - ShaderProgram program = null; - ShaderProgramInfo shaderProgramInfo = null; - - if (isHostProgramValid) - { - // Reconstruct code holder. - - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); - - byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - - ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, code); - - _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); - - return true; - } - else - { - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - - Task compileTask = Task.Run(() => - { - var binaryCode = new Memory(entry.Code); - - var gpuAccessor = new CachedGpuAccessor( - _context, - binaryCode, - binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize), - entry.Header.GpuAccessorHeader, - entry.TextureDescriptors, - null); - - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute); - program = Translator.CreateContext(0, gpuAccessor, options).Translate(out shaderProgramInfo); - }); - - task.OnTask(compileTask, (bool _, ShaderCompileTask task) => - { - if (task.IsFaulted) - { - Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding..."); + ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader( + _context, + _graphicsShaderCache, + _computeShaderCache, + _diskCacheHostStorage, + cancellationToken, + ShaderCacheStateUpdate); - _cacheManager.RemoveProgram(ref key); - return true; // Exit early, the decoding step failed. - } + loader.LoadShaders(); - byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - - ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, code); - - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); - - // Compile shader and create program as the shader program binary got invalidated. - shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, program.Code); - hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, new ShaderInfo(-1)); - - task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => - { - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); - - if (!isReadOnly) - { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } - } - - _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); - - return true; - }); - - return false; // Not finished: still need to compile the host program. - }); - - return false; // Not finished: translating the program. - } - }); - } - else - { - Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); - - ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length]; - List shaderPrograms = new List(); - - TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - - TranslationCounts counts = new TranslationCounts(); - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) - { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - - bool hasFragmentShader = false; - int fragmentOutputMap = -1; - int fragmentIndex = (int)ShaderStage.Fragment - 1; - - if (hostShaderEntries[fragmentIndex] != null && hostShaderEntries[fragmentIndex].Header.InUse) - { - hasFragmentShader = true; - fragmentOutputMap = hostShaderEntries[fragmentIndex].Header.FragmentOutputMap; - } - - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary, hasFragmentShader, new ShaderInfo(fragmentOutputMap)); - } - - ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); - activeTasks.Add(task); - - GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); - - task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => - { - Task compileTask = Task.Run(() => - { - TranslatorContext[] shaderContexts = null; - - if (!isHostProgramValid) - { - shaderContexts = new TranslatorContext[1 + entries.Length]; - - for (int i = 0; i < entries.Length; i++) - { - GuestShaderCacheEntry entry = entries[i]; - - if (entry == null) - { - continue; - } - - var binaryCode = new Memory(entry.Code); - - var gpuAccessor = new CachedGpuAccessor( - _context, - binaryCode, - binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize), - entry.Header.GpuAccessorHeader, - entry.TextureDescriptors, - tfd); - - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags); - - shaderContexts[i + 1] = Translator.CreateContext(0, gpuAccessor, options, counts); - - if (entry.Header.SizeA != 0) - { - var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.VertexA); - - shaderContexts[0] = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, options2, counts); - } - } - } - - // Reconstruct code holder. - for (int i = 0; i < entries.Length; i++) - { - GuestShaderCacheEntry entry = entries[i]; - - if (entry == null) - { - continue; - } - - ShaderProgram program; - ShaderProgramInfo shaderProgramInfo; - - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - int stageIndex = i + 1; - - TranslatorContext currentStage = shaderContexts[stageIndex]; - TranslatorContext nextStage = GetNextStageContext(shaderContexts, stageIndex); - TranslatorContext vertexA = stageIndex == 1 ? shaderContexts[0] : null; - - program = currentStage.Translate(out shaderProgramInfo, nextStage, vertexA); - } - - // NOTE: Vertex B comes first in the shader cache. - byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - byte[] code2 = entry.Header.SizeA != 0 ? entry.Code.AsSpan(entry.Header.Size, entry.Header.SizeA).ToArray() : null; - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); - - shaderPrograms.Add(program); - } - }); - - task.OnTask(compileTask, (bool _, ShaderCompileTask task) => - { - if (task.IsFaulted) - { - Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding..."); - - _cacheManager.RemoveProgram(ref key); - return true; // Exit early, the decoding step failed. - } - - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - if (!isHostProgramValid) - { - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); - - List hostShaders = new List(); - - // Compile shaders and create program as the shader program binary got invalidated. - for (int stage = 0; stage < Constants.ShaderStages; stage++) - { - ShaderProgram program = shaders[stage]?.Program; - - if (program == null) - { - continue; - } - - IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); - - shaders[stage].HostShader = hostShader; - - hostShaders.Add(hostShader); - } - - int fragmentIndex = (int)ShaderStage.Fragment - 1; - int fragmentOutputMap = -1; - - if (shaders[fragmentIndex] != null) - { - fragmentOutputMap = shaders[fragmentIndex].Info.FragmentOutputMap; - } - - hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), new ShaderInfo(fragmentOutputMap)); - - task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => - { - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); - - if (!isReadOnly) - { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } - } - - _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); - - return true; - }); - - return false; // Not finished: still need to compile the host program. - } - else - { - _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); - - return true; - } - }); - - return false; // Not finished: translating the program. - }); - } - - _shaderCount = ++programIndex; - } - - // Process the queue. - for (int i = 0; i < activeTasks.Count; i++) - { - ShaderCompileTask task = activeTasks[i]; - - if (task.IsDone()) - { - activeTasks.RemoveAt(i--); - } - } - - if (activeTasks.Count == maxTaskCount) - { - // Wait for a task to be done, or for 1ms. - // Host shader compilation cannot signal when it is done, - // so the 1ms timeout is required to poll status. - - taskDoneEvent.WaitOne(1); - } - } - - if (!isReadOnly) + int errorCount = loader.ErrorCount; + if (errorCount != 0) { - // Remove entries that are broken in the cache - _cacheManager.RemoveManifestEntries(invalidEntries); - _cacheManager.FlushToArchive(); - _cacheManager.Synchronize(); + Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache."); } - - progressReportEvent.Set(); - progressReportThread?.Join(); - - ShaderCacheStateChanged?.Invoke(ShaderCacheState.Loaded, _shaderCount, _totalShaderCount); - - Logger.Info?.Print(LogClass.Gpu, $"Shader cache loaded {_shaderCount} entries."); } } /// - /// Raises ShaderCacheStateChanged events periodically. + /// Shader cache state update handler. /// - private void ReportProgress(object state) + /// Current state of the shader cache load process + /// Number of the current shader being processed + /// Total number of shaders to process + private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total) { - const int refreshRate = 50; // ms - - AutoResetEvent endEvent = (AutoResetEvent)state; - - int count = 0; - - do - { - int newCount = _shaderCount; - - if (count != newCount) - { - ShaderCacheStateChanged?.Invoke(ShaderCacheState.Loading, newCount, _totalShaderCount); - count = newCount; - } - } - while (!endEvent.WaitOne(refreshRate)); + ShaderCacheStateChanged?.Invoke(state, current, total); } /// @@ -557,112 +189,42 @@ private void ReportProgress(object state) /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU channel - /// GPU accessor state + /// Texture pool state + /// Compute engine state /// GPU virtual address of the binary shader code - /// Local group size X of the computer shader - /// Local group size Y of the computer shader - /// Local group size Z of the computer shader - /// Local memory size of the compute shader - /// Shared memory size of the compute shader /// Compiled compute shader code - public ShaderBundle GetComputeShader( + public CachedShaderProgram GetComputeShader( GpuChannel channel, - GpuAccessorState gas, - ulong gpuVa, - int localSizeX, - int localSizeY, - int localSizeZ, - int localMemorySize, - int sharedMemorySize) + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + ulong gpuVa) { - bool isCached = _cpPrograms.TryGetValue(gpuVa, out List list); - - if (isCached) - { - foreach (ShaderBundle cachedCpShader in list) - { - if (IsShaderEqual(channel.MemoryManager, cachedCpShader, gpuVa)) - { - return cachedCpShader; - } - } - } - - TranslatorContext[] shaderContexts = new TranslatorContext[1]; - - shaderContexts[0] = DecodeComputeShader( - channel, - gas, - gpuVa, - localSizeX, - localSizeY, - localSizeZ, - localMemorySize, - sharedMemorySize); - - bool isShaderCacheEnabled = _cacheManager != null; - bool isShaderCacheReadOnly = false; - - Hash128 programCodeHash = default; - GuestShaderCacheEntry[] shaderCacheEntries = null; - - // Current shader cache doesn't support bindless textures - if (shaderContexts[0].UsedFeatures.HasFlag(FeatureFlags.Bindless)) + if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa)) { - isShaderCacheEnabled = false; + return cpShader; } - if (isShaderCacheEnabled) + if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode)) { - isShaderCacheReadOnly = _cacheManager.IsReadOnly; - - // Compute hash and prepare data for shader disk cache comparison. - shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts); - programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries); + _cpPrograms[gpuVa] = cpShader; + return cpShader; } - ShaderBundle cpShader; - - // Search for the program hash in loaded shaders. - if (!isShaderCacheEnabled || !_cpProgramsDiskCache.TryGetValue(programCodeHash, out cpShader)) - { - if (isShaderCacheEnabled) - { - Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!"); - } - - // The shader isn't currently cached, translate it and compile it. - ShaderCodeHolder shader = TranslateShader(_dumper, channel.MemoryManager, shaderContexts[0], null, null); - - shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); + ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState); + GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState); - IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, new ShaderInfo(-1)); + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, gpuVa); - cpShader = new ShaderBundle(hostProgram, shader); - - if (isShaderCacheEnabled) - { - _cpProgramsDiskCache.Add(programCodeHash, cpShader); - - if (!isShaderCacheReadOnly) - { - byte[] guestProgramDump = CacheHelper.CreateGuestProgramDump(shaderCacheEntries); - _programsToSaveQueue.Enqueue((hostProgram, (byte[] hostProgramBinary) => - { - _cacheManager.SaveProgram(ref programCodeHash, guestProgramDump, HostShaderCacheEntry.Create(hostProgramBinary, new ShaderCodeHolder[] { shader })); - })); - } - } - } + TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); - if (!isCached) - { - list = new List(); + IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1)); - _cpPrograms.Add(gpuVa, list); - } + cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); - list.Add(cpShader); + _computeShaderCache.Add(cpShader); + EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram)); + _cpPrograms[gpuVa] = cpShader; return cpShader; } @@ -676,143 +238,141 @@ public ShaderBundle GetComputeShader( /// /// GPU state /// GPU channel - /// GPU accessor state + /// Texture pool state + /// 3D engine state /// Addresses of the shaders for each stage /// Compiled graphics shader code - public ShaderBundle GetGraphicsShader(ref ThreedClassState state, GpuChannel channel, GpuAccessorState gas, ShaderAddresses addresses) + public CachedShaderProgram GetGraphicsShader( + ref ThreedClassState state, + GpuChannel channel, + GpuChannelPoolState poolState, + GpuChannelGraphicsState graphicsState, + ShaderAddresses addresses) { - bool isCached = _gpPrograms.TryGetValue(addresses, out List list); - - if (isCached) + if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, poolState, gpShaders, addresses)) { - foreach (ShaderBundle cachedGpShaders in list) - { - if (IsShaderEqual(channel.MemoryManager, cachedGpShaders, addresses)) - { - return cachedGpShaders; - } - } + return gpShaders; } - TranslatorContext[] shaderContexts = new TranslatorContext[Constants.ShaderStages + 1]; - - TransformFeedbackDescriptor[] tfd = GetTransformFeedbackDescriptors(ref state); - - gas.TransformFeedbackDescriptors = tfd; - - TranslationCounts counts = new TranslationCounts(); - - if (addresses.VertexA != 0) + if (_graphicsShaderCache.TryFind(channel, poolState, addresses, out gpShaders, out var cachedGuestCode)) { - shaderContexts[0] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags | TranslationFlags.VertexA, ShaderStage.Vertex, addresses.VertexA); + _gpPrograms[addresses] = gpShaders; + return gpShaders; } - shaderContexts[1] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.Vertex, addresses.Vertex); - shaderContexts[2] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.TessellationControl, addresses.TessControl); - shaderContexts[3] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.TessellationEvaluation, addresses.TessEvaluation); - shaderContexts[4] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.Geometry, addresses.Geometry); - shaderContexts[5] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.Fragment, addresses.Fragment); - - bool isShaderCacheEnabled = _cacheManager != null; - bool isShaderCacheReadOnly = false; - - Hash128 programCodeHash = default; - GuestShaderCacheEntry[] shaderCacheEntries = null; - - // Current shader cache doesn't support bindless textures - for (int i = 0; i < shaderContexts.Length; i++) - { - if (shaderContexts[i] != null && shaderContexts[i].UsedFeatures.HasFlag(FeatureFlags.Bindless)) - { - isShaderCacheEnabled = false; - break; - } - } + TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state); - if (isShaderCacheEnabled) - { - isShaderCacheReadOnly = _cacheManager.IsReadOnly; + ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors); + GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors); - // Compute hash and prepare data for shader disk cache comparison. - shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts); - programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries, tfd); - } + ReadOnlySpan addressesSpan = addresses.AsSpan(); - ShaderBundle gpShaders; + TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; + TranslatorContext nextStage = null; - // Search for the program hash in loaded shaders. - if (!isShaderCacheEnabled || !_gpProgramsDiskCache.TryGetValue(programCodeHash, out gpShaders)) + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) { - if (isShaderCacheEnabled) - { - Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!"); - } - - // The shader isn't currently cached, translate it and compile it. - ShaderCodeHolder[] shaders = new ShaderCodeHolder[Constants.ShaderStages]; - - for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) - { - shaders[stageIndex] = TranslateShader(_dumper, channel.MemoryManager, shaderContexts, stageIndex + 1); - } - - List hostShaders = new List(); + ulong gpuVa = addressesSpan[stageIndex + 1]; - for (int stage = 0; stage < Constants.ShaderStages; stage++) + if (gpuVa != 0) { - ShaderProgram program = shaders[stage]?.Program; + GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, gpuVa); - if (program == null) + if (nextStage != null) { - continue; + currentStage.SetNextStage(nextStage); } - IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); - - shaders[stage].HostShader = hostShader; + if (stageIndex == 0 && addresses.VertexA != 0) + { + translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); + } - hostShaders.Add(hostShader); + translatorContexts[stageIndex + 1] = currentStage; + nextStage = currentStage; } + } - int fragmentIndex = (int)ShaderStage.Fragment - 1; - int fragmentOutputMap = -1; + CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; + List shaderSources = new List(); + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + TranslatorContext currentStage = translatorContexts[stageIndex + 1]; - if (shaders[fragmentIndex] != null) + if (currentStage != null) { - fragmentOutputMap = shaders[fragmentIndex].Info.FragmentOutputMap; - } + ShaderProgram program; - IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), new ShaderInfo(fragmentOutputMap)); + if (stageIndex == 0 && translatorContexts[0] != null) + { + TranslatedShaderVertexPair translatedShader = TranslateShader( + _dumper, + channel, + currentStage, + translatorContexts[0], + cachedGuestCode.VertexACode, + cachedGuestCode.VertexBCode); + + shaders[0] = translatedShader.VertexA; + shaders[1] = translatedShader.VertexB; + program = translatedShader.Program; + } + else + { + byte[] code = cachedGuestCode.GetByIndex(stageIndex); - gpShaders = new ShaderBundle(hostProgram, shaders); + TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code); - if (isShaderCacheEnabled) - { - _gpProgramsDiskCache.Add(programCodeHash, gpShaders); + shaders[stageIndex + 1] = translatedShader.Shader; + program = translatedShader.Program; + } - if (!isShaderCacheReadOnly) + if (program != null) { - byte[] guestProgramDump = CacheHelper.CreateGuestProgramDump(shaderCacheEntries, tfd); - _programsToSaveQueue.Enqueue((hostProgram, (byte[] hostProgramBinary) => - { - _cacheManager.SaveProgram(ref programCodeHash, guestProgramDump, HostShaderCacheEntry.Create(hostProgramBinary, shaders)); - })); + shaderSources.Add(CreateShaderSource(program)); } } } - if (!isCached) - { - list = new List(); + int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1; + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap)); - _gpPrograms.Add(addresses, list); - } + gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); - list.Add(gpShaders); + _graphicsShaderCache.Add(gpShaders); + EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram)); + _gpPrograms[addresses] = gpShaders; return gpShaders; } + /// + /// Creates a shader source for use with the backend from a translated shader program. + /// + /// Translated shader program + /// Shader source + public static ShaderSource CreateShaderSource(ShaderProgram program) + { + return new ShaderSource(program.Code, program.BinaryCode, program.Info.Stage, program.Language); + } + + /// + /// Puts a program on the queue of programs to be saved on the disk cache. + /// + /// + /// This will not do anything if disk shader cache is disabled. + /// + /// Program to be saved on disk + private void EnqueueProgramToSave(ProgramToSave programToSave) + { + if (_diskCacheHostStorage.CacheEnabled) + { + _programsToSaveQueue.Enqueue(programToSave); + } + } + /// /// Gets transform feedback state from the current GPU state. /// @@ -821,7 +381,6 @@ public ShaderBundle GetGraphicsShader(ref ThreedClassState state, GpuChannel cha private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state) { bool tfEnable = state.TfEnable; - if (!tfEnable) { return null; @@ -833,11 +392,11 @@ private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref { var tf = state.TfState[i]; - int length = (int)Math.Min((uint)tf.VaryingsCount, 0x80); - - var varyingLocations = MemoryMarshal.Cast(state.TfVaryingLocations[i].ToSpan()).Slice(0, length); - - descs[i] = new TransformFeedbackDescriptor(tf.BufferIndex, tf.Stride, varyingLocations.ToArray()); + descs[i] = new TransformFeedbackDescriptor( + tf.BufferIndex, + tf.Stride, + tf.VaryingsCount, + ref state.TfVaryingLocations[i]); } return descs; @@ -846,46 +405,54 @@ private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref /// /// Checks if compute shader code in memory is equal to the cached shader. /// - /// Memory manager used to access the GPU memory where the shader is located + /// GPU channel using the shader + /// GPU channel state to verify shader compatibility /// Cached compute shader /// GPU virtual address of the shader code in memory /// True if the code is different, false otherwise - private static bool IsShaderEqual(MemoryManager memoryManager, ShaderBundle cpShader, ulong gpuVa) + private static bool IsShaderEqual( + GpuChannel channel, + GpuChannelPoolState poolState, + CachedShaderProgram cpShader, + ulong gpuVa) { - return IsShaderEqual(memoryManager, cpShader.Shaders[0], gpuVa); + if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa)) + { + return cpShader.SpecializationState.MatchesCompute(channel, poolState); + } + + return false; } /// /// Checks if graphics shader code from all stages in memory are equal to the cached shaders. /// - /// Memory manager used to access the GPU memory where the shader is located + /// GPU channel using the shader + /// GPU channel state to verify shader compatibility /// Cached graphics shaders /// GPU virtual addresses of all enabled shader stages /// True if the code is different, false otherwise - private static bool IsShaderEqual(MemoryManager memoryManager, ShaderBundle gpShaders, ShaderAddresses addresses) + private static bool IsShaderEqual( + GpuChannel channel, + GpuChannelPoolState poolState, + CachedShaderProgram gpShaders, + ShaderAddresses addresses) { - for (int stage = 0; stage < gpShaders.Shaders.Length; stage++) - { - ShaderCodeHolder shader = gpShaders.Shaders[stage]; + ReadOnlySpan addressesSpan = addresses.AsSpan(); - ulong gpuVa = 0; + for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++) + { + CachedShaderStage shader = gpShaders.Shaders[stageIndex]; - switch (stage) - { - case 0: gpuVa = addresses.Vertex; break; - case 1: gpuVa = addresses.TessControl; break; - case 2: gpuVa = addresses.TessEvaluation; break; - case 3: gpuVa = addresses.Geometry; break; - case 4: gpuVa = addresses.Fragment; break; - } + ulong gpuVa = addressesSpan[stageIndex]; - if (!IsShaderEqual(memoryManager, shader, gpuVa, addresses.VertexA)) + if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa)) { return false; } } - return true; + return gpShaders.SpecializationState.MatchesGraphics(channel, poolState); } /// @@ -894,9 +461,8 @@ private static bool IsShaderEqual(MemoryManager memoryManager, ShaderBundle gpSh /// Memory manager used to access the GPU memory where the shader is located /// Cached shader to compare with /// GPU virtual address of the binary shader code - /// Optional GPU virtual address of the "Vertex A" binary shader code /// True if the code is different, false otherwise - private static bool IsShaderEqual(MemoryManager memoryManager, ShaderCodeHolder shader, ulong gpuVa, ulong gpuVaA = 0) + private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa) { if (shader == null) { @@ -905,47 +471,17 @@ private static bool IsShaderEqual(MemoryManager memoryManager, ShaderCodeHolder ReadOnlySpan memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length); - bool equals = memoryCode.SequenceEqual(shader.Code); - - if (equals && shader.Code2 != null) - { - memoryCode = memoryManager.GetSpan(gpuVaA, shader.Code2.Length); - - equals = memoryCode.SequenceEqual(shader.Code2); - } - - return equals; + return memoryCode.SequenceEqual(shader.Code); } /// /// Decode the binary Maxwell shader code to a translator context. /// - /// GPU channel - /// GPU accessor state + /// GPU state accessor /// GPU virtual address of the binary shader code - /// Local group size X of the computer shader - /// Local group size Y of the computer shader - /// Local group size Z of the computer shader - /// Local memory size of the compute shader - /// Shared memory size of the compute shader /// The generated translator context - private TranslatorContext DecodeComputeShader( - GpuChannel channel, - GpuAccessorState gas, - ulong gpuVa, - int localSizeX, - int localSizeY, - int localSizeZ, - int localMemorySize, - int sharedMemorySize) + public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, ulong gpuVa) { - if (gpuVa == 0) - { - return null; - } - - GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gas, localSizeX, localSizeY, localSizeZ, localMemorySize, sharedMemorySize); - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute); return Translator.CreateContext(gpuVa, gpuAccessor, options); } @@ -956,126 +492,105 @@ private TranslatorContext DecodeComputeShader( /// /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader. /// - /// GPU channel - /// GPU accessor state - /// Cumulative shader resource counts + /// GPU state accessor /// Flags that controls shader translation - /// Shader stage /// GPU virtual address of the shader code /// The generated translator context - private TranslatorContext DecodeGraphicsShader( - GpuChannel channel, - GpuAccessorState gas, - TranslationCounts counts, - TranslationFlags flags, - ShaderStage stage, - ulong gpuVa) + public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TranslationFlags flags, ulong gpuVa) { - if (gpuVa == 0) - { - return null; - } - - GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gas, (int)stage - 1); - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags); - return Translator.CreateContext(gpuVa, gpuAccessor, options, counts); + return Translator.CreateContext(gpuVa, gpuAccessor, options); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Optional shader code dumper - /// Memory manager used to access the GPU memory where the shader is located - /// Translator context of all available shader stages - /// Index on the stages array to translate + /// GPU channel using the shader + /// Translator context of the stage to be translated + /// Optional translator context of the shader that should be combined + /// Optional Maxwell binary code of the Vertex A shader, if present + /// Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache /// Compiled graphics shader code - private static ShaderCodeHolder TranslateShader( + private static TranslatedShaderVertexPair TranslateShader( ShaderDumper dumper, - MemoryManager memoryManager, - TranslatorContext[] stages, - int stageIndex) + GpuChannel channel, + TranslatorContext currentStage, + TranslatorContext vertexA, + byte[] codeA, + byte[] codeB) { - TranslatorContext currentStage = stages[stageIndex]; - TranslatorContext nextStage = GetNextStageContext(stages, stageIndex); - TranslatorContext vertexA = stageIndex == 1 ? stages[0] : null; + ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1); - return TranslateShader(dumper, memoryManager, currentStage, nextStage, vertexA); - } + var memoryManager = channel.MemoryManager; - /// - /// Gets the next shader stage context, from an array of contexts and index of the current stage. - /// - /// Translator context of all available shader stages - /// Index on the stages array to translate - /// The translator context of the next stage, or null if inexistent - private static TranslatorContext GetNextStageContext(TranslatorContext[] stages, int stageIndex) - { - for (int nextStageIndex = stageIndex + 1; nextStageIndex < stages.Length; nextStageIndex++) + codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); + codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); + byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray(); + byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray(); + + ShaderDumpPaths pathsA = default; + ShaderDumpPaths pathsB = default; + + if (dumper != null) { - if (stages[nextStageIndex] != null) - { - return stages[nextStageIndex]; - } + pathsA = dumper.Dump(codeA, compute: false); + pathsB = dumper.Dump(codeB, compute: false); } - return null; + ShaderProgram program = currentStage.Translate(vertexA); + + pathsB.Prepend(program); + pathsA.Prepend(program); + + CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA); + CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB); + + return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Optional shader code dumper - /// Memory manager used to access the GPU memory where the shader is located - /// Translator context of the stage to be translated - /// Translator context of the next active stage, if existent - /// Optional translator context of the shader that should be combined + /// GPU channel using the shader + /// Translator context of the stage to be translated + /// Optional Maxwell binary code of the current stage shader, if present on cache /// Compiled graphics shader code - private static ShaderCodeHolder TranslateShader( - ShaderDumper dumper, - MemoryManager memoryManager, - TranslatorContext currentStage, - TranslatorContext nextStage, - TranslatorContext vertexA) + private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code) { - if (currentStage == null) - { - return null; - } + var memoryManager = channel.MemoryManager; - if (vertexA != null) - { - byte[] codeA = memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); - byte[] codeB = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); + ulong cb1DataAddress = context.Stage == ShaderStage.Compute + ? channel.BufferManager.GetComputeUniformBufferAddress(1) + : channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1); - ShaderDumpPaths pathsA = default; - ShaderDumpPaths pathsB = default; + byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray(); + code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray(); - if (dumper != null) - { - pathsA = dumper.Dump(codeA, compute: false); - pathsB = dumper.Dump(codeB, compute: false); - } + ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default; + ShaderProgram program = context.Translate(); - ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage, vertexA); + paths.Prepend(program); - pathsB.Prepend(program); - pathsA.Prepend(program); + return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program); + } - return new ShaderCodeHolder(program, shaderProgramInfo, codeB, codeA); - } - else + /// + /// Gets the index of a stage from a . + /// + /// Stage to get the index from + /// Stage index + private static int StageToStageIndex(ShaderStage stage) + { + return stage switch { - byte[] code = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); - - ShaderDumpPaths paths = dumper?.Dump(code, currentStage.Stage == ShaderStage.Compute) ?? default; - - ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage); - - paths.Prepend(program); - - return new ShaderCodeHolder(program, shaderProgramInfo, code); - } + ShaderStage.TessellationControl => 1, + ShaderStage.TessellationEvaluation => 2, + ShaderStage.Geometry => 3, + ShaderStage.Fragment => 4, + _ => 0 + }; } /// @@ -1084,23 +599,17 @@ private static ShaderCodeHolder TranslateShader( /// public void Dispose() { - foreach (List list in _cpPrograms.Values) + foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms()) { - foreach (ShaderBundle bundle in list) - { - bundle.Dispose(); - } + program.Dispose(); } - foreach (List list in _gpPrograms.Values) + foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms()) { - foreach (ShaderBundle bundle in list) - { - bundle.Dispose(); - } + program.Dispose(); } - _cacheManager?.Dispose(); + _cacheWriter?.Dispose(); } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs new file mode 100644 index 000000000000..065f9ba90aa1 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs @@ -0,0 +1,280 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Holds already cached code for a guest shader. + /// + struct CachedGraphicsGuestCode + { + public byte[] VertexACode; + public byte[] VertexBCode; + public byte[] TessControlCode; + public byte[] TessEvaluationCode; + public byte[] GeometryCode; + public byte[] FragmentCode; + + /// + /// Gets the guest code of a shader stage by its index. + /// + /// Index of the shader stage + /// Guest code, or null if not present + public byte[] GetByIndex(int stageIndex) + { + return stageIndex switch + { + 1 => TessControlCode, + 2 => TessEvaluationCode, + 3 => GeometryCode, + 4 => FragmentCode, + _ => VertexBCode + }; + } + } + + /// + /// Graphics shader cache hash table. + /// + class ShaderCacheHashTable + { + /// + /// Shader ID cache. + /// + private struct IdCache + { + private PartitionedHashTable _cache; + private int _id; + + /// + /// Initializes the state. + /// + public void Initialize() + { + _cache = new PartitionedHashTable(); + _id = 0; + } + + /// + /// Adds guest code to the cache. + /// + /// + /// If the code was already cached, it will just return the existing ID. + /// + /// Code to add + /// Unique ID for the guest code + public int Add(byte[] code) + { + int id = ++_id; + int cachedId = _cache.GetOrAdd(code, id); + if (cachedId != id) + { + --_id; + } + + return cachedId; + } + + /// + /// Tries to find cached guest code. + /// + /// Code accessor used to read guest code to find a match on the hash table + /// ID of the guest code, if found + /// Cached guest code, if found + /// True if found, false otherwise + public bool TryFind(IDataAccessor dataAccessor, out int id, out byte[] data) + { + return _cache.TryFindItem(dataAccessor, out id, out data); + } + } + + /// + /// Guest code IDs of the guest shaders that when combined forms a single host program. + /// + private struct IdTable : IEquatable + { + public int VertexAId; + public int VertexBId; + public int TessControlId; + public int TessEvaluationId; + public int GeometryId; + public int FragmentId; + + public override bool Equals(object obj) + { + return obj is IdTable other && Equals(other); + } + + public bool Equals(IdTable other) + { + return other.VertexAId == VertexAId && + other.VertexBId == VertexBId && + other.TessControlId == TessControlId && + other.TessEvaluationId == TessEvaluationId && + other.GeometryId == GeometryId && + other.FragmentId == FragmentId; + } + + public override int GetHashCode() + { + return HashCode.Combine(VertexAId, VertexBId, TessControlId, TessEvaluationId, GeometryId, FragmentId); + } + } + + private IdCache _vertexACache; + private IdCache _vertexBCache; + private IdCache _tessControlCache; + private IdCache _tessEvaluationCache; + private IdCache _geometryCache; + private IdCache _fragmentCache; + + private readonly Dictionary _shaderPrograms; + + /// + /// Creates a new graphics shader cache hash table. + /// + public ShaderCacheHashTable() + { + _vertexACache.Initialize(); + _vertexBCache.Initialize(); + _tessControlCache.Initialize(); + _tessEvaluationCache.Initialize(); + _geometryCache.Initialize(); + _fragmentCache.Initialize(); + + _shaderPrograms = new Dictionary(); + } + + /// + /// Adds a program to the cache. + /// + /// Program to be added + public void Add(CachedShaderProgram program) + { + IdTable idTable = new IdTable(); + + foreach (var shader in program.Shaders) + { + if (shader == null) + { + continue; + } + + if (shader.Info != null) + { + switch (shader.Info.Stage) + { + case ShaderStage.Vertex: + idTable.VertexBId = _vertexBCache.Add(shader.Code); + break; + case ShaderStage.TessellationControl: + idTable.TessControlId = _tessControlCache.Add(shader.Code); + break; + case ShaderStage.TessellationEvaluation: + idTable.TessEvaluationId = _tessEvaluationCache.Add(shader.Code); + break; + case ShaderStage.Geometry: + idTable.GeometryId = _geometryCache.Add(shader.Code); + break; + case ShaderStage.Fragment: + idTable.FragmentId = _fragmentCache.Add(shader.Code); + break; + } + } + else + { + idTable.VertexAId = _vertexACache.Add(shader.Code); + } + } + + if (!_shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList)) + { + specList = new ShaderSpecializationList(); + _shaderPrograms.Add(idTable, specList); + } + + specList.Add(program); + } + + /// + /// Tries to find a cached program. + /// + /// + /// Even if false is returned, might still contain cached guest code. + /// This can be used to avoid additional allocations for guest code that was already cached. + /// + /// GPU channel + /// Texture pool state + /// Guest addresses of the shaders to find + /// Cached host program for the given state, if found + /// Cached guest code, if any found + /// True if a cached host program was found, false otherwise + public bool TryFind( + GpuChannel channel, + GpuChannelPoolState poolState, + ShaderAddresses addresses, + out CachedShaderProgram program, + out CachedGraphicsGuestCode guestCode) + { + var memoryManager = channel.MemoryManager; + IdTable idTable = new IdTable(); + guestCode = new CachedGraphicsGuestCode(); + + program = null; + + bool found = TryGetId(_vertexACache, memoryManager, addresses.VertexA, out idTable.VertexAId, out guestCode.VertexACode); + found &= TryGetId(_vertexBCache, memoryManager, addresses.VertexB, out idTable.VertexBId, out guestCode.VertexBCode); + found &= TryGetId(_tessControlCache, memoryManager, addresses.TessControl, out idTable.TessControlId, out guestCode.TessControlCode); + found &= TryGetId(_tessEvaluationCache, memoryManager, addresses.TessEvaluation, out idTable.TessEvaluationId, out guestCode.TessEvaluationCode); + found &= TryGetId(_geometryCache, memoryManager, addresses.Geometry, out idTable.GeometryId, out guestCode.GeometryCode); + found &= TryGetId(_fragmentCache, memoryManager, addresses.Fragment, out idTable.FragmentId, out guestCode.FragmentCode); + + if (found && _shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList)) + { + return specList.TryFindForGraphics(channel, poolState, out program); + } + + return false; + } + + /// + /// Tries to get the ID of a single cached shader stage. + /// + /// ID cache of the stage + /// GPU memory manager + /// Base address of the shader + /// ID, if found + /// Cached guest code, if found + /// True if a cached shader is found, false otherwise + private static bool TryGetId(IdCache idCache, MemoryManager memoryManager, ulong baseAddress, out int id, out byte[] data) + { + if (baseAddress == 0) + { + id = 0; + data = null; + return true; + } + + ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(memoryManager, baseAddress); + return idCache.TryFind(codeAccessor, out id, out data); + } + + /// + /// Gets all programs that have been added to the table. + /// + /// Programs added to the table + public IEnumerable GetPrograms() + { + foreach (var specList in _shaderPrograms.Values) + { + foreach (var program in specList) + { + yield return program; + } + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs new file mode 100644 index 000000000000..dbb33d224120 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs @@ -0,0 +1,32 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using System; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Shader code accessor. + /// + struct ShaderCodeAccessor : IDataAccessor + { + private readonly MemoryManager _memoryManager; + private readonly ulong _baseAddress; + + /// + /// Creates a new shader code accessor. + /// + /// Memory manager used to access the shader code + /// Base address of the shader in memory + public ShaderCodeAccessor(MemoryManager memoryManager, ulong baseAddress) + { + _memoryManager = memoryManager; + _baseAddress = baseAddress; + } + + /// + public ReadOnlySpan GetSpan(int offset, int length) + { + return _memoryManager.GetSpanMapped(_baseAddress + (ulong)offset, length); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs deleted file mode 100644 index dbf2d6f591c1..000000000000 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs +++ /dev/null @@ -1,52 +0,0 @@ -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.Gpu.Shader -{ - /// - /// Cached shader code for a single shader stage. - /// - class ShaderCodeHolder - { - /// - /// Shader program containing translated code. - /// - public ShaderProgram Program { get; } - - /// - /// Shader program information. - /// - public ShaderProgramInfo Info { get; } - - /// - /// Host shader object. - /// - /// Null if the host shader program cache is in use. - public IShader HostShader { get; set; } - - /// - /// Maxwell binary shader code. - /// - public byte[] Code { get; } - - /// - /// Optional maxwell binary shader code for "Vertex A" shader. - /// - public byte[] Code2 { get; } - - /// - /// Creates a new instace of the shader code holder. - /// - /// Shader program - /// Shader program information - /// Maxwell binary shader code - /// Optional binary shader code of the "Vertex A" shader, when combined with "Vertex B" - public ShaderCodeHolder(ShaderProgram program, ShaderProgramInfo info, byte[] code, byte[] code2 = null) - { - Program = program; - Info = info; - Code = code; - Code2 = code2; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs deleted file mode 100644 index a9283de23d46..000000000000 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs +++ /dev/null @@ -1,95 +0,0 @@ -using Ryujinx.Graphics.GAL; -using System.Threading; -using System.Threading.Tasks; - -namespace Ryujinx.Graphics.Gpu.Shader -{ - delegate bool ShaderCompileTaskCallback(bool success, ShaderCompileTask task); - - /// - /// A class that represents a shader compilation. - /// - class ShaderCompileTask - { - private bool _compiling; - - private Task _programsTask; - private IProgram _program; - - private ShaderCompileTaskCallback _action; - private AutoResetEvent _taskDoneEvent; - - public bool IsFaulted => _programsTask.IsFaulted; - - /// - /// Create a new shader compile task, with an event to signal whenever a subtask completes. - /// - /// Event to signal when a subtask completes - public ShaderCompileTask(AutoResetEvent taskDoneEvent) - { - _taskDoneEvent = taskDoneEvent; - } - - /// - /// Check the completion status of the shader compile task, and run callbacks on step completion. - /// Calling this periodically is required to progress through steps of the compilation. - /// - /// True if the task is complete, false if it is in progress - public bool IsDone() - { - if (_compiling) - { - ProgramLinkStatus status = _program.CheckProgramLink(false); - - if (status != ProgramLinkStatus.Incomplete) - { - return _action(status == ProgramLinkStatus.Success, this); - } - } - else - { - // Waiting on the task. - - if (_programsTask.IsCompleted) - { - return _action(true, this); - } - } - - return false; - } - - /// - /// Run a callback when the specified task has completed. - /// - /// The task object that needs to complete - /// The action to perform when it is complete - public void OnTask(Task task, ShaderCompileTaskCallback action) - { - _compiling = false; - - _programsTask = task; - _action = action; - - task.ContinueWith(task => _taskDoneEvent.Set()); - } - - /// - /// Run a callback when the specified program has been linked. - /// - /// The program that needs to be linked - /// The action to perform when linking is complete - public void OnCompiled(IProgram program, ShaderCompileTaskCallback action) - { - _compiling = true; - - _program = program; - _action = action; - - if (program == null) - { - action(false, this); - } - } - } -} diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs new file mode 100644 index 000000000000..87e087544e73 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs @@ -0,0 +1,76 @@ +using System.Collections; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// List of cached shader programs that differs only by specialization state. + /// + class ShaderSpecializationList : IEnumerable + { + private readonly List _entries = new List(); + + /// + /// Adds a program to the list. + /// + /// Program to be added + public void Add(CachedShaderProgram program) + { + _entries.Add(program); + } + + /// + /// Tries to find an existing 3D program on the cache. + /// + /// GPU channel + /// Texture pool state + /// Cached program, if found + /// True if a compatible program is found, false otherwise + public bool TryFindForGraphics(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program) + { + foreach (var entry in _entries) + { + if (entry.SpecializationState.MatchesGraphics(channel, poolState)) + { + program = entry; + return true; + } + } + + program = default; + return false; + } + + /// + /// Tries to find an existing compute program on the cache. + /// + /// GPU channel + /// Texture pool state + /// Cached program, if found + /// True if a compatible program is found, false otherwise + public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program) + { + foreach (var entry in _entries) + { + if (entry.SpecializationState.MatchesCompute(channel, poolState)) + { + program = entry; + return true; + } + } + + program = default; + return false; + } + + public IEnumerator GetEnumerator() + { + return _entries.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs new file mode 100644 index 000000000000..2bbc3d2c1861 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs @@ -0,0 +1,615 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + class ShaderSpecializationState + { + private const uint ComsMagic = (byte)'C' | ((byte)'O' << 8) | ((byte)'M' << 16) | ((byte)'S' << 24); + private const uint GfxsMagic = (byte)'G' | ((byte)'F' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24); + private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24); + private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + + /// + /// Flags indicating GPU state that is used by the shader. + /// + [Flags] + private enum QueriedStateFlags + { + EarlyZForce = 1 << 0, + PrimitiveTopology = 1 << 1, + TessellationMode = 1 << 2, + TransformFeedback = 1 << 3 + } + + private QueriedStateFlags _queriedState; + private bool _compute; + private byte _constantBufferUsePerStage; + + /// + /// Compute engine state. + /// + public GpuChannelComputeState ComputeState; + + /// + /// 3D engine state. + /// + public GpuChannelGraphicsState GraphicsState; + + /// + /// Contant buffers bound at the time the shader was compiled, per stage. + /// + public Array5 ConstantBufferUse; + + /// + /// Transform feedback buffers active at the time the shader was compiled. + /// + public TransformFeedbackDescriptor[] TransformFeedbackDescriptors; + + /// + /// Flags indicating texture state that is used by the shader. + /// + [Flags] + private enum QueriedTextureStateFlags + { + TextureFormat = 1 << 0, + SamplerType = 1 << 1, + CoordNormalized = 1 << 2 + } + + /// + /// Reference type wrapping a value. + /// + private class Box + { + /// + /// Wrapped value. + /// + public T Value; + } + + /// + /// State of a texture or image that is accessed by the shader. + /// + private struct TextureSpecializationState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Flags indicating which state of the texture the shader depends on. + /// + public QueriedTextureStateFlags QueriedFlags; + + /// + /// Encoded texture format value. + /// + public uint Format; + + /// + /// True if the texture format is sRGB, false otherwise. + /// + public bool FormatSrgb; + + /// + /// Texture target. + /// + public Image.TextureTarget TextureTarget; + + /// + /// Indicates if the coordinates used to sample the texture are normalized or not (0.0..1.0 or 0..Width/Height). + /// + public bool CoordNormalized; + } + + /// + /// Texture binding information, used to identify each texture accessed by the shader. + /// + private struct TextureKey : IEquatable + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Shader stage where the texture is used. + /// + public readonly int StageIndex; + + /// + /// Texture handle offset in words on the texture buffer. + /// + public readonly int Handle; + + /// + /// Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register). + /// + public readonly int CbufSlot; + + /// + /// Creates a new texture key. + /// + /// Shader stage where the texture is used + /// Texture handle offset in words on the texture buffer + /// Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register) + public TextureKey(int stageIndex, int handle, int cbufSlot) + { + StageIndex = stageIndex; + Handle = handle; + CbufSlot = cbufSlot; + } + + public override bool Equals(object obj) + { + return obj is TextureKey textureKey && Equals(textureKey); + } + + public bool Equals(TextureKey other) + { + return StageIndex == other.StageIndex && Handle == other.Handle && CbufSlot == other.CbufSlot; + } + + public override int GetHashCode() + { + return HashCode.Combine(StageIndex, Handle, CbufSlot); + } + } + + private readonly Dictionary> _textureSpecialization; + + /// + /// Creates a new instance of the shader specialization state. + /// + private ShaderSpecializationState() + { + _textureSpecialization = new Dictionary>(); + } + + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current compute engine state + public ShaderSpecializationState(GpuChannelComputeState state) : this() + { + ComputeState = state; + _compute = true; + } + + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current 3D engine state + /// Optional transform feedback buffers in use, if any + public ShaderSpecializationState(GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this() + { + GraphicsState = state; + _compute = false; + + if (descriptors != null) + { + TransformFeedbackDescriptors = descriptors; + _queriedState |= QueriedStateFlags.TransformFeedback; + } + } + + /// + /// Indicates that the shader accesses the early Z force state. + /// + public void RecordEarlyZForce() + { + _queriedState |= QueriedStateFlags.EarlyZForce; + } + + /// + /// Indicates that the shader accesses the primitive topology state. + /// + public void RecordPrimitiveTopology() + { + _queriedState |= QueriedStateFlags.PrimitiveTopology; + } + + /// + /// Indicates that the shader accesses the tessellation mode state. + /// + public void RecordTessellationMode() + { + _queriedState |= QueriedStateFlags.TessellationMode; + } + + /// + /// Indicates that the shader accesses the constant buffer use state. + /// + /// Shader stage index + /// Mask indicating the constant buffers bound at the time of the shader compilation + public void RecordConstantBufferUse(int stageIndex, uint useMask) + { + ConstantBufferUse[stageIndex] = useMask; + _constantBufferUsePerStage |= (byte)(1 << stageIndex); + } + + /// + /// Indicates that a given texture is accessed by the shader. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Descriptor of the texture + public void RegisterTexture(int stageIndex, int handle, int cbufSlot, Image.TextureDescriptor descriptor) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.Format = descriptor.UnpackFormat(); + state.Value.FormatSrgb = descriptor.UnpackSrgb(); + state.Value.TextureTarget = descriptor.UnpackTextureTarget(); + state.Value.CoordNormalized = descriptor.UnpackTextureCoordNormalized(); + } + + /// + /// Indicates that a given texture is accessed by the shader. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Maxwell texture format value + /// Whenever the texture format is a sRGB format + /// Texture target type + /// Whenever the texture coordinates used on the shader are considered normalized + public void RegisterTexture( + int stageIndex, + int handle, + int cbufSlot, + uint format, + bool formatSrgb, + Image.TextureTarget target, + bool coordNormalized) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.Format = format; + state.Value.FormatSrgb = formatSrgb; + state.Value.TextureTarget = target; + state.Value.CoordNormalized = coordNormalized; + } + + /// + /// Indicates that the format of a given texture was used during the shader translation process. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public void RecordTextureFormat(int stageIndex, int handle, int cbufSlot) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.TextureFormat; + } + + /// + /// Indicates that the target of a given texture was used during the shader translation process. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public void RecordTextureSamplerType(int stageIndex, int handle, int cbufSlot) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.SamplerType; + } + + /// + /// Indicates that the coordinate normalization state of a given texture was used during the shader translation process. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public void RecordTextureCoordNormalized(int stageIndex, int handle, int cbufSlot) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.CoordNormalized; + } + + /// + /// Checks if a given texture was registerd on this specialization state. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public bool TextureRegistered(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot) != null; + } + + /// + /// Gets the recorded format of a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public (uint, bool) GetFormat(int stageIndex, int handle, int cbufSlot) + { + TextureSpecializationState state = GetTextureSpecState(stageIndex, handle, cbufSlot).Value; + return (state.Format, state.FormatSrgb); + } + + /// + /// Gets the recorded target of a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public Image.TextureTarget GetTextureTarget(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.TextureTarget; + } + + /// + /// Gets the recorded coordinate normalization state of a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public bool GetCoordNormalized(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.CoordNormalized; + } + + /// + /// Gets texture specialization state for a given texture, or create a new one if not present. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Texture specialization state + private Box GetOrCreateTextureSpecState(int stageIndex, int handle, int cbufSlot) + { + TextureKey key = new TextureKey(stageIndex, handle, cbufSlot); + + if (!_textureSpecialization.TryGetValue(key, out Box state)) + { + _textureSpecialization.Add(key, state = new Box()); + } + + return state; + } + + /// + /// Gets texture specialization state for a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Texture specialization state + private Box GetTextureSpecState(int stageIndex, int handle, int cbufSlot) + { + TextureKey key = new TextureKey(stageIndex, handle, cbufSlot); + + if (_textureSpecialization.TryGetValue(key, out Box state)) + { + return state; + } + + return null; + } + + /// + /// Checks if the recorded state matches the current GPU 3D engine state. + /// + /// GPU channel + /// Texture pool state + /// True if the state matches, false otherwise + public bool MatchesGraphics(GpuChannel channel, GpuChannelPoolState poolState) + { + return Matches(channel, poolState, isCompute: false); + } + + /// + /// Checks if the recorded state matches the current GPU compute engine state. + /// + /// GPU channel + /// Texture pool state + /// True if the state matches, false otherwise + public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState) + { + return Matches(channel, poolState, isCompute: true); + } + + /// + /// Checks if the recorded state matches the current GPU state. + /// + /// GPU channel + /// Texture pool state + /// Indicates whenever the check is requested by the 3D or compute engine + /// True if the state matches, false otherwise + private bool Matches(GpuChannel channel, GpuChannelPoolState poolState, bool isCompute) + { + int constantBufferUsePerStageMask = _constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + + uint useMask = isCompute + ? channel.BufferManager.GetComputeUniformBufferUseMask() + : channel.BufferManager.GetGraphicsUniformBufferUseMask(index); + + if (ConstantBufferUse[index] != useMask) + { + return false; + } + + constantBufferUsePerStageMask &= ~(1 << index); + } + + foreach (var kv in _textureSpecialization) + { + TextureKey textureKey = kv.Key; + + (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(textureKey.CbufSlot, poolState.TextureBufferIndex); + + ulong textureCbAddress; + ulong samplerCbAddress; + + if (isCompute) + { + textureCbAddress = channel.BufferManager.GetComputeUniformBufferAddress(textureBufferIndex); + samplerCbAddress = channel.BufferManager.GetComputeUniformBufferAddress(samplerBufferIndex); + } + else + { + textureCbAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(textureKey.StageIndex, textureBufferIndex); + samplerCbAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(textureKey.StageIndex, samplerBufferIndex); + } + + if (!channel.MemoryManager.Physical.IsMapped(textureCbAddress) || !channel.MemoryManager.Physical.IsMapped(samplerCbAddress)) + { + continue; + } + + Image.TextureDescriptor descriptor; + + if (isCompute) + { + descriptor = channel.TextureManager.GetComputeTextureDescriptor( + poolState.TexturePoolGpuVa, + poolState.TextureBufferIndex, + poolState.TexturePoolMaximumId, + textureKey.Handle, + textureKey.CbufSlot); + } + else + { + descriptor = channel.TextureManager.GetGraphicsTextureDescriptor( + poolState.TexturePoolGpuVa, + poolState.TextureBufferIndex, + poolState.TexturePoolMaximumId, + textureKey.StageIndex, + textureKey.Handle, + textureKey.CbufSlot); + } + + Box specializationState = kv.Value; + + if (specializationState.Value.QueriedFlags.HasFlag(QueriedTextureStateFlags.CoordNormalized) && + specializationState.Value.CoordNormalized != descriptor.UnpackTextureCoordNormalized()) + { + return false; + } + } + + return true; + } + + /// + /// Reads shader specialization state that has been serialized. + /// + /// Data reader + /// Shader specialization state + public static ShaderSpecializationState Read(ref BinarySerializer dataReader) + { + ShaderSpecializationState specState = new ShaderSpecializationState(); + + dataReader.Read(ref specState._queriedState); + dataReader.Read(ref specState._compute); + + if (specState._compute) + { + dataReader.ReadWithMagicAndSize(ref specState.ComputeState, ComsMagic); + } + else + { + dataReader.ReadWithMagicAndSize(ref specState.GraphicsState, GfxsMagic); + } + + dataReader.Read(ref specState._constantBufferUsePerStage); + + int constantBufferUsePerStageMask = specState._constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + dataReader.Read(ref specState.ConstantBufferUse[index]); + constantBufferUsePerStageMask &= ~(1 << index); + } + + if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) + { + ushort tfCount = 0; + dataReader.Read(ref tfCount); + specState.TransformFeedbackDescriptors = new TransformFeedbackDescriptor[tfCount]; + + for (int index = 0; index < tfCount; index++) + { + dataReader.ReadWithMagicAndSize(ref specState.TransformFeedbackDescriptors[index], TfbdMagic); + } + } + + ushort count = 0; + dataReader.Read(ref count); + + for (int index = 0; index < count; index++) + { + TextureKey textureKey = default; + Box textureState = new Box(); + + dataReader.ReadWithMagicAndSize(ref textureKey, TexkMagic); + dataReader.ReadWithMagicAndSize(ref textureState.Value, TexsMagic); + + specState._textureSpecialization[textureKey] = textureState; + } + + return specState; + } + + /// + /// Serializes the shader specialization state. + /// + /// Data writer + public void Write(ref BinarySerializer dataWriter) + { + dataWriter.Write(ref _queriedState); + dataWriter.Write(ref _compute); + + if (_compute) + { + dataWriter.WriteWithMagicAndSize(ref ComputeState, ComsMagic); + } + else + { + dataWriter.WriteWithMagicAndSize(ref GraphicsState, GfxsMagic); + } + + dataWriter.Write(ref _constantBufferUsePerStage); + + int constantBufferUsePerStageMask = _constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + dataWriter.Write(ref ConstantBufferUse[index]); + constantBufferUsePerStageMask &= ~(1 << index); + } + + if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) + { + ushort tfCount = (ushort)TransformFeedbackDescriptors.Length; + dataWriter.Write(ref tfCount); + + for (int index = 0; index < TransformFeedbackDescriptors.Length; index++) + { + dataWriter.WriteWithMagicAndSize(ref TransformFeedbackDescriptors[index], TfbdMagic); + } + } + + ushort count = (ushort)_textureSpecialization.Count; + dataWriter.Write(ref count); + + foreach (var kv in _textureSpecialization) + { + var textureKey = kv.Key; + var textureState = kv.Value; + + dataWriter.WriteWithMagicAndSize(ref textureKey, TexkMagic); + dataWriter.WriteWithMagicAndSize(ref textureState.Value, TexsMagic); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs b/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs index eaa889cc5a41..09f1df760921 100644 --- a/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs @@ -1,19 +1,58 @@ +using Ryujinx.Common.Memory; using System; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Shader { + /// + /// Transform feedback descriptor. + /// struct TransformFeedbackDescriptor { - public int BufferIndex { get; } - public int Stride { get; } + // New fields should be added to the end of the struct to keep disk shader cache compatibility. - public byte[] VaryingLocations { get; } + /// + /// Index of the transform feedback. + /// + public readonly int BufferIndex; - public TransformFeedbackDescriptor(int bufferIndex, int stride, byte[] varyingLocations) + /// + /// Amount of bytes consumed per vertex. + /// + public readonly int Stride; + + /// + /// Number of varyings written into the buffer. + /// + public readonly int VaryingCount; + + /// + /// Location of varyings to be written into the buffer. Each byte is one location. + /// + public Array32 VaryingLocations; // Making this readonly breaks AsSpan + + /// + /// Creates a new transform feedback descriptor. + /// + /// Index of the transform feedback + /// Amount of bytes consumed per vertex + /// Number of varyings written into the buffer. Indicates size in bytes of + /// Location of varyings to be written into the buffer. Each byte is one location + public TransformFeedbackDescriptor(int bufferIndex, int stride, int varyingCount, ref Array32 varyingLocations) + { + BufferIndex = bufferIndex; + Stride = stride; + VaryingCount = varyingCount; + VaryingLocations = varyingLocations; + } + + /// + /// Gets a span of the . + /// + /// Span of varying locations + public ReadOnlySpan AsSpan() { - BufferIndex = bufferIndex; - Stride = stride; - VaryingLocations = varyingLocations ?? throw new ArgumentNullException(nameof(varyingLocations)); + return MemoryMarshal.Cast(VaryingLocations.ToSpan()).Slice(0, Math.Min(128, VaryingCount)); } } } diff --git a/Ryujinx.Graphics.OpenGL/EnumConversion.cs b/Ryujinx.Graphics.OpenGL/EnumConversion.cs index 22e816056e6b..24cf1fc4d6f6 100644 --- a/Ryujinx.Graphics.OpenGL/EnumConversion.cs +++ b/Ryujinx.Graphics.OpenGL/EnumConversion.cs @@ -1,6 +1,7 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; namespace Ryujinx.Graphics.OpenGL { @@ -528,5 +529,19 @@ public static All Convert(this LogicalOp op) return All.Never; } + + public static ShaderType Convert(this ShaderStage stage) + { + return stage switch + { + ShaderStage.Compute => ShaderType.ComputeShader, + ShaderStage.Vertex => ShaderType.VertexShader, + ShaderStage.TessellationControl => ShaderType.TessControlShader, + ShaderStage.TessellationEvaluation => ShaderType.TessEvaluationShader, + ShaderStage.Geometry => ShaderType.GeometryShader, + ShaderStage.Fragment => ShaderType.FragmentShader, + _ => ShaderType.VertexShader + }; + } } } diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs index d2b559a8caaf..0cc722e6c20d 100644 --- a/Ryujinx.Graphics.OpenGL/Program.cs +++ b/Ryujinx.Graphics.OpenGL/Program.cs @@ -1,6 +1,8 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.Buffers.Binary; @@ -24,46 +26,66 @@ public bool IsLinked } private ProgramLinkStatus _status = ProgramLinkStatus.Incomplete; - private IShader[] _shaders; + private int[] _shaderHandles; public bool HasFragmentShader; public int FragmentOutputMap { get; } - public Program(IShader[] shaders, int fragmentOutputMap) + public Program(ShaderSource[] shaders, int fragmentOutputMap) { Handle = GL.CreateProgram(); GL.ProgramParameter(Handle, ProgramParameterName.ProgramBinaryRetrievableHint, 1); + _shaderHandles = new int[shaders.Length]; + for (int index = 0; index < shaders.Length; index++) { - Shader shader = (Shader)shaders[index]; + ShaderSource shader = shaders[index]; - if (shader.IsFragment) + if (shader.Stage == ShaderStage.Fragment) { HasFragmentShader = true; } - GL.AttachShader(Handle, shader.Handle); + int shaderHandle = GL.CreateShader(shader.Stage.Convert()); + + switch (shader.Language) + { + case TargetLanguage.Glsl: + GL.ShaderSource(shaderHandle, shader.Code); + GL.CompileShader(shaderHandle); + break; + case TargetLanguage.Spirv: + GL.ShaderBinary(1, ref shaderHandle, (BinaryFormat)All.ShaderBinaryFormatSpirVArb, shader.BinaryCode, shader.BinaryCode.Length); + GL.SpecializeShader(shaderHandle, "main", 0, (int[])null, (int[])null); + break; + } + + GL.AttachShader(Handle, shaderHandle); + + _shaderHandles[index] = shaderHandle; } GL.LinkProgram(Handle); - _shaders = shaders; FragmentOutputMap = fragmentOutputMap; } public Program(ReadOnlySpan code, bool hasFragmentShader, int fragmentOutputMap) { - BinaryFormat binaryFormat = (BinaryFormat)BinaryPrimitives.ReadInt32LittleEndian(code.Slice(code.Length - 4, 4)); - Handle = GL.CreateProgram(); - unsafe + if (code.Length >= 4) { - fixed (byte* ptr = code) + BinaryFormat binaryFormat = (BinaryFormat)BinaryPrimitives.ReadInt32LittleEndian(code.Slice(code.Length - 4, 4)); + + unsafe { - GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4); + fixed (byte* ptr = code) + { + GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4); + } } } @@ -89,18 +111,7 @@ public ProgramLinkStatus CheckProgramLink(bool blocking) } GL.GetProgram(Handle, GetProgramParameterName.LinkStatus, out int status); - - if (_shaders != null) - { - for (int index = 0; index < _shaders.Length; index++) - { - int shaderHandle = ((Shader)_shaders[index]).Handle; - - GL.DetachShader(Handle, shaderHandle); - } - - _shaders = null; - } + DeleteShaders(); if (status == 0) { @@ -129,10 +140,25 @@ public byte[] GetBinary() return data; } + private void DeleteShaders() + { + if (_shaderHandles != null) + { + foreach (int shaderHandle in _shaderHandles) + { + GL.DetachShader(Handle, shaderHandle); + GL.DeleteShader(shaderHandle); + } + + _shaderHandles = null; + } + } + public void Dispose() { if (Handle != 0) { + DeleteShaders(); GL.DeleteProgram(Handle); Handle = 0; diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index 8a6b4689f6da..2a9ab42232fd 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -1,11 +1,10 @@ -using OpenTK.Graphics; -using OpenTK.Graphics.OpenGL; +using OpenTK.Graphics.OpenGL; using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.OpenGL.Image; using Ryujinx.Graphics.OpenGL.Queries; -using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; namespace Ryujinx.Graphics.OpenGL @@ -54,11 +53,6 @@ public Renderer() ResourcePool = new ResourcePool(); } - public IShader CompileShader(ShaderStage stage, string code) - { - return new Shader(stage, code); - } - public BufferHandle CreateBuffer(int size) { BufferCount++; @@ -66,7 +60,7 @@ public BufferHandle CreateBuffer(int size) return Buffer.Create(size); } - public IProgram CreateProgram(IShader[] shaders, ShaderInfo info) + public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info) { return new Program(shaders, info.FragmentOutputMap); } @@ -101,6 +95,8 @@ public ReadOnlySpan GetBufferData(BufferHandle buffer, int offset, int siz public Capabilities GetCapabilities() { return new Capabilities( + api: TargetApi.OpenGL, + vendorName: GpuVendor, hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows, hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows, supportsAstcCompression: HwCapabilities.SupportsAstcCompression, diff --git a/Ryujinx.Graphics.OpenGL/Shader.cs b/Ryujinx.Graphics.OpenGL/Shader.cs deleted file mode 100644 index 8374fa6269f8..000000000000 --- a/Ryujinx.Graphics.OpenGL/Shader.cs +++ /dev/null @@ -1,42 +0,0 @@ -using OpenTK.Graphics.OpenGL; -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.OpenGL -{ - class Shader : IShader - { - public int Handle { get; private set; } - public bool IsFragment { get; } - - public Shader(ShaderStage stage, string code) - { - ShaderType type = stage switch - { - ShaderStage.Compute => ShaderType.ComputeShader, - ShaderStage.Vertex => ShaderType.VertexShader, - ShaderStage.TessellationControl => ShaderType.TessControlShader, - ShaderStage.TessellationEvaluation => ShaderType.TessEvaluationShader, - ShaderStage.Geometry => ShaderType.GeometryShader, - ShaderStage.Fragment => ShaderType.FragmentShader, - _ => ShaderType.VertexShader - }; - - Handle = GL.CreateShader(type); - IsFragment = stage == ShaderStage.Fragment; - - GL.ShaderSource(Handle, code); - GL.CompileShader(Handle); - } - - public void Dispose() - { - if (Handle != 0) - { - GL.DeleteShader(Handle); - - Handle = 0; - } - } - } -} diff --git a/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/Ryujinx.Graphics.Shader/BufferDescriptor.cs index a3af6e41f931..4ce8a896df7c 100644 --- a/Ryujinx.Graphics.Shader/BufferDescriptor.cs +++ b/Ryujinx.Graphics.Shader/BufferDescriptor.cs @@ -2,6 +2,8 @@ namespace Ryujinx.Graphics.Shader { public struct BufferDescriptor { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + public readonly int Binding; public readonly int Slot; public BufferUsageFlags Flags; diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index 6fa4055aa384..60ad540cbfd4 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -373,7 +373,7 @@ private static bool FindBrxTargets(ShaderConfig config, IEnumerable block for (int i = 0; i < cbOffsetsCount; i++) { - uint targetOffset = config.GpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4); + uint targetOffset = config.ConstantBuffer1Read(cbBaseOffset + i * 4); Block target = getBlock(baseOffset + targetOffset); target.Predecessors.Add(block); block.Successors.Add(target); diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs index b2512868ea47..9c624d90d865 100644 --- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -2,153 +2,341 @@ namespace Ryujinx.Graphics.Shader { + /// + /// GPU state access interface. + /// public interface IGpuAccessor { + /// + /// Prints a log message. + /// + /// Message to print void Log(string message) { // No default log output. } + /// + /// Reads data from the constant buffer 1. + /// + /// Offset in bytes to read from + /// Value at the given offset uint ConstantBuffer1Read(int offset) { return 0; } + /// + /// Gets a span of the specified memory location, containing shader code. + /// + /// GPU virtual address of the data + /// Minimum size that the returned span may have + /// Span of the memory location ReadOnlySpan GetCode(ulong address, int minimumSize); + /// + /// Queries the binding number of a constant buffer. + /// + /// Constant buffer index + /// Binding number + int QueryBindingConstantBuffer(int index) + { + return index; + } + + /// + /// Queries the binding number of a storage buffer. + /// + /// Storage buffer index + /// Binding number + int QueryBindingStorageBuffer(int index) + { + return index; + } + + /// + /// Queries the binding number of a texture. + /// + /// Texture index + /// Binding number + int QueryBindingTexture(int index) + { + return index; + } + + /// + /// Queries the binding number of an image. + /// + /// Image index + /// Binding number + int QueryBindingImage(int index) + { + return index; + } + + /// + /// Queries Local Size X for compute shaders. + /// + /// Local Size X int QueryComputeLocalSizeX() { return 1; } + /// + /// Queries Local Size Y for compute shaders. + /// + /// Local Size Y int QueryComputeLocalSizeY() { return 1; } + /// + /// Queries Local Size Z for compute shaders. + /// + /// Local Size Z int QueryComputeLocalSizeZ() { return 1; } + /// + /// Queries Local Memory size in bytes for compute shaders. + /// + /// Local Memory size in bytes int QueryComputeLocalMemorySize() { return 0x1000; } + /// + /// Queries Shared Memory size in bytes for compute shaders. + /// + /// Shared Memory size in bytes int QueryComputeSharedMemorySize() { return 0xc000; } + /// + /// Queries Constant Buffer usage information. + /// + /// A mask where each bit set indicates a bound constant buffer uint QueryConstantBufferUse() { return 0; } + /// + /// Queries host about the presence of the FrontFacing built-in variable bug. + /// + /// True if the bug is present on the host device used, false otherwise bool QueryHostHasFrontFacingBug() { return false; } + /// + /// Queries host about the presence of the vector indexing bug. + /// + /// True if the bug is present on the host device used, false otherwise bool QueryHostHasVectorIndexingBug() { return false; } + /// + /// Queries host storage buffer alignment required. + /// + /// Host storage buffer alignment in bytes int QueryHostStorageBufferOffsetAlignment() { return 16; } + /// + /// Queries host support for texture formats with BGRA component order (such as BGRA8). + /// + /// True if BGRA formats are supported, false otherwise bool QueryHostSupportsBgraFormat() { return true; } + /// + /// Queries host support for fragment shader ordering critical sections on the shader code. + /// + /// True if fragment shader interlock is supported, false otherwise bool QueryHostSupportsFragmentShaderInterlock() { return true; } + /// + /// Queries host support for fragment shader ordering scoped critical sections on the shader code. + /// + /// True if fragment shader ordering is supported, false otherwise bool QueryHostSupportsFragmentShaderOrderingIntel() { return false; } + /// + /// Queries host support for readable images without a explicit format declaration on the shader. + /// + /// True if formatted image load is supported, false otherwise bool QueryHostSupportsImageLoadFormatted() { return true; } + /// + /// Queries host GPU non-constant texture offset support. + /// + /// True if the GPU and driver supports non-constant texture offsets, false otherwise bool QueryHostSupportsNonConstantTextureOffset() { return true; } + /// + /// Queries host GPU shader ballot support. + /// + /// True if the GPU and driver supports shader ballot, false otherwise bool QueryHostSupportsShaderBallot() { return true; } + /// + /// Queries host GPU texture shadow LOD support. + /// + /// True if the GPU and driver supports texture shadow LOD, false otherwise bool QueryHostSupportsTextureShadowLod() { return true; } + /// + /// Queries sampler type information. + /// + /// Texture handle + /// Constant buffer slot for the texture handle + /// The sampler type value for the given handle SamplerType QuerySamplerType(int handle, int cbufSlot = -1) { return SamplerType.Texture2D; } - bool QueryIsTextureRectangle(int handle, int cbufSlot = -1) + /// + /// Queries texture coordinate normalization information. + /// + /// Texture handle + /// Constant buffer slot for the texture handle + /// True if the coordinates are normalized, false otherwise + bool QueryTextureCoordNormalized(int handle, int cbufSlot = -1) { return false; } + /// + /// Queries current primitive topology for geometry shaders. + /// + /// Current primitive topology InputTopology QueryPrimitiveTopology() { return InputTopology.Points; } + /// + /// Queries the tessellation evaluation shader primitive winding order. + /// + /// True if the primitive winding order is clockwise, false if counter-clockwise bool QueryTessCw() { return false; } + /// + /// Queries the tessellation evaluation shader abstract patch type. + /// + /// Abstract patch type TessPatchType QueryTessPatchType() { return TessPatchType.Triangles; } + /// + /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch. + /// + /// Spacing between tessellated vertices of the patch TessSpacing QueryTessSpacing() { return TessSpacing.EqualSpacing; } + /// + /// Queries texture format information, for shaders using image load or store. + /// + /// + /// This only returns non-compressed color formats. + /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned. + /// + /// Texture handle + /// Constant buffer slot for the texture handle + /// Color format of the non-compressed texture TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1) { return TextureFormat.R8G8B8A8Unorm; } + /// + /// Queries transform feedback enable state. + /// + /// True if the shader uses transform feedback, false otherwise bool QueryTransformFeedbackEnabled() { return false; } + /// + /// Queries the varying locations that should be written to the transform feedback buffer. + /// + /// Index of the transform feedback buffer + /// Varying locations for the specified buffer ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) { return ReadOnlySpan.Empty; } + /// + /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer. + /// + /// Index of the transform feedback buffer + /// Stride for the specified buffer int QueryTransformFeedbackStride(int bufferIndex) { return 0; } + /// + /// Queries if host state forces early depth testing. + /// + /// True if early depth testing is forced bool QueryEarlyZForce() { return false; } + + /// + /// Registers a texture used by the shader. + /// + /// Texture handle word offset + /// Constant buffer slot where the texture handle is located + void RegisterTexture(int handle, int cbufSlot) + { + // Only useful when recording information for a disk shader cache. + } } } diff --git a/Ryujinx.Graphics.Shader/ShaderProgram.cs b/Ryujinx.Graphics.Shader/ShaderProgram.cs index dd87b67d49a1..29fff21e62c1 100644 --- a/Ryujinx.Graphics.Shader/ShaderProgram.cs +++ b/Ryujinx.Graphics.Shader/ShaderProgram.cs @@ -1,25 +1,28 @@ +using Ryujinx.Graphics.Shader.Translation; using System; namespace Ryujinx.Graphics.Shader { public class ShaderProgram { - public ShaderStage Stage { get; } + public ShaderProgramInfo Info { get; } + public TargetLanguage Language { get; } public string Code { get; private set; } public byte[] BinaryCode { get; } - private ShaderProgram(ShaderStage stage) + private ShaderProgram(ShaderProgramInfo info, TargetLanguage language) { - Stage = stage; + Info = info; + Language = language; } - public ShaderProgram(ShaderStage stage, string code) : this(stage) + public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, string code) : this(info, language) { Code = code; } - public ShaderProgram(ShaderStage stage, byte[] binaryCode) : this(stage) + public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, byte[] binaryCode) : this(info, language) { BinaryCode = binaryCode; } diff --git a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs index d1c1b94571ba..659f6167e8f8 100644 --- a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs @@ -10,6 +10,7 @@ public class ShaderProgramInfo public ReadOnlyCollection Textures { get; } public ReadOnlyCollection Images { get; } + public ShaderStage Stage { get; } public bool UsesInstanceId { get; } public bool UsesRtLayer { get; } public byte ClipDistancesWritten { get; } @@ -20,6 +21,7 @@ public ShaderProgramInfo( BufferDescriptor[] sBuffers, TextureDescriptor[] textures, TextureDescriptor[] images, + ShaderStage stage, bool usesInstanceId, bool usesRtLayer, byte clipDistancesWritten, @@ -30,6 +32,7 @@ public ShaderProgramInfo( Textures = Array.AsReadOnly(textures); Images = Array.AsReadOnly(images); + Stage = stage; UsesInstanceId = usesInstanceId; UsesRtLayer = usesRtLayer; ClipDistancesWritten = clipDistancesWritten; diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 31c71f20fec7..ce79f3b8e33f 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -74,7 +74,7 @@ public static StructuredProgramInfo MakeStructuredProgram(Function[] functions, for (int j = 0; j < locations.Length; j++) { byte location = locations[j]; - if (location < 0x80) + if (location < 0xc0) { context.Info.TransformFeedbackOutputs[location] = new TransformFeedbackOutput(tfbIndex, j * 4, stride); } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs index 933f265f9900..2dc2396433aa 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs @@ -30,7 +30,7 @@ public StructuredProgramInfo() { Functions = new List(); - TransformFeedbackOutputs = new TransformFeedbackOutput[0x80]; + TransformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/TextureDescriptor.cs b/Ryujinx.Graphics.Shader/TextureDescriptor.cs index b7b0ae12c5a4..85ea9adbe093 100644 --- a/Ryujinx.Graphics.Shader/TextureDescriptor.cs +++ b/Ryujinx.Graphics.Shader/TextureDescriptor.cs @@ -2,6 +2,8 @@ namespace Ryujinx.Graphics.Shader { public struct TextureDescriptor { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + public readonly int Binding; public readonly SamplerType Type; diff --git a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index 910faf1ca078..e9b073ab26ee 100644 --- a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -164,9 +164,9 @@ private static LinkedListNode RewriteTextureSample(LinkedListNode bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; - bool isRect = !isBindless && config.GpuAccessor.QueryIsTextureRectangle(texOp.Handle, texOp.CbufSlot); + bool isCoordNormalized = !isBindless && config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot); - if (!(hasInvalidOffset || isRect)) + if (!hasInvalidOffset && isCoordNormalized) { return node; } @@ -263,7 +263,7 @@ private static LinkedListNode RewriteTextureSample(LinkedListNode hasInvalidOffset &= !areAllOffsetsConstant; - if (!(hasInvalidOffset || isRect)) + if (!hasInvalidOffset && isCoordNormalized) { return node; } @@ -300,15 +300,17 @@ Operand Float(Operand value) return res; } - // Emulate texture rectangle by normalizing the coordinates on the shader. - // When sampler*Rect is used, the coords are expected to the in the [0, W or H] range, + // Emulate non-normalized coordinates by normalizing the coordinates on the shader. + // Without normalization, the coordinates are expected to the in the [0, W or H] range, // and otherwise, it is expected to be in the [0, 1] range. // We normalize by dividing the coords by the texture size. - if (isRect && !intCoords) + if (!isCoordNormalized && !intCoords) { config.SetUsedFeature(FeatureFlags.IntegerSampling); - for (int index = 0; index < coordsCount; index++) + int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount; + + for (int index = 0; index < normCoordsCount; index++) { Operand coordSize = Local(); diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 6bb045ec1dd1..23b8b9510466 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -41,9 +41,7 @@ class ShaderConfig public FeatureFlags UsedFeatures { get; private set; } - public HashSet TextureHandlesForCache { get; } - - private readonly TranslationCounts _counts; + public int Cb1DataSize { get; private set; } public bool NextUsesFixedFuncAttributes { get; private set; } public int UsedInputAttributes { get; private set; } @@ -109,21 +107,22 @@ private struct TextureMeta private TextureDescriptor[] _cachedTextureDescriptors; private TextureDescriptor[] _cachedImageDescriptors; - public int FirstConstantBufferBinding { get; private set; } - public int FirstStorageBufferBinding { get; private set; } + private int _firstConstantBufferBinding; + private int _firstStorageBufferBinding; + + public int FirstConstantBufferBinding => _firstConstantBufferBinding; + public int FirstStorageBufferBinding => _firstStorageBufferBinding; - public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) + public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options) { - Stage = ShaderStage.Compute; - GpuAccessor = gpuAccessor; - Options = options; - _counts = counts; - TextureHandlesForCache = new HashSet(); - _usedTextures = new Dictionary(); - _usedImages = new Dictionary(); + Stage = ShaderStage.Compute; + GpuAccessor = gpuAccessor; + Options = options; + _usedTextures = new Dictionary(); + _usedImages = new Dictionary(); } - public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) : this(gpuAccessor, options, counts) + public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options) { Stage = header.Stage; GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; @@ -144,6 +143,16 @@ public int GetDepthRegister() return BitOperations.PopCount((uint)OmapTargets) + 1; } + public uint ConstantBuffer1Read(int offset) + { + if (Cb1DataSize < offset + 4) + { + Cb1DataSize = offset + 4; + } + + return GpuAccessor.ConstantBuffer1Read(offset); + } + public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1) { // When the formatted load extension is supported, we don't need to @@ -197,8 +206,6 @@ public void InheritFrom(ShaderConfig other) ClipDistancesWritten |= other.ClipDistancesWritten; UsedFeatures |= other.UsedFeatures; - TextureHandlesForCache.UnionWith(other.TextureHandlesForCache); - UsedInputAttributes |= other.UsedInputAttributes; UsedOutputAttributes |= other.UsedOutputAttributes; _usedConstantBuffers |= other._usedConstantBuffers; @@ -391,6 +398,8 @@ public void SetUsedTexture( bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize; SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent); } + + GpuAccessor.RegisterTexture(handle, cbufSlot); } private void SetUsedTextureOrImage( @@ -485,13 +494,12 @@ public BufferDescriptor[] GetConstantBufferDescriptors() usedMask |= (int)GpuAccessor.QueryConstantBufferUse(); } - FirstConstantBufferBinding = _counts.UniformBuffersCount; - return _cachedConstantBufferDescriptors = GetBufferDescriptors( usedMask, 0, UsedFeatures.HasFlag(FeatureFlags.CbIndexing), - _counts.IncrementUniformBuffersCount); + out _firstConstantBufferBinding, + GpuAccessor.QueryBindingConstantBuffer); } public BufferDescriptor[] GetStorageBufferDescriptors() @@ -501,21 +509,23 @@ public BufferDescriptor[] GetStorageBufferDescriptors() return _cachedStorageBufferDescriptors; } - FirstStorageBufferBinding = _counts.StorageBuffersCount; - return _cachedStorageBufferDescriptors = GetBufferDescriptors( _usedStorageBuffers, _usedStorageBuffersWrite, true, - _counts.IncrementStorageBuffersCount); + out _firstStorageBufferBinding, + GpuAccessor.QueryBindingStorageBuffer); } private static BufferDescriptor[] GetBufferDescriptors( int usedMask, int writtenMask, bool isArray, - Func getBindingCallback) + out int firstBinding, + Func getBindingCallback) { + firstBinding = 0; + bool hasFirstBinding = false; var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)]; int lastSlot = -1; @@ -529,13 +539,25 @@ private static BufferDescriptor[] GetBufferDescriptors( // The next array entries also consumes bindings, even if they are unused. for (int j = lastSlot + 1; j < slot; j++) { - getBindingCallback(); + int binding = getBindingCallback(j); + + if (!hasFirstBinding) + { + firstBinding = binding; + hasFirstBinding = true; + } } } lastSlot = slot; - descriptors[i] = new BufferDescriptor(getBindingCallback(), slot); + descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot); + + if (!hasFirstBinding) + { + firstBinding = descriptors[i].Binding; + hasFirstBinding = true; + } if ((writtenMask & (1 << slot)) != 0) { @@ -550,15 +572,15 @@ private static BufferDescriptor[] GetBufferDescriptors( public TextureDescriptor[] GetTextureDescriptors() { - return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, _counts.IncrementTexturesCount); + return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture); } public TextureDescriptor[] GetImageDescriptors() { - return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, _counts.IncrementImagesCount); + return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage); } - private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary dict, Func getBindingCallback) + private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary dict, Func getBindingCallback) { var descriptors = new TextureDescriptor[dict.Count]; @@ -568,7 +590,7 @@ private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) + }; } - private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) + private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) { ShaderConfig config; DecodedProgram program; @@ -119,13 +107,13 @@ private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAcc if ((options.Flags & TranslationFlags.Compute) != 0) { - config = new ShaderConfig(gpuAccessor, options, counts); + config = new ShaderConfig(gpuAccessor, options); program = Decoder.Decode(config, address); } else { - config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options, counts); + config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options); program = Decoder.Decode(config, address + HeaderSize); } @@ -138,20 +126,6 @@ private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAcc { maxEndAddress = block.EndAddress; } - - if (!config.UsedFeatures.HasFlag(FeatureFlags.Bindless)) - { - for (int index = 0; index < block.OpCodes.Count; index++) - { - InstOp op = block.OpCodes[index]; - - if (op.Props.HasFlag(InstProps.Tex)) - { - int tidB = (int)((op.RawOpCode >> 36) & 0x1fff); - config.TextureHandlesForCache.Add(tidB); - } - } - } } } diff --git a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index b4e61cb63c4b..8900f9fe6d9b 100644 --- a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -16,10 +16,7 @@ public class TranslatorContext public ShaderStage Stage => _config.Stage; public int Size => _config.Size; - - public FeatureFlags UsedFeatures => _config.UsedFeatures; - - public HashSet TextureHandlesForCache => _config.TextureHandlesForCache; + public int Cb1DataSize => _config.Cb1DataSize; public IGpuAccessor GpuAccessor => _config.GpuAccessor; @@ -129,16 +126,13 @@ private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aS return output; } - public ShaderProgram Translate( - out ShaderProgramInfo shaderProgramInfo, - TranslatorContext nextStage = null, - TranslatorContext other = null) + public void SetNextStage(TranslatorContext nextStage) { - if (nextStage != null) - { - _config.MergeFromtNextStage(nextStage._config); - } + _config.MergeFromtNextStage(nextStage._config); + } + public ShaderProgram Translate(TranslatorContext other = null) + { FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _); if (other != null) @@ -152,7 +146,7 @@ public ShaderProgram Translate( _config.InheritFrom(other._config); } - return Translator.Translate(code, _config, out shaderProgramInfo); + return Translator.Translate(code, _config); } } } diff --git a/Ryujinx.Headless.SDL2/WindowBase.cs b/Ryujinx.Headless.SDL2/WindowBase.cs index 3fbd9bc3d210..74eb0d31ae23 100644 --- a/Ryujinx.Headless.SDL2/WindowBase.cs +++ b/Ryujinx.Headless.SDL2/WindowBase.cs @@ -43,6 +43,7 @@ abstract class WindowBase : IHostUiHandler, IDisposable private GraphicsDebugLevel _glLogLevel; private readonly Stopwatch _chrono; private readonly long _ticksPerFrame; + private readonly CancellationTokenSource _gpuCancellationTokenSource; private readonly ManualResetEvent _exitEvent; private long _ticks; @@ -66,6 +67,7 @@ public WindowBase(InputManager inputManager, GraphicsDebugLevel glLogLevel, Aspe _glLogLevel = glLogLevel; _chrono = new Stopwatch(); _ticksPerFrame = Stopwatch.Frequency / TargetFps; + _gpuCancellationTokenSource = new CancellationTokenSource(); _exitEvent = new ManualResetEvent(false); _aspectRatio = aspectRatio; _enableMouse = enableMouse; @@ -162,7 +164,7 @@ public void Render() Device.Gpu.Renderer.RunLoop(() => { - Device.Gpu.InitializeShaderCache(); + Device.Gpu.InitializeShaderCache(_gpuCancellationTokenSource.Token); Translator.IsReadyForTranslation.Set(); while (_isActive) @@ -223,6 +225,8 @@ public void Exit() return; } + _gpuCancellationTokenSource.Cancel(); + _isStopped = true; _isActive = false; diff --git a/Ryujinx.ShaderTools/Program.cs b/Ryujinx.ShaderTools/Program.cs index 43b9494e706b..746b780c0d94 100644 --- a/Ryujinx.ShaderTools/Program.cs +++ b/Ryujinx.ShaderTools/Program.cs @@ -55,7 +55,7 @@ static void HandleArguments(Options options) TranslationOptions translationOptions = new TranslationOptions(options.TargetLanguage, options.TargetApi, flags); - ShaderProgram program = Translator.CreateContext(0, new GpuAccessor(data), translationOptions).Translate(out _); + ShaderProgram program = Translator.CreateContext(0, new GpuAccessor(data), translationOptions).Translate(); if (options.OutputPath == null) { diff --git a/Ryujinx/Ui/RendererWidgetBase.cs b/Ryujinx/Ui/RendererWidgetBase.cs index cdbf5d6c5d01..123403088100 100644 --- a/Ryujinx/Ui/RendererWidgetBase.cs +++ b/Ryujinx/Ui/RendererWidgetBase.cs @@ -60,6 +60,8 @@ public abstract class RendererWidgetBase : DrawingArea private readonly ManualResetEvent _exitEvent; + private readonly CancellationTokenSource _gpuCancellationTokenSource; + // Hide Cursor const int CursorHideIdleTime = 8; // seconds private static readonly Cursor _invisibleCursor = new Cursor(Display.Default, CursorType.BlankCursor); @@ -105,6 +107,8 @@ public RendererWidgetBase(InputManager inputManager, GraphicsDebugLevel glLogLev _exitEvent = new ManualResetEvent(false); + _gpuCancellationTokenSource = new CancellationTokenSource(); + _hideCursorOnIdle = ConfigurationState.Instance.HideCursorOnIdle; _lastCursorMoveTime = Stopwatch.GetTimestamp(); @@ -387,7 +391,7 @@ public void Render() Device.Gpu.Renderer.RunLoop(() => { Device.Gpu.SetGpuThread(); - Device.Gpu.InitializeShaderCache(); + Device.Gpu.InitializeShaderCache(_gpuCancellationTokenSource.Token); Translator.IsReadyForTranslation.Set(); (Toplevel as MainWindow)?.ActivatePauseMenu(); @@ -499,6 +503,8 @@ public void Exit() return; } + _gpuCancellationTokenSource.Cancel(); + _isStopped = true; _isActive = false; @@ -603,7 +609,7 @@ private bool UpdateFrame() if (currentHotkeyState.HasFlag(KeyboardHotkeyState.ToggleMute) && !_prevHotkeyState.HasFlag(KeyboardHotkeyState.ToggleMute)) { - if (Device.IsAudioMuted()) + if (Device.IsAudioMuted()) { Device.SetVolume(ConfigurationState.Instance.System.AudioVolume); } diff --git a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs index 0e3b4892f098..8bf8af36c1e3 100644 --- a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs +++ b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs @@ -172,7 +172,7 @@ private void ExtractSection(NcaSectionType ncaSectionType, int programIndex = 0) ResponseType response = (ResponseType)fileChooser.Run(); string destination = fileChooser.Filename; - + fileChooser.Dispose(); if (response == ResponseType.Accept) @@ -490,7 +490,7 @@ private void ExtractLogo_Clicked(object sender, EventArgs args) private void OpenPtcDir_Clicked(object sender, EventArgs args) { string ptcDir = System.IO.Path.Combine(AppDataManager.GamesDirPath, _titleIdText, "cache", "cpu"); - + string mainPath = System.IO.Path.Combine(ptcDir, "0"); string backupPath = System.IO.Path.Combine(ptcDir, "1"); @@ -515,7 +515,7 @@ private void OpenShaderCacheDir_Clicked(object sender, EventArgs args) OpenHelper.OpenFolder(shaderCacheDir); } - + private void PurgePtcCache_Clicked(object sender, EventArgs args) { DirectoryInfo mainDir = new DirectoryInfo(System.IO.Path.Combine(AppDataManager.GamesDirPath, _titleIdText, "cache", "cpu", "0")); @@ -526,7 +526,7 @@ private void PurgePtcCache_Clicked(object sender, EventArgs args) List cacheFiles = new List(); if (mainDir.Exists) - { + { cacheFiles.AddRange(mainDir.EnumerateFiles("*.cache")); } @@ -539,9 +539,9 @@ private void PurgePtcCache_Clicked(object sender, EventArgs args) { foreach (FileInfo file in cacheFiles) { - try - { - file.Delete(); + try + { + file.Delete(); } catch(Exception e) { @@ -557,18 +557,21 @@ private void PurgeShaderCache_Clicked(object sender, EventArgs args) { DirectoryInfo shaderCacheDir = new DirectoryInfo(System.IO.Path.Combine(AppDataManager.GamesDirPath, _titleIdText, "cache", "shader")); - MessageDialog warningDialog = GtkDialog.CreateConfirmationDialog("Warning", $"You are about to delete the shader cache for :\n\n{_titleName}\n\nAre you sure you want to proceed?"); + using MessageDialog warningDialog = GtkDialog.CreateConfirmationDialog("Warning", $"You are about to delete the shader cache for :\n\n{_titleName}\n\nAre you sure you want to proceed?"); - List cacheDirectory = new List(); + List oldCacheDirectories = new List(); + List newCacheFiles = new List(); if (shaderCacheDir.Exists) { - cacheDirectory.AddRange(shaderCacheDir.EnumerateDirectories("*")); + oldCacheDirectories.AddRange(shaderCacheDir.EnumerateDirectories("*")); + newCacheFiles.AddRange(shaderCacheDir.GetFiles("*.toc")); + newCacheFiles.AddRange(shaderCacheDir.GetFiles("*.data")); } - if (cacheDirectory.Count > 0 && warningDialog.Run() == (int)ResponseType.Yes) + if ((oldCacheDirectories.Count > 0 || newCacheFiles.Count > 0) && warningDialog.Run() == (int)ResponseType.Yes) { - foreach (DirectoryInfo directory in cacheDirectory) + foreach (DirectoryInfo directory in oldCacheDirectories) { try { @@ -579,9 +582,19 @@ private void PurgeShaderCache_Clicked(object sender, EventArgs args) GtkDialog.CreateErrorDialog($"Error purging shader cache at {directory.Name}: {e}"); } } - } - warningDialog.Dispose(); + foreach (FileInfo file in newCacheFiles) + { + try + { + file.Delete(); + } + catch (Exception e) + { + GtkDialog.CreateErrorDialog($"Error purging shader cache at {file.Name}: {e}"); + } + } + } } } } From 247d26b4b50df8a068b9bfbb412a29f0c9f6a97b Mon Sep 17 00:00:00 2001 From: merry Date: Sun, 10 Apr 2022 18:04:22 +0100 Subject: [PATCH 2/2] ForceDpiAware: X11 implementation (#3269) * ForceDpiAware: X11 implementation * address comments --- Ryujinx.Common/System/ForceDpiAware.cs | 42 ++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/Ryujinx.Common/System/ForceDpiAware.cs b/Ryujinx.Common/System/ForceDpiAware.cs index dc513307a4cd..f29630a62810 100644 --- a/Ryujinx.Common/System/ForceDpiAware.cs +++ b/Ryujinx.Common/System/ForceDpiAware.cs @@ -1,6 +1,7 @@ using Ryujinx.Common.Logging; using System; using System.Drawing; +using System.Globalization; using System.Runtime.InteropServices; using System.Runtime.Versioning; @@ -11,6 +12,23 @@ public static class ForceDpiAware [DllImport("user32.dll")] private static extern bool SetProcessDPIAware(); + private const string X11LibraryName = "libX11.so.6"; + + [DllImport(X11LibraryName)] + private static extern IntPtr XOpenDisplay(string display); + + [DllImport(X11LibraryName)] + private static extern IntPtr XGetDefault(IntPtr display, string program, string option); + + [DllImport(X11LibraryName)] + private static extern int XDisplayWidth(IntPtr display, int screenNumber); + + [DllImport(X11LibraryName)] + private static extern int XDisplayWidthMM(IntPtr display, int screenNumber); + + [DllImport(X11LibraryName)] + private static extern int XCloseDisplay(IntPtr display); + private static readonly double _standardDpiScale = 96.0; private static readonly double _maxScaleFactor = 1.25; @@ -36,9 +54,29 @@ public static double GetWindowScaleFactor() { userDpiScale = Graphics.FromHwnd(IntPtr.Zero).DpiX; } - else + else if (OperatingSystem.IsLinux()) { - // TODO: Linux support + string xdgSessionType = Environment.GetEnvironmentVariable("XDG_SESSION_TYPE")?.ToLower(); + + if (xdgSessionType == null || xdgSessionType == "x11") + { + IntPtr display = XOpenDisplay(null); + string dpiString = Marshal.PtrToStringAnsi(XGetDefault(display, "Xft", "dpi")); + if (dpiString == null || !double.TryParse(dpiString, NumberStyles.Any, CultureInfo.InvariantCulture, out userDpiScale)) + { + userDpiScale = (double)XDisplayWidth(display, 0) * 25.4 / (double)XDisplayWidthMM(display, 0); + } + XCloseDisplay(display); + } + else if (xdgSessionType == "wayland") + { + // TODO + Logger.Warning?.Print(LogClass.Application, $"Couldn't determine monitor DPI: Wayland not yet supported"); + } + else + { + Logger.Warning?.Print(LogClass.Application, $"Couldn't determine monitor DPI: Unrecognised XDG_SESSION_TYPE: {xdgSessionType}"); + } } } catch (Exception e)