Mipmaps (#66)

Mipmap generation and basic usage.
Senryoku · Nov 3, 2024 · 8985e68 · 8985e68
1 parent c8d588a
commit 8985e68
Show file tree

Hide file tree

Showing 6 changed files with 188 additions and 12 deletions.
diff --git a/Readme.md b/Readme.md
@@ -53,7 +53,7 @@ sudo apt install libgtk-3-dev
     -   Fog LUT Mode 2.
     -   Secondary accumulate buffer (very low priority, not sure if many games use this feature).
     -   Bump mapping.
-    -   Handle and generate Mipmaps.
+    -   Mipmaps for palette textures?
     -   Sort-DMA?
     -   User Tile Clip, only the simplest version is supported.
 -   MMU: Only supported for store queue writes using the pref intruction (used by Ikaruga for example)  

diff --git a/src/holly.zig b/src/holly.zig
@@ -477,7 +477,7 @@ pub const TSPInstructionWord = packed struct(u32) {
     texture_shading_instruction: TextureShadingInstruction,
     mipmap_d_adjust: u4,
     supersample_texture: u1,
-    filter_mode: u2,
+    filter_mode: enum(u2) { Point = 0, Bilinear = 1, TrilinearPassA = 2, TrilinearPassB = 3 },
     clamp_uv: u2,
     flip_uv: u2,
     ignore_texture_alpha: u1,

diff --git a/src/mipmap.zig b/src/mipmap.zig
@@ -0,0 +1,121 @@
+const std = @import("std");
+const zgpu = @import("zgpu");
+
+const ShaderSource = @embedFile("shaders/generate_mipmaps.wgsl");
+
+bind_group_layout: zgpu.BindGroupLayoutHandle,
+pipeline: zgpu.ComputePipelineHandle,
+
+pub fn init(gctx: *zgpu.GraphicsContext) @This() {
+    const bind_group_layout = gctx.createBindGroupLayout(&.{
+        zgpu.textureEntry(0, .{ .compute = true }, .unfilterable_float, .tvdim_2d, false),
+        zgpu.storageTextureEntry(1, .{ .compute = true }, .write_only, .bgra8_unorm, .tvdim_2d),
+        zgpu.storageTextureEntry(2, .{ .compute = true }, .write_only, .bgra8_unorm, .tvdim_2d),
+        zgpu.storageTextureEntry(3, .{ .compute = true }, .write_only, .bgra8_unorm, .tvdim_2d),
+    });
+    const pipeline_layout = gctx.createPipelineLayout(&.{
+        bind_group_layout,
+    });
+    defer gctx.releaseResource(pipeline_layout);
+
+    const cs_module = zgpu.createWgslShaderModule(gctx.device, ShaderSource, "generate_mipmaps");
+    defer cs_module.release();
+
+    return .{
+        .bind_group_layout = bind_group_layout,
+        .pipeline = gctx.createComputePipeline(pipeline_layout, .{
+            .compute = .{
+                .module = cs_module,
+                .entry_point = "main",
+            },
+        }),
+    };
+}
+
+pub fn deinit(self: *@This(), gctx: *zgpu.GraphicsContext) void {
+    gctx.releaseResource(self.bind_group_layout);
+    gctx.releaseResource(self.pipeline);
+}
+
+pub fn generate_mipmaps(self: @This(), gctx: *zgpu.GraphicsContext, texture: zgpu.TextureHandle, layer: u32) void {
+    const texture_info = gctx.lookupResourceInfo(texture) orelse return;
+
+    std.debug.assert(texture_info.usage.copy_dst == true);
+    std.debug.assert(texture_info.dimension == .tdim_2d);
+    std.debug.assert(texture_info.size.width == texture_info.size.height);
+    std.debug.assert(texture_info.size.width >= 8 and texture_info.size.width <= 1024);
+    std.debug.assert(std.math.isPowerOfTwo(texture_info.size.width));
+
+    const commands = commands: {
+        const encoder = gctx.device.createCommandEncoder(null);
+        defer encoder.release();
+
+        const pass = encoder.beginComputePass(null);
+        pass.setPipeline(gctx.lookupResource(self.pipeline).?);
+
+        const last_mip_level = texture_info.mip_level_count - 1;
+        var base_mip_level: u32 = 0;
+        while (base_mip_level < last_mip_level) {
+            const src_texture_view = gctx.createTextureView(texture, .{
+                .dimension = .tvdim_2d,
+                .base_array_layer = layer,
+                .array_layer_count = 1,
+                .base_mip_level = base_mip_level,
+                .mip_level_count = 1,
+            });
+            defer gctx.releaseResource(src_texture_view);
+            const dst_texture_views = [3]zgpu.TextureViewHandle{
+                gctx.createTextureView(texture, .{
+                    .dimension = .tvdim_2d,
+                    .base_array_layer = layer,
+                    .array_layer_count = 1,
+                    .base_mip_level = base_mip_level + 1,
+                    .mip_level_count = 1,
+                }),
+                gctx.createTextureView(texture, .{
+                    .dimension = .tvdim_2d,
+                    .base_array_layer = layer,
+                    .array_layer_count = 1,
+                    .base_mip_level = base_mip_level + 2,
+                    .mip_level_count = 1,
+                }),
+                gctx.createTextureView(texture, .{
+                    .dimension = .tvdim_2d,
+                    .base_array_layer = layer,
+                    .array_layer_count = 1,
+                    .base_mip_level = base_mip_level + 3,
+                    .mip_level_count = 1,
+                }),
+            };
+            defer {
+                for (dst_texture_views) |dst_texture_view|
+                    gctx.releaseResource(dst_texture_view);
+            }
+
+            const bind_group = gctx.createBindGroup(self.bind_group_layout, &.{
+                .{ .binding = 0, .texture_view_handle = src_texture_view },
+                .{ .binding = 1, .texture_view_handle = dst_texture_views[0] },
+                .{ .binding = 2, .texture_view_handle = dst_texture_views[1] },
+                .{ .binding = 3, .texture_view_handle = dst_texture_views[2] },
+            });
+            defer gctx.releaseResource(bind_group);
+
+            pass.setBindGroup(0, gctx.lookupResource(bind_group).?, &.{});
+            const num_groups = [2]u32{ @divExact(texture_info.size.width, std.math.pow(u32, 2, 1 + base_mip_level) * 4), @divExact(texture_info.size.height, std.math.pow(u32, 2, 1 + base_mip_level) * 4) };
+            pass.dispatchWorkgroups(num_groups[0], num_groups[1], 1);
+
+            // We want the source to always be at least 4 * 4 (a single workgroup). We'll compute some small (<=8x8) mipmaps twice, but that's ok.
+            if (last_mip_level - base_mip_level > 3 and last_mip_level - base_mip_level < 6) {
+                base_mip_level = last_mip_level - 3;
+            } else base_mip_level += 3;
+        }
+
+        pass.end();
+        pass.release();
+
+        break :commands encoder.finish(null);
+    };
+    defer commands.release();
+
+    gctx.submit(&.{commands});
+}
diff --git a/src/renderer.zig b/src/renderer.zig
@@ -18,6 +18,8 @@ const YUV422 = Colors.YUV422;
 const Dreamcast = @import("dreamcast.zig").Dreamcast;
 const HollyModule = @import("holly.zig");
 
+const MipMap = @import("mipmap.zig");
+
 pub const ExperimentalFBWriteBack = false;
 
 // First 1024 values of the Moser de Bruijin sequence, Textures on the dreamcast are limited to 1024*1024 pixels.
@@ -81,7 +83,8 @@ const ShadingInstructions = packed struct(u32) {
     shadow_bit: u1,
     gouraud_bit: u1,
     volume_bit: u1,
-    _: u7 = 0,
+    mipmap_bit: u1,
+    _: u6 = 0,
 };
 
 fn sampler_index(mag_filter: wgpu.FilterMode, min_filter: wgpu.FilterMode, mipmap_filter: wgpu.MipmapFilterMode, address_mode_u: wgpu.AddressMode, address_mode_v: wgpu.AddressMode) u8 {
@@ -544,6 +547,8 @@ pub const Renderer = struct {
     strips_metadata: std.ArrayList(StripMetadata) = undefined, // Just here to avoid repeated allocations.
     modifier_volume_vertices: std.ArrayList([4]f32) = undefined,
 
+    mipmap_gen_pipeline: MipMap,
+
     _scratch_pad: []u8 align(4), // Used to avoid temporary allocations before GPU uploads for example. 4 * 1024 * 1024, since this is the maximum texture size supported by the DC.
 
     _gctx: *zgpu.GraphicsContext,
@@ -706,14 +711,14 @@ pub const Renderer = struct {
         var texture_array_views: [8]zgpu.TextureViewHandle = undefined;
         for (0..8) |i| {
             texture_arrays[i] = gctx.createTexture(.{
-                .usage = .{ .texture_binding = true, .copy_dst = true },
+                .usage = .{ .texture_binding = true, .storage_binding = true, .copy_dst = true },
                 .size = .{
                     .width = @as(u32, 8) << @intCast(i),
                     .height = @as(u32, 8) << @intCast(i),
                     .depth_or_array_layers = Renderer.MaxTextures[i],
                 },
                 .format = .bgra8_unorm,
-                .mip_level_count = 1, // std.math.log2_int(u32, @as(u32, 8))) + 1,
+                .mip_level_count = @intCast(4 + i),
             });
             texture_array_views[i] = gctx.createTextureView(texture_arrays[i], .{});
         }
@@ -1163,6 +1168,8 @@ pub const Renderer = struct {
 
             .ta_lists = HollyModule.TALists.init(allocator),
 
+            .mipmap_gen_pipeline = MipMap.init(gctx),
+
             ._scratch_pad = try allocator.allocWithOptions(u8, 4 * 1024 * 1024, 4, null),
 
             ._gctx = gctx,
@@ -1179,6 +1186,8 @@ pub const Renderer = struct {
 
         self._allocator.free(self._scratch_pad);
 
+        self.mipmap_gen_pipeline.deinit(self._gctx);
+
         self.translucent_pass.deinit();
         self.punchthrough_pass.deinit();
         self.opaque_pass.deinit();
@@ -1692,8 +1701,7 @@ pub const Renderer = struct {
         }
 
         if (texture_control_word.mip_mapped == 1) {
-            // TODO: Here we'd want to generate mipmaps.
-            //       See zgpu.generateMipmaps, maybe?
+            self.mipmap_gen_pipeline.generate_mipmaps(self._gctx, self.texture_arrays[size_index], texture_index);
         }
 
         return texture_index;
@@ -1853,7 +1861,7 @@ pub const Renderer = struct {
             .index = tex_idx,
             .palette = .{
                 .palette = texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP,
-                .filtered = tsp_instruction.filter_mode != 0,
+                .filtered = tsp_instruction.filter_mode != .Point,
                 .selector = @truncate(texture_control.palette_selector() >> 4),
             },
             .shading = .{
@@ -1868,6 +1876,7 @@ pub const Renderer = struct {
                 .shadow_bit = 0,
                 .gouraud_bit = isp_tsp_instruction.gouraud,
                 .volume_bit = 0,
+                .mipmap_bit = 0,
             },
         };
 
@@ -2127,15 +2136,15 @@ pub const Renderer = struct {
 
                 // TODO: Add support for mipmapping (Tri-linear filtering) (And figure out what Pass A and Pass B means!).
                 // Force nearest filtering when using palette textures (we'll be sampling indices into the palette). Filtering will have to be done in the shader.
-                const filter_mode: wgpu.FilterMode = if (texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP) .nearest else if (tsp_instruction.filter_mode == 0) .nearest else .linear;
+                const filter_mode: wgpu.FilterMode = if (texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP) .nearest else if (tsp_instruction.filter_mode == .Point) .nearest else .linear;
 
                 const sampler = if (textured) sampler_index(filter_mode, filter_mode, .linear, u_addr_mode, v_addr_mode) else sampler_index(.linear, .linear, .linear, .clamp_to_edge, .clamp_to_edge);
 
                 const area0_instructions: VertexTextureInfo = .{
                     .index = tex_idx,
                     .palette = .{
                         .palette = texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP,
-                        .filtered = tsp_instruction.filter_mode != 0,
+                        .filtered = tsp_instruction.filter_mode != .Point,
                         .selector = @truncate(texture_control.palette_selector() >> 4),
                     },
                     .shading = .{
@@ -2152,14 +2161,15 @@ pub const Renderer = struct {
                         .shadow_bit = parameter_control_word.obj_control.shadow,
                         .gouraud_bit = isp_tsp_instruction.gouraud,
                         .volume_bit = parameter_control_word.obj_control.volume,
+                        .mipmap_bit = texture_control.mip_mapped,
                     },
                 };
 
                 const area1_instructions: VertexTextureInfo = if (area1_tsp_instruction) |atspi| .{
                     .index = tex_idx_area_1,
                     .palette = .{
                         .palette = if (area1_texture_control) |a| a.pixel_format == .Palette4BPP or a.pixel_format == .Palette8BPP else false,
-                        .filtered = atspi.filter_mode != 0,
+                        .filtered = atspi.filter_mode != .Point,
                         .selector = if (area1_texture_control) |a| @truncate(a.palette_selector() >> 4) else 0,
                     },
                     .shading = .{
@@ -2176,6 +2186,7 @@ pub const Renderer = struct {
                         .shadow_bit = parameter_control_word.obj_control.shadow,
                         .gouraud_bit = isp_tsp_instruction.gouraud,
                         .volume_bit = parameter_control_word.obj_control.volume,
+                        .mipmap_bit = if (area1_texture_control) |a| a.mip_mapped else 0,
                     },
                 } else VertexTextureInfo.invalid();
 

diff --git a/src/shaders/fragment_color.wgsl b/src/shaders/fragment_color.wgsl
@@ -20,10 +20,14 @@ fn bilinear_interpolation(u_min_v_min: vec4<f32>, u_min_v_max: vec4<f32>, u_max_
 fn tex_array_sample(tex_array: texture_2d_array<f32>, uv: vec2<f32>, duvdx: vec2<f32>, duvdy: vec2<f32>, palette_instructions: u32, control: u32, index: u32) -> vec4<f32> {
     if index >= textureNumLayers(tex_array) { return vec4<f32>(1.0, 0.0, 0.0, 1.0); }
 
+    let mipmap: bool = ((control >> 25) & 1) == 1;
+
     if (palette_instructions & 1) == 1 {
         // Palette Texture
         let palette_selector = ((palette_instructions >> 2) & 0x3F) << 4;
 
+        // TODO: Handle mipmaps?
+
         if ((palette_instructions >> 1) & 1) == 1 {
             // with Bilinear filtering
             let texel_coord = vec2<f32>(textureDimensions(tex_array)) * uv;
@@ -67,7 +71,11 @@ fn tex_array_sample(tex_array: texture_2d_array<f32>, uv: vec2<f32>, duvdx: vec2
             return unpack4x8unorm(palette[palette_selector + palette_index]).zyxw;
         }
     } else {
-        return textureSampleGrad(tex_array, image_sampler, uv, index, duvdx, duvdy);
+        if mipmap {
+            return textureSampleGrad(tex_array, image_sampler, uv, index, duvdx, duvdy);
+        } else {
+            return textureSampleLevel(tex_array, image_sampler, uv, index, 0);
+        }
     }
 }
 

diff --git a/src/shaders/generate_mipmaps.wgsl b/src/shaders/generate_mipmaps.wgsl
@@ -0,0 +1,36 @@
+@group(0) @binding(0) var src: texture_2d<f32>;
+@group(0) @binding(1) var dst0: texture_storage_2d<bgra8unorm, write>;
+@group(0) @binding(2) var dst1: texture_storage_2d<bgra8unorm, write>;
+@group(0) @binding(3) var dst2: texture_storage_2d<bgra8unorm, write>;
+
+var<workgroup> cache : array<vec4<f32>, 16>;
+
+@compute @workgroup_size(4, 4)
+fn main(@builtin(global_invocation_id) id: vec3<u32>, @builtin(local_invocation_id) local_id: vec3<u32>, @builtin(local_invocation_index) local_idx: u32) {
+    var color = (textureLoad(src, 2 * id.xy + vec2<u32>(0, 0), 0) + //
+		textureLoad(src, 2 * id.xy + vec2<u32>(0, 1), 0) + // 
+		textureLoad(src, 2 * id.xy + vec2<u32>(1, 0), 0) + //
+		textureLoad(src, 2 * id.xy + vec2<u32>(1, 1), 0)) * 0.25;
+    textureStore(dst0, id.xy, color);
+    cache[local_idx] = color;
+
+    workgroupBarrier();
+
+    if local_id.x % 2 == 0 && local_id.y % 2 == 0 {
+        color = (cache[local_idx / 2 + 0] + cache[local_idx / 2 + 1] + cache[local_idx / 2 + 4] + cache[local_idx / 2 + 5]) * 0.25;
+        textureStore(dst1, id.xy / 2u, color);
+    }
+
+    workgroupBarrier();
+
+    if local_id.x % 2 == 0 && local_id.y % 2 == 0 {
+        cache[local_idx / 2] = color;
+    }
+
+    workgroupBarrier();
+
+    if local_idx == 0 {
+        color = (cache[0] + cache[1] + cache[2] + cache[3]) * 0.25;
+        textureStore(dst2, id.xy / 4u, color);
+    }
+}