Skip to content

Commit

Permalink
Mipmaps (#66)
Browse files Browse the repository at this point in the history
Mipmap generation and basic usage.
  • Loading branch information
Senryoku authored Nov 3, 2024
1 parent c8d588a commit 8985e68
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ sudo apt install libgtk-3-dev
- Fog LUT Mode 2.
- Secondary accumulate buffer (very low priority, not sure if many games use this feature).
- Bump mapping.
- Handle and generate Mipmaps.
- Mipmaps for palette textures?
- Sort-DMA?
- User Tile Clip, only the simplest version is supported.
- MMU: Only supported for store queue writes using the pref intruction (used by Ikaruga for example)
Expand Down
2 changes: 1 addition & 1 deletion src/holly.zig
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ pub const TSPInstructionWord = packed struct(u32) {
texture_shading_instruction: TextureShadingInstruction,
mipmap_d_adjust: u4,
supersample_texture: u1,
filter_mode: u2,
filter_mode: enum(u2) { Point = 0, Bilinear = 1, TrilinearPassA = 2, TrilinearPassB = 3 },
clamp_uv: u2,
flip_uv: u2,
ignore_texture_alpha: u1,
Expand Down
121 changes: 121 additions & 0 deletions src/mipmap.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
const std = @import("std");
const zgpu = @import("zgpu");

const ShaderSource = @embedFile("shaders/generate_mipmaps.wgsl");

bind_group_layout: zgpu.BindGroupLayoutHandle,
pipeline: zgpu.ComputePipelineHandle,

pub fn init(gctx: *zgpu.GraphicsContext) @This() {
const bind_group_layout = gctx.createBindGroupLayout(&.{
zgpu.textureEntry(0, .{ .compute = true }, .unfilterable_float, .tvdim_2d, false),
zgpu.storageTextureEntry(1, .{ .compute = true }, .write_only, .bgra8_unorm, .tvdim_2d),
zgpu.storageTextureEntry(2, .{ .compute = true }, .write_only, .bgra8_unorm, .tvdim_2d),
zgpu.storageTextureEntry(3, .{ .compute = true }, .write_only, .bgra8_unorm, .tvdim_2d),
});
const pipeline_layout = gctx.createPipelineLayout(&.{
bind_group_layout,
});
defer gctx.releaseResource(pipeline_layout);

const cs_module = zgpu.createWgslShaderModule(gctx.device, ShaderSource, "generate_mipmaps");
defer cs_module.release();

return .{
.bind_group_layout = bind_group_layout,
.pipeline = gctx.createComputePipeline(pipeline_layout, .{
.compute = .{
.module = cs_module,
.entry_point = "main",
},
}),
};
}

pub fn deinit(self: *@This(), gctx: *zgpu.GraphicsContext) void {
gctx.releaseResource(self.bind_group_layout);
gctx.releaseResource(self.pipeline);
}

pub fn generate_mipmaps(self: @This(), gctx: *zgpu.GraphicsContext, texture: zgpu.TextureHandle, layer: u32) void {
const texture_info = gctx.lookupResourceInfo(texture) orelse return;

std.debug.assert(texture_info.usage.copy_dst == true);
std.debug.assert(texture_info.dimension == .tdim_2d);
std.debug.assert(texture_info.size.width == texture_info.size.height);
std.debug.assert(texture_info.size.width >= 8 and texture_info.size.width <= 1024);
std.debug.assert(std.math.isPowerOfTwo(texture_info.size.width));

const commands = commands: {
const encoder = gctx.device.createCommandEncoder(null);
defer encoder.release();

const pass = encoder.beginComputePass(null);
pass.setPipeline(gctx.lookupResource(self.pipeline).?);

const last_mip_level = texture_info.mip_level_count - 1;
var base_mip_level: u32 = 0;
while (base_mip_level < last_mip_level) {
const src_texture_view = gctx.createTextureView(texture, .{
.dimension = .tvdim_2d,
.base_array_layer = layer,
.array_layer_count = 1,
.base_mip_level = base_mip_level,
.mip_level_count = 1,
});
defer gctx.releaseResource(src_texture_view);
const dst_texture_views = [3]zgpu.TextureViewHandle{
gctx.createTextureView(texture, .{
.dimension = .tvdim_2d,
.base_array_layer = layer,
.array_layer_count = 1,
.base_mip_level = base_mip_level + 1,
.mip_level_count = 1,
}),
gctx.createTextureView(texture, .{
.dimension = .tvdim_2d,
.base_array_layer = layer,
.array_layer_count = 1,
.base_mip_level = base_mip_level + 2,
.mip_level_count = 1,
}),
gctx.createTextureView(texture, .{
.dimension = .tvdim_2d,
.base_array_layer = layer,
.array_layer_count = 1,
.base_mip_level = base_mip_level + 3,
.mip_level_count = 1,
}),
};
defer {
for (dst_texture_views) |dst_texture_view|
gctx.releaseResource(dst_texture_view);
}

const bind_group = gctx.createBindGroup(self.bind_group_layout, &.{
.{ .binding = 0, .texture_view_handle = src_texture_view },
.{ .binding = 1, .texture_view_handle = dst_texture_views[0] },
.{ .binding = 2, .texture_view_handle = dst_texture_views[1] },
.{ .binding = 3, .texture_view_handle = dst_texture_views[2] },
});
defer gctx.releaseResource(bind_group);

pass.setBindGroup(0, gctx.lookupResource(bind_group).?, &.{});
const num_groups = [2]u32{ @divExact(texture_info.size.width, std.math.pow(u32, 2, 1 + base_mip_level) * 4), @divExact(texture_info.size.height, std.math.pow(u32, 2, 1 + base_mip_level) * 4) };
pass.dispatchWorkgroups(num_groups[0], num_groups[1], 1);

// We want the source to always be at least 4 * 4 (a single workgroup). We'll compute some small (<=8x8) mipmaps twice, but that's ok.
if (last_mip_level - base_mip_level > 3 and last_mip_level - base_mip_level < 6) {
base_mip_level = last_mip_level - 3;
} else base_mip_level += 3;
}

pass.end();
pass.release();

break :commands encoder.finish(null);
};
defer commands.release();

gctx.submit(&.{commands});
}
29 changes: 20 additions & 9 deletions src/renderer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ const YUV422 = Colors.YUV422;
const Dreamcast = @import("dreamcast.zig").Dreamcast;
const HollyModule = @import("holly.zig");

const MipMap = @import("mipmap.zig");

pub const ExperimentalFBWriteBack = false;

// First 1024 values of the Moser de Bruijin sequence, Textures on the dreamcast are limited to 1024*1024 pixels.
Expand Down Expand Up @@ -81,7 +83,8 @@ const ShadingInstructions = packed struct(u32) {
shadow_bit: u1,
gouraud_bit: u1,
volume_bit: u1,
_: u7 = 0,
mipmap_bit: u1,
_: u6 = 0,
};

fn sampler_index(mag_filter: wgpu.FilterMode, min_filter: wgpu.FilterMode, mipmap_filter: wgpu.MipmapFilterMode, address_mode_u: wgpu.AddressMode, address_mode_v: wgpu.AddressMode) u8 {
Expand Down Expand Up @@ -544,6 +547,8 @@ pub const Renderer = struct {
strips_metadata: std.ArrayList(StripMetadata) = undefined, // Just here to avoid repeated allocations.
modifier_volume_vertices: std.ArrayList([4]f32) = undefined,

mipmap_gen_pipeline: MipMap,

_scratch_pad: []u8 align(4), // Used to avoid temporary allocations before GPU uploads for example. 4 * 1024 * 1024, since this is the maximum texture size supported by the DC.

_gctx: *zgpu.GraphicsContext,
Expand Down Expand Up @@ -706,14 +711,14 @@ pub const Renderer = struct {
var texture_array_views: [8]zgpu.TextureViewHandle = undefined;
for (0..8) |i| {
texture_arrays[i] = gctx.createTexture(.{
.usage = .{ .texture_binding = true, .copy_dst = true },
.usage = .{ .texture_binding = true, .storage_binding = true, .copy_dst = true },
.size = .{
.width = @as(u32, 8) << @intCast(i),
.height = @as(u32, 8) << @intCast(i),
.depth_or_array_layers = Renderer.MaxTextures[i],
},
.format = .bgra8_unorm,
.mip_level_count = 1, // std.math.log2_int(u32, @as(u32, 8))) + 1,
.mip_level_count = @intCast(4 + i),
});
texture_array_views[i] = gctx.createTextureView(texture_arrays[i], .{});
}
Expand Down Expand Up @@ -1163,6 +1168,8 @@ pub const Renderer = struct {

.ta_lists = HollyModule.TALists.init(allocator),

.mipmap_gen_pipeline = MipMap.init(gctx),

._scratch_pad = try allocator.allocWithOptions(u8, 4 * 1024 * 1024, 4, null),

._gctx = gctx,
Expand All @@ -1179,6 +1186,8 @@ pub const Renderer = struct {

self._allocator.free(self._scratch_pad);

self.mipmap_gen_pipeline.deinit(self._gctx);

self.translucent_pass.deinit();
self.punchthrough_pass.deinit();
self.opaque_pass.deinit();
Expand Down Expand Up @@ -1692,8 +1701,7 @@ pub const Renderer = struct {
}

if (texture_control_word.mip_mapped == 1) {
// TODO: Here we'd want to generate mipmaps.
// See zgpu.generateMipmaps, maybe?
self.mipmap_gen_pipeline.generate_mipmaps(self._gctx, self.texture_arrays[size_index], texture_index);
}

return texture_index;
Expand Down Expand Up @@ -1853,7 +1861,7 @@ pub const Renderer = struct {
.index = tex_idx,
.palette = .{
.palette = texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP,
.filtered = tsp_instruction.filter_mode != 0,
.filtered = tsp_instruction.filter_mode != .Point,
.selector = @truncate(texture_control.palette_selector() >> 4),
},
.shading = .{
Expand All @@ -1868,6 +1876,7 @@ pub const Renderer = struct {
.shadow_bit = 0,
.gouraud_bit = isp_tsp_instruction.gouraud,
.volume_bit = 0,
.mipmap_bit = 0,
},
};

Expand Down Expand Up @@ -2127,15 +2136,15 @@ pub const Renderer = struct {

// TODO: Add support for mipmapping (Tri-linear filtering) (And figure out what Pass A and Pass B means!).
// Force nearest filtering when using palette textures (we'll be sampling indices into the palette). Filtering will have to be done in the shader.
const filter_mode: wgpu.FilterMode = if (texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP) .nearest else if (tsp_instruction.filter_mode == 0) .nearest else .linear;
const filter_mode: wgpu.FilterMode = if (texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP) .nearest else if (tsp_instruction.filter_mode == .Point) .nearest else .linear;

const sampler = if (textured) sampler_index(filter_mode, filter_mode, .linear, u_addr_mode, v_addr_mode) else sampler_index(.linear, .linear, .linear, .clamp_to_edge, .clamp_to_edge);

const area0_instructions: VertexTextureInfo = .{
.index = tex_idx,
.palette = .{
.palette = texture_control.pixel_format == .Palette4BPP or texture_control.pixel_format == .Palette8BPP,
.filtered = tsp_instruction.filter_mode != 0,
.filtered = tsp_instruction.filter_mode != .Point,
.selector = @truncate(texture_control.palette_selector() >> 4),
},
.shading = .{
Expand All @@ -2152,14 +2161,15 @@ pub const Renderer = struct {
.shadow_bit = parameter_control_word.obj_control.shadow,
.gouraud_bit = isp_tsp_instruction.gouraud,
.volume_bit = parameter_control_word.obj_control.volume,
.mipmap_bit = texture_control.mip_mapped,
},
};

const area1_instructions: VertexTextureInfo = if (area1_tsp_instruction) |atspi| .{
.index = tex_idx_area_1,
.palette = .{
.palette = if (area1_texture_control) |a| a.pixel_format == .Palette4BPP or a.pixel_format == .Palette8BPP else false,
.filtered = atspi.filter_mode != 0,
.filtered = atspi.filter_mode != .Point,
.selector = if (area1_texture_control) |a| @truncate(a.palette_selector() >> 4) else 0,
},
.shading = .{
Expand All @@ -2176,6 +2186,7 @@ pub const Renderer = struct {
.shadow_bit = parameter_control_word.obj_control.shadow,
.gouraud_bit = isp_tsp_instruction.gouraud,
.volume_bit = parameter_control_word.obj_control.volume,
.mipmap_bit = if (area1_texture_control) |a| a.mip_mapped else 0,
},
} else VertexTextureInfo.invalid();

Expand Down
10 changes: 9 additions & 1 deletion src/shaders/fragment_color.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@ fn bilinear_interpolation(u_min_v_min: vec4<f32>, u_min_v_max: vec4<f32>, u_max_
fn tex_array_sample(tex_array: texture_2d_array<f32>, uv: vec2<f32>, duvdx: vec2<f32>, duvdy: vec2<f32>, palette_instructions: u32, control: u32, index: u32) -> vec4<f32> {
if index >= textureNumLayers(tex_array) { return vec4<f32>(1.0, 0.0, 0.0, 1.0); }

let mipmap: bool = ((control >> 25) & 1) == 1;

if (palette_instructions & 1) == 1 {
// Palette Texture
let palette_selector = ((palette_instructions >> 2) & 0x3F) << 4;

// TODO: Handle mipmaps?

if ((palette_instructions >> 1) & 1) == 1 {
// with Bilinear filtering
let texel_coord = vec2<f32>(textureDimensions(tex_array)) * uv;
Expand Down Expand Up @@ -67,7 +71,11 @@ fn tex_array_sample(tex_array: texture_2d_array<f32>, uv: vec2<f32>, duvdx: vec2
return unpack4x8unorm(palette[palette_selector + palette_index]).zyxw;
}
} else {
return textureSampleGrad(tex_array, image_sampler, uv, index, duvdx, duvdy);
if mipmap {
return textureSampleGrad(tex_array, image_sampler, uv, index, duvdx, duvdy);
} else {
return textureSampleLevel(tex_array, image_sampler, uv, index, 0);
}
}
}

Expand Down
36 changes: 36 additions & 0 deletions src/shaders/generate_mipmaps.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
@group(0) @binding(0) var src: texture_2d<f32>;
@group(0) @binding(1) var dst0: texture_storage_2d<bgra8unorm, write>;
@group(0) @binding(2) var dst1: texture_storage_2d<bgra8unorm, write>;
@group(0) @binding(3) var dst2: texture_storage_2d<bgra8unorm, write>;

var<workgroup> cache : array<vec4<f32>, 16>;

@compute @workgroup_size(4, 4)
fn main(@builtin(global_invocation_id) id: vec3<u32>, @builtin(local_invocation_id) local_id: vec3<u32>, @builtin(local_invocation_index) local_idx: u32) {
var color = (textureLoad(src, 2 * id.xy + vec2<u32>(0, 0), 0) + //
textureLoad(src, 2 * id.xy + vec2<u32>(0, 1), 0) + //
textureLoad(src, 2 * id.xy + vec2<u32>(1, 0), 0) + //
textureLoad(src, 2 * id.xy + vec2<u32>(1, 1), 0)) * 0.25;
textureStore(dst0, id.xy, color);
cache[local_idx] = color;

workgroupBarrier();

if local_id.x % 2 == 0 && local_id.y % 2 == 0 {
color = (cache[local_idx / 2 + 0] + cache[local_idx / 2 + 1] + cache[local_idx / 2 + 4] + cache[local_idx / 2 + 5]) * 0.25;
textureStore(dst1, id.xy / 2u, color);
}

workgroupBarrier();

if local_id.x % 2 == 0 && local_id.y % 2 == 0 {
cache[local_idx / 2] = color;
}

workgroupBarrier();

if local_idx == 0 {
color = (cache[0] + cache[1] + cache[2] + cache[3]) * 0.25;
textureStore(dst2, id.xy / 4u, color);
}
}

0 comments on commit 8985e68

Please sign in to comment.