From ba610f48dcdcaa0eb797c9d3fbe3cbd100c91418 Mon Sep 17 00:00:00 2001 From: tolelom <98kimsungmin@naver.com> Date: Wed, 25 Mar 2026 13:25:11 +0900 Subject: [PATCH] docs: add Phase 7-1 through 7-3 specs and plans Co-Authored-By: Claude Opus 4.6 (1M context) --- .../2026-03-25-phase7-1-deferred-rendering.md | 961 ++++++++++++++++++ .../plans/2026-03-25-phase7-2-ssgi.md | 661 ++++++++++++ .../plans/2026-03-25-phase7-3-rt-shadows.md | 631 ++++++++++++ .../2026-03-25-phase7-1-deferred-rendering.md | 199 ++++ .../specs/2026-03-25-phase7-2-ssgi.md | 202 ++++ .../specs/2026-03-25-phase7-3-rt-shadows.md | 197 ++++ 6 files changed, 2851 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md create mode 100644 docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md create mode 100644 docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md create mode 100644 docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md create mode 100644 docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md create mode 100644 docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md diff --git a/docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md b/docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md new file mode 100644 index 0000000..c03ac36 --- /dev/null +++ b/docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md @@ -0,0 +1,961 @@ +# Phase 7-1: Deferred Rendering Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** G-Buffer + Lighting Pass 디퍼드 렌더링 파이프라인으로 다수의 라이트를 효율적으로 처리 + +**Architecture:** voltex_renderer에 새 모듈 추가. G-Buffer pass(MRT 4개)가 기하 데이터를 기록하고, Lighting pass(풀스크린 삼각형)가 G-Buffer를 읽어 Cook-Torrance BRDF + 섀도우 + IBL 라이팅을 수행. 기존 포워드 PBR은 유지. + +**Tech Stack:** Rust, wgpu 28.0, WGSL + +**Spec:** `docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md` + +--- + +## File Structure + +### voltex_renderer (추가) +- `crates/voltex_renderer/src/gbuffer.rs` — GBuffer 텍스처 생성/리사이즈 (Create) +- `crates/voltex_renderer/src/fullscreen_quad.rs` — 풀스크린 삼각형 (Create) +- `crates/voltex_renderer/src/deferred_gbuffer.wgsl` — G-Buffer pass 셰이더 (Create) +- `crates/voltex_renderer/src/deferred_lighting.wgsl` — Lighting pass 셰이더 (Create) +- `crates/voltex_renderer/src/deferred_pipeline.rs` — 파이프라인 생성 함수들 (Create) +- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록 (Modify) + +### Example (추가) +- `examples/deferred_demo/Cargo.toml` (Create) +- `examples/deferred_demo/src/main.rs` (Create) +- `Cargo.toml` — workspace members (Modify) + +--- + +## Task 1: GBuffer + Fullscreen Triangle + +**Files:** +- Create: `crates/voltex_renderer/src/gbuffer.rs` +- Create: `crates/voltex_renderer/src/fullscreen_quad.rs` +- Modify: `crates/voltex_renderer/src/lib.rs` + +- [ ] **Step 1: gbuffer.rs 작성** + +```rust +// crates/voltex_renderer/src/gbuffer.rs + +pub const GBUFFER_POSITION_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Float; +pub const GBUFFER_NORMAL_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float; +pub const GBUFFER_ALBEDO_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8UnormSrgb; +pub const GBUFFER_MATERIAL_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm; + +pub struct GBuffer { + pub position_view: wgpu::TextureView, + pub normal_view: wgpu::TextureView, + pub albedo_view: wgpu::TextureView, + pub material_view: wgpu::TextureView, + pub depth_view: wgpu::TextureView, + pub width: u32, + pub height: u32, +} + +impl GBuffer { + pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self { + let position_view = create_rt(device, width, height, GBUFFER_POSITION_FORMAT, "GBuffer Position"); + let normal_view = create_rt(device, width, height, GBUFFER_NORMAL_FORMAT, "GBuffer Normal"); + let albedo_view = create_rt(device, width, height, GBUFFER_ALBEDO_FORMAT, "GBuffer Albedo"); + let material_view = create_rt(device, width, height, GBUFFER_MATERIAL_FORMAT, "GBuffer Material"); + let depth_view = create_depth(device, width, height); + Self { position_view, normal_view, albedo_view, material_view, depth_view, width, height } + } + + pub fn resize(&mut self, device: &wgpu::Device, width: u32, height: u32) { + *self = Self::new(device, width, height); + } +} + +fn create_rt(device: &wgpu::Device, w: u32, h: u32, format: wgpu::TextureFormat, label: &str) -> wgpu::TextureView { + let tex = device.create_texture(&wgpu::TextureDescriptor { + label: Some(label), + size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + tex.create_view(&wgpu::TextureViewDescriptor::default()) +} + +fn create_depth(device: &wgpu::Device, w: u32, h: u32) -> wgpu::TextureView { + let tex = device.create_texture(&wgpu::TextureDescriptor { + label: Some("GBuffer Depth"), + size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: crate::gpu::DEPTH_FORMAT, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + tex.create_view(&wgpu::TextureViewDescriptor::default()) +} +``` + +- [ ] **Step 2: fullscreen_quad.rs 작성** + +```rust +// crates/voltex_renderer/src/fullscreen_quad.rs +use bytemuck::{Pod, Zeroable}; + +#[repr(C)] +#[derive(Copy, Clone, Debug, Pod, Zeroable)] +pub struct FullscreenVertex { + pub position: [f32; 2], +} + +impl FullscreenVertex { + pub const LAYOUT: wgpu::VertexBufferLayout<'static> = wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as wgpu::BufferAddress, + step_mode: wgpu::VertexStepMode::Vertex, + attributes: &[ + wgpu::VertexAttribute { + offset: 0, + shader_location: 0, + format: wgpu::VertexFormat::Float32x2, + }, + ], + }; +} + +/// Oversized triangle that covers the entire screen after clipping. +pub const FULLSCREEN_VERTICES: [FullscreenVertex; 3] = [ + FullscreenVertex { position: [-1.0, -1.0] }, + FullscreenVertex { position: [ 3.0, -1.0] }, + FullscreenVertex { position: [-1.0, 3.0] }, +]; + +pub fn create_fullscreen_vertex_buffer(device: &wgpu::Device) -> wgpu::Buffer { + use wgpu::util::DeviceExt; + device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("Fullscreen Vertex Buffer"), + contents: bytemuck::cast_slice(&FULLSCREEN_VERTICES), + usage: wgpu::BufferUsages::VERTEX, + }) +} +``` + +- [ ] **Step 3: lib.rs에 모듈 등록** + +```rust +pub mod gbuffer; +pub mod fullscreen_quad; +``` + +And add re-exports: +```rust +pub use gbuffer::GBuffer; +pub use fullscreen_quad::{create_fullscreen_vertex_buffer, FullscreenVertex}; +``` + +- [ ] **Step 4: 빌드 확인** + +Run: `cargo build -p voltex_renderer` +Expected: 컴파일 성공 + +- [ ] **Step 5: 커밋** + +```bash +git add crates/voltex_renderer/src/gbuffer.rs crates/voltex_renderer/src/fullscreen_quad.rs crates/voltex_renderer/src/lib.rs +git commit -m "feat(renderer): add GBuffer and fullscreen triangle for deferred rendering" +``` + +--- + +## Task 2: G-Buffer Pass 셰이더 + +**Files:** +- Create: `crates/voltex_renderer/src/deferred_gbuffer.wgsl` + +- [ ] **Step 1: deferred_gbuffer.wgsl 작성** + +```wgsl +// G-Buffer pass: writes geometry data to multiple render targets + +struct CameraUniform { + view_proj: mat4x4, + model: mat4x4, + camera_pos: vec3, +}; + +struct MaterialUniform { + base_color: vec4, + metallic: f32, + roughness: f32, + ao: f32, +}; + +@group(0) @binding(0) var camera: CameraUniform; + +@group(1) @binding(0) var t_diffuse: texture_2d; +@group(1) @binding(1) var s_diffuse: sampler; +@group(1) @binding(2) var t_normal: texture_2d; +@group(1) @binding(3) var s_normal: sampler; + +@group(2) @binding(0) var material: MaterialUniform; + +struct VertexInput { + @location(0) position: vec3, + @location(1) normal: vec3, + @location(2) uv: vec2, + @location(3) tangent: vec4, +}; + +struct VertexOutput { + @builtin(position) clip_position: vec4, + @location(0) world_pos: vec3, + @location(1) world_normal: vec3, + @location(2) uv: vec2, + @location(3) world_tangent: vec3, + @location(4) world_bitangent: vec3, +}; + +struct GBufferOutput { + @location(0) position: vec4, + @location(1) normal: vec4, + @location(2) albedo: vec4, + @location(3) material_out: vec4, +}; + +@vertex +fn vs_main(in: VertexInput) -> VertexOutput { + var out: VertexOutput; + let world_pos = camera.model * vec4(in.position, 1.0); + out.world_pos = world_pos.xyz; + out.clip_position = camera.view_proj * world_pos; + out.world_normal = normalize((camera.model * vec4(in.normal, 0.0)).xyz); + out.uv = in.uv; + + let T = normalize((camera.model * vec4(in.tangent.xyz, 0.0)).xyz); + let N = out.world_normal; + let B = cross(N, T) * in.tangent.w; + out.world_tangent = T; + out.world_bitangent = B; + + return out; +} + +@fragment +fn fs_main(in: VertexOutput) -> GBufferOutput { + var out: GBufferOutput; + + // World position + out.position = vec4(in.world_pos, 1.0); + + // Normal mapping + let T = normalize(in.world_tangent); + let B = normalize(in.world_bitangent); + let N_geom = normalize(in.world_normal); + let normal_sample = textureSample(t_normal, s_normal, in.uv).rgb; + let tangent_normal = normal_sample * 2.0 - 1.0; + let TBN = mat3x3(T, B, N_geom); + let N = normalize(TBN * tangent_normal); + out.normal = vec4(N, 0.0); + + // Albedo + let tex_color = textureSample(t_diffuse, s_diffuse, in.uv); + out.albedo = vec4(material.base_color.rgb * tex_color.rgb, 1.0); + + // Material: R=metallic, G=roughness, B=ao + out.material_out = vec4(material.metallic, material.roughness, material.ao, 1.0); + + return out; +} +``` + +- [ ] **Step 2: 커밋** + +```bash +git add crates/voltex_renderer/src/deferred_gbuffer.wgsl +git commit -m "feat(renderer): add G-Buffer pass shader for deferred rendering" +``` + +--- + +## Task 3: Lighting Pass 셰이더 + +**Files:** +- Create: `crates/voltex_renderer/src/deferred_lighting.wgsl` + +- [ ] **Step 1: deferred_lighting.wgsl 작성** + +This shader reuses the Cook-Torrance BRDF functions from pbr_shader.wgsl but reads from G-Buffer instead of vertex attributes. + +```wgsl +// Deferred Lighting Pass: reads G-Buffer, applies full PBR lighting + +// Group 0: G-Buffer textures +@group(0) @binding(0) var t_position: texture_2d; +@group(0) @binding(1) var t_normal: texture_2d; +@group(0) @binding(2) var t_albedo: texture_2d; +@group(0) @binding(3) var t_material: texture_2d; +@group(0) @binding(4) var s_gbuffer: sampler; + +// Group 1: Lights + Camera +struct LightData { + position: vec3, + light_type: u32, + direction: vec3, + range: f32, + color: vec3, + intensity: f32, + inner_cone: f32, + outer_cone: f32, + _padding: vec2, +}; + +struct LightsUniform { + lights: array, + count: u32, + ambient_color: vec3, +}; + +struct CameraPositionUniform { + camera_pos: vec3, +}; + +@group(1) @binding(0) var lights_uniform: LightsUniform; +@group(1) @binding(1) var camera_data: CameraPositionUniform; + +// Group 2: Shadow + IBL +struct ShadowUniform { + light_view_proj: mat4x4, + shadow_map_size: f32, + shadow_bias: f32, +}; + +@group(2) @binding(0) var t_shadow: texture_depth_2d; +@group(2) @binding(1) var s_shadow: sampler_comparison; +@group(2) @binding(2) var shadow: ShadowUniform; +@group(2) @binding(3) var t_brdf_lut: texture_2d; +@group(2) @binding(4) var s_brdf_lut: sampler; + +// Fullscreen vertex +struct VertexOutput { + @builtin(position) clip_position: vec4, + @location(0) uv: vec2, +}; + +@vertex +fn vs_main(@location(0) position: vec2) -> VertexOutput { + var out: VertexOutput; + out.clip_position = vec4(position, 0.0, 1.0); + // Convert clip space [-1,1] to UV [0,1] + out.uv = vec2(position.x * 0.5 + 0.5, 1.0 - (position.y * 0.5 + 0.5)); + return out; +} + +// === BRDF functions (same as pbr_shader.wgsl) === + +fn distribution_ggx(N: vec3, H: vec3, roughness: f32) -> f32 { + let a = roughness * roughness; + let a2 = a * a; + let NdotH = max(dot(N, H), 0.0); + let NdotH2 = NdotH * NdotH; + let denom_inner = NdotH2 * (a2 - 1.0) + 1.0; + let denom = 3.14159265358979 * denom_inner * denom_inner; + return a2 / denom; +} + +fn geometry_schlick_ggx(NdotV: f32, roughness: f32) -> f32 { + let r = roughness + 1.0; + let k = (r * r) / 8.0; + return NdotV / (NdotV * (1.0 - k) + k); +} + +fn geometry_smith(N: vec3, V: vec3, L: vec3, roughness: f32) -> f32 { + let NdotV = max(dot(N, V), 0.0); + let NdotL = max(dot(N, L), 0.0); + return geometry_schlick_ggx(NdotV, roughness) * geometry_schlick_ggx(NdotL, roughness); +} + +fn fresnel_schlick(cosTheta: f32, F0: vec3) -> vec3 { + return F0 + (1.0 - F0) * pow(clamp(1.0 - cosTheta, 0.0, 1.0), 5.0); +} + +fn attenuation_point(distance: f32, range: f32) -> f32 { + let d_over_r = distance / range; + let d_over_r4 = d_over_r * d_over_r * d_over_r * d_over_r; + let falloff = clamp(1.0 - d_over_r4, 0.0, 1.0); + return (falloff * falloff) / (distance * distance + 0.0001); +} + +fn attenuation_spot(light: LightData, L: vec3) -> f32 { + let spot_dir = normalize(light.direction); + let theta = dot(spot_dir, -L); + return clamp( + (theta - light.outer_cone) / (light.inner_cone - light.outer_cone + 0.0001), + 0.0, 1.0, + ); +} + +fn compute_light_contribution( + light: LightData, N: vec3, V: vec3, world_pos: vec3, + F0: vec3, albedo: vec3, metallic: f32, roughness: f32, +) -> vec3 { + var L: vec3; + var radiance: vec3; + + if light.light_type == 0u { + L = normalize(-light.direction); + radiance = light.color * light.intensity; + } else if light.light_type == 1u { + let to_light = light.position - world_pos; + let dist = length(to_light); + L = normalize(to_light); + radiance = light.color * light.intensity * attenuation_point(dist, light.range); + } else { + let to_light = light.position - world_pos; + let dist = length(to_light); + L = normalize(to_light); + radiance = light.color * light.intensity * attenuation_point(dist, light.range) * attenuation_spot(light, L); + } + + let H = normalize(V + L); + let NDF = distribution_ggx(N, H, roughness); + let G = geometry_smith(N, V, L, roughness); + let F = fresnel_schlick(max(dot(H, V), 0.0), F0); + let ks = F; + let kd = (vec3(1.0) - ks) * (1.0 - metallic); + let numerator = NDF * G * F; + let NdotL = max(dot(N, L), 0.0); + let NdotV = max(dot(N, V), 0.0); + let denominator = 4.0 * NdotV * NdotL + 0.0001; + let specular = numerator / denominator; + + return (kd * albedo / 3.14159265358979 + specular) * radiance * NdotL; +} + +fn calculate_shadow(world_pos: vec3) -> f32 { + if shadow.shadow_map_size == 0.0 { return 1.0; } + let light_space_pos = shadow.light_view_proj * vec4(world_pos, 1.0); + let proj_coords = light_space_pos.xyz / light_space_pos.w; + let shadow_uv = vec2(proj_coords.x * 0.5 + 0.5, -proj_coords.y * 0.5 + 0.5); + let current_depth = proj_coords.z; + if shadow_uv.x < 0.0 || shadow_uv.x > 1.0 || shadow_uv.y < 0.0 || shadow_uv.y > 1.0 { return 1.0; } + if current_depth > 1.0 || current_depth < 0.0 { return 1.0; } + let texel_size = 1.0 / shadow.shadow_map_size; + var shadow_val = 0.0; + for (var x = -1; x <= 1; x++) { + for (var y = -1; y <= 1; y++) { + shadow_val += textureSampleCompare(t_shadow, s_shadow, shadow_uv + vec2(f32(x), f32(y)) * texel_size, current_depth - shadow.shadow_bias); + } + } + return shadow_val / 9.0; +} + +fn sample_environment(direction: vec3, roughness: f32) -> vec3 { + var env: vec3; + if direction.y > 0.0 { + env = mix(vec3(0.6, 0.6, 0.5), vec3(0.3, 0.5, 0.9), pow(direction.y, 0.4)); + } else { + env = mix(vec3(0.6, 0.6, 0.5), vec3(0.1, 0.08, 0.06), pow(-direction.y, 0.4)); + } + return mix(env, vec3(0.3, 0.35, 0.4), roughness * roughness); +} + +@fragment +fn fs_main(in: VertexOutput) -> @location(0) vec4 { + let world_pos = textureSample(t_position, s_gbuffer, in.uv).xyz; + let N = normalize(textureSample(t_normal, s_gbuffer, in.uv).xyz); + let albedo = textureSample(t_albedo, s_gbuffer, in.uv).rgb; + let mat_data = textureSample(t_material, s_gbuffer, in.uv); + let metallic = mat_data.r; + let roughness = mat_data.g; + let ao = mat_data.b; + + // Skip background pixels (position = 0,0,0 means no geometry) + if length(textureSample(t_position, s_gbuffer, in.uv).xyz) < 0.001 { + return vec4(0.05, 0.05, 0.08, 1.0); // background color + } + + let V = normalize(camera_data.camera_pos - world_pos); + let F0 = mix(vec3(0.04), albedo, metallic); + + let shadow_factor = calculate_shadow(world_pos); + var Lo = vec3(0.0); + let light_count = min(lights_uniform.count, 16u); + for (var i = 0u; i < light_count; i++) { + var contribution = compute_light_contribution( + lights_uniform.lights[i], N, V, world_pos, F0, albedo, metallic, roughness, + ); + if lights_uniform.lights[i].light_type == 0u { + contribution = contribution * shadow_factor; + } + Lo += contribution; + } + + // IBL + let NdotV_ibl = max(dot(N, V), 0.0); + let R = reflect(-V, N); + let irradiance = sample_environment(N, 1.0); + let F_env = fresnel_schlick(NdotV_ibl, F0); + let kd_ibl = (vec3(1.0) - F_env) * (1.0 - metallic); + let diffuse_ibl = kd_ibl * albedo * irradiance; + let prefiltered = sample_environment(R, roughness); + let brdf_val = textureSample(t_brdf_lut, s_brdf_lut, vec2(NdotV_ibl, roughness)); + let specular_ibl = prefiltered * (F0 * brdf_val.r + vec3(brdf_val.g)); + let ambient = (diffuse_ibl + specular_ibl) * ao; + + var color = ambient + Lo; + color = color / (color + vec3(1.0)); // Reinhard + color = pow(color, vec3(1.0 / 2.2)); // Gamma + + return vec4(color, 1.0); +} +``` + +- [ ] **Step 2: 커밋** + +```bash +git add crates/voltex_renderer/src/deferred_lighting.wgsl +git commit -m "feat(renderer): add deferred lighting pass shader with Cook-Torrance BRDF" +``` + +--- + +## Task 4: Deferred Pipeline (Rust) + +**Files:** +- Create: `crates/voltex_renderer/src/deferred_pipeline.rs` +- Modify: `crates/voltex_renderer/src/lib.rs` + +- [ ] **Step 1: deferred_pipeline.rs 작성** + +This file creates both G-Buffer pass and Lighting pass pipelines, plus their bind group layouts. + +```rust +// crates/voltex_renderer/src/deferred_pipeline.rs +use crate::vertex::MeshVertex; +use crate::fullscreen_quad::FullscreenVertex; +use crate::gbuffer::*; +use crate::gpu::DEPTH_FORMAT; + +// === G-Buffer Pass === + +pub fn gbuffer_camera_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("GBuffer Camera BGL"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }, + ], + }) +} + +pub fn create_gbuffer_pipeline( + device: &wgpu::Device, + camera_layout: &wgpu::BindGroupLayout, + texture_layout: &wgpu::BindGroupLayout, + material_layout: &wgpu::BindGroupLayout, +) -> wgpu::RenderPipeline { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("Deferred GBuffer Shader"), + source: wgpu::ShaderSource::Wgsl(include_str!("deferred_gbuffer.wgsl").into()), + }); + + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("GBuffer Pipeline Layout"), + bind_group_layouts: &[camera_layout, texture_layout, material_layout], + immediate_size: 0, + }); + + device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("GBuffer Pipeline"), + layout: Some(&layout), + vertex: wgpu::VertexState { + module: &shader, + entry_point: Some("vs_main"), + buffers: &[MeshVertex::LAYOUT], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: Some("fs_main"), + targets: &[ + Some(wgpu::ColorTargetState { + format: GBUFFER_POSITION_FORMAT, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }), + Some(wgpu::ColorTargetState { + format: GBUFFER_NORMAL_FORMAT, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }), + Some(wgpu::ColorTargetState { + format: GBUFFER_ALBEDO_FORMAT, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }), + Some(wgpu::ColorTargetState { + format: GBUFFER_MATERIAL_FORMAT, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }), + ], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + front_face: wgpu::FrontFace::Ccw, + cull_mode: Some(wgpu::Face::Back), + ..Default::default() + }, + depth_stencil: Some(wgpu::DepthStencilState { + format: DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: wgpu::CompareFunction::Less, + stencil: wgpu::StencilState::default(), + bias: wgpu::DepthBiasState::default(), + }), + multisample: wgpu::MultisampleState::default(), + multiview_mask: None, + cache: None, + }) +} + +// === Lighting Pass === + +pub fn lighting_gbuffer_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("Lighting GBuffer BGL"), + entries: &[ + // position texture + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // normal texture + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // albedo texture + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // material texture + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // sampler + wgpu::BindGroupLayoutEntry { + binding: 4, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering), + count: None, + }, + ], + }) +} + +pub fn lighting_lights_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("Lighting Lights BGL"), + entries: &[ + // LightsUniform + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + // CameraPositionUniform + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + ], + }) +} + +pub fn lighting_shadow_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("Lighting Shadow+IBL BGL"), + entries: &[ + // shadow depth texture + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Depth, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // shadow comparison sampler + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Comparison), + count: None, + }, + // ShadowUniform + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + // BRDF LUT texture + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // BRDF LUT sampler + wgpu::BindGroupLayoutEntry { + binding: 4, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + ], + }) +} + +pub fn create_lighting_pipeline( + device: &wgpu::Device, + surface_format: wgpu::TextureFormat, + gbuffer_layout: &wgpu::BindGroupLayout, + lights_layout: &wgpu::BindGroupLayout, + shadow_layout: &wgpu::BindGroupLayout, +) -> wgpu::RenderPipeline { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("Deferred Lighting Shader"), + source: wgpu::ShaderSource::Wgsl(include_str!("deferred_lighting.wgsl").into()), + }); + + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("Lighting Pipeline Layout"), + bind_group_layouts: &[gbuffer_layout, lights_layout, shadow_layout], + immediate_size: 0, + }); + + device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("Lighting Pipeline"), + layout: Some(&layout), + vertex: wgpu::VertexState { + module: &shader, + entry_point: Some("vs_main"), + buffers: &[FullscreenVertex::LAYOUT], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: Some("fs_main"), + targets: &[Some(wgpu::ColorTargetState { + format: surface_format, + blend: Some(wgpu::BlendState::REPLACE), + write_mask: wgpu::ColorWrites::ALL, + })], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + ..Default::default() + }, + depth_stencil: None, // No depth for fullscreen pass + multisample: wgpu::MultisampleState::default(), + multiview_mask: None, + cache: None, + }) +} +``` + +- [ ] **Step 2: lib.rs에 deferred_pipeline 등록** + +```rust +pub mod deferred_pipeline; +``` + +And re-exports: +```rust +pub use deferred_pipeline::{ + create_gbuffer_pipeline, create_lighting_pipeline, + gbuffer_camera_bind_group_layout, + lighting_gbuffer_bind_group_layout, lighting_lights_bind_group_layout, lighting_shadow_bind_group_layout, +}; +``` + +- [ ] **Step 3: 빌드 확인** + +Run: `cargo build -p voltex_renderer` +Expected: 컴파일 성공 + +Run: `cargo test --workspace` +Expected: all pass (기존 200개) + +- [ ] **Step 4: 커밋** + +```bash +git add crates/voltex_renderer/src/deferred_pipeline.rs crates/voltex_renderer/src/lib.rs +git commit -m "feat(renderer): add deferred rendering pipeline (G-Buffer + Lighting pass)" +``` + +--- + +## Task 5: deferred_demo 예제 + +**Files:** +- Create: `examples/deferred_demo/Cargo.toml` +- Create: `examples/deferred_demo/src/main.rs` +- Modify: `Cargo.toml` (workspace members) + +NOTE: 이 예제는 복잡합니다 (GPU 리소스 설정, 바인드 그룹 생성, 2-pass 렌더). 기존 pbr_demo 패턴을 따르되 디퍼드로 변경. 구체 그리드 + 다수 포인트 라이트 씬. + +이 태스크는 가장 큰 구현이며, 더 capable한 모델로 실행해야 합니다. + +- [ ] **Step 1: Cargo.toml** + +```toml +[package] +name = "deferred_demo" +version = "0.1.0" +edition = "2021" + +[dependencies] +voltex_math.workspace = true +voltex_platform.workspace = true +voltex_renderer.workspace = true +bytemuck.workspace = true +pollster.workspace = true +wgpu.workspace = true +``` + +- [ ] **Step 2: main.rs 작성** + +The example should: +1. Create window + GpuContext +2. Create GBuffer +3. Create G-Buffer pipeline + Lighting pipeline with proper bind group layouts +4. Generate sphere meshes (5x5 grid of metallic/roughness variations) +5. Set up 8 point lights orbiting the scene (to show deferred advantage) +6. Create all uniform buffers, textures, bind groups +7. Main loop: + - Update camera (FPS controller) + - Update light positions (orbit animation) + - Pass 1: G-Buffer pass (render all objects to MRT) + - Pass 2: Lighting pass (fullscreen quad, reads G-Buffer) + - Present + +Key: must create CameraPositionUniform buffer (vec3 + padding = 16 bytes) for the lighting pass. + +- [ ] **Step 3: workspace에 추가** + +`Cargo.toml` members에 `"examples/deferred_demo"` 추가. + +- [ ] **Step 4: 빌드 + 실행 확인** + +Run: `cargo build --bin deferred_demo` +Run: `cargo run --bin deferred_demo` (수동 확인) + +- [ ] **Step 5: 커밋** + +```bash +git add examples/deferred_demo/ Cargo.toml +git commit -m "feat(renderer): add deferred_demo example with multi-light deferred rendering" +``` + +--- + +## Task 6: 문서 업데이트 + +**Files:** +- Modify: `docs/STATUS.md` +- Modify: `docs/DEFERRED.md` + +- [ ] **Step 1: STATUS.md에 Phase 7-1 추가** + +Phase 6-3 아래에: +```markdown +### Phase 7-1: Deferred Rendering +- voltex_renderer: GBuffer (4 MRT: Position/Normal/Albedo/Material + Depth) +- voltex_renderer: G-Buffer pass shader (MRT output, TBN normal mapping) +- voltex_renderer: Lighting pass shader (fullscreen quad, Cook-Torrance BRDF, multi-light, shadow, IBL) +- voltex_renderer: Deferred pipeline (gbuffer + lighting bind group layouts) +- examples/deferred_demo (5x5 sphere grid + 8 orbiting point lights) +``` + +예제 수 11로 업데이트. + +- [ ] **Step 2: DEFERRED.md에 Phase 7-1 미뤄진 항목 추가** + +```markdown +## Phase 7-1 + +- **투명 오브젝트** — 디퍼드에서 처리 불가. 별도 포워드 패스 필요. +- **G-Buffer 압축** — Position을 depth에서 복원, Normal을 octahedral 인코딩 등 미적용. +- **Light Volumes** — 풀스크린 라이팅만. 라이트별 sphere/cone 렌더 미구현. +- **Stencil 최적화** — 미구현. +``` + +- [ ] **Step 3: 커밋** + +```bash +git add docs/STATUS.md docs/DEFERRED.md +git commit -m "docs: add Phase 7-1 deferred rendering status and deferred items" +``` diff --git a/docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md b/docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md new file mode 100644 index 0000000..5156b5c --- /dev/null +++ b/docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md @@ -0,0 +1,661 @@ +# Phase 7-2: SSGI Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** SSAO + Color Bleeding 기반 SSGI로 간접광과 앰비언트 오클루전 추가 + +**Architecture:** `voltex_renderer`에 ssgi.rs(리소스+커널 생성) + ssgi_shader.wgsl(SSGI 풀스크린 패스) 추가. 기존 deferred_lighting.wgsl의 Shadow+IBL 바인드 그룹에 SSGI 출력 텍스처를 추가하여 ambient에 적용. + +**Tech Stack:** Rust, wgpu 28.0, WGSL + +**Spec:** `docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md` + +--- + +## File Structure + +- `crates/voltex_renderer/src/ssgi.rs` — SsgiResources, SsgiUniform, 커널/노이즈 생성 (Create) +- `crates/voltex_renderer/src/ssgi_shader.wgsl` — SSGI 풀스크린 셰이더 (Create) +- `crates/voltex_renderer/src/deferred_pipeline.rs` — SSGI 파이프라인 + 바인드 그룹 레이아웃 추가 (Modify) +- `crates/voltex_renderer/src/deferred_lighting.wgsl` — SSGI 텍스처 읽어서 ambient 적용 (Modify) +- `crates/voltex_renderer/src/lib.rs` — ssgi 모듈 등록 (Modify) +- `examples/deferred_demo/src/main.rs` — SSGI 패스 통합 (Modify) + +--- + +## Task 1: SsgiResources + 커널/노이즈 생성 + +**Files:** +- Create: `crates/voltex_renderer/src/ssgi.rs` +- Modify: `crates/voltex_renderer/src/lib.rs` + +- [ ] **Step 1: ssgi.rs 작성** + +```rust +// crates/voltex_renderer/src/ssgi.rs +use bytemuck::{Pod, Zeroable}; +use wgpu::util::DeviceExt; + +pub const SSGI_OUTPUT_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float; +pub const SSGI_KERNEL_SIZE: usize = 64; + +#[repr(C)] +#[derive(Copy, Clone, Debug, Pod, Zeroable)] +pub struct SsgiUniform { + pub projection: [f32; 16], + pub view: [f32; 16], + pub radius: f32, + pub bias: f32, + pub intensity: f32, + pub indirect_strength: f32, +} + +impl Default for SsgiUniform { + fn default() -> Self { + Self { + projection: [0.0; 16], + view: [0.0; 16], + radius: 0.5, + bias: 0.025, + intensity: 1.5, + indirect_strength: 0.5, + } + } +} + +pub struct SsgiResources { + pub output_view: wgpu::TextureView, + pub kernel_buffer: wgpu::Buffer, + pub noise_view: wgpu::TextureView, + pub noise_sampler: wgpu::Sampler, + pub uniform_buffer: wgpu::Buffer, + pub width: u32, + pub height: u32, +} + +impl SsgiResources { + pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self { + let output_view = create_ssgi_output(device, width, height); + let kernel_data = generate_kernel(SSGI_KERNEL_SIZE); + let kernel_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("SSGI Kernel"), + contents: bytemuck::cast_slice(&kernel_data), + usage: wgpu::BufferUsages::UNIFORM, + }); + let noise_view = create_noise_texture(device, queue); + let noise_sampler = device.create_sampler(&wgpu::SamplerDescriptor { + label: Some("SSGI Noise Sampler"), + address_mode_u: wgpu::AddressMode::Repeat, + address_mode_v: wgpu::AddressMode::Repeat, + mag_filter: wgpu::FilterMode::Nearest, + min_filter: wgpu::FilterMode::Nearest, + ..Default::default() + }); + let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("SSGI Uniform"), + contents: bytemuck::bytes_of(&SsgiUniform::default()), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + Self { output_view, kernel_buffer, noise_view, noise_sampler, uniform_buffer, width, height } + } + + pub fn resize(&mut self, device: &wgpu::Device, width: u32, height: u32) { + self.output_view = create_ssgi_output(device, width, height); + self.width = width; + self.height = height; + } +} + +fn create_ssgi_output(device: &wgpu::Device, w: u32, h: u32) -> wgpu::TextureView { + let tex = device.create_texture(&wgpu::TextureDescriptor { + label: Some("SSGI Output"), + size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: SSGI_OUTPUT_FORMAT, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + tex.create_view(&wgpu::TextureViewDescriptor::default()) +} + +/// Generate hemisphere sample kernel for SSAO/SSGI. +/// Samples are distributed in a hemisphere (z >= 0) with more samples near center. +pub fn generate_kernel(count: usize) -> Vec<[f32; 4]> { + let mut kernel = Vec::with_capacity(count); + for i in 0..count { + // Pseudo-random using simple hash + let fi = i as f32; + let x = pseudo_random(i * 2) * 2.0 - 1.0; + let y = pseudo_random(i * 2 + 1) * 2.0 - 1.0; + let z = pseudo_random(i * 3 + 7).max(0.05); // hemisphere, z > 0 + + let len = (x * x + y * y + z * z).sqrt(); + let (nx, ny, nz) = (x / len, y / len, z / len); + + // Scale: more samples near center + let mut scale = fi / count as f32; + scale = 0.1 + scale * scale * 0.9; // lerp(0.1, 1.0, scale^2) + + kernel.push([nx * scale, ny * scale, nz * scale, 0.0]); + } + kernel +} + +/// Generate 4x4 noise texture data (random tangent-space rotation vectors). +pub fn generate_noise_data() -> Vec<[f32; 4]> { + let mut noise = Vec::with_capacity(16); + for i in 0..16 { + let x = pseudo_random(i * 5 + 13) * 2.0 - 1.0; + let y = pseudo_random(i * 7 + 17) * 2.0 - 1.0; + let len = (x * x + y * y).sqrt().max(0.001); + noise.push([x / len, y / len, 0.0, 0.0]); + } + noise +} + +fn create_noise_texture(device: &wgpu::Device, queue: &wgpu::Queue) -> wgpu::TextureView { + let data = generate_noise_data(); + let bytes: Vec = data.iter().flat_map(|v| { + v.iter().flat_map(|f| f.to_le_bytes()) + }).collect(); + + let tex = device.create_texture(&wgpu::TextureDescriptor { + label: Some("SSGI Noise"), + size: wgpu::Extent3d { width: 4, height: 4, depth_or_array_layers: 1 }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba32Float, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }); + queue.write_texture( + wgpu::TexelCopyTextureInfo { texture: &tex, mip_level: 0, origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All }, + &bytes, + wgpu::TexelCopyBufferLayout { offset: 0, bytes_per_row: Some(4 * 16), rows_per_image: None }, + wgpu::Extent3d { width: 4, height: 4, depth_or_array_layers: 1 }, + ); + tex.create_view(&wgpu::TextureViewDescriptor::default()) +} + +/// Simple deterministic pseudo-random [0, 1) from integer seed. +fn pseudo_random(seed: usize) -> f32 { + let n = seed.wrapping_mul(0x5DEECE66D).wrapping_add(0xB) & 0xFFFFFF; + n as f32 / 0xFFFFFF as f32 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_kernel_hemisphere() { + let kernel = generate_kernel(64); + assert_eq!(kernel.len(), 64); + for k in &kernel { + assert!(k[2] >= 0.0, "kernel z must be >= 0 (hemisphere), got {}", k[2]); + let len = (k[0] * k[0] + k[1] * k[1] + k[2] * k[2]).sqrt(); + assert!(len <= 1.01, "kernel sample must be within unit hemisphere, len={}", len); + } + } + + #[test] + fn test_noise_data() { + let noise = generate_noise_data(); + assert_eq!(noise.len(), 16); + for n in &noise { + assert!((n[2]).abs() < 1e-5, "noise z should be 0"); + let len = (n[0] * n[0] + n[1] * n[1]).sqrt(); + assert!((len - 1.0).abs() < 0.1, "noise vector should be roughly unit length, got {}", len); + } + } + + #[test] + fn test_ssgi_uniform_default() { + let u = SsgiUniform::default(); + assert!((u.radius - 0.5).abs() < 1e-5); + assert!((u.bias - 0.025).abs() < 1e-5); + } +} +``` + +- [ ] **Step 2: lib.rs에 ssgi 모듈 등록** + +```rust +pub mod ssgi; +pub use ssgi::{SsgiResources, SsgiUniform, SSGI_OUTPUT_FORMAT}; +``` + +- [ ] **Step 3: 빌드 + 테스트** + +Run: `cargo test -p voltex_renderer` +Expected: 기존 20 + 3 = 23 PASS + +- [ ] **Step 4: 커밋** + +```bash +git add crates/voltex_renderer/src/ssgi.rs crates/voltex_renderer/src/lib.rs +git commit -m "feat(renderer): add SSGI resources with hemisphere kernel and noise texture" +``` + +--- + +## Task 2: SSGI 셰이더 + 파이프라인 + +**Files:** +- Create: `crates/voltex_renderer/src/ssgi_shader.wgsl` +- Modify: `crates/voltex_renderer/src/deferred_pipeline.rs` + +- [ ] **Step 1: ssgi_shader.wgsl 작성** + +```wgsl +// SSGI pass: screen-space ambient occlusion + color bleeding +// Reads G-Buffer position/normal/albedo, outputs AO + indirect color + +// Group 0: G-Buffer (same layout as lighting pass) +@group(0) @binding(0) var t_position: texture_2d; +@group(0) @binding(1) var t_normal: texture_2d; +@group(0) @binding(2) var t_albedo: texture_2d; +@group(0) @binding(3) var s_gbuffer: sampler; + +// Group 1: SSGI data +struct SsgiUniform { + projection: mat4x4, + view: mat4x4, + radius: f32, + bias: f32, + intensity: f32, + indirect_strength: f32, +}; + +struct SsgiKernel { + samples: array, 64>, +}; + +@group(1) @binding(0) var ssgi: SsgiUniform; +@group(1) @binding(1) var kernel: SsgiKernel; +@group(1) @binding(2) var t_noise: texture_2d; +@group(1) @binding(3) var s_noise: sampler; + +struct VertexOutput { + @builtin(position) clip_position: vec4, + @location(0) uv: vec2, +}; + +@vertex +fn vs_main(@location(0) position: vec2) -> VertexOutput { + var out: VertexOutput; + out.clip_position = vec4(position, 0.0, 1.0); + out.uv = vec2(position.x * 0.5 + 0.5, 1.0 - (position.y * 0.5 + 0.5)); + return out; +} + +@fragment +fn fs_main(in: VertexOutput) -> @location(0) vec4 { + let uv = in.uv; + + let world_pos = textureSample(t_position, s_gbuffer, uv).xyz; + + // Skip background + if dot(world_pos, world_pos) < 0.001 { + return vec4(1.0, 0.0, 0.0, 0.0); // AO=1 (no occlusion), indirect=0 + } + + let world_normal = normalize(textureSample(t_normal, s_gbuffer, uv).xyz * 2.0 - 1.0); + + // Transform to view space + let view_pos = (ssgi.view * vec4(world_pos, 1.0)).xyz; + let view_normal = normalize((ssgi.view * vec4(world_normal, 0.0)).xyz); + + // Random rotation from noise texture (4x4 tiling) + let tex_dims = textureDimensions(t_position); + let noise_scale = vec2(f32(tex_dims.x) / 4.0, f32(tex_dims.y) / 4.0); + let random_vec = textureSample(t_noise, s_noise, uv * noise_scale).xyz; + + // Construct TBN in view space using Gram-Schmidt + let tangent = normalize(random_vec - view_normal * dot(random_vec, view_normal)); + let bitangent = cross(view_normal, tangent); + let TBN = mat3x3(tangent, bitangent, view_normal); + + var occlusion = 0.0; + var indirect = vec3(0.0); + + for (var i = 0u; i < 64u; i++) { + // Sample position in view space + let sample_offset = TBN * kernel.samples[i].xyz; + let sample_view_pos = view_pos + sample_offset * ssgi.radius; + + // Project to screen UV + let clip = ssgi.projection * vec4(sample_view_pos, 1.0); + var screen_uv = clip.xy / clip.w * 0.5 + 0.5; + screen_uv.y = 1.0 - screen_uv.y; + + // Clamp to valid range + screen_uv = clamp(screen_uv, vec2(0.001), vec2(0.999)); + + // Read actual position at that screen location + let actual_world_pos = textureSample(t_position, s_gbuffer, screen_uv).xyz; + let actual_view_pos = (ssgi.view * vec4(actual_world_pos, 1.0)).xyz; + + // Occlusion: is the actual geometry closer to camera than our sample? + let depth_diff = sample_view_pos.z - actual_view_pos.z; + let range_check = smoothstep(0.0, 1.0, ssgi.radius / (abs(view_pos.z - actual_view_pos.z) + 0.001)); + + if depth_diff > ssgi.bias && depth_diff < ssgi.radius { + occlusion += range_check; + // Color bleeding: sample albedo at occluder position + let sample_albedo = textureSample(t_albedo, s_gbuffer, screen_uv).rgb; + indirect += sample_albedo * range_check; + } + } + + let ao = clamp(1.0 - (occlusion / 64.0) * ssgi.intensity, 0.0, 1.0); + indirect = indirect / 64.0 * ssgi.indirect_strength; + + return vec4(ao, indirect); +} +``` + +- [ ] **Step 2: deferred_pipeline.rs에 SSGI 파이프라인 함수 추가** + +Add to deferred_pipeline.rs: + +```rust +use crate::ssgi::SSGI_OUTPUT_FORMAT; + +/// SSGI pass: reads G-Buffer (group 0) + SSGI data (group 1) +pub fn ssgi_gbuffer_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("SSGI GBuffer BGL"), + entries: &[ + // position (non-filterable) + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // normal (filterable) + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // albedo (filterable) + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // sampler (non-filtering for position) + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering), + count: None, + }, + ], + }) +} + +pub fn ssgi_data_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("SSGI Data BGL"), + entries: &[ + // SsgiUniform + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + // kernel (uniform buffer, 64 * vec4 = 1024 bytes) + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + // noise texture (non-filterable, Rgba32Float) + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // noise sampler + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering), + count: None, + }, + ], + }) +} + +pub fn create_ssgi_pipeline( + device: &wgpu::Device, + gbuffer_layout: &wgpu::BindGroupLayout, + data_layout: &wgpu::BindGroupLayout, +) -> wgpu::RenderPipeline { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("SSGI Shader"), + source: wgpu::ShaderSource::Wgsl(include_str!("ssgi_shader.wgsl").into()), + }); + + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("SSGI Pipeline Layout"), + bind_group_layouts: &[gbuffer_layout, data_layout], + immediate_size: 0, + }); + + device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("SSGI Pipeline"), + layout: Some(&layout), + vertex: wgpu::VertexState { + module: &shader, + entry_point: Some("vs_main"), + buffers: &[FullscreenVertex::LAYOUT], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: Some("fs_main"), + targets: &[Some(wgpu::ColorTargetState { + format: SSGI_OUTPUT_FORMAT, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + })], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + ..Default::default() + }, + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview_mask: None, + cache: None, + }) +} +``` + +- [ ] **Step 3: 빌드 확인** + +Run: `cargo build -p voltex_renderer` +Expected: 컴파일 성공 + +- [ ] **Step 4: 커밋** + +```bash +git add crates/voltex_renderer/src/ssgi_shader.wgsl crates/voltex_renderer/src/deferred_pipeline.rs +git commit -m "feat(renderer): add SSGI shader and pipeline for screen-space GI" +``` + +--- + +## Task 3: Lighting Pass에 SSGI 통합 + +**Files:** +- Modify: `crates/voltex_renderer/src/deferred_lighting.wgsl` +- Modify: `crates/voltex_renderer/src/deferred_pipeline.rs` + +- [ ] **Step 1: lighting_shadow_bind_group_layout에 SSGI binding 추가** + +현재 `lighting_shadow_bind_group_layout`에 binding 0-4 (shadow+IBL). 여기에 추가: + +```rust +// binding 5: SSGI output texture +wgpu::BindGroupLayoutEntry { + binding: 5, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, +}, +// binding 6: SSGI sampler +wgpu::BindGroupLayoutEntry { + binding: 6, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, +}, +``` + +- [ ] **Step 2: deferred_lighting.wgsl에 SSGI 바인딩 + 적용 추가** + +Group 2에 추가: +```wgsl +@group(2) @binding(5) var t_ssgi: texture_2d; +@group(2) @binding(6) var s_ssgi: sampler; +``` + +Fragment shader에서 ambient 계산 부분 변경: +```wgsl +// 기존: let ambient = (diffuse_ibl + specular_ibl) * ao; +// 변경: +let ssgi_data = textureSample(t_ssgi, s_ssgi, uv); +let ssgi_ao = ssgi_data.r; +let ssgi_indirect = ssgi_data.gba; +let ambient = (diffuse_ibl + specular_ibl) * ao * ssgi_ao + ssgi_indirect; +``` + +- [ ] **Step 3: 빌드 확인** + +Run: `cargo build -p voltex_renderer` +Expected: 컴파일 성공 + +- [ ] **Step 4: 커밋** + +```bash +git add crates/voltex_renderer/src/deferred_lighting.wgsl crates/voltex_renderer/src/deferred_pipeline.rs +git commit -m "feat(renderer): integrate SSGI output into deferred lighting pass" +``` + +--- + +## Task 4: deferred_demo에 SSGI 패스 통합 + +**Files:** +- Modify: `examples/deferred_demo/src/main.rs` + +NOTE: 이 태스크는 기존 deferred_demo를 확장하여 3-pass 렌더링으로 변경합니다. + +변경사항: +1. `SsgiResources::new()` 호출하여 SSGI 리소스 생성 +2. SSGI 파이프라인 + 바인드 그룹 레이아웃 생성 +3. SSGI 바인드 그룹 2개 생성 (G-Buffer + SSGI data) +4. 기존 Shadow+IBL 바인드 그룹에 SSGI output texture + sampler 추가 (binding 5,6) +5. 렌더 루프에 SSGI 패스 삽입 (Pass 2: SSGI, 기존 Lighting은 Pass 3으로) +6. 매 프레임 SsgiUniform 업데이트 (view, projection 행렬) +7. 리사이즈 시 SSGI 리소스 + 바인드 그룹 재생성 + +이 태스크는 deferred_demo의 전체 구조를 이해해야 하므로 opus 모델로 실행. + +- [ ] **Step 1: deferred_demo 수정** + +Read the current `examples/deferred_demo/src/main.rs` first, then add SSGI integration. + +- [ ] **Step 2: 빌드 확인** + +Run: `cargo build --bin deferred_demo` +Expected: 컴파일 성공 + +- [ ] **Step 3: 커밋** + +```bash +git add examples/deferred_demo/src/main.rs +git commit -m "feat(renderer): add SSGI pass to deferred_demo (AO + color bleeding)" +``` + +--- + +## Task 5: 문서 업데이트 + +**Files:** +- Modify: `docs/STATUS.md` +- Modify: `docs/DEFERRED.md` + +- [ ] **Step 1: STATUS.md에 Phase 7-2 추가** + +Phase 7-1 아래에: +```markdown +### Phase 7-2: SSGI (Screen-Space Global Illumination) +- voltex_renderer: SsgiResources (hemisphere kernel, 4x4 noise, output texture) +- voltex_renderer: SSGI shader (SSAO + color bleeding in one pass) +- voltex_renderer: SSGI pipeline + bind group layouts +- voltex_renderer: Lighting pass SSGI integration (ambient * ssgi_ao + indirect) +- deferred_demo updated with 3-pass rendering (GBuffer → SSGI → Lighting) +``` + +테스트 수 업데이트 (voltex_renderer: 23). + +- [ ] **Step 2: DEFERRED.md에 Phase 7-2 미뤄진 항목** + +```markdown +## Phase 7-2 + +- **Bilateral Blur** — SSGI 노이즈 제거 블러 미구현. 4x4 노이즈 타일링만. +- **반해상도 렌더링** — 풀 해상도에서 SSGI 실행. 성능 최적화 미적용. +- **Temporal Accumulation** — 프레임 간 누적 미구현. 매 프레임 독립 계산. +- **Light Probes** — 베이크 기반 GI 미구현. +``` + +- [ ] **Step 3: 커밋** + +```bash +git add docs/STATUS.md docs/DEFERRED.md +git commit -m "docs: add Phase 7-2 SSGI status and deferred items" +``` diff --git a/docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md b/docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md new file mode 100644 index 0000000..c43a247 --- /dev/null +++ b/docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md @@ -0,0 +1,631 @@ +# Phase 7-3: RT Shadows Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** wgpu ray query로 하드웨어 레이트레이싱 기반 그림자 구현 — 정확한 픽셀-퍼펙트 그림자 + +**Architecture:** BLAS/TLAS acceleration structure를 구축하고, 컴퓨트 셰이더에서 G-Buffer position을 읽어 light 방향으로 ray query를 수행. 차폐 여부를 R8Unorm shadow 텍스처에 기록. Lighting Pass에서 이 텍스처를 읽어 기존 PCF shadow map 대체. + +**Tech Stack:** Rust, wgpu 28.0 (EXPERIMENTAL_RAY_QUERY), WGSL (ray_query) + +**Spec:** `docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md` + +--- + +## File Structure + +### 새 파일 +- `crates/voltex_renderer/src/rt_accel.rs` — BLAS/TLAS 생성 관리 (Create) +- `crates/voltex_renderer/src/rt_shadow.rs` — RT Shadow 리소스 + uniform (Create) +- `crates/voltex_renderer/src/rt_shadow_shader.wgsl` — RT shadow 컴퓨트 셰이더 (Create) + +### 수정 파일 +- `crates/voltex_renderer/src/deferred_pipeline.rs` — RT shadow 컴퓨트 파이프라인, lighting group에 RT shadow binding 추가 (Modify) +- `crates/voltex_renderer/src/deferred_lighting.wgsl` — RT shadow 텍스처 사용 (Modify) +- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록 (Modify) +- `examples/deferred_demo/src/main.rs` — RT shadow 통합 (Modify) + +--- + +## Task 1: rt_accel.rs — BLAS/TLAS 관리 + +**Files:** +- Create: `crates/voltex_renderer/src/rt_accel.rs` +- Modify: `crates/voltex_renderer/src/lib.rs` + +- [ ] **Step 1: rt_accel.rs 작성** + +This module wraps wgpu's acceleration structure API. + +```rust +// crates/voltex_renderer/src/rt_accel.rs +use crate::vertex::MeshVertex; + +/// Mesh data needed to build a BLAS. +pub struct BlasMeshData<'a> { + pub vertex_buffer: &'a wgpu::Buffer, + pub index_buffer: &'a wgpu::Buffer, + pub vertex_count: u32, + pub index_count: u32, +} + +/// Manages BLAS/TLAS for ray tracing. +pub struct RtAccel { + pub blas_list: Vec, + pub tlas: wgpu::Tlas, +} + +impl RtAccel { + /// Create acceleration structures. + /// `meshes` — one BLAS per unique mesh. + /// `instances` — (mesh_index, transform [3x4 row-major f32; 12]). + pub fn new( + device: &wgpu::Device, + encoder: &mut wgpu::CommandEncoder, + meshes: &[BlasMeshData], + instances: &[(usize, [f32; 12])], + ) -> Self { + // 1. Create BLAS for each mesh + let mut blas_list = Vec::new(); + let mut blas_sizes = Vec::new(); + + for mesh in meshes { + let size_desc = wgpu::BlasTriangleGeometrySizeDescriptor { + vertex_format: wgpu::VertexFormat::Float32x3, + vertex_count: mesh.vertex_count, + index_format: Some(wgpu::IndexFormat::Uint16), + index_count: Some(mesh.index_count), + flags: wgpu::AccelerationStructureGeometryFlags::OPAQUE, + }; + blas_sizes.push(size_desc); + } + + for (i, mesh) in meshes.iter().enumerate() { + let blas = device.create_blas( + &wgpu::CreateBlasDescriptor { + label: Some(&format!("BLAS {}", i)), + flags: wgpu::AccelerationStructureFlags::PREFER_FAST_TRACE, + update_mode: wgpu::AccelerationStructureUpdateMode::Build, + }, + wgpu::BlasGeometrySizeDescriptors::Triangles { + descriptors: vec![blas_sizes[i].clone()], + }, + ); + blas_list.push(blas); + } + + // Build all BLAS + let blas_entries: Vec = meshes.iter().enumerate().map(|(i, mesh)| { + wgpu::BlasBuildEntry { + blas: &blas_list[i], + geometry: wgpu::BlasGeometries::TriangleGeometries(vec![ + wgpu::BlasTriangleGeometry { + size: &blas_sizes[i], + vertex_buffer: mesh.vertex_buffer, + first_vertex: 0, + vertex_stride: std::mem::size_of::() as u64, + index_buffer: Some(mesh.index_buffer), + first_index: Some(0), + transform_buffer: None, + transform_buffer_offset: None, + }, + ]), + } + }).collect(); + + // 2. Create TLAS + let max_instances = instances.len().max(1) as u32; + let mut tlas = device.create_tlas(&wgpu::CreateTlasDescriptor { + label: Some("TLAS"), + max_instances, + flags: wgpu::AccelerationStructureFlags::PREFER_FAST_TRACE, + update_mode: wgpu::AccelerationStructureUpdateMode::Build, + }); + + // Fill TLAS instances + for (i, (mesh_idx, transform)) in instances.iter().enumerate() { + tlas[i] = Some(wgpu::TlasInstance::new( + &blas_list[*mesh_idx], + *transform, + 0, // custom_data + 0xFF, // mask + )); + } + + // 3. Build + encoder.build_acceleration_structures( + blas_entries.iter(), + [&tlas], + ); + + RtAccel { blas_list, tlas } + } + + /// Update TLAS instance transforms (BLAS stays the same). + pub fn update_instances( + &mut self, + encoder: &mut wgpu::CommandEncoder, + instances: &[(usize, [f32; 12])], + ) { + for (i, (mesh_idx, transform)) in instances.iter().enumerate() { + self.tlas[i] = Some(wgpu::TlasInstance::new( + &self.blas_list[*mesh_idx], + *transform, + 0, + 0xFF, + )); + } + + // Rebuild TLAS only (no BLAS rebuild) + encoder.build_acceleration_structures( + std::iter::empty(), + [&self.tlas], + ); + } +} + +/// Convert a 4x4 column-major matrix to 3x4 row-major transform for TLAS instance. +pub fn mat4_to_tlas_transform(m: &[f32; 16]) -> [f32; 12] { + // Column-major [c0r0, c0r1, c0r2, c0r3, c1r0, ...] to + // Row-major 3x4 [r0c0, r0c1, r0c2, r0c3, r1c0, ...] + [ + m[0], m[4], m[8], m[12], // row 0 + m[1], m[5], m[9], m[13], // row 1 + m[2], m[6], m[10], m[14], // row 2 + ] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mat4_to_tlas_transform_identity() { + let identity: [f32; 16] = [ + 1.0, 0.0, 0.0, 0.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ]; + let t = mat4_to_tlas_transform(&identity); + assert_eq!(t, [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]); + } + + #[test] + fn test_mat4_to_tlas_transform_translation() { + // Column-major translation (5, 10, 15) + let m: [f32; 16] = [ + 1.0, 0.0, 0.0, 0.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 5.0, 10.0, 15.0, 1.0, + ]; + let t = mat4_to_tlas_transform(&m); + // Row 0: [1, 0, 0, 5] + assert_eq!(t[3], 5.0); + assert_eq!(t[7], 10.0); + assert_eq!(t[11], 15.0); + } +} +``` + +- [ ] **Step 2: lib.rs에 모듈 등록** + +```rust +pub mod rt_accel; +pub use rt_accel::{RtAccel, BlasMeshData, mat4_to_tlas_transform}; +``` + +- [ ] **Step 3: 빌드 + 테스트** + +Run: `cargo test -p voltex_renderer` +Expected: 기존 23 + 2 = 25 PASS + +- [ ] **Step 4: 커밋** + +```bash +git add crates/voltex_renderer/src/rt_accel.rs crates/voltex_renderer/src/lib.rs +git commit -m "feat(renderer): add BLAS/TLAS acceleration structure management for RT" +``` + +--- + +## Task 2: RT Shadow 리소스 + 컴퓨트 셰이더 + +**Files:** +- Create: `crates/voltex_renderer/src/rt_shadow.rs` +- Create: `crates/voltex_renderer/src/rt_shadow_shader.wgsl` +- Modify: `crates/voltex_renderer/src/lib.rs` + +- [ ] **Step 1: rt_shadow.rs 작성** + +```rust +// crates/voltex_renderer/src/rt_shadow.rs +use bytemuck::{Pod, Zeroable}; +use wgpu::util::DeviceExt; + +pub const RT_SHADOW_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::R32Float; + +#[repr(C)] +#[derive(Copy, Clone, Debug, Pod, Zeroable)] +pub struct RtShadowUniform { + pub light_direction: [f32; 3], + pub _pad0: f32, + pub width: u32, + pub height: u32, + pub _pad1: [u32; 2], +} + +pub struct RtShadowResources { + pub shadow_texture: wgpu::Texture, + pub shadow_view: wgpu::TextureView, + pub uniform_buffer: wgpu::Buffer, + pub width: u32, + pub height: u32, +} + +impl RtShadowResources { + pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self { + let (shadow_texture, shadow_view) = create_shadow_texture(device, width, height); + let uniform = RtShadowUniform { + light_direction: [0.0, -1.0, 0.0], + _pad0: 0.0, + width, + height, + _pad1: [0; 2], + }; + let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("RT Shadow Uniform"), + contents: bytemuck::bytes_of(&uniform), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + Self { shadow_texture, shadow_view, uniform_buffer, width, height } + } + + pub fn resize(&mut self, device: &wgpu::Device, width: u32, height: u32) { + let (tex, view) = create_shadow_texture(device, width, height); + self.shadow_texture = tex; + self.shadow_view = view; + self.width = width; + self.height = height; + } +} + +fn create_shadow_texture(device: &wgpu::Device, w: u32, h: u32) -> (wgpu::Texture, wgpu::TextureView) { + let tex = device.create_texture(&wgpu::TextureDescriptor { + label: Some("RT Shadow Texture"), + size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: RT_SHADOW_FORMAT, + usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + let view = tex.create_view(&wgpu::TextureViewDescriptor::default()); + (tex, view) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rt_shadow_uniform_size() { + assert_eq!(std::mem::size_of::(), 32); + } +} +``` + +- [ ] **Step 2: rt_shadow_shader.wgsl 작성** + +```wgsl +// RT Shadow compute shader +// Traces shadow rays from G-Buffer world positions toward the light + +@group(0) @binding(0) var t_position: texture_2d; +@group(0) @binding(1) var t_normal: texture_2d; + +struct RtShadowUniform { + light_direction: vec3, + _pad0: f32, + width: u32, + height: u32, + _pad1: vec2, +}; + +@group(1) @binding(0) var tlas: acceleration_structure; +@group(1) @binding(1) var t_shadow_out: texture_storage_2d; +@group(1) @binding(2) var uniforms: RtShadowUniform; + +@compute @workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) id: vec3) { + if id.x >= uniforms.width || id.y >= uniforms.height { + return; + } + + let world_pos = textureLoad(t_position, vec2(id.xy), 0).xyz; + + // Skip background pixels + if dot(world_pos, world_pos) < 0.001 { + textureStore(t_shadow_out, vec2(id.xy), vec4(1.0, 0.0, 0.0, 0.0)); + return; + } + + let normal = normalize(textureLoad(t_normal, vec2(id.xy), 0).xyz * 2.0 - 1.0); + + // Ray from surface toward light (opposite of light direction) + let ray_origin = world_pos + normal * 0.01; // bias off surface + let ray_dir = normalize(-uniforms.light_direction); + + // Trace shadow ray + var rq: ray_query; + rayQueryInitialize(&rq, tlas, + RAY_FLAG_TERMINATE_ON_FIRST_HIT | RAY_FLAG_SKIP_CLOSEST_HIT_SHADER, + 0xFFu, ray_origin, 0.001, ray_dir, 1000.0); + rayQueryProceed(&rq); + + var shadow_val = 1.0; // lit by default + if rayQueryGetCommittedIntersectionType(&rq) != RAY_QUERY_COMMITTED_INTERSECTION_NONE { + shadow_val = 0.0; // in shadow + } + + textureStore(t_shadow_out, vec2(id.xy), vec4(shadow_val, 0.0, 0.0, 0.0)); +} +``` + +- [ ] **Step 3: lib.rs에 모듈 등록** + +```rust +pub mod rt_shadow; +pub use rt_shadow::{RtShadowResources, RtShadowUniform, RT_SHADOW_FORMAT}; +``` + +- [ ] **Step 4: 빌드 + 테스트** + +Run: `cargo test -p voltex_renderer` +Expected: 26 PASS (25 + 1) + +- [ ] **Step 5: 커밋** + +```bash +git add crates/voltex_renderer/src/rt_shadow.rs crates/voltex_renderer/src/rt_shadow_shader.wgsl crates/voltex_renderer/src/lib.rs +git commit -m "feat(renderer): add RT shadow resources and compute shader" +``` + +--- + +## Task 3: RT Shadow 파이프라인 + Lighting 통합 + +**Files:** +- Modify: `crates/voltex_renderer/src/deferred_pipeline.rs` +- Modify: `crates/voltex_renderer/src/deferred_lighting.wgsl` + +- [ ] **Step 1: deferred_pipeline.rs에 RT shadow 파이프라인 함수 추가** + +Add import: `use crate::rt_shadow::RT_SHADOW_FORMAT;` + +Add these functions: + +```rust +/// Compute pipeline bind group layout for RT shadow G-Buffer input (group 0). +pub fn rt_shadow_gbuffer_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("RT Shadow GBuffer BGL"), + entries: &[ + // position texture + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // normal texture + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + ], + }) +} + +/// Compute pipeline bind group layout for RT shadow data (group 1). +pub fn rt_shadow_data_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("RT Shadow Data BGL"), + entries: &[ + // TLAS + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::AccelerationStructure, + count: None, + }, + // shadow output (storage texture, write) + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: RT_SHADOW_FORMAT, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: None, + }, + // uniform + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + ], + }) +} + +/// Create the RT shadow compute pipeline. +pub fn create_rt_shadow_pipeline( + device: &wgpu::Device, + gbuffer_layout: &wgpu::BindGroupLayout, + data_layout: &wgpu::BindGroupLayout, +) -> wgpu::ComputePipeline { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("RT Shadow Shader"), + source: wgpu::ShaderSource::Wgsl(include_str!("rt_shadow_shader.wgsl").into()), + }); + + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("RT Shadow Pipeline Layout"), + bind_group_layouts: &[gbuffer_layout, data_layout], + immediate_size: 0, + }); + + device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("RT Shadow Compute Pipeline"), + layout: Some(&layout), + module: &shader, + entry_point: Some("main"), + compilation_options: wgpu::PipelineCompilationOptions::default(), + cache: None, + }) +} +``` + +- [ ] **Step 2: lighting_shadow_bind_group_layout에 RT shadow binding 추가** + +기존 8 bindings (0-6 shadow+IBL+SSGI) + 추가: +```rust +// binding 7: RT shadow texture (Float, filterable) +wgpu::BindGroupLayoutEntry { + binding: 7, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, +}, +// binding 8: RT shadow sampler +wgpu::BindGroupLayoutEntry { + binding: 8, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, +}, +``` + +- [ ] **Step 3: deferred_lighting.wgsl 수정** + +Add bindings: +```wgsl +@group(2) @binding(7) var t_rt_shadow: texture_2d; +@group(2) @binding(8) var s_rt_shadow: sampler; +``` + +Replace shadow usage in fs_main: +```wgsl +// OLD: let shadow_factor = calculate_shadow(world_pos); +// NEW: Use RT shadow +let rt_shadow_val = textureSample(t_rt_shadow, s_rt_shadow, uv).r; +let shadow_factor = rt_shadow_val; +``` + +- [ ] **Step 4: 빌드 확인** + +Run: `cargo build -p voltex_renderer` +Expected: 컴파일 성공 + +- [ ] **Step 5: 커밋** + +```bash +git add crates/voltex_renderer/src/deferred_pipeline.rs crates/voltex_renderer/src/deferred_lighting.wgsl +git commit -m "feat(renderer): add RT shadow compute pipeline and integrate into lighting pass" +``` + +--- + +## Task 4: deferred_demo에 RT Shadow 통합 + +**Files:** +- Modify: `examples/deferred_demo/src/main.rs` + +NOTE: 이 태스크가 가장 복잡합니다. GpuContext 대신 직접 device를 생성하여 EXPERIMENTAL_RAY_QUERY feature를 요청해야 합니다. + +변경사항: +1. Device 생성 시 `Features::EXPERIMENTAL_RAY_QUERY` 요청 +2. `RtAccel::new()` — 구체 메시의 BLAS 빌드, 25개 인스턴스의 TLAS 빌드 +3. `RtShadowResources::new()` — RT shadow 텍스처 + uniform +4. RT shadow 컴퓨트 파이프라인 + 바인드 그룹 생성 +5. 렌더 루프에 RT shadow 컴퓨트 디스패치 추가 (Pass 3) +6. Lighting shadow 바인드 그룹에 RT shadow 텍스처 추가 (binding 7, 8) +7. 매 프레임 RtShadowUniform 업데이트 (light direction) +8. 리사이즈 시 RT shadow 리소스 재생성 + +이 태스크는 opus 모델로 실행. + +- [ ] **Step 1: deferred_demo 수정** + +- [ ] **Step 2: 빌드 확인** + +Run: `cargo build --bin deferred_demo` + +- [ ] **Step 3: 커밋** + +```bash +git add examples/deferred_demo/src/main.rs +git commit -m "feat(renderer): add hardware RT shadows to deferred_demo" +``` + +--- + +## Task 5: 문서 업데이트 + +**Files:** +- Modify: `docs/STATUS.md` +- Modify: `docs/DEFERRED.md` + +- [ ] **Step 1: STATUS.md에 Phase 7-3 추가** + +```markdown +### Phase 7-3: RT Shadows (Hardware Ray Tracing) +- voltex_renderer: RtAccel (BLAS/TLAS acceleration structure management) +- voltex_renderer: RT Shadow compute shader (ray query, directional light) +- voltex_renderer: RT shadow pipeline + bind group layouts +- voltex_renderer: Lighting pass RT shadow integration +- deferred_demo updated with hardware RT shadows (requires RTX/RDNA2+) +``` + +- [ ] **Step 2: DEFERRED.md에 Phase 7-3 미뤄진 항목** + +```markdown +## Phase 7-3 + +- **RT Reflections** — 미구현. BLAS/TLAS 인프라 재사용 가능. +- **RT AO** — 미구현. +- **Point/Spot Light RT shadows** — Directional만 구현. +- **Soft RT shadows** — 단일 ray만. Multi-ray soft shadow 미구현. +- **BLAS 업데이트** — 정적 지오메트리만. 동적 메시 변경 시 BLAS 재빌드 필요. +- **Fallback** — RT 미지원 GPU에서 자동 PCF 폴백 미구현. +``` + +- [ ] **Step 3: 커밋** + +```bash +git add docs/STATUS.md docs/DEFERRED.md +git commit -m "docs: add Phase 7-3 RT shadows status and deferred items" +``` diff --git a/docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md b/docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md new file mode 100644 index 0000000..a5e658f --- /dev/null +++ b/docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md @@ -0,0 +1,199 @@ +# Phase 7-1: Deferred Rendering — Design Spec + +## Overview + +`voltex_renderer`에 디퍼드 렌더링 파이프라인을 추가한다. 기존 포워드 PBR은 유지하고, G-Buffer + Lighting Pass 구조의 디퍼드 파이프라인을 새 모듈로 구현한다. + +## Scope + +- G-Buffer (4 MRT: Position, Normal, Albedo, Material + Depth) +- G-Buffer Pass 셰이더 (기하 데이터 기록) +- Lighting Pass 셰이더 (풀스크린 쿼드, Cook-Torrance BRDF, 멀티 라이트, 섀도우, IBL) +- 풀스크린 삼각형 +- deferred_demo 예제 + +## Out of Scope + +- 포워드 파이프라인 제거/변경 +- 투명 오브젝트 (디퍼드에서 처리 어려움, 별도 포워드 패스 필요) +- G-Buffer 압축/최적화 (octahedral normal, depth-position 복원 등) +- Light volumes (sphere/cone 렌더링으로 라이트 컬링) +- Stencil 기반 최적화 + +## Render Pass Architecture + +### Pass 1: G-Buffer Pass + +MRT(Multiple Render Targets)로 기하 데이터 기록. + +| RT | Format | Content | +|----|--------|---------| +| RT0 | Rgba32Float | World Position (xyz) | +| RT1 | Rgba16Float | World Normal (xyz, normalized) | +| RT2 | Rgba8UnormSrgb | Albedo (rgb) | +| RT3 | Rgba8Unorm | R=metallic, G=roughness, B=ao | +| Depth | Depth32Float | Depth (기존 공유) | + +**Bind Groups:** +- Group 0 (dynamic): CameraUniform (view_proj, model) +- Group 1: PBR Textures (albedo + normal map) +- Group 2 (dynamic): MaterialUniform + +**Shader:** 버텍스 → 월드 변환, 프래그먼트 → G-Buffer 기록. TBN 노멀맵 적용. + +### Pass 2: Lighting Pass + +풀스크린 삼각형 렌더, G-Buffer를 텍스처로 읽어 라이팅 계산. + +**Bind Groups:** +- Group 0: G-Buffer textures (4개) + sampler +- Group 1: LightsUniform + CameraPosition +- Group 2: Shadow map + shadow sampler + ShadowUniform + BRDF LUT + BRDF sampler + +**Shader:** 기존 pbr_shader.wgsl의 Cook-Torrance BRDF 로직을 재사용. +- G-Buffer에서 position, normal, albedo, metallic/roughness/ao 읽기 +- 멀티 라이트 루프 (directional, point, spot) +- PCF 섀도우 +- IBL ambient (procedural sky + BRDF LUT) +- Reinhard 톤매핑 + 감마 보정 + +## Module Structure + +### 새 파일 +- `crates/voltex_renderer/src/gbuffer.rs` — GBuffer 타입 (텍스처 생성/리사이즈) +- `crates/voltex_renderer/src/fullscreen_quad.rs` — 풀스크린 삼각형 정점 +- `crates/voltex_renderer/src/deferred_pipeline.rs` — 파이프라인 생성 (gbuffer pass + lighting pass) +- `crates/voltex_renderer/src/deferred_gbuffer.wgsl` — G-Buffer pass 셰이더 +- `crates/voltex_renderer/src/deferred_lighting.wgsl` — Lighting pass 셰이더 + +### 수정 파일 +- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록 + +## Types + +### GBuffer + +```rust +pub struct GBuffer { + pub position_view: TextureView, // Rgba32Float + pub normal_view: TextureView, // Rgba16Float + pub albedo_view: TextureView, // Rgba8UnormSrgb + pub material_view: TextureView, // Rgba8Unorm + pub depth_view: TextureView, // Depth32Float + pub width: u32, + pub height: u32, +} +``` + +- `new(device, width, height) -> Self` +- `resize(device, width, height)` — 윈도우 리사이즈 시 재생성 + +### DeferredPipeline + +```rust +pub struct DeferredPipeline { + pub gbuffer_pipeline: RenderPipeline, + pub lighting_pipeline: RenderPipeline, + pub gbuffer_bind_group_layouts: [BindGroupLayout; 3], // camera, texture, material + pub lighting_bind_group_layouts: [BindGroupLayout; 3], // gbuffer, lights, shadow+ibl +} +``` + +- `new(device, surface_format) -> Self` + +### Fullscreen Triangle + +```rust +pub struct FullscreenTriangle { + pub vertex_buffer: Buffer, +} +``` + +3 정점: (-1,-1), (3,-1), (-1,3) — 클리핑으로 화면 커버. UV는 셰이더에서 position으로 계산. + +## Bind Group Details + +### G-Buffer Pass + +**Group 0 — Camera (dynamic offset):** +- binding 0: CameraUniform (view_proj, model, camera_pos) + +**Group 1 — Textures:** +- binding 0: albedo texture +- binding 1: albedo sampler +- binding 2: normal map texture +- binding 3: normal map sampler + +**Group 2 — Material (dynamic offset):** +- binding 0: MaterialUniform (base_color, metallic, roughness, ao) + +### Lighting Pass + +**Group 0 — G-Buffer:** +- binding 0: position texture +- binding 1: normal texture +- binding 2: albedo texture +- binding 3: material texture +- binding 4: sampler (shared, nearest) + +**Group 1 — Lights:** +- binding 0: LightsUniform +- binding 1: CameraPositionUniform (vec3 + padding) + +**Group 2 — Shadow + IBL:** +- binding 0: shadow depth texture +- binding 1: shadow comparison sampler +- binding 2: ShadowUniform +- binding 3: BRDF LUT texture +- binding 4: BRDF LUT sampler + +## Shader Summary + +### deferred_gbuffer.wgsl + +Vertex: position → world (model * pos), normal → world (model * normal), TBN 계산, UV 전달. + +Fragment outputs (4 targets): +```wgsl +struct GBufferOutput { + @location(0) position: vec4, + @location(1) normal: vec4, + @location(2) albedo: vec4, + @location(3) material: vec4, +} +``` +- position.xyz = world position +- normal.xyz = TBN-mapped world normal +- albedo.rgb = texture sample * base_color +- material = vec4(metallic, roughness, ao, 1.0) + +### deferred_lighting.wgsl + +Vertex: 풀스크린 삼각형, UV 계산. + +Fragment: +1. G-Buffer 샘플링 +2. Cook-Torrance BRDF (기존 pbr_shader.wgsl 로직) +3. 멀티 라이트 루프 +4. PCF 섀도우 +5. IBL ambient +6. Reinhard 톤매핑 + 감마 + +## Test Plan + +### gbuffer.rs +- GBuffer 생성: 텍스처 크기 확인 +- 리사이즈: 새 크기로 재생성 + +### fullscreen_quad.rs +- 정점 데이터: 3개 정점, 올바른 좌표 + +### 통합 (수동) +- deferred_demo 예제: 다수 포인트 라이트 + 디퍼드 렌더링 +- G-Buffer 시각화 (디버그용: position/normal/albedo 각각 출력) + +## Constraints + +- max_bind_groups=4: G-Buffer pass 3개, Lighting pass 3개 사용 → 제약 내 +- MRT: wgpu는 최대 8개 color attachment 지원. 4개 사용. +- Rgba32Float: Position에 32-bit float 사용 (정밀도 우선, 최적화는 추후) diff --git a/docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md b/docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md new file mode 100644 index 0000000..b7086cc --- /dev/null +++ b/docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md @@ -0,0 +1,202 @@ +# Phase 7-2: SSGI (Screen-Space Global Illumination) — Design Spec + +## Overview + +디퍼드 파이프라인에 SSGI 포스트 프로세싱 패스를 추가한다. SSAO 확장형으로, 반구 샘플링을 통해 Ambient Occlusion과 Color Bleeding(간접광)을 동시에 계산한다. + +## Scope + +- SSGI 리소스 (반구 커널, 4x4 노이즈 텍스처, 출력 텍스처) +- SSGI 풀스크린 셰이더 (AO + indirect color 계산) +- SSGI 파이프라인 + 바인드 그룹 레이아웃 +- Lighting Pass 수정 (SSGI 결과를 ambient에 적용) +- deferred_demo에 SSGI 통합 + +## Out of Scope + +- 블러 패스 (노이즈 제거용 bilateral blur — 추후 추가) +- 반해상도 렌더링 (성능 최적화) +- 시간적 누적 (temporal accumulation) +- Light Probes + +## Render Pass Flow (디퍼드 확장) + +``` +Pass 1: G-Buffer (기존, 변경 없음) +Pass 2: SSGI Pass (NEW) → Rgba16Float 출력 +Pass 3: Lighting Pass (수정) → SSGI 텍스처 읽어서 ambient에 적용 +``` + +## Module Structure + +### 새 파일 +- `crates/voltex_renderer/src/ssgi.rs` — SsgiResources, SsgiUniform, 커널/노이즈 생성 +- `crates/voltex_renderer/src/ssgi_shader.wgsl` — SSGI 풀스크린 셰이더 + +### 수정 파일 +- `crates/voltex_renderer/src/deferred_pipeline.rs` — SSGI 파이프라인 + 바인드 그룹 레이아웃 추가 +- `crates/voltex_renderer/src/deferred_lighting.wgsl` — SSGI 결과 적용 +- `crates/voltex_renderer/src/lib.rs` — ssgi 모듈 등록 +- `examples/deferred_demo/src/main.rs` — SSGI 패스 추가 + +## Types + +### SsgiUniform (128 bytes) + +```rust +#[repr(C)] +#[derive(Copy, Clone, Pod, Zeroable)] +pub struct SsgiUniform { + pub projection: [f32; 16], // view → clip + pub view: [f32; 16], // world → view + pub radius: f32, // 샘플링 반경 (기본 0.5) + pub bias: f32, // depth 바이어스 (기본 0.025) + pub intensity: f32, // AO 강도 (기본 1.0) + pub indirect_strength: f32, // color bleeding 강도 (기본 0.5) +} +``` + +### SsgiResources + +```rust +pub struct SsgiResources { + pub output_view: TextureView, // Rgba16Float — R=AO, G=indirect_r, B=indirect_g, A=indirect_b + pub kernel_buffer: Buffer, // 64 * vec4 = 1024 bytes (반구 샘플) + pub noise_view: TextureView, // 4x4 Rgba16Float (랜덤 회전 벡터) + pub uniform_buffer: Buffer, // SsgiUniform + pub width: u32, + pub height: u32, +} +``` + +- `new(device, width, height)` — 리소스 생성, 커널/노이즈 초기화 +- `resize(device, width, height)` — 출력 텍스처 재생성 + +### 반구 커널 생성 + +64개 샘플, 반구(+z 방향) 내 랜덤 분포. 중심 가까이에 더 많은 샘플 (코사인 가중): +```rust +fn generate_kernel(count: usize) -> Vec<[f32; 4]> { + // 의사 랜덤 (시드 고정) + // 각 샘플: normalize(random_in_hemisphere) * lerp(0.1, 1.0, scale^2) + // scale = i / count +} +``` + +### 4x4 노이즈 텍스처 + +16개 랜덤 회전 벡터 (xy 평면). TBN 구성 시 tangent 방향을 랜덤화하여 밴딩 방지. +```rust +fn generate_noise() -> Vec<[f32; 4]> { + // 16개 vec4(random_x, random_y, 0.0, 0.0) +} +``` + +## SSGI Shader (ssgi_shader.wgsl) + +### 바인드 그룹 + +**Group 0: G-Buffer** +- binding 0: position texture (Float, non-filterable) +- binding 1: normal texture (Float, filterable) +- binding 2: albedo texture (Float, filterable) +- binding 3: sampler (NonFiltering) + +**Group 1: SSGI Data** +- binding 0: SsgiUniform +- binding 1: kernel buffer (storage or uniform, 64 * vec4) +- binding 2: noise texture +- binding 3: noise sampler + +### 알고리즘 + +``` +@fragment +fn fs_main(uv): + world_pos = sample(t_position, uv) + if length(world_pos) < 0.001: discard (background) + + normal = sample(t_normal, uv) + + // View space conversion + view_pos = (ssgi.view * vec4(world_pos, 1.0)).xyz + view_normal = normalize((ssgi.view * vec4(normal, 0.0)).xyz) + + // Random rotation from noise (4x4 tiling) + noise_uv = uv * vec2(width/4.0, height/4.0) + random_vec = sample(t_noise, noise_uv).xyz + + // Construct TBN in view space + tangent = normalize(random_vec - view_normal * dot(random_vec, view_normal)) + bitangent = cross(view_normal, tangent) + TBN = mat3x3(tangent, bitangent, view_normal) + + occlusion = 0.0 + indirect = vec3(0.0) + + for i in 0..64: + // Sample position in view space + sample_offset = TBN * kernel[i].xyz * ssgi.radius + sample_pos = view_pos + sample_offset + + // Project to screen + clip = ssgi.projection * vec4(sample_pos, 1.0) + screen_uv = clip.xy / clip.w * 0.5 + 0.5 + screen_uv.y = 1.0 - screen_uv.y + + // Read actual depth at that screen position + sample_world_pos = sample(t_position, screen_uv).xyz + sample_view_pos = (ssgi.view * vec4(sample_world_pos, 1.0)).xyz + + // Occlusion check + range_check = smoothstep(0.0, 1.0, ssgi.radius / abs(view_pos.z - sample_view_pos.z)) + if sample_view_pos.z >= sample_pos.z + ssgi.bias: + occlusion += range_check + // Color bleeding: read albedo at occluder position + sample_albedo = sample(t_albedo, screen_uv).rgb + indirect += sample_albedo * range_check + + ao = 1.0 - (occlusion / 64.0) * ssgi.intensity + indirect = indirect / 64.0 * ssgi.indirect_strength + + return vec4(ao, indirect) +``` + +## Lighting Pass 수정 + +### 바인드 그룹 변경 + +기존 Group 2 (Shadow+IBL, 5 bindings)에 SSGI 출력 추가: +- binding 5: SSGI output texture (Float, filterable) +- binding 6: SSGI sampler + +### 셰이더 변경 + +```wgsl +// 기존 +let ambient = (diffuse_ibl + specular_ibl) * ao; + +// 변경 +let ssgi_data = textureSample(t_ssgi, s_ssgi, in.uv); +let ssgi_ao = ssgi_data.r; +let indirect_light = ssgi_data.gba; +let ambient = (diffuse_ibl + specular_ibl) * ao * ssgi_ao + indirect_light; +``` + +## Bind Group Constraint (max 4) + +**SSGI Pass:** 2 groups (0: G-Buffer, 1: SSGI data) — OK + +**Lighting Pass:** 기존 3 groups. Group 2에 SSGI binding 추가 (5,6) — 같은 그룹 내 binding 추가이므로 group 수 변화 없음. OK. + +## Test Plan + +### ssgi.rs +- generate_kernel: 64개 샘플, 모두 반구 내 (z >= 0), 정규화됨 +- generate_noise: 16개 벡터 +- SsgiResources 생성/리사이즈 + +### 통합 (수동) +- deferred_demo에서 SSGI ON/OFF 비교 +- 구석/틈에서 AO 어두워짐 확인 +- 밝은 물체 근처에서 color bleeding 확인 diff --git a/docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md b/docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md new file mode 100644 index 0000000..2356b83 --- /dev/null +++ b/docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md @@ -0,0 +1,197 @@ +# Phase 7-3: RT Shadows — Design Spec + +## Overview + +wgpu의 EXPERIMENTAL_RAY_QUERY를 활용하여 하드웨어 레이트레이싱 기반 그림자를 구현한다. 기존 PCF shadow map을 대체하는 정확한 그림자. + +## Hardware Requirements + +- GPU: RTX 20xx+ / RDNA2+ (ray query 지원) +- wgpu Features: EXPERIMENTAL_RAY_QUERY +- 검증 완료: RTX 4050 Laptop GPU, Vulkan backend + +## Scope + +- BLAS/TLAS acceleration structure 생성 관리 +- RT Shadow 컴퓨트 셰이더 (ray query로 directional light shadow) +- RT Shadow 출력 텍스처 (R8Unorm) +- Lighting Pass에 RT shadow 통합 +- deferred_demo에 RT shadow 적용 + +## Out of Scope + +- RT Reflections +- RT AO +- Point/Spot light RT shadows +- Soft RT shadows (multi-ray) +- BLAS 재빌드 (정적 지오메트리만) + +## Render Pass Flow (디퍼드 확장) + +``` +Pass 1: G-Buffer (변경 없음) +Pass 2: SSGI (변경 없음) +Pass 3: RT Shadow (NEW) — 컴퓨트 셰이더, ray query로 shadow 텍스처 출력 +Pass 4: Lighting (수정) — RT shadow 텍스처 사용 +``` + +## Module Structure + +### 새 파일 +- `crates/voltex_renderer/src/rt_accel.rs` — RtAccel (BLAS/TLAS 관리) +- `crates/voltex_renderer/src/rt_shadow.rs` — RtShadowResources + 컴퓨트 파이프라인 +- `crates/voltex_renderer/src/rt_shadow_shader.wgsl` — RT shadow 컴퓨트 셰이더 + +### 수정 파일 +- `crates/voltex_renderer/src/deferred_pipeline.rs` — lighting shadow bind group에 RT shadow 텍스처 추가 +- `crates/voltex_renderer/src/deferred_lighting.wgsl` — RT shadow 사용 +- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록 +- `examples/deferred_demo/src/main.rs` — RT shadow 통합 + +## Types + +### RtAccel + +```rust +pub struct RtAccel { + pub blas_list: Vec, + pub tlas_package: wgpu::TlasPackage, +} +``` + +**Methods:** +- `new(device, meshes: &[(vertex_buffer, index_buffer, vertex_count, index_count)], transforms: &[[f32; 12]])` — BLAS 빌드, TLAS 구성 +- BLAS: 메시별 삼각형 지오메트리 (BlasTriangleGeometry) +- TLAS: 인스턴스 배열 (TlasInstance with transform, blas index) + +**BLAS 생성:** +1. BlasTriangleGeometrySizeDescriptor (vertex_count, index_count, vertex_format: Float32x3) +2. device.create_blas(size, flags: PREFER_FAST_TRACE) +3. encoder.build_acceleration_structures with BlasBuildEntry (vertex_buffer, index_buffer, geometry) + +**TLAS 생성:** +1. device.create_tlas(max_instances: transform_count) +2. TlasPackage에 TlasInstance 채움 (transform [3x4 row-major], blas_index, mask: 0xFF) +3. encoder.build_acceleration_structures with tlas_package + +### RtShadowResources + +```rust +pub struct RtShadowResources { + pub shadow_view: TextureView, // R8Unorm, STORAGE_BINDING + pub shadow_texture: Texture, + pub uniform_buffer: Buffer, // RtShadowUniform + pub width: u32, + pub height: u32, +} +``` + +### RtShadowUniform + +```rust +#[repr(C)] +pub struct RtShadowUniform { + pub light_direction: [f32; 3], + pub _pad0: f32, + pub width: u32, + pub height: u32, + pub _pad1: [u32; 2], +} +``` + +## RT Shadow Compute Shader + +### 바인드 그룹 + +**Group 0: G-Buffer** +- binding 0: position texture (Float, non-filterable) +- binding 1: normal texture (Float, filterable) + +**Group 1: RT Data** +- binding 0: TLAS (acceleration_structure) +- binding 1: RT shadow output (storage texture, r32float, write) +- binding 2: RtShadowUniform + +### 셰이더 로직 + +```wgsl +@compute @workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) id: vec3) { + if id.x >= uniforms.width || id.y >= uniforms.height { return; } + + let world_pos = textureLoad(t_position, id.xy, 0).xyz; + + // Skip background + if dot(world_pos, world_pos) < 0.001 { + textureStore(t_shadow_out, id.xy, vec4(1.0)); + return; + } + + let normal = normalize(textureLoad(t_normal, id.xy, 0).xyz * 2.0 - 1.0); + let ray_origin = world_pos + normal * 0.01; // bias off surface + let ray_dir = normalize(-uniforms.light_direction); + + var rq: ray_query; + rayQueryInitialize(&rq, tlas, RAY_FLAG_TERMINATE_ON_FIRST_HIT, + 0xFFu, ray_origin, 0.001, ray_dir, 1000.0); + rayQueryProceed(&rq); + + var shadow = 1.0; // lit by default + if rayQueryGetCommittedIntersectionType(&rq) != RAY_QUERY_COMMITTED_INTERSECTION_NONE { + shadow = 0.0; // occluded + } + + textureStore(t_shadow_out, id.xy, vec4(shadow, 0.0, 0.0, 0.0)); +} +``` + +## Lighting Pass 수정 + +RT shadow 텍스처를 기존 shadow_factor 대신 사용: + +```wgsl +// 기존: let shadow_factor = calculate_shadow(world_pos); +// 변경: RT shadow map에서 직접 읽기 +let rt_shadow = textureSample(t_rt_shadow, s_rt_shadow, uv).r; +let shadow_factor = rt_shadow; +``` + +기존 PCF shadow map 관련 바인딩은 유지하되 사용하지 않음 (호환성). +RT shadow 텍스처를 Group 2의 추가 바인딩(7, 8)으로 추가. + +## Device Creation 변경 + +RT feature를 요청해야 함: +```rust +let (device, queue) = adapter.request_device(&DeviceDescriptor { + required_features: Features::EXPERIMENTAL_RAY_QUERY, + .. +}).await; +``` + +기존 GpuContext::new()는 features를 요청하지 않으므로, deferred_demo에서 직접 device를 생성하거나 GpuContext에 optional features 파라미터를 추가. + +## Bind Group Details + +### RT Shadow Compute + +**Group 0:** +- binding 0: position texture (texture_2d) +- binding 1: normal texture (texture_2d) + +**Group 1:** +- binding 0: acceleration_structure (TLAS) +- binding 1: storage texture (r32float, write) +- binding 2: uniform buffer (RtShadowUniform) + +### Lighting Pass Group 2 (확장) + +기존 7 bindings (0-6: shadow+IBL+SSGI) + 추가: +- binding 7: RT shadow texture (Float, filterable) +- binding 8: RT shadow sampler (Filtering) + +## Test Plan + +- rt_accel.rs: 빌드 확인만 (GPU 의존) +- rt_shadow.rs: RtShadowUniform 크기, 리소스 생성 +- 통합: deferred_demo에서 RT shadow ON, 기존 PCF OFF → 날카로운 그림자 확인