From ba610f48dcdcaa0eb797c9d3fbe3cbd100c91418 Mon Sep 17 00:00:00 2001
From: tolelom <98kimsungmin@naver.com>
Date: Wed, 25 Mar 2026 13:25:11 +0900
Subject: [PATCH] docs: add Phase 7-1 through 7-3 specs and plans

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../2026-03-25-phase7-1-deferred-rendering.md | 961 ++++++++++++++++++
 .../plans/2026-03-25-phase7-2-ssgi.md         | 661 ++++++++++++
 .../plans/2026-03-25-phase7-3-rt-shadows.md   | 631 ++++++++++++
 .../2026-03-25-phase7-1-deferred-rendering.md | 199 ++++
 .../specs/2026-03-25-phase7-2-ssgi.md         | 202 ++++
 .../specs/2026-03-25-phase7-3-rt-shadows.md   | 197 ++++
 6 files changed, 2851 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md
 create mode 100644 docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md
 create mode 100644 docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md
 create mode 100644 docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md
 create mode 100644 docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md
 create mode 100644 docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md

diff --git a/docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md b/docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md
new file mode 100644
index 0000000..c03ac36
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-25-phase7-1-deferred-rendering.md
@@ -0,0 +1,961 @@
+# Phase 7-1: Deferred Rendering Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** G-Buffer + Lighting Pass 디퍼드 렌더링 파이프라인으로 다수의 라이트를 효율적으로 처리
+
+**Architecture:** voltex_renderer에 새 모듈 추가. G-Buffer pass(MRT 4개)가 기하 데이터를 기록하고, Lighting pass(풀스크린 삼각형)가 G-Buffer를 읽어 Cook-Torrance BRDF + 섀도우 + IBL 라이팅을 수행. 기존 포워드 PBR은 유지.
+
+**Tech Stack:** Rust, wgpu 28.0, WGSL
+
+**Spec:** `docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md`
+
+---
+
+## File Structure
+
+### voltex_renderer (추가)
+- `crates/voltex_renderer/src/gbuffer.rs` — GBuffer 텍스처 생성/리사이즈 (Create)
+- `crates/voltex_renderer/src/fullscreen_quad.rs` — 풀스크린 삼각형 (Create)
+- `crates/voltex_renderer/src/deferred_gbuffer.wgsl` — G-Buffer pass 셰이더 (Create)
+- `crates/voltex_renderer/src/deferred_lighting.wgsl` — Lighting pass 셰이더 (Create)
+- `crates/voltex_renderer/src/deferred_pipeline.rs` — 파이프라인 생성 함수들 (Create)
+- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록 (Modify)
+
+### Example (추가)
+- `examples/deferred_demo/Cargo.toml` (Create)
+- `examples/deferred_demo/src/main.rs` (Create)
+- `Cargo.toml` — workspace members (Modify)
+
+---
+
+## Task 1: GBuffer + Fullscreen Triangle
+
+**Files:**
+- Create: `crates/voltex_renderer/src/gbuffer.rs`
+- Create: `crates/voltex_renderer/src/fullscreen_quad.rs`
+- Modify: `crates/voltex_renderer/src/lib.rs`
+
+- [ ] **Step 1: gbuffer.rs 작성**
+
+```rust
+// crates/voltex_renderer/src/gbuffer.rs
+
+pub const GBUFFER_POSITION_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Float;
+pub const GBUFFER_NORMAL_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float;
+pub const GBUFFER_ALBEDO_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8UnormSrgb;
+pub const GBUFFER_MATERIAL_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm;
+
+pub struct GBuffer {
+    pub position_view: wgpu::TextureView,
+    pub normal_view: wgpu::TextureView,
+    pub albedo_view: wgpu::TextureView,
+    pub material_view: wgpu::TextureView,
+    pub depth_view: wgpu::TextureView,
+    pub width: u32,
+    pub height: u32,
+}
+
+impl GBuffer {
+    pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self {
+        let position_view = create_rt(device, width, height, GBUFFER_POSITION_FORMAT, "GBuffer Position");
+        let normal_view = create_rt(device, width, height, GBUFFER_NORMAL_FORMAT, "GBuffer Normal");
+        let albedo_view = create_rt(device, width, height, GBUFFER_ALBEDO_FORMAT, "GBuffer Albedo");
+        let material_view = create_rt(device, width, height, GBUFFER_MATERIAL_FORMAT, "GBuffer Material");
+        let depth_view = create_depth(device, width, height);
+        Self { position_view, normal_view, albedo_view, material_view, depth_view, width, height }
+    }
+
+    pub fn resize(&mut self, device: &wgpu::Device, width: u32, height: u32) {
+        *self = Self::new(device, width, height);
+    }
+}
+
+fn create_rt(device: &wgpu::Device, w: u32, h: u32, format: wgpu::TextureFormat, label: &str) -> wgpu::TextureView {
+    let tex = device.create_texture(&wgpu::TextureDescriptor {
+        label: Some(label),
+        size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format,
+        usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING,
+        view_formats: &[],
+    });
+    tex.create_view(&wgpu::TextureViewDescriptor::default())
+}
+
+fn create_depth(device: &wgpu::Device, w: u32, h: u32) -> wgpu::TextureView {
+    let tex = device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("GBuffer Depth"),
+        size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: crate::gpu::DEPTH_FORMAT,
+        usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING,
+        view_formats: &[],
+    });
+    tex.create_view(&wgpu::TextureViewDescriptor::default())
+}
+```
+
+- [ ] **Step 2: fullscreen_quad.rs 작성**
+
+```rust
+// crates/voltex_renderer/src/fullscreen_quad.rs
+use bytemuck::{Pod, Zeroable};
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug, Pod, Zeroable)]
+pub struct FullscreenVertex {
+    pub position: [f32; 2],
+}
+
+impl FullscreenVertex {
+    pub const LAYOUT: wgpu::VertexBufferLayout<'static> = wgpu::VertexBufferLayout {
+        array_stride: std::mem::size_of::<FullscreenVertex>() as wgpu::BufferAddress,
+        step_mode: wgpu::VertexStepMode::Vertex,
+        attributes: &[
+            wgpu::VertexAttribute {
+                offset: 0,
+                shader_location: 0,
+                format: wgpu::VertexFormat::Float32x2,
+            },
+        ],
+    };
+}
+
+/// Oversized triangle that covers the entire screen after clipping.
+pub const FULLSCREEN_VERTICES: [FullscreenVertex; 3] = [
+    FullscreenVertex { position: [-1.0, -1.0] },
+    FullscreenVertex { position: [ 3.0, -1.0] },
+    FullscreenVertex { position: [-1.0,  3.0] },
+];
+
+pub fn create_fullscreen_vertex_buffer(device: &wgpu::Device) -> wgpu::Buffer {
+    use wgpu::util::DeviceExt;
+    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+        label: Some("Fullscreen Vertex Buffer"),
+        contents: bytemuck::cast_slice(&FULLSCREEN_VERTICES),
+        usage: wgpu::BufferUsages::VERTEX,
+    })
+}
+```
+
+- [ ] **Step 3: lib.rs에 모듈 등록**
+
+```rust
+pub mod gbuffer;
+pub mod fullscreen_quad;
+```
+
+And add re-exports:
+```rust
+pub use gbuffer::GBuffer;
+pub use fullscreen_quad::{create_fullscreen_vertex_buffer, FullscreenVertex};
+```
+
+- [ ] **Step 4: 빌드 확인**
+
+Run: `cargo build -p voltex_renderer`
+Expected: 컴파일 성공
+
+- [ ] **Step 5: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/gbuffer.rs crates/voltex_renderer/src/fullscreen_quad.rs crates/voltex_renderer/src/lib.rs
+git commit -m "feat(renderer): add GBuffer and fullscreen triangle for deferred rendering"
+```
+
+---
+
+## Task 2: G-Buffer Pass 셰이더
+
+**Files:**
+- Create: `crates/voltex_renderer/src/deferred_gbuffer.wgsl`
+
+- [ ] **Step 1: deferred_gbuffer.wgsl 작성**
+
+```wgsl
+// G-Buffer pass: writes geometry data to multiple render targets
+
+struct CameraUniform {
+    view_proj: mat4x4<f32>,
+    model: mat4x4<f32>,
+    camera_pos: vec3<f32>,
+};
+
+struct MaterialUniform {
+    base_color: vec4<f32>,
+    metallic: f32,
+    roughness: f32,
+    ao: f32,
+};
+
+@group(0) @binding(0) var<uniform> camera: CameraUniform;
+
+@group(1) @binding(0) var t_diffuse: texture_2d<f32>;
+@group(1) @binding(1) var s_diffuse: sampler;
+@group(1) @binding(2) var t_normal: texture_2d<f32>;
+@group(1) @binding(3) var s_normal: sampler;
+
+@group(2) @binding(0) var<uniform> material: MaterialUniform;
+
+struct VertexInput {
+    @location(0) position: vec3<f32>,
+    @location(1) normal: vec3<f32>,
+    @location(2) uv: vec2<f32>,
+    @location(3) tangent: vec4<f32>,
+};
+
+struct VertexOutput {
+    @builtin(position) clip_position: vec4<f32>,
+    @location(0) world_pos: vec3<f32>,
+    @location(1) world_normal: vec3<f32>,
+    @location(2) uv: vec2<f32>,
+    @location(3) world_tangent: vec3<f32>,
+    @location(4) world_bitangent: vec3<f32>,
+};
+
+struct GBufferOutput {
+    @location(0) position: vec4<f32>,
+    @location(1) normal: vec4<f32>,
+    @location(2) albedo: vec4<f32>,
+    @location(3) material_out: vec4<f32>,
+};
+
+@vertex
+fn vs_main(in: VertexInput) -> VertexOutput {
+    var out: VertexOutput;
+    let world_pos = camera.model * vec4<f32>(in.position, 1.0);
+    out.world_pos = world_pos.xyz;
+    out.clip_position = camera.view_proj * world_pos;
+    out.world_normal = normalize((camera.model * vec4<f32>(in.normal, 0.0)).xyz);
+    out.uv = in.uv;
+
+    let T = normalize((camera.model * vec4<f32>(in.tangent.xyz, 0.0)).xyz);
+    let N = out.world_normal;
+    let B = cross(N, T) * in.tangent.w;
+    out.world_tangent = T;
+    out.world_bitangent = B;
+
+    return out;
+}
+
+@fragment
+fn fs_main(in: VertexOutput) -> GBufferOutput {
+    var out: GBufferOutput;
+
+    // World position
+    out.position = vec4<f32>(in.world_pos, 1.0);
+
+    // Normal mapping
+    let T = normalize(in.world_tangent);
+    let B = normalize(in.world_bitangent);
+    let N_geom = normalize(in.world_normal);
+    let normal_sample = textureSample(t_normal, s_normal, in.uv).rgb;
+    let tangent_normal = normal_sample * 2.0 - 1.0;
+    let TBN = mat3x3<f32>(T, B, N_geom);
+    let N = normalize(TBN * tangent_normal);
+    out.normal = vec4<f32>(N, 0.0);
+
+    // Albedo
+    let tex_color = textureSample(t_diffuse, s_diffuse, in.uv);
+    out.albedo = vec4<f32>(material.base_color.rgb * tex_color.rgb, 1.0);
+
+    // Material: R=metallic, G=roughness, B=ao
+    out.material_out = vec4<f32>(material.metallic, material.roughness, material.ao, 1.0);
+
+    return out;
+}
+```
+
+- [ ] **Step 2: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/deferred_gbuffer.wgsl
+git commit -m "feat(renderer): add G-Buffer pass shader for deferred rendering"
+```
+
+---
+
+## Task 3: Lighting Pass 셰이더
+
+**Files:**
+- Create: `crates/voltex_renderer/src/deferred_lighting.wgsl`
+
+- [ ] **Step 1: deferred_lighting.wgsl 작성**
+
+This shader reuses the Cook-Torrance BRDF functions from pbr_shader.wgsl but reads from G-Buffer instead of vertex attributes.
+
+```wgsl
+// Deferred Lighting Pass: reads G-Buffer, applies full PBR lighting
+
+// Group 0: G-Buffer textures
+@group(0) @binding(0) var t_position: texture_2d<f32>;
+@group(0) @binding(1) var t_normal: texture_2d<f32>;
+@group(0) @binding(2) var t_albedo: texture_2d<f32>;
+@group(0) @binding(3) var t_material: texture_2d<f32>;
+@group(0) @binding(4) var s_gbuffer: sampler;
+
+// Group 1: Lights + Camera
+struct LightData {
+    position: vec3<f32>,
+    light_type: u32,
+    direction: vec3<f32>,
+    range: f32,
+    color: vec3<f32>,
+    intensity: f32,
+    inner_cone: f32,
+    outer_cone: f32,
+    _padding: vec2<f32>,
+};
+
+struct LightsUniform {
+    lights: array<LightData, 16>,
+    count: u32,
+    ambient_color: vec3<f32>,
+};
+
+struct CameraPositionUniform {
+    camera_pos: vec3<f32>,
+};
+
+@group(1) @binding(0) var<uniform> lights_uniform: LightsUniform;
+@group(1) @binding(1) var<uniform> camera_data: CameraPositionUniform;
+
+// Group 2: Shadow + IBL
+struct ShadowUniform {
+    light_view_proj: mat4x4<f32>,
+    shadow_map_size: f32,
+    shadow_bias: f32,
+};
+
+@group(2) @binding(0) var t_shadow: texture_depth_2d;
+@group(2) @binding(1) var s_shadow: sampler_comparison;
+@group(2) @binding(2) var<uniform> shadow: ShadowUniform;
+@group(2) @binding(3) var t_brdf_lut: texture_2d<f32>;
+@group(2) @binding(4) var s_brdf_lut: sampler;
+
+// Fullscreen vertex
+struct VertexOutput {
+    @builtin(position) clip_position: vec4<f32>,
+    @location(0) uv: vec2<f32>,
+};
+
+@vertex
+fn vs_main(@location(0) position: vec2<f32>) -> VertexOutput {
+    var out: VertexOutput;
+    out.clip_position = vec4<f32>(position, 0.0, 1.0);
+    // Convert clip space [-1,1] to UV [0,1]
+    out.uv = vec2<f32>(position.x * 0.5 + 0.5, 1.0 - (position.y * 0.5 + 0.5));
+    return out;
+}
+
+// === BRDF functions (same as pbr_shader.wgsl) ===
+
+fn distribution_ggx(N: vec3<f32>, H: vec3<f32>, roughness: f32) -> f32 {
+    let a = roughness * roughness;
+    let a2 = a * a;
+    let NdotH = max(dot(N, H), 0.0);
+    let NdotH2 = NdotH * NdotH;
+    let denom_inner = NdotH2 * (a2 - 1.0) + 1.0;
+    let denom = 3.14159265358979 * denom_inner * denom_inner;
+    return a2 / denom;
+}
+
+fn geometry_schlick_ggx(NdotV: f32, roughness: f32) -> f32 {
+    let r = roughness + 1.0;
+    let k = (r * r) / 8.0;
+    return NdotV / (NdotV * (1.0 - k) + k);
+}
+
+fn geometry_smith(N: vec3<f32>, V: vec3<f32>, L: vec3<f32>, roughness: f32) -> f32 {
+    let NdotV = max(dot(N, V), 0.0);
+    let NdotL = max(dot(N, L), 0.0);
+    return geometry_schlick_ggx(NdotV, roughness) * geometry_schlick_ggx(NdotL, roughness);
+}
+
+fn fresnel_schlick(cosTheta: f32, F0: vec3<f32>) -> vec3<f32> {
+    return F0 + (1.0 - F0) * pow(clamp(1.0 - cosTheta, 0.0, 1.0), 5.0);
+}
+
+fn attenuation_point(distance: f32, range: f32) -> f32 {
+    let d_over_r = distance / range;
+    let d_over_r4 = d_over_r * d_over_r * d_over_r * d_over_r;
+    let falloff = clamp(1.0 - d_over_r4, 0.0, 1.0);
+    return (falloff * falloff) / (distance * distance + 0.0001);
+}
+
+fn attenuation_spot(light: LightData, L: vec3<f32>) -> f32 {
+    let spot_dir = normalize(light.direction);
+    let theta = dot(spot_dir, -L);
+    return clamp(
+        (theta - light.outer_cone) / (light.inner_cone - light.outer_cone + 0.0001),
+        0.0, 1.0,
+    );
+}
+
+fn compute_light_contribution(
+    light: LightData, N: vec3<f32>, V: vec3<f32>, world_pos: vec3<f32>,
+    F0: vec3<f32>, albedo: vec3<f32>, metallic: f32, roughness: f32,
+) -> vec3<f32> {
+    var L: vec3<f32>;
+    var radiance: vec3<f32>;
+
+    if light.light_type == 0u {
+        L = normalize(-light.direction);
+        radiance = light.color * light.intensity;
+    } else if light.light_type == 1u {
+        let to_light = light.position - world_pos;
+        let dist = length(to_light);
+        L = normalize(to_light);
+        radiance = light.color * light.intensity * attenuation_point(dist, light.range);
+    } else {
+        let to_light = light.position - world_pos;
+        let dist = length(to_light);
+        L = normalize(to_light);
+        radiance = light.color * light.intensity * attenuation_point(dist, light.range) * attenuation_spot(light, L);
+    }
+
+    let H = normalize(V + L);
+    let NDF = distribution_ggx(N, H, roughness);
+    let G = geometry_smith(N, V, L, roughness);
+    let F = fresnel_schlick(max(dot(H, V), 0.0), F0);
+    let ks = F;
+    let kd = (vec3<f32>(1.0) - ks) * (1.0 - metallic);
+    let numerator = NDF * G * F;
+    let NdotL = max(dot(N, L), 0.0);
+    let NdotV = max(dot(N, V), 0.0);
+    let denominator = 4.0 * NdotV * NdotL + 0.0001;
+    let specular = numerator / denominator;
+
+    return (kd * albedo / 3.14159265358979 + specular) * radiance * NdotL;
+}
+
+fn calculate_shadow(world_pos: vec3<f32>) -> f32 {
+    if shadow.shadow_map_size == 0.0 { return 1.0; }
+    let light_space_pos = shadow.light_view_proj * vec4<f32>(world_pos, 1.0);
+    let proj_coords = light_space_pos.xyz / light_space_pos.w;
+    let shadow_uv = vec2<f32>(proj_coords.x * 0.5 + 0.5, -proj_coords.y * 0.5 + 0.5);
+    let current_depth = proj_coords.z;
+    if shadow_uv.x < 0.0 || shadow_uv.x > 1.0 || shadow_uv.y < 0.0 || shadow_uv.y > 1.0 { return 1.0; }
+    if current_depth > 1.0 || current_depth < 0.0 { return 1.0; }
+    let texel_size = 1.0 / shadow.shadow_map_size;
+    var shadow_val = 0.0;
+    for (var x = -1; x <= 1; x++) {
+        for (var y = -1; y <= 1; y++) {
+            shadow_val += textureSampleCompare(t_shadow, s_shadow, shadow_uv + vec2<f32>(f32(x), f32(y)) * texel_size, current_depth - shadow.shadow_bias);
+        }
+    }
+    return shadow_val / 9.0;
+}
+
+fn sample_environment(direction: vec3<f32>, roughness: f32) -> vec3<f32> {
+    var env: vec3<f32>;
+    if direction.y > 0.0 {
+        env = mix(vec3<f32>(0.6, 0.6, 0.5), vec3<f32>(0.3, 0.5, 0.9), pow(direction.y, 0.4));
+    } else {
+        env = mix(vec3<f32>(0.6, 0.6, 0.5), vec3<f32>(0.1, 0.08, 0.06), pow(-direction.y, 0.4));
+    }
+    return mix(env, vec3<f32>(0.3, 0.35, 0.4), roughness * roughness);
+}
+
+@fragment
+fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
+    let world_pos = textureSample(t_position, s_gbuffer, in.uv).xyz;
+    let N = normalize(textureSample(t_normal, s_gbuffer, in.uv).xyz);
+    let albedo = textureSample(t_albedo, s_gbuffer, in.uv).rgb;
+    let mat_data = textureSample(t_material, s_gbuffer, in.uv);
+    let metallic = mat_data.r;
+    let roughness = mat_data.g;
+    let ao = mat_data.b;
+
+    // Skip background pixels (position = 0,0,0 means no geometry)
+    if length(textureSample(t_position, s_gbuffer, in.uv).xyz) < 0.001 {
+        return vec4<f32>(0.05, 0.05, 0.08, 1.0); // background color
+    }
+
+    let V = normalize(camera_data.camera_pos - world_pos);
+    let F0 = mix(vec3<f32>(0.04), albedo, metallic);
+
+    let shadow_factor = calculate_shadow(world_pos);
+    var Lo = vec3<f32>(0.0);
+    let light_count = min(lights_uniform.count, 16u);
+    for (var i = 0u; i < light_count; i++) {
+        var contribution = compute_light_contribution(
+            lights_uniform.lights[i], N, V, world_pos, F0, albedo, metallic, roughness,
+        );
+        if lights_uniform.lights[i].light_type == 0u {
+            contribution = contribution * shadow_factor;
+        }
+        Lo += contribution;
+    }
+
+    // IBL
+    let NdotV_ibl = max(dot(N, V), 0.0);
+    let R = reflect(-V, N);
+    let irradiance = sample_environment(N, 1.0);
+    let F_env = fresnel_schlick(NdotV_ibl, F0);
+    let kd_ibl = (vec3<f32>(1.0) - F_env) * (1.0 - metallic);
+    let diffuse_ibl = kd_ibl * albedo * irradiance;
+    let prefiltered = sample_environment(R, roughness);
+    let brdf_val = textureSample(t_brdf_lut, s_brdf_lut, vec2<f32>(NdotV_ibl, roughness));
+    let specular_ibl = prefiltered * (F0 * brdf_val.r + vec3<f32>(brdf_val.g));
+    let ambient = (diffuse_ibl + specular_ibl) * ao;
+
+    var color = ambient + Lo;
+    color = color / (color + vec3<f32>(1.0)); // Reinhard
+    color = pow(color, vec3<f32>(1.0 / 2.2)); // Gamma
+
+    return vec4<f32>(color, 1.0);
+}
+```
+
+- [ ] **Step 2: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/deferred_lighting.wgsl
+git commit -m "feat(renderer): add deferred lighting pass shader with Cook-Torrance BRDF"
+```
+
+---
+
+## Task 4: Deferred Pipeline (Rust)
+
+**Files:**
+- Create: `crates/voltex_renderer/src/deferred_pipeline.rs`
+- Modify: `crates/voltex_renderer/src/lib.rs`
+
+- [ ] **Step 1: deferred_pipeline.rs 작성**
+
+This file creates both G-Buffer pass and Lighting pass pipelines, plus their bind group layouts.
+
+```rust
+// crates/voltex_renderer/src/deferred_pipeline.rs
+use crate::vertex::MeshVertex;
+use crate::fullscreen_quad::FullscreenVertex;
+use crate::gbuffer::*;
+use crate::gpu::DEPTH_FORMAT;
+
+// === G-Buffer Pass ===
+
+pub fn gbuffer_camera_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("GBuffer Camera BGL"),
+        entries: &[
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Uniform,
+                    has_dynamic_offset: true,
+                    min_binding_size: None,
+                },
+                count: None,
+            },
+        ],
+    })
+}
+
+pub fn create_gbuffer_pipeline(
+    device: &wgpu::Device,
+    camera_layout: &wgpu::BindGroupLayout,
+    texture_layout: &wgpu::BindGroupLayout,
+    material_layout: &wgpu::BindGroupLayout,
+) -> wgpu::RenderPipeline {
+    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+        label: Some("Deferred GBuffer Shader"),
+        source: wgpu::ShaderSource::Wgsl(include_str!("deferred_gbuffer.wgsl").into()),
+    });
+
+    let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+        label: Some("GBuffer Pipeline Layout"),
+        bind_group_layouts: &[camera_layout, texture_layout, material_layout],
+        immediate_size: 0,
+    });
+
+    device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+        label: Some("GBuffer Pipeline"),
+        layout: Some(&layout),
+        vertex: wgpu::VertexState {
+            module: &shader,
+            entry_point: Some("vs_main"),
+            buffers: &[MeshVertex::LAYOUT],
+            compilation_options: wgpu::PipelineCompilationOptions::default(),
+        },
+        fragment: Some(wgpu::FragmentState {
+            module: &shader,
+            entry_point: Some("fs_main"),
+            targets: &[
+                Some(wgpu::ColorTargetState {
+                    format: GBUFFER_POSITION_FORMAT,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                }),
+                Some(wgpu::ColorTargetState {
+                    format: GBUFFER_NORMAL_FORMAT,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                }),
+                Some(wgpu::ColorTargetState {
+                    format: GBUFFER_ALBEDO_FORMAT,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                }),
+                Some(wgpu::ColorTargetState {
+                    format: GBUFFER_MATERIAL_FORMAT,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                }),
+            ],
+            compilation_options: wgpu::PipelineCompilationOptions::default(),
+        }),
+        primitive: wgpu::PrimitiveState {
+            topology: wgpu::PrimitiveTopology::TriangleList,
+            front_face: wgpu::FrontFace::Ccw,
+            cull_mode: Some(wgpu::Face::Back),
+            ..Default::default()
+        },
+        depth_stencil: Some(wgpu::DepthStencilState {
+            format: DEPTH_FORMAT,
+            depth_write_enabled: true,
+            depth_compare: wgpu::CompareFunction::Less,
+            stencil: wgpu::StencilState::default(),
+            bias: wgpu::DepthBiasState::default(),
+        }),
+        multisample: wgpu::MultisampleState::default(),
+        multiview_mask: None,
+        cache: None,
+    })
+}
+
+// === Lighting Pass ===
+
+pub fn lighting_gbuffer_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("Lighting GBuffer BGL"),
+        entries: &[
+            // position texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: false },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // normal texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 1,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // albedo texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 2,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // material texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 3,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // sampler
+            wgpu::BindGroupLayoutEntry {
+                binding: 4,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+                count: None,
+            },
+        ],
+    })
+}
+
+pub fn lighting_lights_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("Lighting Lights BGL"),
+        entries: &[
+            // LightsUniform
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: None,
+                },
+                count: None,
+            },
+            // CameraPositionUniform
+            wgpu::BindGroupLayoutEntry {
+                binding: 1,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: None,
+                },
+                count: None,
+            },
+        ],
+    })
+}
+
+pub fn lighting_shadow_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("Lighting Shadow+IBL BGL"),
+        entries: &[
+            // shadow depth texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Depth,
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // shadow comparison sampler
+            wgpu::BindGroupLayoutEntry {
+                binding: 1,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Comparison),
+                count: None,
+            },
+            // ShadowUniform
+            wgpu::BindGroupLayoutEntry {
+                binding: 2,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: None,
+                },
+                count: None,
+            },
+            // BRDF LUT texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 3,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // BRDF LUT sampler
+            wgpu::BindGroupLayoutEntry {
+                binding: 4,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
+                count: None,
+            },
+        ],
+    })
+}
+
+pub fn create_lighting_pipeline(
+    device: &wgpu::Device,
+    surface_format: wgpu::TextureFormat,
+    gbuffer_layout: &wgpu::BindGroupLayout,
+    lights_layout: &wgpu::BindGroupLayout,
+    shadow_layout: &wgpu::BindGroupLayout,
+) -> wgpu::RenderPipeline {
+    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+        label: Some("Deferred Lighting Shader"),
+        source: wgpu::ShaderSource::Wgsl(include_str!("deferred_lighting.wgsl").into()),
+    });
+
+    let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+        label: Some("Lighting Pipeline Layout"),
+        bind_group_layouts: &[gbuffer_layout, lights_layout, shadow_layout],
+        immediate_size: 0,
+    });
+
+    device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+        label: Some("Lighting Pipeline"),
+        layout: Some(&layout),
+        vertex: wgpu::VertexState {
+            module: &shader,
+            entry_point: Some("vs_main"),
+            buffers: &[FullscreenVertex::LAYOUT],
+            compilation_options: wgpu::PipelineCompilationOptions::default(),
+        },
+        fragment: Some(wgpu::FragmentState {
+            module: &shader,
+            entry_point: Some("fs_main"),
+            targets: &[Some(wgpu::ColorTargetState {
+                format: surface_format,
+                blend: Some(wgpu::BlendState::REPLACE),
+                write_mask: wgpu::ColorWrites::ALL,
+            })],
+            compilation_options: wgpu::PipelineCompilationOptions::default(),
+        }),
+        primitive: wgpu::PrimitiveState {
+            topology: wgpu::PrimitiveTopology::TriangleList,
+            ..Default::default()
+        },
+        depth_stencil: None, // No depth for fullscreen pass
+        multisample: wgpu::MultisampleState::default(),
+        multiview_mask: None,
+        cache: None,
+    })
+}
+```
+
+- [ ] **Step 2: lib.rs에 deferred_pipeline 등록**
+
+```rust
+pub mod deferred_pipeline;
+```
+
+And re-exports:
+```rust
+pub use deferred_pipeline::{
+    create_gbuffer_pipeline, create_lighting_pipeline,
+    gbuffer_camera_bind_group_layout,
+    lighting_gbuffer_bind_group_layout, lighting_lights_bind_group_layout, lighting_shadow_bind_group_layout,
+};
+```
+
+- [ ] **Step 3: 빌드 확인**
+
+Run: `cargo build -p voltex_renderer`
+Expected: 컴파일 성공
+
+Run: `cargo test --workspace`
+Expected: all pass (기존 200개)
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/deferred_pipeline.rs crates/voltex_renderer/src/lib.rs
+git commit -m "feat(renderer): add deferred rendering pipeline (G-Buffer + Lighting pass)"
+```
+
+---
+
+## Task 5: deferred_demo 예제
+
+**Files:**
+- Create: `examples/deferred_demo/Cargo.toml`
+- Create: `examples/deferred_demo/src/main.rs`
+- Modify: `Cargo.toml` (workspace members)
+
+NOTE: 이 예제는 복잡합니다 (GPU 리소스 설정, 바인드 그룹 생성, 2-pass 렌더). 기존 pbr_demo 패턴을 따르되 디퍼드로 변경. 구체 그리드 + 다수 포인트 라이트 씬.
+
+이 태스크는 가장 큰 구현이며, 더 capable한 모델로 실행해야 합니다.
+
+- [ ] **Step 1: Cargo.toml**
+
+```toml
+[package]
+name = "deferred_demo"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+voltex_math.workspace = true
+voltex_platform.workspace = true
+voltex_renderer.workspace = true
+bytemuck.workspace = true
+pollster.workspace = true
+wgpu.workspace = true
+```
+
+- [ ] **Step 2: main.rs 작성**
+
+The example should:
+1. Create window + GpuContext
+2. Create GBuffer
+3. Create G-Buffer pipeline + Lighting pipeline with proper bind group layouts
+4. Generate sphere meshes (5x5 grid of metallic/roughness variations)
+5. Set up 8 point lights orbiting the scene (to show deferred advantage)
+6. Create all uniform buffers, textures, bind groups
+7. Main loop:
+   - Update camera (FPS controller)
+   - Update light positions (orbit animation)
+   - Pass 1: G-Buffer pass (render all objects to MRT)
+   - Pass 2: Lighting pass (fullscreen quad, reads G-Buffer)
+   - Present
+
+Key: must create CameraPositionUniform buffer (vec3 + padding = 16 bytes) for the lighting pass.
+
+- [ ] **Step 3: workspace에 추가**
+
+`Cargo.toml` members에 `"examples/deferred_demo"` 추가.
+
+- [ ] **Step 4: 빌드 + 실행 확인**
+
+Run: `cargo build --bin deferred_demo`
+Run: `cargo run --bin deferred_demo` (수동 확인)
+
+- [ ] **Step 5: 커밋**
+
+```bash
+git add examples/deferred_demo/ Cargo.toml
+git commit -m "feat(renderer): add deferred_demo example with multi-light deferred rendering"
+```
+
+---
+
+## Task 6: 문서 업데이트
+
+**Files:**
+- Modify: `docs/STATUS.md`
+- Modify: `docs/DEFERRED.md`
+
+- [ ] **Step 1: STATUS.md에 Phase 7-1 추가**
+
+Phase 6-3 아래에:
+```markdown
+### Phase 7-1: Deferred Rendering
+- voltex_renderer: GBuffer (4 MRT: Position/Normal/Albedo/Material + Depth)
+- voltex_renderer: G-Buffer pass shader (MRT output, TBN normal mapping)
+- voltex_renderer: Lighting pass shader (fullscreen quad, Cook-Torrance BRDF, multi-light, shadow, IBL)
+- voltex_renderer: Deferred pipeline (gbuffer + lighting bind group layouts)
+- examples/deferred_demo (5x5 sphere grid + 8 orbiting point lights)
+```
+
+예제 수 11로 업데이트.
+
+- [ ] **Step 2: DEFERRED.md에 Phase 7-1 미뤄진 항목 추가**
+
+```markdown
+## Phase 7-1
+
+- **투명 오브젝트** — 디퍼드에서 처리 불가. 별도 포워드 패스 필요.
+- **G-Buffer 압축** — Position을 depth에서 복원, Normal을 octahedral 인코딩 등 미적용.
+- **Light Volumes** — 풀스크린 라이팅만. 라이트별 sphere/cone 렌더 미구현.
+- **Stencil 최적화** — 미구현.
+```
+
+- [ ] **Step 3: 커밋**
+
+```bash
+git add docs/STATUS.md docs/DEFERRED.md
+git commit -m "docs: add Phase 7-1 deferred rendering status and deferred items"
+```
diff --git a/docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md b/docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md
new file mode 100644
index 0000000..5156b5c
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-25-phase7-2-ssgi.md
@@ -0,0 +1,661 @@
+# Phase 7-2: SSGI Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** SSAO + Color Bleeding 기반 SSGI로 간접광과 앰비언트 오클루전 추가
+
+**Architecture:** `voltex_renderer`에 ssgi.rs(리소스+커널 생성) + ssgi_shader.wgsl(SSGI 풀스크린 패스) 추가. 기존 deferred_lighting.wgsl의 Shadow+IBL 바인드 그룹에 SSGI 출력 텍스처를 추가하여 ambient에 적용.
+
+**Tech Stack:** Rust, wgpu 28.0, WGSL
+
+**Spec:** `docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md`
+
+---
+
+## File Structure
+
+- `crates/voltex_renderer/src/ssgi.rs` — SsgiResources, SsgiUniform, 커널/노이즈 생성 (Create)
+- `crates/voltex_renderer/src/ssgi_shader.wgsl` — SSGI 풀스크린 셰이더 (Create)
+- `crates/voltex_renderer/src/deferred_pipeline.rs` — SSGI 파이프라인 + 바인드 그룹 레이아웃 추가 (Modify)
+- `crates/voltex_renderer/src/deferred_lighting.wgsl` — SSGI 텍스처 읽어서 ambient 적용 (Modify)
+- `crates/voltex_renderer/src/lib.rs` — ssgi 모듈 등록 (Modify)
+- `examples/deferred_demo/src/main.rs` — SSGI 패스 통합 (Modify)
+
+---
+
+## Task 1: SsgiResources + 커널/노이즈 생성
+
+**Files:**
+- Create: `crates/voltex_renderer/src/ssgi.rs`
+- Modify: `crates/voltex_renderer/src/lib.rs`
+
+- [ ] **Step 1: ssgi.rs 작성**
+
+```rust
+// crates/voltex_renderer/src/ssgi.rs
+use bytemuck::{Pod, Zeroable};
+use wgpu::util::DeviceExt;
+
+pub const SSGI_OUTPUT_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float;
+pub const SSGI_KERNEL_SIZE: usize = 64;
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug, Pod, Zeroable)]
+pub struct SsgiUniform {
+    pub projection: [f32; 16],
+    pub view: [f32; 16],
+    pub radius: f32,
+    pub bias: f32,
+    pub intensity: f32,
+    pub indirect_strength: f32,
+}
+
+impl Default for SsgiUniform {
+    fn default() -> Self {
+        Self {
+            projection: [0.0; 16],
+            view: [0.0; 16],
+            radius: 0.5,
+            bias: 0.025,
+            intensity: 1.5,
+            indirect_strength: 0.5,
+        }
+    }
+}
+
+pub struct SsgiResources {
+    pub output_view: wgpu::TextureView,
+    pub kernel_buffer: wgpu::Buffer,
+    pub noise_view: wgpu::TextureView,
+    pub noise_sampler: wgpu::Sampler,
+    pub uniform_buffer: wgpu::Buffer,
+    pub width: u32,
+    pub height: u32,
+}
+
+impl SsgiResources {
+    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
+        let output_view = create_ssgi_output(device, width, height);
+        let kernel_data = generate_kernel(SSGI_KERNEL_SIZE);
+        let kernel_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("SSGI Kernel"),
+            contents: bytemuck::cast_slice(&kernel_data),
+            usage: wgpu::BufferUsages::UNIFORM,
+        });
+        let noise_view = create_noise_texture(device, queue);
+        let noise_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
+            label: Some("SSGI Noise Sampler"),
+            address_mode_u: wgpu::AddressMode::Repeat,
+            address_mode_v: wgpu::AddressMode::Repeat,
+            mag_filter: wgpu::FilterMode::Nearest,
+            min_filter: wgpu::FilterMode::Nearest,
+            ..Default::default()
+        });
+        let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("SSGI Uniform"),
+            contents: bytemuck::bytes_of(&SsgiUniform::default()),
+            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
+        });
+        Self { output_view, kernel_buffer, noise_view, noise_sampler, uniform_buffer, width, height }
+    }
+
+    pub fn resize(&mut self, device: &wgpu::Device, width: u32, height: u32) {
+        self.output_view = create_ssgi_output(device, width, height);
+        self.width = width;
+        self.height = height;
+    }
+}
+
+fn create_ssgi_output(device: &wgpu::Device, w: u32, h: u32) -> wgpu::TextureView {
+    let tex = device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("SSGI Output"),
+        size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: SSGI_OUTPUT_FORMAT,
+        usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING,
+        view_formats: &[],
+    });
+    tex.create_view(&wgpu::TextureViewDescriptor::default())
+}
+
+/// Generate hemisphere sample kernel for SSAO/SSGI.
+/// Samples are distributed in a hemisphere (z >= 0) with more samples near center.
+pub fn generate_kernel(count: usize) -> Vec<[f32; 4]> {
+    let mut kernel = Vec::with_capacity(count);
+    for i in 0..count {
+        // Pseudo-random using simple hash
+        let fi = i as f32;
+        let x = pseudo_random(i * 2) * 2.0 - 1.0;
+        let y = pseudo_random(i * 2 + 1) * 2.0 - 1.0;
+        let z = pseudo_random(i * 3 + 7).max(0.05); // hemisphere, z > 0
+
+        let len = (x * x + y * y + z * z).sqrt();
+        let (nx, ny, nz) = (x / len, y / len, z / len);
+
+        // Scale: more samples near center
+        let mut scale = fi / count as f32;
+        scale = 0.1 + scale * scale * 0.9; // lerp(0.1, 1.0, scale^2)
+
+        kernel.push([nx * scale, ny * scale, nz * scale, 0.0]);
+    }
+    kernel
+}
+
+/// Generate 4x4 noise texture data (random tangent-space rotation vectors).
+pub fn generate_noise_data() -> Vec<[f32; 4]> {
+    let mut noise = Vec::with_capacity(16);
+    for i in 0..16 {
+        let x = pseudo_random(i * 5 + 13) * 2.0 - 1.0;
+        let y = pseudo_random(i * 7 + 17) * 2.0 - 1.0;
+        let len = (x * x + y * y).sqrt().max(0.001);
+        noise.push([x / len, y / len, 0.0, 0.0]);
+    }
+    noise
+}
+
+fn create_noise_texture(device: &wgpu::Device, queue: &wgpu::Queue) -> wgpu::TextureView {
+    let data = generate_noise_data();
+    let bytes: Vec<u8> = data.iter().flat_map(|v| {
+        v.iter().flat_map(|f| f.to_le_bytes())
+    }).collect();
+
+    let tex = device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("SSGI Noise"),
+        size: wgpu::Extent3d { width: 4, height: 4, depth_or_array_layers: 1 },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba32Float,
+        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
+        view_formats: &[],
+    });
+    queue.write_texture(
+        wgpu::TexelCopyTextureInfo { texture: &tex, mip_level: 0, origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All },
+        &bytes,
+        wgpu::TexelCopyBufferLayout { offset: 0, bytes_per_row: Some(4 * 16), rows_per_image: None },
+        wgpu::Extent3d { width: 4, height: 4, depth_or_array_layers: 1 },
+    );
+    tex.create_view(&wgpu::TextureViewDescriptor::default())
+}
+
+/// Simple deterministic pseudo-random [0, 1) from integer seed.
+fn pseudo_random(seed: usize) -> f32 {
+    let n = seed.wrapping_mul(0x5DEECE66D).wrapping_add(0xB) & 0xFFFFFF;
+    n as f32 / 0xFFFFFF as f32
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_kernel_hemisphere() {
+        let kernel = generate_kernel(64);
+        assert_eq!(kernel.len(), 64);
+        for k in &kernel {
+            assert!(k[2] >= 0.0, "kernel z must be >= 0 (hemisphere), got {}", k[2]);
+            let len = (k[0] * k[0] + k[1] * k[1] + k[2] * k[2]).sqrt();
+            assert!(len <= 1.01, "kernel sample must be within unit hemisphere, len={}", len);
+        }
+    }
+
+    #[test]
+    fn test_noise_data() {
+        let noise = generate_noise_data();
+        assert_eq!(noise.len(), 16);
+        for n in &noise {
+            assert!((n[2]).abs() < 1e-5, "noise z should be 0");
+            let len = (n[0] * n[0] + n[1] * n[1]).sqrt();
+            assert!((len - 1.0).abs() < 0.1, "noise vector should be roughly unit length, got {}", len);
+        }
+    }
+
+    #[test]
+    fn test_ssgi_uniform_default() {
+        let u = SsgiUniform::default();
+        assert!((u.radius - 0.5).abs() < 1e-5);
+        assert!((u.bias - 0.025).abs() < 1e-5);
+    }
+}
+```
+
+- [ ] **Step 2: lib.rs에 ssgi 모듈 등록**
+
+```rust
+pub mod ssgi;
+pub use ssgi::{SsgiResources, SsgiUniform, SSGI_OUTPUT_FORMAT};
+```
+
+- [ ] **Step 3: 빌드 + 테스트**
+
+Run: `cargo test -p voltex_renderer`
+Expected: 기존 20 + 3 = 23 PASS
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/ssgi.rs crates/voltex_renderer/src/lib.rs
+git commit -m "feat(renderer): add SSGI resources with hemisphere kernel and noise texture"
+```
+
+---
+
+## Task 2: SSGI 셰이더 + 파이프라인
+
+**Files:**
+- Create: `crates/voltex_renderer/src/ssgi_shader.wgsl`
+- Modify: `crates/voltex_renderer/src/deferred_pipeline.rs`
+
+- [ ] **Step 1: ssgi_shader.wgsl 작성**
+
+```wgsl
+// SSGI pass: screen-space ambient occlusion + color bleeding
+// Reads G-Buffer position/normal/albedo, outputs AO + indirect color
+
+// Group 0: G-Buffer (same layout as lighting pass)
+@group(0) @binding(0) var t_position: texture_2d<f32>;
+@group(0) @binding(1) var t_normal: texture_2d<f32>;
+@group(0) @binding(2) var t_albedo: texture_2d<f32>;
+@group(0) @binding(3) var s_gbuffer: sampler;
+
+// Group 1: SSGI data
+struct SsgiUniform {
+    projection: mat4x4<f32>,
+    view: mat4x4<f32>,
+    radius: f32,
+    bias: f32,
+    intensity: f32,
+    indirect_strength: f32,
+};
+
+struct SsgiKernel {
+    samples: array<vec4<f32>, 64>,
+};
+
+@group(1) @binding(0) var<uniform> ssgi: SsgiUniform;
+@group(1) @binding(1) var<uniform> kernel: SsgiKernel;
+@group(1) @binding(2) var t_noise: texture_2d<f32>;
+@group(1) @binding(3) var s_noise: sampler;
+
+struct VertexOutput {
+    @builtin(position) clip_position: vec4<f32>,
+    @location(0) uv: vec2<f32>,
+};
+
+@vertex
+fn vs_main(@location(0) position: vec2<f32>) -> VertexOutput {
+    var out: VertexOutput;
+    out.clip_position = vec4<f32>(position, 0.0, 1.0);
+    out.uv = vec2<f32>(position.x * 0.5 + 0.5, 1.0 - (position.y * 0.5 + 0.5));
+    return out;
+}
+
+@fragment
+fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
+    let uv = in.uv;
+
+    let world_pos = textureSample(t_position, s_gbuffer, uv).xyz;
+
+    // Skip background
+    if dot(world_pos, world_pos) < 0.001 {
+        return vec4<f32>(1.0, 0.0, 0.0, 0.0); // AO=1 (no occlusion), indirect=0
+    }
+
+    let world_normal = normalize(textureSample(t_normal, s_gbuffer, uv).xyz * 2.0 - 1.0);
+
+    // Transform to view space
+    let view_pos = (ssgi.view * vec4<f32>(world_pos, 1.0)).xyz;
+    let view_normal = normalize((ssgi.view * vec4<f32>(world_normal, 0.0)).xyz);
+
+    // Random rotation from noise texture (4x4 tiling)
+    let tex_dims = textureDimensions(t_position);
+    let noise_scale = vec2<f32>(f32(tex_dims.x) / 4.0, f32(tex_dims.y) / 4.0);
+    let random_vec = textureSample(t_noise, s_noise, uv * noise_scale).xyz;
+
+    // Construct TBN in view space using Gram-Schmidt
+    let tangent = normalize(random_vec - view_normal * dot(random_vec, view_normal));
+    let bitangent = cross(view_normal, tangent);
+    let TBN = mat3x3<f32>(tangent, bitangent, view_normal);
+
+    var occlusion = 0.0;
+    var indirect = vec3<f32>(0.0);
+
+    for (var i = 0u; i < 64u; i++) {
+        // Sample position in view space
+        let sample_offset = TBN * kernel.samples[i].xyz;
+        let sample_view_pos = view_pos + sample_offset * ssgi.radius;
+
+        // Project to screen UV
+        let clip = ssgi.projection * vec4<f32>(sample_view_pos, 1.0);
+        var screen_uv = clip.xy / clip.w * 0.5 + 0.5;
+        screen_uv.y = 1.0 - screen_uv.y;
+
+        // Clamp to valid range
+        screen_uv = clamp(screen_uv, vec2<f32>(0.001), vec2<f32>(0.999));
+
+        // Read actual position at that screen location
+        let actual_world_pos = textureSample(t_position, s_gbuffer, screen_uv).xyz;
+        let actual_view_pos = (ssgi.view * vec4<f32>(actual_world_pos, 1.0)).xyz;
+
+        // Occlusion: is the actual geometry closer to camera than our sample?
+        let depth_diff = sample_view_pos.z - actual_view_pos.z;
+        let range_check = smoothstep(0.0, 1.0, ssgi.radius / (abs(view_pos.z - actual_view_pos.z) + 0.001));
+
+        if depth_diff > ssgi.bias && depth_diff < ssgi.radius {
+            occlusion += range_check;
+            // Color bleeding: sample albedo at occluder position
+            let sample_albedo = textureSample(t_albedo, s_gbuffer, screen_uv).rgb;
+            indirect += sample_albedo * range_check;
+        }
+    }
+
+    let ao = clamp(1.0 - (occlusion / 64.0) * ssgi.intensity, 0.0, 1.0);
+    indirect = indirect / 64.0 * ssgi.indirect_strength;
+
+    return vec4<f32>(ao, indirect);
+}
+```
+
+- [ ] **Step 2: deferred_pipeline.rs에 SSGI 파이프라인 함수 추가**
+
+Add to deferred_pipeline.rs:
+
+```rust
+use crate::ssgi::SSGI_OUTPUT_FORMAT;
+
+/// SSGI pass: reads G-Buffer (group 0) + SSGI data (group 1)
+pub fn ssgi_gbuffer_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("SSGI GBuffer BGL"),
+        entries: &[
+            // position (non-filterable)
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: false },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // normal (filterable)
+            wgpu::BindGroupLayoutEntry {
+                binding: 1,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // albedo (filterable)
+            wgpu::BindGroupLayoutEntry {
+                binding: 2,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // sampler (non-filtering for position)
+            wgpu::BindGroupLayoutEntry {
+                binding: 3,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+                count: None,
+            },
+        ],
+    })
+}
+
+pub fn ssgi_data_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("SSGI Data BGL"),
+        entries: &[
+            // SsgiUniform
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: None,
+                },
+                count: None,
+            },
+            // kernel (uniform buffer, 64 * vec4 = 1024 bytes)
+            wgpu::BindGroupLayoutEntry {
+                binding: 1,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: None,
+                },
+                count: None,
+            },
+            // noise texture (non-filterable, Rgba32Float)
+            wgpu::BindGroupLayoutEntry {
+                binding: 2,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: false },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // noise sampler
+            wgpu::BindGroupLayoutEntry {
+                binding: 3,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+                count: None,
+            },
+        ],
+    })
+}
+
+pub fn create_ssgi_pipeline(
+    device: &wgpu::Device,
+    gbuffer_layout: &wgpu::BindGroupLayout,
+    data_layout: &wgpu::BindGroupLayout,
+) -> wgpu::RenderPipeline {
+    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+        label: Some("SSGI Shader"),
+        source: wgpu::ShaderSource::Wgsl(include_str!("ssgi_shader.wgsl").into()),
+    });
+
+    let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+        label: Some("SSGI Pipeline Layout"),
+        bind_group_layouts: &[gbuffer_layout, data_layout],
+        immediate_size: 0,
+    });
+
+    device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+        label: Some("SSGI Pipeline"),
+        layout: Some(&layout),
+        vertex: wgpu::VertexState {
+            module: &shader,
+            entry_point: Some("vs_main"),
+            buffers: &[FullscreenVertex::LAYOUT],
+            compilation_options: wgpu::PipelineCompilationOptions::default(),
+        },
+        fragment: Some(wgpu::FragmentState {
+            module: &shader,
+            entry_point: Some("fs_main"),
+            targets: &[Some(wgpu::ColorTargetState {
+                format: SSGI_OUTPUT_FORMAT,
+                blend: None,
+                write_mask: wgpu::ColorWrites::ALL,
+            })],
+            compilation_options: wgpu::PipelineCompilationOptions::default(),
+        }),
+        primitive: wgpu::PrimitiveState {
+            topology: wgpu::PrimitiveTopology::TriangleList,
+            ..Default::default()
+        },
+        depth_stencil: None,
+        multisample: wgpu::MultisampleState::default(),
+        multiview_mask: None,
+        cache: None,
+    })
+}
+```
+
+- [ ] **Step 3: 빌드 확인**
+
+Run: `cargo build -p voltex_renderer`
+Expected: 컴파일 성공
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/ssgi_shader.wgsl crates/voltex_renderer/src/deferred_pipeline.rs
+git commit -m "feat(renderer): add SSGI shader and pipeline for screen-space GI"
+```
+
+---
+
+## Task 3: Lighting Pass에 SSGI 통합
+
+**Files:**
+- Modify: `crates/voltex_renderer/src/deferred_lighting.wgsl`
+- Modify: `crates/voltex_renderer/src/deferred_pipeline.rs`
+
+- [ ] **Step 1: lighting_shadow_bind_group_layout에 SSGI binding 추가**
+
+현재 `lighting_shadow_bind_group_layout`에 binding 0-4 (shadow+IBL). 여기에 추가:
+
+```rust
+// binding 5: SSGI output texture
+wgpu::BindGroupLayoutEntry {
+    binding: 5,
+    visibility: wgpu::ShaderStages::FRAGMENT,
+    ty: wgpu::BindingType::Texture {
+        sample_type: wgpu::TextureSampleType::Float { filterable: true },
+        view_dimension: wgpu::TextureViewDimension::D2,
+        multisampled: false,
+    },
+    count: None,
+},
+// binding 6: SSGI sampler
+wgpu::BindGroupLayoutEntry {
+    binding: 6,
+    visibility: wgpu::ShaderStages::FRAGMENT,
+    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
+    count: None,
+},
+```
+
+- [ ] **Step 2: deferred_lighting.wgsl에 SSGI 바인딩 + 적용 추가**
+
+Group 2에 추가:
+```wgsl
+@group(2) @binding(5) var t_ssgi: texture_2d<f32>;
+@group(2) @binding(6) var s_ssgi: sampler;
+```
+
+Fragment shader에서 ambient 계산 부분 변경:
+```wgsl
+// 기존: let ambient = (diffuse_ibl + specular_ibl) * ao;
+// 변경:
+let ssgi_data = textureSample(t_ssgi, s_ssgi, uv);
+let ssgi_ao = ssgi_data.r;
+let ssgi_indirect = ssgi_data.gba;
+let ambient = (diffuse_ibl + specular_ibl) * ao * ssgi_ao + ssgi_indirect;
+```
+
+- [ ] **Step 3: 빌드 확인**
+
+Run: `cargo build -p voltex_renderer`
+Expected: 컴파일 성공
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/deferred_lighting.wgsl crates/voltex_renderer/src/deferred_pipeline.rs
+git commit -m "feat(renderer): integrate SSGI output into deferred lighting pass"
+```
+
+---
+
+## Task 4: deferred_demo에 SSGI 패스 통합
+
+**Files:**
+- Modify: `examples/deferred_demo/src/main.rs`
+
+NOTE: 이 태스크는 기존 deferred_demo를 확장하여 3-pass 렌더링으로 변경합니다.
+
+변경사항:
+1. `SsgiResources::new()` 호출하여 SSGI 리소스 생성
+2. SSGI 파이프라인 + 바인드 그룹 레이아웃 생성
+3. SSGI 바인드 그룹 2개 생성 (G-Buffer + SSGI data)
+4. 기존 Shadow+IBL 바인드 그룹에 SSGI output texture + sampler 추가 (binding 5,6)
+5. 렌더 루프에 SSGI 패스 삽입 (Pass 2: SSGI, 기존 Lighting은 Pass 3으로)
+6. 매 프레임 SsgiUniform 업데이트 (view, projection 행렬)
+7. 리사이즈 시 SSGI 리소스 + 바인드 그룹 재생성
+
+이 태스크는 deferred_demo의 전체 구조를 이해해야 하므로 opus 모델로 실행.
+
+- [ ] **Step 1: deferred_demo 수정**
+
+Read the current `examples/deferred_demo/src/main.rs` first, then add SSGI integration.
+
+- [ ] **Step 2: 빌드 확인**
+
+Run: `cargo build --bin deferred_demo`
+Expected: 컴파일 성공
+
+- [ ] **Step 3: 커밋**
+
+```bash
+git add examples/deferred_demo/src/main.rs
+git commit -m "feat(renderer): add SSGI pass to deferred_demo (AO + color bleeding)"
+```
+
+---
+
+## Task 5: 문서 업데이트
+
+**Files:**
+- Modify: `docs/STATUS.md`
+- Modify: `docs/DEFERRED.md`
+
+- [ ] **Step 1: STATUS.md에 Phase 7-2 추가**
+
+Phase 7-1 아래에:
+```markdown
+### Phase 7-2: SSGI (Screen-Space Global Illumination)
+- voltex_renderer: SsgiResources (hemisphere kernel, 4x4 noise, output texture)
+- voltex_renderer: SSGI shader (SSAO + color bleeding in one pass)
+- voltex_renderer: SSGI pipeline + bind group layouts
+- voltex_renderer: Lighting pass SSGI integration (ambient * ssgi_ao + indirect)
+- deferred_demo updated with 3-pass rendering (GBuffer → SSGI → Lighting)
+```
+
+테스트 수 업데이트 (voltex_renderer: 23).
+
+- [ ] **Step 2: DEFERRED.md에 Phase 7-2 미뤄진 항목**
+
+```markdown
+## Phase 7-2
+
+- **Bilateral Blur** — SSGI 노이즈 제거 블러 미구현. 4x4 노이즈 타일링만.
+- **반해상도 렌더링** — 풀 해상도에서 SSGI 실행. 성능 최적화 미적용.
+- **Temporal Accumulation** — 프레임 간 누적 미구현. 매 프레임 독립 계산.
+- **Light Probes** — 베이크 기반 GI 미구현.
+```
+
+- [ ] **Step 3: 커밋**
+
+```bash
+git add docs/STATUS.md docs/DEFERRED.md
+git commit -m "docs: add Phase 7-2 SSGI status and deferred items"
+```
diff --git a/docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md b/docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md
new file mode 100644
index 0000000..c43a247
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-25-phase7-3-rt-shadows.md
@@ -0,0 +1,631 @@
+# Phase 7-3: RT Shadows Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** wgpu ray query로 하드웨어 레이트레이싱 기반 그림자 구현 — 정확한 픽셀-퍼펙트 그림자
+
+**Architecture:** BLAS/TLAS acceleration structure를 구축하고, 컴퓨트 셰이더에서 G-Buffer position을 읽어 light 방향으로 ray query를 수행. 차폐 여부를 R8Unorm shadow 텍스처에 기록. Lighting Pass에서 이 텍스처를 읽어 기존 PCF shadow map 대체.
+
+**Tech Stack:** Rust, wgpu 28.0 (EXPERIMENTAL_RAY_QUERY), WGSL (ray_query)
+
+**Spec:** `docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md`
+
+---
+
+## File Structure
+
+### 새 파일
+- `crates/voltex_renderer/src/rt_accel.rs` — BLAS/TLAS 생성 관리 (Create)
+- `crates/voltex_renderer/src/rt_shadow.rs` — RT Shadow 리소스 + uniform (Create)
+- `crates/voltex_renderer/src/rt_shadow_shader.wgsl` — RT shadow 컴퓨트 셰이더 (Create)
+
+### 수정 파일
+- `crates/voltex_renderer/src/deferred_pipeline.rs` — RT shadow 컴퓨트 파이프라인, lighting group에 RT shadow binding 추가 (Modify)
+- `crates/voltex_renderer/src/deferred_lighting.wgsl` — RT shadow 텍스처 사용 (Modify)
+- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록 (Modify)
+- `examples/deferred_demo/src/main.rs` — RT shadow 통합 (Modify)
+
+---
+
+## Task 1: rt_accel.rs — BLAS/TLAS 관리
+
+**Files:**
+- Create: `crates/voltex_renderer/src/rt_accel.rs`
+- Modify: `crates/voltex_renderer/src/lib.rs`
+
+- [ ] **Step 1: rt_accel.rs 작성**
+
+This module wraps wgpu's acceleration structure API.
+
+```rust
+// crates/voltex_renderer/src/rt_accel.rs
+use crate::vertex::MeshVertex;
+
+/// Mesh data needed to build a BLAS.
+pub struct BlasMeshData<'a> {
+    pub vertex_buffer: &'a wgpu::Buffer,
+    pub index_buffer: &'a wgpu::Buffer,
+    pub vertex_count: u32,
+    pub index_count: u32,
+}
+
+/// Manages BLAS/TLAS for ray tracing.
+pub struct RtAccel {
+    pub blas_list: Vec<wgpu::Blas>,
+    pub tlas: wgpu::Tlas,
+}
+
+impl RtAccel {
+    /// Create acceleration structures.
+    /// `meshes` — one BLAS per unique mesh.
+    /// `instances` — (mesh_index, transform [3x4 row-major f32; 12]).
+    pub fn new(
+        device: &wgpu::Device,
+        encoder: &mut wgpu::CommandEncoder,
+        meshes: &[BlasMeshData],
+        instances: &[(usize, [f32; 12])],
+    ) -> Self {
+        // 1. Create BLAS for each mesh
+        let mut blas_list = Vec::new();
+        let mut blas_sizes = Vec::new();
+
+        for mesh in meshes {
+            let size_desc = wgpu::BlasTriangleGeometrySizeDescriptor {
+                vertex_format: wgpu::VertexFormat::Float32x3,
+                vertex_count: mesh.vertex_count,
+                index_format: Some(wgpu::IndexFormat::Uint16),
+                index_count: Some(mesh.index_count),
+                flags: wgpu::AccelerationStructureGeometryFlags::OPAQUE,
+            };
+            blas_sizes.push(size_desc);
+        }
+
+        for (i, mesh) in meshes.iter().enumerate() {
+            let blas = device.create_blas(
+                &wgpu::CreateBlasDescriptor {
+                    label: Some(&format!("BLAS {}", i)),
+                    flags: wgpu::AccelerationStructureFlags::PREFER_FAST_TRACE,
+                    update_mode: wgpu::AccelerationStructureUpdateMode::Build,
+                },
+                wgpu::BlasGeometrySizeDescriptors::Triangles {
+                    descriptors: vec![blas_sizes[i].clone()],
+                },
+            );
+            blas_list.push(blas);
+        }
+
+        // Build all BLAS
+        let blas_entries: Vec<wgpu::BlasBuildEntry> = meshes.iter().enumerate().map(|(i, mesh)| {
+            wgpu::BlasBuildEntry {
+                blas: &blas_list[i],
+                geometry: wgpu::BlasGeometries::TriangleGeometries(vec![
+                    wgpu::BlasTriangleGeometry {
+                        size: &blas_sizes[i],
+                        vertex_buffer: mesh.vertex_buffer,
+                        first_vertex: 0,
+                        vertex_stride: std::mem::size_of::<MeshVertex>() as u64,
+                        index_buffer: Some(mesh.index_buffer),
+                        first_index: Some(0),
+                        transform_buffer: None,
+                        transform_buffer_offset: None,
+                    },
+                ]),
+            }
+        }).collect();
+
+        // 2. Create TLAS
+        let max_instances = instances.len().max(1) as u32;
+        let mut tlas = device.create_tlas(&wgpu::CreateTlasDescriptor {
+            label: Some("TLAS"),
+            max_instances,
+            flags: wgpu::AccelerationStructureFlags::PREFER_FAST_TRACE,
+            update_mode: wgpu::AccelerationStructureUpdateMode::Build,
+        });
+
+        // Fill TLAS instances
+        for (i, (mesh_idx, transform)) in instances.iter().enumerate() {
+            tlas[i] = Some(wgpu::TlasInstance::new(
+                &blas_list[*mesh_idx],
+                *transform,
+                0, // custom_data
+                0xFF, // mask
+            ));
+        }
+
+        // 3. Build
+        encoder.build_acceleration_structures(
+            blas_entries.iter(),
+            [&tlas],
+        );
+
+        RtAccel { blas_list, tlas }
+    }
+
+    /// Update TLAS instance transforms (BLAS stays the same).
+    pub fn update_instances(
+        &mut self,
+        encoder: &mut wgpu::CommandEncoder,
+        instances: &[(usize, [f32; 12])],
+    ) {
+        for (i, (mesh_idx, transform)) in instances.iter().enumerate() {
+            self.tlas[i] = Some(wgpu::TlasInstance::new(
+                &self.blas_list[*mesh_idx],
+                *transform,
+                0,
+                0xFF,
+            ));
+        }
+
+        // Rebuild TLAS only (no BLAS rebuild)
+        encoder.build_acceleration_structures(
+            std::iter::empty(),
+            [&self.tlas],
+        );
+    }
+}
+
+/// Convert a 4x4 column-major matrix to 3x4 row-major transform for TLAS instance.
+pub fn mat4_to_tlas_transform(m: &[f32; 16]) -> [f32; 12] {
+    // Column-major [c0r0, c0r1, c0r2, c0r3, c1r0, ...] to
+    // Row-major 3x4 [r0c0, r0c1, r0c2, r0c3, r1c0, ...]
+    [
+        m[0], m[4], m[8],  m[12], // row 0
+        m[1], m[5], m[9],  m[13], // row 1
+        m[2], m[6], m[10], m[14], // row 2
+    ]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_mat4_to_tlas_transform_identity() {
+        let identity: [f32; 16] = [
+            1.0, 0.0, 0.0, 0.0,
+            0.0, 1.0, 0.0, 0.0,
+            0.0, 0.0, 1.0, 0.0,
+            0.0, 0.0, 0.0, 1.0,
+        ];
+        let t = mat4_to_tlas_transform(&identity);
+        assert_eq!(t, [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]);
+    }
+
+    #[test]
+    fn test_mat4_to_tlas_transform_translation() {
+        // Column-major translation (5, 10, 15)
+        let m: [f32; 16] = [
+            1.0, 0.0, 0.0, 0.0,
+            0.0, 1.0, 0.0, 0.0,
+            0.0, 0.0, 1.0, 0.0,
+            5.0, 10.0, 15.0, 1.0,
+        ];
+        let t = mat4_to_tlas_transform(&m);
+        // Row 0: [1, 0, 0, 5]
+        assert_eq!(t[3], 5.0);
+        assert_eq!(t[7], 10.0);
+        assert_eq!(t[11], 15.0);
+    }
+}
+```
+
+- [ ] **Step 2: lib.rs에 모듈 등록**
+
+```rust
+pub mod rt_accel;
+pub use rt_accel::{RtAccel, BlasMeshData, mat4_to_tlas_transform};
+```
+
+- [ ] **Step 3: 빌드 + 테스트**
+
+Run: `cargo test -p voltex_renderer`
+Expected: 기존 23 + 2 = 25 PASS
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/rt_accel.rs crates/voltex_renderer/src/lib.rs
+git commit -m "feat(renderer): add BLAS/TLAS acceleration structure management for RT"
+```
+
+---
+
+## Task 2: RT Shadow 리소스 + 컴퓨트 셰이더
+
+**Files:**
+- Create: `crates/voltex_renderer/src/rt_shadow.rs`
+- Create: `crates/voltex_renderer/src/rt_shadow_shader.wgsl`
+- Modify: `crates/voltex_renderer/src/lib.rs`
+
+- [ ] **Step 1: rt_shadow.rs 작성**
+
+```rust
+// crates/voltex_renderer/src/rt_shadow.rs
+use bytemuck::{Pod, Zeroable};
+use wgpu::util::DeviceExt;
+
+pub const RT_SHADOW_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::R32Float;
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug, Pod, Zeroable)]
+pub struct RtShadowUniform {
+    pub light_direction: [f32; 3],
+    pub _pad0: f32,
+    pub width: u32,
+    pub height: u32,
+    pub _pad1: [u32; 2],
+}
+
+pub struct RtShadowResources {
+    pub shadow_texture: wgpu::Texture,
+    pub shadow_view: wgpu::TextureView,
+    pub uniform_buffer: wgpu::Buffer,
+    pub width: u32,
+    pub height: u32,
+}
+
+impl RtShadowResources {
+    pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self {
+        let (shadow_texture, shadow_view) = create_shadow_texture(device, width, height);
+        let uniform = RtShadowUniform {
+            light_direction: [0.0, -1.0, 0.0],
+            _pad0: 0.0,
+            width,
+            height,
+            _pad1: [0; 2],
+        };
+        let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("RT Shadow Uniform"),
+            contents: bytemuck::bytes_of(&uniform),
+            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
+        });
+        Self { shadow_texture, shadow_view, uniform_buffer, width, height }
+    }
+
+    pub fn resize(&mut self, device: &wgpu::Device, width: u32, height: u32) {
+        let (tex, view) = create_shadow_texture(device, width, height);
+        self.shadow_texture = tex;
+        self.shadow_view = view;
+        self.width = width;
+        self.height = height;
+    }
+}
+
+fn create_shadow_texture(device: &wgpu::Device, w: u32, h: u32) -> (wgpu::Texture, wgpu::TextureView) {
+    let tex = device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("RT Shadow Texture"),
+        size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: RT_SHADOW_FORMAT,
+        usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
+        view_formats: &[],
+    });
+    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
+    (tex, view)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_rt_shadow_uniform_size() {
+        assert_eq!(std::mem::size_of::<RtShadowUniform>(), 32);
+    }
+}
+```
+
+- [ ] **Step 2: rt_shadow_shader.wgsl 작성**
+
+```wgsl
+// RT Shadow compute shader
+// Traces shadow rays from G-Buffer world positions toward the light
+
+@group(0) @binding(0) var t_position: texture_2d<f32>;
+@group(0) @binding(1) var t_normal: texture_2d<f32>;
+
+struct RtShadowUniform {
+    light_direction: vec3<f32>,
+    _pad0: f32,
+    width: u32,
+    height: u32,
+    _pad1: vec2<u32>,
+};
+
+@group(1) @binding(0) var tlas: acceleration_structure;
+@group(1) @binding(1) var t_shadow_out: texture_storage_2d<r32float, write>;
+@group(1) @binding(2) var<uniform> uniforms: RtShadowUniform;
+
+@compute @workgroup_size(8, 8)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+    if id.x >= uniforms.width || id.y >= uniforms.height {
+        return;
+    }
+
+    let world_pos = textureLoad(t_position, vec2<i32>(id.xy), 0).xyz;
+
+    // Skip background pixels
+    if dot(world_pos, world_pos) < 0.001 {
+        textureStore(t_shadow_out, vec2<i32>(id.xy), vec4<f32>(1.0, 0.0, 0.0, 0.0));
+        return;
+    }
+
+    let normal = normalize(textureLoad(t_normal, vec2<i32>(id.xy), 0).xyz * 2.0 - 1.0);
+
+    // Ray from surface toward light (opposite of light direction)
+    let ray_origin = world_pos + normal * 0.01; // bias off surface
+    let ray_dir = normalize(-uniforms.light_direction);
+
+    // Trace shadow ray
+    var rq: ray_query;
+    rayQueryInitialize(&rq, tlas,
+        RAY_FLAG_TERMINATE_ON_FIRST_HIT | RAY_FLAG_SKIP_CLOSEST_HIT_SHADER,
+        0xFFu, ray_origin, 0.001, ray_dir, 1000.0);
+    rayQueryProceed(&rq);
+
+    var shadow_val = 1.0; // lit by default
+    if rayQueryGetCommittedIntersectionType(&rq) != RAY_QUERY_COMMITTED_INTERSECTION_NONE {
+        shadow_val = 0.0; // in shadow
+    }
+
+    textureStore(t_shadow_out, vec2<i32>(id.xy), vec4<f32>(shadow_val, 0.0, 0.0, 0.0));
+}
+```
+
+- [ ] **Step 3: lib.rs에 모듈 등록**
+
+```rust
+pub mod rt_shadow;
+pub use rt_shadow::{RtShadowResources, RtShadowUniform, RT_SHADOW_FORMAT};
+```
+
+- [ ] **Step 4: 빌드 + 테스트**
+
+Run: `cargo test -p voltex_renderer`
+Expected: 26 PASS (25 + 1)
+
+- [ ] **Step 5: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/rt_shadow.rs crates/voltex_renderer/src/rt_shadow_shader.wgsl crates/voltex_renderer/src/lib.rs
+git commit -m "feat(renderer): add RT shadow resources and compute shader"
+```
+
+---
+
+## Task 3: RT Shadow 파이프라인 + Lighting 통합
+
+**Files:**
+- Modify: `crates/voltex_renderer/src/deferred_pipeline.rs`
+- Modify: `crates/voltex_renderer/src/deferred_lighting.wgsl`
+
+- [ ] **Step 1: deferred_pipeline.rs에 RT shadow 파이프라인 함수 추가**
+
+Add import: `use crate::rt_shadow::RT_SHADOW_FORMAT;`
+
+Add these functions:
+
+```rust
+/// Compute pipeline bind group layout for RT shadow G-Buffer input (group 0).
+pub fn rt_shadow_gbuffer_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("RT Shadow GBuffer BGL"),
+        entries: &[
+            // position texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: false },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+            // normal texture
+            wgpu::BindGroupLayoutEntry {
+                binding: 1,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            },
+        ],
+    })
+}
+
+/// Compute pipeline bind group layout for RT shadow data (group 1).
+pub fn rt_shadow_data_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("RT Shadow Data BGL"),
+        entries: &[
+            // TLAS
+            wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::AccelerationStructure,
+                count: None,
+            },
+            // shadow output (storage texture, write)
+            wgpu::BindGroupLayoutEntry {
+                binding: 1,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::StorageTexture {
+                    access: wgpu::StorageTextureAccess::WriteOnly,
+                    format: RT_SHADOW_FORMAT,
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                },
+                count: None,
+            },
+            // uniform
+            wgpu::BindGroupLayoutEntry {
+                binding: 2,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: None,
+                },
+                count: None,
+            },
+        ],
+    })
+}
+
+/// Create the RT shadow compute pipeline.
+pub fn create_rt_shadow_pipeline(
+    device: &wgpu::Device,
+    gbuffer_layout: &wgpu::BindGroupLayout,
+    data_layout: &wgpu::BindGroupLayout,
+) -> wgpu::ComputePipeline {
+    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+        label: Some("RT Shadow Shader"),
+        source: wgpu::ShaderSource::Wgsl(include_str!("rt_shadow_shader.wgsl").into()),
+    });
+
+    let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+        label: Some("RT Shadow Pipeline Layout"),
+        bind_group_layouts: &[gbuffer_layout, data_layout],
+        immediate_size: 0,
+    });
+
+    device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+        label: Some("RT Shadow Compute Pipeline"),
+        layout: Some(&layout),
+        module: &shader,
+        entry_point: Some("main"),
+        compilation_options: wgpu::PipelineCompilationOptions::default(),
+        cache: None,
+    })
+}
+```
+
+- [ ] **Step 2: lighting_shadow_bind_group_layout에 RT shadow binding 추가**
+
+기존 8 bindings (0-6 shadow+IBL+SSGI) + 추가:
+```rust
+// binding 7: RT shadow texture (Float, filterable)
+wgpu::BindGroupLayoutEntry {
+    binding: 7,
+    visibility: wgpu::ShaderStages::FRAGMENT,
+    ty: wgpu::BindingType::Texture {
+        sample_type: wgpu::TextureSampleType::Float { filterable: true },
+        view_dimension: wgpu::TextureViewDimension::D2,
+        multisampled: false,
+    },
+    count: None,
+},
+// binding 8: RT shadow sampler
+wgpu::BindGroupLayoutEntry {
+    binding: 8,
+    visibility: wgpu::ShaderStages::FRAGMENT,
+    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
+    count: None,
+},
+```
+
+- [ ] **Step 3: deferred_lighting.wgsl 수정**
+
+Add bindings:
+```wgsl
+@group(2) @binding(7) var t_rt_shadow: texture_2d<f32>;
+@group(2) @binding(8) var s_rt_shadow: sampler;
+```
+
+Replace shadow usage in fs_main:
+```wgsl
+// OLD: let shadow_factor = calculate_shadow(world_pos);
+// NEW: Use RT shadow
+let rt_shadow_val = textureSample(t_rt_shadow, s_rt_shadow, uv).r;
+let shadow_factor = rt_shadow_val;
+```
+
+- [ ] **Step 4: 빌드 확인**
+
+Run: `cargo build -p voltex_renderer`
+Expected: 컴파일 성공
+
+- [ ] **Step 5: 커밋**
+
+```bash
+git add crates/voltex_renderer/src/deferred_pipeline.rs crates/voltex_renderer/src/deferred_lighting.wgsl
+git commit -m "feat(renderer): add RT shadow compute pipeline and integrate into lighting pass"
+```
+
+---
+
+## Task 4: deferred_demo에 RT Shadow 통합
+
+**Files:**
+- Modify: `examples/deferred_demo/src/main.rs`
+
+NOTE: 이 태스크가 가장 복잡합니다. GpuContext 대신 직접 device를 생성하여 EXPERIMENTAL_RAY_QUERY feature를 요청해야 합니다.
+
+변경사항:
+1. Device 생성 시 `Features::EXPERIMENTAL_RAY_QUERY` 요청
+2. `RtAccel::new()` — 구체 메시의 BLAS 빌드, 25개 인스턴스의 TLAS 빌드
+3. `RtShadowResources::new()` — RT shadow 텍스처 + uniform
+4. RT shadow 컴퓨트 파이프라인 + 바인드 그룹 생성
+5. 렌더 루프에 RT shadow 컴퓨트 디스패치 추가 (Pass 3)
+6. Lighting shadow 바인드 그룹에 RT shadow 텍스처 추가 (binding 7, 8)
+7. 매 프레임 RtShadowUniform 업데이트 (light direction)
+8. 리사이즈 시 RT shadow 리소스 재생성
+
+이 태스크는 opus 모델로 실행.
+
+- [ ] **Step 1: deferred_demo 수정**
+
+- [ ] **Step 2: 빌드 확인**
+
+Run: `cargo build --bin deferred_demo`
+
+- [ ] **Step 3: 커밋**
+
+```bash
+git add examples/deferred_demo/src/main.rs
+git commit -m "feat(renderer): add hardware RT shadows to deferred_demo"
+```
+
+---
+
+## Task 5: 문서 업데이트
+
+**Files:**
+- Modify: `docs/STATUS.md`
+- Modify: `docs/DEFERRED.md`
+
+- [ ] **Step 1: STATUS.md에 Phase 7-3 추가**
+
+```markdown
+### Phase 7-3: RT Shadows (Hardware Ray Tracing)
+- voltex_renderer: RtAccel (BLAS/TLAS acceleration structure management)
+- voltex_renderer: RT Shadow compute shader (ray query, directional light)
+- voltex_renderer: RT shadow pipeline + bind group layouts
+- voltex_renderer: Lighting pass RT shadow integration
+- deferred_demo updated with hardware RT shadows (requires RTX/RDNA2+)
+```
+
+- [ ] **Step 2: DEFERRED.md에 Phase 7-3 미뤄진 항목**
+
+```markdown
+## Phase 7-3
+
+- **RT Reflections** — 미구현. BLAS/TLAS 인프라 재사용 가능.
+- **RT AO** — 미구현.
+- **Point/Spot Light RT shadows** — Directional만 구현.
+- **Soft RT shadows** — 단일 ray만. Multi-ray soft shadow 미구현.
+- **BLAS 업데이트** — 정적 지오메트리만. 동적 메시 변경 시 BLAS 재빌드 필요.
+- **Fallback** — RT 미지원 GPU에서 자동 PCF 폴백 미구현.
+```
+
+- [ ] **Step 3: 커밋**
+
+```bash
+git add docs/STATUS.md docs/DEFERRED.md
+git commit -m "docs: add Phase 7-3 RT shadows status and deferred items"
+```
diff --git a/docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md b/docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md
new file mode 100644
index 0000000..a5e658f
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-25-phase7-1-deferred-rendering.md
@@ -0,0 +1,199 @@
+# Phase 7-1: Deferred Rendering — Design Spec
+
+## Overview
+
+`voltex_renderer`에 디퍼드 렌더링 파이프라인을 추가한다. 기존 포워드 PBR은 유지하고, G-Buffer + Lighting Pass 구조의 디퍼드 파이프라인을 새 모듈로 구현한다.
+
+## Scope
+
+- G-Buffer (4 MRT: Position, Normal, Albedo, Material + Depth)
+- G-Buffer Pass 셰이더 (기하 데이터 기록)
+- Lighting Pass 셰이더 (풀스크린 쿼드, Cook-Torrance BRDF, 멀티 라이트, 섀도우, IBL)
+- 풀스크린 삼각형
+- deferred_demo 예제
+
+## Out of Scope
+
+- 포워드 파이프라인 제거/변경
+- 투명 오브젝트 (디퍼드에서 처리 어려움, 별도 포워드 패스 필요)
+- G-Buffer 압축/최적화 (octahedral normal, depth-position 복원 등)
+- Light volumes (sphere/cone 렌더링으로 라이트 컬링)
+- Stencil 기반 최적화
+
+## Render Pass Architecture
+
+### Pass 1: G-Buffer Pass
+
+MRT(Multiple Render Targets)로 기하 데이터 기록.
+
+| RT | Format | Content |
+|----|--------|---------|
+| RT0 | Rgba32Float | World Position (xyz) |
+| RT1 | Rgba16Float | World Normal (xyz, normalized) |
+| RT2 | Rgba8UnormSrgb | Albedo (rgb) |
+| RT3 | Rgba8Unorm | R=metallic, G=roughness, B=ao |
+| Depth | Depth32Float | Depth (기존 공유) |
+
+**Bind Groups:**
+- Group 0 (dynamic): CameraUniform (view_proj, model)
+- Group 1: PBR Textures (albedo + normal map)
+- Group 2 (dynamic): MaterialUniform
+
+**Shader:** 버텍스 → 월드 변환, 프래그먼트 → G-Buffer 기록. TBN 노멀맵 적용.
+
+### Pass 2: Lighting Pass
+
+풀스크린 삼각형 렌더, G-Buffer를 텍스처로 읽어 라이팅 계산.
+
+**Bind Groups:**
+- Group 0: G-Buffer textures (4개) + sampler
+- Group 1: LightsUniform + CameraPosition
+- Group 2: Shadow map + shadow sampler + ShadowUniform + BRDF LUT + BRDF sampler
+
+**Shader:** 기존 pbr_shader.wgsl의 Cook-Torrance BRDF 로직을 재사용.
+- G-Buffer에서 position, normal, albedo, metallic/roughness/ao 읽기
+- 멀티 라이트 루프 (directional, point, spot)
+- PCF 섀도우
+- IBL ambient (procedural sky + BRDF LUT)
+- Reinhard 톤매핑 + 감마 보정
+
+## Module Structure
+
+### 새 파일
+- `crates/voltex_renderer/src/gbuffer.rs` — GBuffer 타입 (텍스처 생성/리사이즈)
+- `crates/voltex_renderer/src/fullscreen_quad.rs` — 풀스크린 삼각형 정점
+- `crates/voltex_renderer/src/deferred_pipeline.rs` — 파이프라인 생성 (gbuffer pass + lighting pass)
+- `crates/voltex_renderer/src/deferred_gbuffer.wgsl` — G-Buffer pass 셰이더
+- `crates/voltex_renderer/src/deferred_lighting.wgsl` — Lighting pass 셰이더
+
+### 수정 파일
+- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록
+
+## Types
+
+### GBuffer
+
+```rust
+pub struct GBuffer {
+    pub position_view: TextureView,  // Rgba32Float
+    pub normal_view: TextureView,    // Rgba16Float
+    pub albedo_view: TextureView,    // Rgba8UnormSrgb
+    pub material_view: TextureView,  // Rgba8Unorm
+    pub depth_view: TextureView,     // Depth32Float
+    pub width: u32,
+    pub height: u32,
+}
+```
+
+- `new(device, width, height) -> Self`
+- `resize(device, width, height)` — 윈도우 리사이즈 시 재생성
+
+### DeferredPipeline
+
+```rust
+pub struct DeferredPipeline {
+    pub gbuffer_pipeline: RenderPipeline,
+    pub lighting_pipeline: RenderPipeline,
+    pub gbuffer_bind_group_layouts: [BindGroupLayout; 3],  // camera, texture, material
+    pub lighting_bind_group_layouts: [BindGroupLayout; 3],  // gbuffer, lights, shadow+ibl
+}
+```
+
+- `new(device, surface_format) -> Self`
+
+### Fullscreen Triangle
+
+```rust
+pub struct FullscreenTriangle {
+    pub vertex_buffer: Buffer,
+}
+```
+
+3 정점: (-1,-1), (3,-1), (-1,3) — 클리핑으로 화면 커버. UV는 셰이더에서 position으로 계산.
+
+## Bind Group Details
+
+### G-Buffer Pass
+
+**Group 0 — Camera (dynamic offset):**
+- binding 0: CameraUniform (view_proj, model, camera_pos)
+
+**Group 1 — Textures:**
+- binding 0: albedo texture
+- binding 1: albedo sampler
+- binding 2: normal map texture
+- binding 3: normal map sampler
+
+**Group 2 — Material (dynamic offset):**
+- binding 0: MaterialUniform (base_color, metallic, roughness, ao)
+
+### Lighting Pass
+
+**Group 0 — G-Buffer:**
+- binding 0: position texture
+- binding 1: normal texture
+- binding 2: albedo texture
+- binding 3: material texture
+- binding 4: sampler (shared, nearest)
+
+**Group 1 — Lights:**
+- binding 0: LightsUniform
+- binding 1: CameraPositionUniform (vec3 + padding)
+
+**Group 2 — Shadow + IBL:**
+- binding 0: shadow depth texture
+- binding 1: shadow comparison sampler
+- binding 2: ShadowUniform
+- binding 3: BRDF LUT texture
+- binding 4: BRDF LUT sampler
+
+## Shader Summary
+
+### deferred_gbuffer.wgsl
+
+Vertex: position → world (model * pos), normal → world (model * normal), TBN 계산, UV 전달.
+
+Fragment outputs (4 targets):
+```wgsl
+struct GBufferOutput {
+    @location(0) position: vec4<f32>,
+    @location(1) normal: vec4<f32>,
+    @location(2) albedo: vec4<f32>,
+    @location(3) material: vec4<f32>,
+}
+```
+- position.xyz = world position
+- normal.xyz = TBN-mapped world normal
+- albedo.rgb = texture sample * base_color
+- material = vec4(metallic, roughness, ao, 1.0)
+
+### deferred_lighting.wgsl
+
+Vertex: 풀스크린 삼각형, UV 계산.
+
+Fragment:
+1. G-Buffer 샘플링
+2. Cook-Torrance BRDF (기존 pbr_shader.wgsl 로직)
+3. 멀티 라이트 루프
+4. PCF 섀도우
+5. IBL ambient
+6. Reinhard 톤매핑 + 감마
+
+## Test Plan
+
+### gbuffer.rs
+- GBuffer 생성: 텍스처 크기 확인
+- 리사이즈: 새 크기로 재생성
+
+### fullscreen_quad.rs
+- 정점 데이터: 3개 정점, 올바른 좌표
+
+### 통합 (수동)
+- deferred_demo 예제: 다수 포인트 라이트 + 디퍼드 렌더링
+- G-Buffer 시각화 (디버그용: position/normal/albedo 각각 출력)
+
+## Constraints
+
+- max_bind_groups=4: G-Buffer pass 3개, Lighting pass 3개 사용 → 제약 내
+- MRT: wgpu는 최대 8개 color attachment 지원. 4개 사용.
+- Rgba32Float: Position에 32-bit float 사용 (정밀도 우선, 최적화는 추후)
diff --git a/docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md b/docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md
new file mode 100644
index 0000000..b7086cc
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-25-phase7-2-ssgi.md
@@ -0,0 +1,202 @@
+# Phase 7-2: SSGI (Screen-Space Global Illumination) — Design Spec
+
+## Overview
+
+디퍼드 파이프라인에 SSGI 포스트 프로세싱 패스를 추가한다. SSAO 확장형으로, 반구 샘플링을 통해 Ambient Occlusion과 Color Bleeding(간접광)을 동시에 계산한다.
+
+## Scope
+
+- SSGI 리소스 (반구 커널, 4x4 노이즈 텍스처, 출력 텍스처)
+- SSGI 풀스크린 셰이더 (AO + indirect color 계산)
+- SSGI 파이프라인 + 바인드 그룹 레이아웃
+- Lighting Pass 수정 (SSGI 결과를 ambient에 적용)
+- deferred_demo에 SSGI 통합
+
+## Out of Scope
+
+- 블러 패스 (노이즈 제거용 bilateral blur — 추후 추가)
+- 반해상도 렌더링 (성능 최적화)
+- 시간적 누적 (temporal accumulation)
+- Light Probes
+
+## Render Pass Flow (디퍼드 확장)
+
+```
+Pass 1: G-Buffer (기존, 변경 없음)
+Pass 2: SSGI Pass (NEW) → Rgba16Float 출력
+Pass 3: Lighting Pass (수정) → SSGI 텍스처 읽어서 ambient에 적용
+```
+
+## Module Structure
+
+### 새 파일
+- `crates/voltex_renderer/src/ssgi.rs` — SsgiResources, SsgiUniform, 커널/노이즈 생성
+- `crates/voltex_renderer/src/ssgi_shader.wgsl` — SSGI 풀스크린 셰이더
+
+### 수정 파일
+- `crates/voltex_renderer/src/deferred_pipeline.rs` — SSGI 파이프라인 + 바인드 그룹 레이아웃 추가
+- `crates/voltex_renderer/src/deferred_lighting.wgsl` — SSGI 결과 적용
+- `crates/voltex_renderer/src/lib.rs` — ssgi 모듈 등록
+- `examples/deferred_demo/src/main.rs` — SSGI 패스 추가
+
+## Types
+
+### SsgiUniform (128 bytes)
+
+```rust
+#[repr(C)]
+#[derive(Copy, Clone, Pod, Zeroable)]
+pub struct SsgiUniform {
+    pub projection: [f32; 16],       // view → clip
+    pub view: [f32; 16],             // world → view
+    pub radius: f32,                  // 샘플링 반경 (기본 0.5)
+    pub bias: f32,                    // depth 바이어스 (기본 0.025)
+    pub intensity: f32,               // AO 강도 (기본 1.0)
+    pub indirect_strength: f32,       // color bleeding 강도 (기본 0.5)
+}
+```
+
+### SsgiResources
+
+```rust
+pub struct SsgiResources {
+    pub output_view: TextureView,     // Rgba16Float — R=AO, G=indirect_r, B=indirect_g, A=indirect_b
+    pub kernel_buffer: Buffer,        // 64 * vec4 = 1024 bytes (반구 샘플)
+    pub noise_view: TextureView,      // 4x4 Rgba16Float (랜덤 회전 벡터)
+    pub uniform_buffer: Buffer,       // SsgiUniform
+    pub width: u32,
+    pub height: u32,
+}
+```
+
+- `new(device, width, height)` — 리소스 생성, 커널/노이즈 초기화
+- `resize(device, width, height)` — 출력 텍스처 재생성
+
+### 반구 커널 생성
+
+64개 샘플, 반구(+z 방향) 내 랜덤 분포. 중심 가까이에 더 많은 샘플 (코사인 가중):
+```rust
+fn generate_kernel(count: usize) -> Vec<[f32; 4]> {
+    // 의사 랜덤 (시드 고정)
+    // 각 샘플: normalize(random_in_hemisphere) * lerp(0.1, 1.0, scale^2)
+    // scale = i / count
+}
+```
+
+### 4x4 노이즈 텍스처
+
+16개 랜덤 회전 벡터 (xy 평면). TBN 구성 시 tangent 방향을 랜덤화하여 밴딩 방지.
+```rust
+fn generate_noise() -> Vec<[f32; 4]> {
+    // 16개 vec4(random_x, random_y, 0.0, 0.0)
+}
+```
+
+## SSGI Shader (ssgi_shader.wgsl)
+
+### 바인드 그룹
+
+**Group 0: G-Buffer**
+- binding 0: position texture (Float, non-filterable)
+- binding 1: normal texture (Float, filterable)
+- binding 2: albedo texture (Float, filterable)
+- binding 3: sampler (NonFiltering)
+
+**Group 1: SSGI Data**
+- binding 0: SsgiUniform
+- binding 1: kernel buffer (storage or uniform, 64 * vec4)
+- binding 2: noise texture
+- binding 3: noise sampler
+
+### 알고리즘
+
+```
+@fragment
+fn fs_main(uv):
+    world_pos = sample(t_position, uv)
+    if length(world_pos) < 0.001: discard (background)
+
+    normal = sample(t_normal, uv)
+
+    // View space conversion
+    view_pos = (ssgi.view * vec4(world_pos, 1.0)).xyz
+    view_normal = normalize((ssgi.view * vec4(normal, 0.0)).xyz)
+
+    // Random rotation from noise (4x4 tiling)
+    noise_uv = uv * vec2(width/4.0, height/4.0)
+    random_vec = sample(t_noise, noise_uv).xyz
+
+    // Construct TBN in view space
+    tangent = normalize(random_vec - view_normal * dot(random_vec, view_normal))
+    bitangent = cross(view_normal, tangent)
+    TBN = mat3x3(tangent, bitangent, view_normal)
+
+    occlusion = 0.0
+    indirect = vec3(0.0)
+
+    for i in 0..64:
+        // Sample position in view space
+        sample_offset = TBN * kernel[i].xyz * ssgi.radius
+        sample_pos = view_pos + sample_offset
+
+        // Project to screen
+        clip = ssgi.projection * vec4(sample_pos, 1.0)
+        screen_uv = clip.xy / clip.w * 0.5 + 0.5
+        screen_uv.y = 1.0 - screen_uv.y
+
+        // Read actual depth at that screen position
+        sample_world_pos = sample(t_position, screen_uv).xyz
+        sample_view_pos = (ssgi.view * vec4(sample_world_pos, 1.0)).xyz
+
+        // Occlusion check
+        range_check = smoothstep(0.0, 1.0, ssgi.radius / abs(view_pos.z - sample_view_pos.z))
+        if sample_view_pos.z >= sample_pos.z + ssgi.bias:
+            occlusion += range_check
+            // Color bleeding: read albedo at occluder position
+            sample_albedo = sample(t_albedo, screen_uv).rgb
+            indirect += sample_albedo * range_check
+
+    ao = 1.0 - (occlusion / 64.0) * ssgi.intensity
+    indirect = indirect / 64.0 * ssgi.indirect_strength
+
+    return vec4(ao, indirect)
+```
+
+## Lighting Pass 수정
+
+### 바인드 그룹 변경
+
+기존 Group 2 (Shadow+IBL, 5 bindings)에 SSGI 출력 추가:
+- binding 5: SSGI output texture (Float, filterable)
+- binding 6: SSGI sampler
+
+### 셰이더 변경
+
+```wgsl
+// 기존
+let ambient = (diffuse_ibl + specular_ibl) * ao;
+
+// 변경
+let ssgi_data = textureSample(t_ssgi, s_ssgi, in.uv);
+let ssgi_ao = ssgi_data.r;
+let indirect_light = ssgi_data.gba;
+let ambient = (diffuse_ibl + specular_ibl) * ao * ssgi_ao + indirect_light;
+```
+
+## Bind Group Constraint (max 4)
+
+**SSGI Pass:** 2 groups (0: G-Buffer, 1: SSGI data) — OK
+
+**Lighting Pass:** 기존 3 groups. Group 2에 SSGI binding 추가 (5,6) — 같은 그룹 내 binding 추가이므로 group 수 변화 없음. OK.
+
+## Test Plan
+
+### ssgi.rs
+- generate_kernel: 64개 샘플, 모두 반구 내 (z >= 0), 정규화됨
+- generate_noise: 16개 벡터
+- SsgiResources 생성/리사이즈
+
+### 통합 (수동)
+- deferred_demo에서 SSGI ON/OFF 비교
+- 구석/틈에서 AO 어두워짐 확인
+- 밝은 물체 근처에서 color bleeding 확인
diff --git a/docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md b/docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md
new file mode 100644
index 0000000..2356b83
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-25-phase7-3-rt-shadows.md
@@ -0,0 +1,197 @@
+# Phase 7-3: RT Shadows — Design Spec
+
+## Overview
+
+wgpu의 EXPERIMENTAL_RAY_QUERY를 활용하여 하드웨어 레이트레이싱 기반 그림자를 구현한다. 기존 PCF shadow map을 대체하는 정확한 그림자.
+
+## Hardware Requirements
+
+- GPU: RTX 20xx+ / RDNA2+ (ray query 지원)
+- wgpu Features: EXPERIMENTAL_RAY_QUERY
+- 검증 완료: RTX 4050 Laptop GPU, Vulkan backend
+
+## Scope
+
+- BLAS/TLAS acceleration structure 생성 관리
+- RT Shadow 컴퓨트 셰이더 (ray query로 directional light shadow)
+- RT Shadow 출력 텍스처 (R8Unorm)
+- Lighting Pass에 RT shadow 통합
+- deferred_demo에 RT shadow 적용
+
+## Out of Scope
+
+- RT Reflections
+- RT AO
+- Point/Spot light RT shadows
+- Soft RT shadows (multi-ray)
+- BLAS 재빌드 (정적 지오메트리만)
+
+## Render Pass Flow (디퍼드 확장)
+
+```
+Pass 1: G-Buffer (변경 없음)
+Pass 2: SSGI (변경 없음)
+Pass 3: RT Shadow (NEW) — 컴퓨트 셰이더, ray query로 shadow 텍스처 출력
+Pass 4: Lighting (수정) — RT shadow 텍스처 사용
+```
+
+## Module Structure
+
+### 새 파일
+- `crates/voltex_renderer/src/rt_accel.rs` — RtAccel (BLAS/TLAS 관리)
+- `crates/voltex_renderer/src/rt_shadow.rs` — RtShadowResources + 컴퓨트 파이프라인
+- `crates/voltex_renderer/src/rt_shadow_shader.wgsl` — RT shadow 컴퓨트 셰이더
+
+### 수정 파일
+- `crates/voltex_renderer/src/deferred_pipeline.rs` — lighting shadow bind group에 RT shadow 텍스처 추가
+- `crates/voltex_renderer/src/deferred_lighting.wgsl` — RT shadow 사용
+- `crates/voltex_renderer/src/lib.rs` — 새 모듈 등록
+- `examples/deferred_demo/src/main.rs` — RT shadow 통합
+
+## Types
+
+### RtAccel
+
+```rust
+pub struct RtAccel {
+    pub blas_list: Vec<wgpu::Blas>,
+    pub tlas_package: wgpu::TlasPackage,
+}
+```
+
+**Methods:**
+- `new(device, meshes: &[(vertex_buffer, index_buffer, vertex_count, index_count)], transforms: &[[f32; 12]])` — BLAS 빌드, TLAS 구성
+- BLAS: 메시별 삼각형 지오메트리 (BlasTriangleGeometry)
+- TLAS: 인스턴스 배열 (TlasInstance with transform, blas index)
+
+**BLAS 생성:**
+1. BlasTriangleGeometrySizeDescriptor (vertex_count, index_count, vertex_format: Float32x3)
+2. device.create_blas(size, flags: PREFER_FAST_TRACE)
+3. encoder.build_acceleration_structures with BlasBuildEntry (vertex_buffer, index_buffer, geometry)
+
+**TLAS 생성:**
+1. device.create_tlas(max_instances: transform_count)
+2. TlasPackage에 TlasInstance 채움 (transform [3x4 row-major], blas_index, mask: 0xFF)
+3. encoder.build_acceleration_structures with tlas_package
+
+### RtShadowResources
+
+```rust
+pub struct RtShadowResources {
+    pub shadow_view: TextureView,    // R8Unorm, STORAGE_BINDING
+    pub shadow_texture: Texture,
+    pub uniform_buffer: Buffer,       // RtShadowUniform
+    pub width: u32,
+    pub height: u32,
+}
+```
+
+### RtShadowUniform
+
+```rust
+#[repr(C)]
+pub struct RtShadowUniform {
+    pub light_direction: [f32; 3],
+    pub _pad0: f32,
+    pub width: u32,
+    pub height: u32,
+    pub _pad1: [u32; 2],
+}
+```
+
+## RT Shadow Compute Shader
+
+### 바인드 그룹
+
+**Group 0: G-Buffer**
+- binding 0: position texture (Float, non-filterable)
+- binding 1: normal texture (Float, filterable)
+
+**Group 1: RT Data**
+- binding 0: TLAS (acceleration_structure)
+- binding 1: RT shadow output (storage texture, r32float, write)
+- binding 2: RtShadowUniform
+
+### 셰이더 로직
+
+```wgsl
+@compute @workgroup_size(8, 8)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+    if id.x >= uniforms.width || id.y >= uniforms.height { return; }
+
+    let world_pos = textureLoad(t_position, id.xy, 0).xyz;
+
+    // Skip background
+    if dot(world_pos, world_pos) < 0.001 {
+        textureStore(t_shadow_out, id.xy, vec4(1.0));
+        return;
+    }
+
+    let normal = normalize(textureLoad(t_normal, id.xy, 0).xyz * 2.0 - 1.0);
+    let ray_origin = world_pos + normal * 0.01;  // bias off surface
+    let ray_dir = normalize(-uniforms.light_direction);
+
+    var rq: ray_query;
+    rayQueryInitialize(&rq, tlas, RAY_FLAG_TERMINATE_ON_FIRST_HIT,
+        0xFFu, ray_origin, 0.001, ray_dir, 1000.0);
+    rayQueryProceed(&rq);
+
+    var shadow = 1.0;  // lit by default
+    if rayQueryGetCommittedIntersectionType(&rq) != RAY_QUERY_COMMITTED_INTERSECTION_NONE {
+        shadow = 0.0;  // occluded
+    }
+
+    textureStore(t_shadow_out, id.xy, vec4(shadow, 0.0, 0.0, 0.0));
+}
+```
+
+## Lighting Pass 수정
+
+RT shadow 텍스처를 기존 shadow_factor 대신 사용:
+
+```wgsl
+// 기존: let shadow_factor = calculate_shadow(world_pos);
+// 변경: RT shadow map에서 직접 읽기
+let rt_shadow = textureSample(t_rt_shadow, s_rt_shadow, uv).r;
+let shadow_factor = rt_shadow;
+```
+
+기존 PCF shadow map 관련 바인딩은 유지하되 사용하지 않음 (호환성).
+RT shadow 텍스처를 Group 2의 추가 바인딩(7, 8)으로 추가.
+
+## Device Creation 변경
+
+RT feature를 요청해야 함:
+```rust
+let (device, queue) = adapter.request_device(&DeviceDescriptor {
+    required_features: Features::EXPERIMENTAL_RAY_QUERY,
+    ..
+}).await;
+```
+
+기존 GpuContext::new()는 features를 요청하지 않으므로, deferred_demo에서 직접 device를 생성하거나 GpuContext에 optional features 파라미터를 추가.
+
+## Bind Group Details
+
+### RT Shadow Compute
+
+**Group 0:**
+- binding 0: position texture (texture_2d<f32>)
+- binding 1: normal texture (texture_2d<f32>)
+
+**Group 1:**
+- binding 0: acceleration_structure (TLAS)
+- binding 1: storage texture (r32float, write)
+- binding 2: uniform buffer (RtShadowUniform)
+
+### Lighting Pass Group 2 (확장)
+
+기존 7 bindings (0-6: shadow+IBL+SSGI) + 추가:
+- binding 7: RT shadow texture (Float, filterable)
+- binding 8: RT shadow sampler (Filtering)
+
+## Test Plan
+
+- rt_accel.rs: 빌드 확인만 (GPU 의존)
+- rt_shadow.rs: RtShadowUniform 크기, 리소스 생성
+- 통합: deferred_demo에서 RT shadow ON, 기존 PCF OFF → 날카로운 그림자 확인