feat(renderer): improve IBL with Hosek-Wilkie sky, SH irradiance, GPU BRDF LUT

- Hosek-Wilkie inspired procedural sky (Rayleigh/Mie scattering, sun disk) - L2 Spherical Harmonics irradiance (9 coefficients, CPU computation) - SH evaluation in shader replaces sample_environment for diffuse IBL - GPU compute BRDF LUT (Rg16Float, higher precision than CPU Rgba8Unorm) - SkyParams (sun_direction, turbidity) in ShadowUniform Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 20:58:28 +09:00
parent abd6f5cf6e
commit 1081fb472f
13 changed files with 693 additions and 10 deletions
--- a/crates/voltex_renderer/src/brdf_lut_compute.wgsl
+++ b/crates/voltex_renderer/src/brdf_lut_compute.wgsl
@@ -0,0 +1,89 @@
 // GPU Compute shader for BRDF LUT generation (split-sum approximation).
 // Workgroup size: 16x16, each thread computes one texel.
 // Output: Rg16Float texture with (scale, bias) per texel.
@group(0) @binding(0) var output_tex: texture_storage_2d<rg16float, write>;
 const PI: f32 = 3.14159265358979;
 const NUM_SAMPLES: u32 = 1024u;
 // Van der Corput radical inverse via bit-reversal
 fn radical_inverse_vdc(bits_in: u32) -> f32 {
    var bits = bits_in;
    bits = (bits << 16u) | (bits >> 16u);
    bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
    bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
    bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
    bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
    return f32(bits) * 2.3283064365386963e-10; // / 0x100000000
 }
 // Hammersley low-discrepancy 2D sample
 fn hammersley(i: u32, n: u32) -> vec2<f32> {
    return vec2<f32>(f32(i) / f32(n), radical_inverse_vdc(i));
 }
 // GGX importance-sampled half vector in tangent space (N = (0,0,1))
 fn importance_sample_ggx(xi: vec2<f32>, roughness: f32) -> vec3<f32> {
    let a = roughness * roughness;
    let phi = 2.0 * PI * xi.x;
    let cos_theta = sqrt((1.0 - xi.y) / (1.0 + (a * a - 1.0) * xi.y));
    let sin_theta = sqrt(max(1.0 - cos_theta * cos_theta, 0.0));
    return vec3<f32>(cos(phi) * sin_theta, sin(phi) * sin_theta, cos_theta);
 }
 // Smith geometry function for IBL: k = a^2/2
 fn geometry_smith_ibl(n_dot_v: f32, n_dot_l: f32, roughness: f32) -> f32 {
    let a = roughness * roughness;
    let k = a / 2.0;
    let ggx_v = n_dot_v / (n_dot_v * (1.0 - k) + k);
    let ggx_l = n_dot_l / (n_dot_l * (1.0 - k) + k);
    return ggx_v * ggx_l;
 }
@compute @workgroup_size(16, 16, 1)
 fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
    let dims = textureDimensions(output_tex);
    if gid.x >= dims.x || gid.y >= dims.y {
        return;
    }
    let size = f32(dims.x);
    let n_dot_v = (f32(gid.x) + 0.5) / size;
    let roughness = clamp((f32(gid.y) + 0.5) / size, 0.0, 1.0);
    let n_dot_v_clamped = clamp(n_dot_v, 0.0, 1.0);
    // View vector in tangent space where N = (0,0,1)
    let v = vec3<f32>(sqrt(max(1.0 - n_dot_v_clamped * n_dot_v_clamped, 0.0)), 0.0, n_dot_v_clamped);
    var scale = 0.0;
    var bias = 0.0;
    for (var i = 0u; i < NUM_SAMPLES; i++) {
        let xi = hammersley(i, NUM_SAMPLES);
        let h = importance_sample_ggx(xi, roughness);
        // dot(V, H)
        let v_dot_h = max(dot(v, h), 0.0);
        // Reflect V around H to get L
        let l = 2.0 * v_dot_h * h - v;
        let n_dot_l = max(l.z, 0.0); // L.z in tangent space
        let n_dot_h = max(h.z, 0.0);
        if n_dot_l > 0.0 {
            let g = geometry_smith_ibl(n_dot_v_clamped, n_dot_l, roughness);
            let g_vis = g * v_dot_h / max(n_dot_h * n_dot_v_clamped, 0.001);
            let fc = pow(1.0 - v_dot_h, 5.0);
            scale += g_vis * (1.0 - fc);
            bias += g_vis * fc;
        }
    }
    scale /= f32(NUM_SAMPLES);
    bias /= f32(NUM_SAMPLES);
    textureStore(output_tex, vec2<i32>(i32(gid.x), i32(gid.y)), vec4<f32>(scale, bias, 0.0, 1.0));
 }
--- a/crates/voltex_renderer/src/deferred_lighting.wgsl
+++ b/crates/voltex_renderer/src/deferred_lighting.wgsl
@@ -42,6 +42,10 @@ struct ShadowUniform {
    light_view_proj: mat4x4<f32>,
    shadow_map_size: f32,
    shadow_bias: f32,
    _padding: vec2<f32>,
    sun_direction: vec3<f32>,
    turbidity: f32,
    sh_coefficients: array<vec4<f32>, 7>,
 };
@group(2) @binding(0) var t_shadow: texture_depth_2d;
--- a/crates/voltex_renderer/src/ibl.rs
+++ b/crates/voltex_renderer/src/ibl.rs
@@ -9,6 +9,7 @@ pub struct IblResources {
 }
 impl IblResources {
    /// CPU fallback: generates the BRDF LUT on the CPU and uploads as Rgba8Unorm.
    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue) -> Self {
        let size = BRDF_LUT_SIZE;
@@ -79,4 +80,120 @@ impl IblResources {
        }
    }
    /// GPU compute path: generates the BRDF LUT via a compute shader in Rg16Float format.
    /// Higher precision than the CPU Rgba8Unorm path.
    pub fn new_gpu(device: &wgpu::Device, queue: &wgpu::Queue) -> Self {
        let size = BRDF_LUT_SIZE;
        let extent = wgpu::Extent3d {
            width: size,
            height: size,
            depth_or_array_layers: 1,
        };
        // Create Rg16Float storage texture
        let brdf_lut_texture = device.create_texture(&wgpu::TextureDescriptor {
            label: Some("BrdfLutTexture_GPU"),
            size: extent,
            mip_level_count: 1,
            sample_count: 1,
            dimension: wgpu::TextureDimension::D2,
            format: wgpu::TextureFormat::Rg16Float,
            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::STORAGE_BINDING,
            view_formats: &[],
        });
        let storage_view =
            brdf_lut_texture.create_view(&wgpu::TextureViewDescriptor::default());
        // Create compute pipeline
        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
            label: Some("BRDF LUT Compute Shader"),
            source: wgpu::ShaderSource::Wgsl(
                include_str!("brdf_lut_compute.wgsl").into(),
            ),
        });
        let bind_group_layout =
            device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
                label: Some("BRDF LUT Compute BGL"),
                entries: &[wgpu::BindGroupLayoutEntry {
                    binding: 0,
                    visibility: wgpu::ShaderStages::COMPUTE,
                    ty: wgpu::BindingType::StorageTexture {
                        access: wgpu::StorageTextureAccess::WriteOnly,
                        format: wgpu::TextureFormat::Rg16Float,
                        view_dimension: wgpu::TextureViewDimension::D2,
                    },
                    count: None,
                }],
            });
        let pipeline_layout =
            device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
                label: Some("BRDF LUT Compute Pipeline Layout"),
                bind_group_layouts: &[&bind_group_layout],
                immediate_size: 0,
            });
        let pipeline =
            device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
                label: Some("BRDF LUT Compute Pipeline"),
                layout: Some(&pipeline_layout),
                module: &shader,
                entry_point: Some("main"),
                compilation_options: wgpu::PipelineCompilationOptions::default(),
                cache: None,
            });
        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
            label: Some("BRDF LUT Compute Bind Group"),
            layout: &bind_group_layout,
            entries: &[wgpu::BindGroupEntry {
                binding: 0,
                resource: wgpu::BindingResource::TextureView(&storage_view),
            }],
        });
        // Dispatch compute shader
        let mut encoder =
            device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
                label: Some("BRDF LUT Compute Encoder"),
            });
        {
            let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
                label: Some("BRDF LUT Compute Pass"),
                timestamp_writes: None,
            });
            pass.set_pipeline(&pipeline);
            pass.set_bind_group(0, &bind_group, &[]);
            // Dispatch enough workgroups to cover size x size texels (16x16 per workgroup)
            let wg_x = (size + 15) / 16;
            let wg_y = (size + 15) / 16;
            pass.dispatch_workgroups(wg_x, wg_y, 1);
        }
        queue.submit(std::iter::once(encoder.finish()));
        let brdf_lut_view =
            brdf_lut_texture.create_view(&wgpu::TextureViewDescriptor::default());
        let brdf_lut_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
            label: Some("BrdfLutSampler"),
            address_mode_u: wgpu::AddressMode::ClampToEdge,
            address_mode_v: wgpu::AddressMode::ClampToEdge,
            address_mode_w: wgpu::AddressMode::ClampToEdge,
            mag_filter: wgpu::FilterMode::Linear,
            min_filter: wgpu::FilterMode::Linear,
            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
            ..Default::default()
        });
        Self {
            brdf_lut_texture,
            brdf_lut_view,
            brdf_lut_sampler,
        }
    }
 }
--- a/crates/voltex_renderer/src/lib.rs
+++ b/crates/voltex_renderer/src/lib.rs
@@ -22,6 +22,7 @@ pub mod spot_shadow;
 pub mod frustum;
 pub mod brdf_lut;
 pub mod ibl;
 pub mod sh;
 pub mod gbuffer;
 pub mod fullscreen_quad;
 pub mod deferred_pipeline;
@@ -47,6 +48,7 @@ pub use point_shadow::{PointShadowMap, point_shadow_view_matrices, point_shadow_
 pub use spot_shadow::{SpotShadowMap, spot_shadow_matrix};
 pub use frustum::{Plane, Frustum, extract_frustum, sphere_vs_frustum, cull_lights};
 pub use ibl::IblResources;
 pub use sh::{compute_sh_coefficients, pack_sh_coefficients, evaluate_sh_cpu};
 pub use gbuffer::GBuffer;
 pub use fullscreen_quad::{create_fullscreen_vertex_buffer, FullscreenVertex};
 pub use deferred_pipeline::{
--- a/crates/voltex_renderer/src/pbr_shader.wgsl
+++ b/crates/voltex_renderer/src/pbr_shader.wgsl
@@ -47,6 +47,10 @@ struct ShadowUniform {
    light_view_proj: mat4x4<f32>,
    shadow_map_size: f32,
    shadow_bias: f32,
    _padding: vec2<f32>,
    sun_direction: vec3<f32>,
    turbidity: f32,
    sh_coefficients: array<vec4<f32>, 7>,
 };
@group(3) @binding(0) var t_shadow: texture_depth_2d;
@@ -233,23 +237,80 @@ fn calculate_shadow(light_space_pos: vec4<f32>) -> f32 {
    return shadow_val / 9.0;
 }
-// Procedural environment sampling for IBL
+// Hosek-Wilkie inspired procedural sky model
 fn sample_environment(direction: vec3<f32>, roughness: f32) -> vec3<f32> {
-    let t = direction.y * 0.5 + 0.5;
+    let sun_dir = normalize(shadow.sun_direction);
    let turb = clamp(shadow.turbidity, 1.5, 10.0);
    var env: vec3<f32>;
    if direction.y > 0.0 {
-        let horizon = vec3<f32>(0.6, 0.6, 0.5);
+        // Rayleigh scattering: blue zenith, warm horizon
-        let sky = vec3<f32>(0.3, 0.5, 0.9);
+        let zenith_color = vec3<f32>(0.15, 0.3, 0.8) * (1.0 / (turb * 0.15 + 0.5));
-        env = mix(horizon, sky, pow(direction.y, 0.4));
+        let horizon_color = vec3<f32>(0.7, 0.6, 0.5) * (1.0 + turb * 0.04);
        let elevation = direction.y;
        let sky_gradient = mix(horizon_color, zenith_color, pow(elevation, 0.4));
        // Mie scattering: haze near sun direction
        let cos_sun = max(dot(direction, sun_dir), 0.0);
        let mie_strength = turb * 0.02;
        let mie = mie_strength * pow(cos_sun, 8.0) * vec3<f32>(1.0, 0.9, 0.7);
        // Sun disk: bright spot with falloff
        let sun_disk = pow(max(cos_sun, 0.0), 2048.0) * vec3<f32>(10.0, 9.0, 7.0);
        // Combine
        env = sky_gradient + mie + sun_disk;
    } else {
-        let horizon = vec3<f32>(0.6, 0.6, 0.5);
+        // Ground: dark, warm
-        let ground = vec3<f32>(0.1, 0.08, 0.06);
+        let horizon_color = vec3<f32>(0.6, 0.55, 0.45);
-        env = mix(horizon, ground, pow(-direction.y, 0.4));
+        let ground_color = vec3<f32>(0.1, 0.08, 0.06);
        env = mix(horizon_color, ground_color, pow(-direction.y, 0.4));
    }
    // Roughness blur: blend toward average for rough surfaces
    let avg = vec3<f32>(0.3, 0.35, 0.4);
    return mix(env, avg, roughness * roughness);
 }
 // Evaluate L2 Spherical Harmonics at given normal direction
 // 9 SH coefficients (RGB) packed into 7 vec4s
 fn evaluate_sh(normal: vec3<f32>, coeffs: array<vec4<f32>, 7>) -> vec3<f32> {
    let x = normal.x;
    let y = normal.y;
    let z = normal.z;
    // SH basis functions (real, L2 order)
    let Y00  = 0.282095;             // L=0, M=0
    let Y1n1 = 0.488603 * y;         // L=1, M=-1
    let Y10  = 0.488603 * z;         // L=1, M=0
    let Y1p1 = 0.488603 * x;         // L=1, M=1
    let Y2n2 = 1.092548 * x * y;     // L=2, M=-2
    let Y2n1 = 1.092548 * y * z;     // L=2, M=-1
    let Y20  = 0.315392 * (3.0 * z * z - 1.0); // L=2, M=0
    let Y2p1 = 1.092548 * x * z;     // L=2, M=1
    let Y2p2 = 0.546274 * (x * x - y * y); // L=2, M=2
    // Unpack: coeffs[0].xyz = c0_rgb, coeffs[0].w = c1_r,
    //         coeffs[1].xyz = c1_gb + c2_r, coeffs[1].w = c2_g, etc.
    // Packing: 9 coeffs * 3 channels = 27 floats -> 7 vec4s (28 floats, last padded)
    let c0 = vec3<f32>(coeffs[0].x, coeffs[0].y, coeffs[0].z);
    let c1 = vec3<f32>(coeffs[0].w, coeffs[1].x, coeffs[1].y);
    let c2 = vec3<f32>(coeffs[1].z, coeffs[1].w, coeffs[2].x);
    let c3 = vec3<f32>(coeffs[2].y, coeffs[2].z, coeffs[2].w);
    let c4 = vec3<f32>(coeffs[3].x, coeffs[3].y, coeffs[3].z);
    let c5 = vec3<f32>(coeffs[3].w, coeffs[4].x, coeffs[4].y);
    let c6 = vec3<f32>(coeffs[4].z, coeffs[4].w, coeffs[5].x);
    let c7 = vec3<f32>(coeffs[5].y, coeffs[5].z, coeffs[5].w);
    let c8 = vec3<f32>(coeffs[6].x, coeffs[6].y, coeffs[6].z);
    return max(
        c0 * Y00 + c1 * Y1n1 + c2 * Y10 + c3 * Y1p1 +
        c4 * Y2n2 + c5 * Y2n1 + c6 * Y20 + c7 * Y2p1 + c8 * Y2p2,
        vec3<f32>(0.0)
    );
 }
@fragment
 fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
    let tex_color = textureSample(t_diffuse, s_diffuse, in.uv);
@@ -301,8 +362,14 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
    let NdotV_ibl = max(dot(N, V), 0.0);
    let R = reflect(-V, N);
-    // Diffuse IBL
+    // Diffuse IBL: use SH irradiance if SH coefficients are set, else fallback to procedural
-    let irradiance = sample_environment(N, 1.0);
+    var irradiance: vec3<f32>;
    let sh_test = shadow.sh_coefficients[0].x + shadow.sh_coefficients[0].y + shadow.sh_coefficients[0].z;
    if abs(sh_test) > 0.0001 {
        irradiance = evaluate_sh(N, shadow.sh_coefficients);
    } else {
        irradiance = sample_environment(N, 1.0);
    }
    let F_env = fresnel_schlick(NdotV_ibl, F0);
    let kd_ibl = (vec3<f32>(1.0) - F_env) * (1.0 - metallic);
    let diffuse_ibl = kd_ibl * albedo * irradiance;
--- a/crates/voltex_renderer/src/sh.rs
+++ b/crates/voltex_renderer/src/sh.rs
@@ -0,0 +1,375 @@
 //! L2 Spherical Harmonics computation for procedural sky irradiance.
 //!
 //! Computes 9 SH coefficients (order 2) for 3 channels (RGB) from a procedural sky model.
 use std::f32::consts::PI;
 /// Real SH basis functions for L=0,1,2 evaluated at direction (x, y, z).
 /// Returns array of 9 basis values.
 fn sh_basis(x: f32, y: f32, z: f32) -> [f32; 9] {
    [
        0.282095,                           // Y00:  L=0 M=0
        0.488603 * y,                       // Y1-1: L=1 M=-1
        0.488603 * z,                       // Y10:  L=1 M=0
        0.488603 * x,                       // Y1+1: L=1 M=+1
        1.092548 * x * y,                   // Y2-2: L=2 M=-2
        1.092548 * y * z,                   // Y2-1: L=2 M=-1
        0.315392 * (3.0 * z * z - 1.0),    // Y20:  L=2 M=0
        1.092548 * x * z,                   // Y2+1: L=2 M=+1
        0.546274 * (x * x - y * y),         // Y2+2: L=2 M=+2
    ]
 }
 /// Hosek-Wilkie inspired procedural sky evaluation.
 /// Returns RGB radiance for a given direction.
 fn procedural_sky(dir: [f32; 3], sun_dir: [f32; 3], turbidity: f32) -> [f32; 3] {
    let turb = turbidity.clamp(1.5, 10.0);
    if dir[1] > 0.0 {
        // Rayleigh scattering: blue zenith, warm horizon
        let zenith_r = 0.15 / (turb * 0.15 + 0.5);
        let zenith_g = 0.3 / (turb * 0.15 + 0.5);
        let zenith_b = 0.8 / (turb * 0.15 + 0.5);
        let horizon_r = 0.7 * (1.0 + turb * 0.04);
        let horizon_g = 0.6 * (1.0 + turb * 0.04);
        let horizon_b = 0.5 * (1.0 + turb * 0.04);
        let elevation = dir[1];
        let t = elevation.powf(0.4);
        let sky_r = horizon_r + (zenith_r - horizon_r) * t;
        let sky_g = horizon_g + (zenith_g - horizon_g) * t;
        let sky_b = horizon_b + (zenith_b - horizon_b) * t;
        // Mie scattering near sun
        let cos_sun = (dir[0] * sun_dir[0] + dir[1] * sun_dir[1] + dir[2] * sun_dir[2]).max(0.0);
        let mie_strength = turb * 0.02;
        let mie_factor = mie_strength * cos_sun.powi(8);
        // Sun disk
        let sun_factor = cos_sun.powi(2048);
        [
            sky_r + mie_factor * 1.0 + sun_factor * 10.0,
            sky_g + mie_factor * 0.9 + sun_factor * 9.0,
            sky_b + mie_factor * 0.7 + sun_factor * 7.0,
        ]
    } else {
        // Ground
        let t = (-dir[1]).powf(0.4);
        let horizon = [0.6f32, 0.55, 0.45];
        let ground = [0.1f32, 0.08, 0.06];
        [
            horizon[0] + (ground[0] - horizon[0]) * t,
            horizon[1] + (ground[1] - horizon[1]) * t,
            horizon[2] + (ground[2] - horizon[2]) * t,
        ]
    }
 }
 fn normalize_vec3(v: [f32; 3]) -> [f32; 3] {
    let len = (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]).sqrt();
    if len < 1e-8 {
        return [0.0, 1.0, 0.0];
    }
    [v[0] / len, v[1] / len, v[2] / len]
 }
 /// Compute L2 (order 2) SH coefficients from the procedural sky model.
 ///
 /// Samples the environment at `num_samples` directions distributed over the full sphere
 /// and accumulates SH basis function values weighted by environment radiance.
 ///
 /// Returns 9 RGB coefficient triplets.
 ///
 /// # Arguments
 /// * `sun_dir` - Normalized sun direction vector
 /// * `sun_color` - Sun color (unused in current procedural model, kept for API compatibility)
 /// * `turbidity` - Atmospheric turbidity (1.5 to 10.0)
 pub fn compute_sh_coefficients(
    sun_dir: [f32; 3],
    _sun_color: [f32; 3],
    turbidity: f32,
 ) -> [[f32; 3]; 9] {
    compute_sh_coefficients_with_samples(sun_dir, _sun_color, turbidity, 128)
 }
 /// Same as `compute_sh_coefficients` but with configurable sample count per axis.
 /// Total samples = `samples_per_axis * samples_per_axis`.
 pub fn compute_sh_coefficients_with_samples(
    sun_dir: [f32; 3],
    _sun_color: [f32; 3],
    turbidity: f32,
    samples_per_axis: u32,
 ) -> [[f32; 3]; 9] {
    let sun_dir = normalize_vec3(sun_dir);
    let n = samples_per_axis;
    let total = n * n;
    let mut coeffs = [[0.0f32; 3]; 9];
    // Sample uniformly on the sphere using spherical coordinates
    for i in 0..n {
        let theta = PI * (i as f32 + 0.5) / n as f32; // [0, pi]
        let sin_theta = theta.sin();
        let cos_theta = theta.cos();
        for j in 0..n {
            let phi = 2.0 * PI * (j as f32 + 0.5) / n as f32; // [0, 2pi]
            let x = sin_theta * phi.cos();
            let y = cos_theta; // y-up convention
            let z = sin_theta * phi.sin();
            let radiance = procedural_sky([x, y, z], sun_dir, turbidity);
            let basis = sh_basis(x, y, z);
            // Monte Carlo weight: sphere area = 4*pi, uniform PDF = 1/(4*pi)
            // weight = radiance * basis * sin_theta * (pi/n) * (2*pi/n) / (1/(4*pi))
            // But for uniform sphere sampling with stratified grid:
            // weight = (4*pi / total) * radiance * basis
            // sin_theta is already accounted for by the area element
            let weight = 4.0 * PI * sin_theta / total as f32;
            // Actually the correct formula for stratified spherical integration:
            // dA = sin(theta) * dtheta * dphi
            // dtheta = pi/n, dphi = 2*pi/n
            // weight = sin(theta) * (pi/n) * (2*pi/n)
            let _correct_weight = sin_theta * (PI / n as f32) * (2.0 * PI / n as f32);
            for k in 0..9 {
                let w = _correct_weight * basis[k];
                coeffs[k][0] += w * radiance[0];
                coeffs[k][1] += w * radiance[1];
                coeffs[k][2] += w * radiance[2];
            }
        }
    }
    coeffs
 }
 /// Pack 9 RGB SH coefficients into 7 vec4s (28 floats) for GPU uniform buffer.
 /// Layout: coefficients are stored sequentially as [c0.r, c0.g, c0.b, c1.r, c1.g, c1.b, ...]
 /// packed into vec4s.
 pub fn pack_sh_coefficients(coeffs: &[[f32; 3]; 9]) -> [[f32; 4]; 7] {
    // Flatten 9*3 = 27 floats, pad to 28
    let mut flat = [0.0f32; 28];
    for (i, c) in coeffs.iter().enumerate() {
        flat[i * 3] = c[0];
        flat[i * 3 + 1] = c[1];
        flat[i * 3 + 2] = c[2];
    }
    // flat[27] = 0.0 (padding)
    let mut packed = [[0.0f32; 4]; 7];
    for i in 0..7 {
        packed[i] = [flat[i * 4], flat[i * 4 + 1], flat[i * 4 + 2], flat[i * 4 + 3]];
    }
    packed
 }
 /// Evaluate SH at a given normal direction (CPU-side, for testing).
 pub fn evaluate_sh_cpu(normal: [f32; 3], coeffs: &[[f32; 3]; 9]) -> [f32; 3] {
    let basis = sh_basis(normal[0], normal[1], normal[2]);
    let mut result = [0.0f32; 3];
    for k in 0..9 {
        result[0] += coeffs[k][0] * basis[k];
        result[1] += coeffs[k][1] * basis[k];
        result[2] += coeffs[k][2] * basis[k];
    }
    // Clamp to non-negative
    result[0] = result[0].max(0.0);
    result[1] = result[1].max(0.0);
    result[2] = result[2].max(0.0);
    result
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    /// For a uniform white environment (radiance = 1.0 everywhere),
    /// only L=0 coefficient should be non-zero, equal to sqrt(4*pi) * 1.0 / sqrt(4*pi) = sqrt(pi) / ...
    /// Actually, for uniform radiance L(d) = 1:
    /// c_00 = integral(1 * Y00 * dw) = Y00 * 4*pi = 0.282095 * 4*pi ≈ 3.5449
    /// All other coefficients should be ~0.
    #[test]
    fn test_sh_uniform_white_environment() {
        // We'll compute SH for a "uniform white" sky by using a custom function
        // Instead of procedural_sky, we test the basis directly
        let n = 128u32;
        let mut coeffs = [[0.0f32; 3]; 9];
        for i in 0..n {
            let theta = PI * (i as f32 + 0.5) / n as f32;
            let sin_theta = theta.sin();
            let cos_theta = theta.cos();
            for j in 0..n {
                let phi = 2.0 * PI * (j as f32 + 0.5) / n as f32;
                let x = sin_theta * phi.cos();
                let y = cos_theta;
                let z = sin_theta * phi.sin();
                let radiance = [1.0f32, 1.0, 1.0]; // uniform white
                let basis = sh_basis(x, y, z);
                let weight = sin_theta * (PI / n as f32) * (2.0 * PI / n as f32);
                for k in 0..9 {
                    let w = weight * basis[k];
                    coeffs[k][0] += w * radiance[0];
                    coeffs[k][1] += w * radiance[1];
                    coeffs[k][2] += w * radiance[2];
                }
            }
        }
        // L=0 coefficient should be approximately 0.282095 * 4*pi ≈ 3.5449
        let expected_c0 = 0.282095 * 4.0 * PI;
        assert!(
            (coeffs[0][0] - expected_c0).abs() < 0.05,
            "c0.r = {} expected ~{}", coeffs[0][0], expected_c0
        );
        assert!(
            (coeffs[0][1] - expected_c0).abs() < 0.05,
            "c0.g = {} expected ~{}", coeffs[0][1], expected_c0
        );
        assert!(
            (coeffs[0][2] - expected_c0).abs() < 0.05,
            "c0.b = {} expected ~{}", coeffs[0][2], expected_c0
        );
        // All higher-order coefficients should be ~0
        for k in 1..9 {
            for ch in 0..3 {
                assert!(
                    coeffs[k][ch].abs() < 0.05,
                    "c{}[{}] = {} should be ~0", k, ch, coeffs[k][ch]
                );
            }
        }
    }
    /// For a directional light along +Y, L1 coefficient for Y (index 1) should dominate.
    #[test]
    fn test_sh_directional_light_dominant_l1() {
        // Simulate a directional "light" by using a sky that is bright only near +Y
        let n = 128u32;
        let mut coeffs = [[0.0f32; 3]; 9];
        for i in 0..n {
            let theta = PI * (i as f32 + 0.5) / n as f32;
            let sin_theta = theta.sin();
            let cos_theta = theta.cos();
            for j in 0..n {
                let phi = 2.0 * PI * (j as f32 + 0.5) / n as f32;
                let x = sin_theta * phi.cos();
                let y = cos_theta;
                let z = sin_theta * phi.sin();
                // Concentrated light near +Y direction
                let intensity = y.max(0.0).powi(32);
                let radiance = [intensity; 3];
                let basis = sh_basis(x, y, z);
                let weight = sin_theta * (PI / n as f32) * (2.0 * PI / n as f32);
                for k in 0..9 {
                    let w = weight * basis[k];
                    coeffs[k][0] += w * radiance[0];
                    coeffs[k][1] += w * radiance[1];
                    coeffs[k][2] += w * radiance[2];
                }
            }
        }
        // L1 Y-component (index 1, which is 0.488603 * y) should be significant
        // L0 (index 0) should also be non-zero (DC component)
        assert!(
            coeffs[0][0] > 0.01,
            "L0 coefficient should be positive for directional light: {}", coeffs[0][0]
        );
        assert!(
            coeffs[1][0] > 0.01,
            "L1(-1) Y-direction coefficient should be positive: {}", coeffs[1][0]
        );
        // The L1 Y-component should be the largest L1 component (light is along +Y)
        assert!(
            coeffs[1][0].abs() > coeffs[2][0].abs(),
            "L1(-1) Y should dominate over L1(0) Z: {} vs {}", coeffs[1][0], coeffs[2][0]
        );
        assert!(
            coeffs[1][0].abs() > coeffs[3][0].abs(),
            "L1(-1) Y should dominate over L1(+1) X: {} vs {}", coeffs[1][0], coeffs[3][0]
        );
    }
    #[test]
    fn test_sh_procedural_sky_coefficients() {
        let coeffs = compute_sh_coefficients(
            [0.5, -0.7, 0.5],
            [1.0, 1.0, 1.0],
            3.0,
        );
        // L0 should be positive (sky has positive radiance)
        assert!(coeffs[0][0] > 0.0, "L0 R should be positive");
        assert!(coeffs[0][1] > 0.0, "L0 G should be positive");
        assert!(coeffs[0][2] > 0.0, "L0 B should be positive");
        // Verify coefficients are finite
        for k in 0..9 {
            for ch in 0..3 {
                assert!(
                    coeffs[k][ch].is_finite(),
                    "SH coefficient c{}[{}] = {} is not finite", k, ch, coeffs[k][ch]
                );
            }
        }
    }
    #[test]
    fn test_pack_sh_coefficients() {
        let mut coeffs = [[0.0f32; 3]; 9];
        for k in 0..9 {
            coeffs[k] = [(k * 3) as f32, (k * 3 + 1) as f32, (k * 3 + 2) as f32];
        }
        let packed = pack_sh_coefficients(&coeffs);
        // Verify flat layout: c0.r, c0.g, c0.b, c1.r, c1.g, c1.b, ...
        assert_eq!(packed[0], [0.0, 1.0, 2.0, 3.0]);   // c0.rgb, c1.r
        assert_eq!(packed[1], [4.0, 5.0, 6.0, 7.0]);   // c1.gb, c2.rg
        assert_eq!(packed[6][0], 24.0); // c8.r
        assert_eq!(packed[6][1], 25.0); // c8.g
        assert_eq!(packed[6][2], 26.0); // c8.b
        assert_eq!(packed[6][3], 0.0);  // padding
    }
    #[test]
    fn test_evaluate_sh_cpu_positive() {
        let coeffs = compute_sh_coefficients(
            [0.5, -0.7, 0.5],
            [1.0, 1.0, 1.0],
            3.0,
        );
        // Evaluate at several directions — should be non-negative
        let dirs = [
            [0.0, 1.0, 0.0],   // up
            [0.0, -1.0, 0.0],  // down
            [1.0, 0.0, 0.0],   // right
            [0.0, 0.0, 1.0],   // forward
        ];
        for dir in &dirs {
            let result = evaluate_sh_cpu(*dir, &coeffs);
            assert!(
                result[0] >= 0.0 && result[1] >= 0.0 && result[2] >= 0.0,
                "SH evaluation at {:?} should be non-negative: {:?}", dir, result
            );
        }
    }
 }
--- a/crates/voltex_renderer/src/shadow.rs
+++ b/crates/voltex_renderer/src/shadow.rs
@@ -144,6 +144,11 @@ pub struct ShadowUniform {
    pub shadow_map_size: f32,
    pub shadow_bias: f32,
    pub _padding: [f32; 2],
    // Sky parameters (Hosek-Wilkie inspired)
    pub sun_direction: [f32; 3],
    pub turbidity: f32,
    // L2 Spherical Harmonics coefficients: 9 RGB coefficients packed into 7 vec4s (28 floats)
    pub sh_coefficients: [[f32; 4]; 7],
 }
 #[repr(C)]
--- a/examples/deferred_demo/src/main.rs
+++ b/examples/deferred_demo/src/main.rs
@@ -417,6 +417,9 @@ impl ApplicationHandler for DeferredDemoApp {
            shadow_map_size: 0.0,
            shadow_bias: 0.0,
            _padding: [0.0; 2],
            sun_direction: [0.5, -0.7, 0.5],
            turbidity: 3.0,
            sh_coefficients: [[0.0; 4]; 7],
        };
        let shadow_uniform_buffer = gpu.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("Shadow Uniform Buffer"),
--- a/examples/ibl_demo/src/main.rs
+++ b/examples/ibl_demo/src/main.rs
@@ -211,6 +211,9 @@ impl ApplicationHandler for IblDemoApp {
            shadow_map_size: 0.0,
            shadow_bias: 0.0,
            _padding: [0.0; 2],
            sun_direction: [0.5, -0.7, 0.5],
            turbidity: 3.0,
            sh_coefficients: [[0.0; 4]; 7],
        };
        let shadow_uniform_buffer = gpu.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("Shadow Uniform Buffer"),
--- a/examples/multi_light_demo/src/main.rs
+++ b/examples/multi_light_demo/src/main.rs
@@ -222,6 +222,9 @@ impl ApplicationHandler for MultiLightApp {
            shadow_map_size: 0.0,
            shadow_bias: 0.0,
            _padding: [0.0; 2],
            sun_direction: [0.5, -0.7, 0.5],
            turbidity: 3.0,
            sh_coefficients: [[0.0; 4]; 7],
        };
        let shadow_uniform_buffer = gpu.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("Shadow Uniform Buffer"),
--- a/examples/pbr_demo/src/main.rs
+++ b/examples/pbr_demo/src/main.rs
@@ -207,6 +207,9 @@ impl ApplicationHandler for PbrDemoApp {
            shadow_map_size: 0.0,
            shadow_bias: 0.0,
            _padding: [0.0; 2],
            sun_direction: [0.5, -0.7, 0.5],
            turbidity: 3.0,
            sh_coefficients: [[0.0; 4]; 7],
        };
        let shadow_uniform_buffer = gpu.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("Shadow Uniform Buffer"),
--- a/examples/shadow_demo/src/main.rs
+++ b/examples/shadow_demo/src/main.rs
@@ -258,6 +258,9 @@ impl ApplicationHandler for ShadowDemoApp {
            shadow_map_size: SHADOW_MAP_SIZE as f32,
            shadow_bias: 0.005,
            _padding: [0.0; 2],
            sun_direction: [0.5, -0.7, 0.5],
            turbidity: 3.0,
            sh_coefficients: [[0.0; 4]; 7],
        };
        let shadow_uniform_buffer = gpu.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("Shadow Uniform Buffer"),
@@ -483,6 +486,9 @@ impl ApplicationHandler for ShadowDemoApp {
                    shadow_map_size: SHADOW_MAP_SIZE as f32,
                    shadow_bias: 0.005,
                    _padding: [0.0; 2],
                    sun_direction: [0.5, -0.7, 0.5],
                    turbidity: 3.0,
                    sh_coefficients: [[0.0; 4]; 7],
                };
                state.gpu.queue.write_buffer(
                    &state.shadow_uniform_buffer,
--- a/examples/survivor_game/src/main.rs
+++ b/examples/survivor_game/src/main.rs
@@ -267,6 +267,9 @@ impl ApplicationHandler for SurvivorApp {
            shadow_map_size: SHADOW_MAP_SIZE as f32,
            shadow_bias: 0.005,
            _padding: [0.0; 2],
            sun_direction: [0.5, -0.7, 0.5],
            turbidity: 3.0,
            sh_coefficients: [[0.0; 4]; 7],
        };
        let shadow_uniform_buffer =
            gpu.device
@@ -595,6 +598,9 @@ impl ApplicationHandler for SurvivorApp {
                    shadow_map_size: SHADOW_MAP_SIZE as f32,
                    shadow_bias: 0.005,
                    _padding: [0.0; 2],
                    sun_direction: [0.5, -0.7, 0.5],
                    turbidity: 3.0,
                    sh_coefficients: [[0.0; 4]; 7],
                };
                state.gpu.queue.write_buffer(
                    &state.shadow_uniform_buffer,