TAD: pre/de-emphasis

2026-06-06 13:38:30 +09:00 · 2025-11-07 23:13:08 +09:00
parent 8878d37e5b
commit aa9ecee7ca
3 changed files with 233 additions and 68 deletions
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
@@ -157,6 +157,19 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {

    private val LAMBDA_FIXED = 6.0f

+    // Deadzone marker for stochastic reconstruction (must match encoder)
+    private val DEADZONE_MARKER_QUANT = (-128).toByte()
+
+    // Deadband thresholds (must match encoder)
+    private val DEADBANDS = arrayOf(
+        floatArrayOf(  // Mid channel
+            1.0f, 0.3f, 0.3f, 0.3f, 0.3f, 0.2f, 0.2f, 0.05f, 0.05f, 0.05f
+        ),
+        floatArrayOf(  // Side channel
+            1.0f, 0.3f, 0.3f, 0.3f, 0.3f, 0.2f, 0.2f, 0.05f, 0.05f, 0.05f
+        )
+    )
+
    // Dither state for noise shaping (2 channels, 2 history samples each)
    private val ditherError = Array(2) { FloatArray(2) }

@@ -366,9 +379,23 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
    // TAD (Terrarum Advanced Audio) Decoder
    //=============================================================================

-    // Uniform random in [0, 1)
+    // Laplacian-distributed noise (for stochastic reconstruction)
+    private fun laplacianNoise(scale: Float): Float {
+        val u = urand() - 0.5f  // [-0.5, 0.5)
+        val sign = if (u >= 0.0f) 1.0f else -1.0f
+        var absU = kotlin.math.abs(u)
+
+        // Avoid log(0)
+        if (absU >= 0.49999f) absU = 0.49999f
+
+        // Inverse Laplacian CDF with λ = 1/scale
+        val x = -sign * kotlin.math.ln(1.0f - 2.0f * absU) * scale
+        return x
+    }
+
+    // Uniform random in [0, 1) - kept for compatibility
    private fun frand01(): Float {
-        return Math.random().toFloat()
+        return urand()
    }

    // TPDF (Triangular Probability Density Function) noise in [-1, +1)
@@ -554,8 +581,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
            // Dequantize to Float32
            val dwtMid = FloatArray(sampleCount)
            val dwtSide = FloatArray(sampleCount)
-            dequantizeDwtCoefficients(quantMid, dwtMid, sampleCount, maxIndex, dwtLevels)
-            dequantizeDwtCoefficients(quantSide, dwtSide, sampleCount, maxIndex, dwtLevels)
+            dequantizeDwtCoefficients(0, quantMid, dwtMid, sampleCount, maxIndex, dwtLevels)
+            dequantizeDwtCoefficients(1, quantSide, dwtSide, sampleCount, maxIndex, dwtLevels)

            // Inverse DWT using CDF 9/7 wavelet (produces Float32 samples in range [-1.0, 1.0])
            dwt97InverseMultilevel(dwtMid, sampleCount, dwtLevels)
@@ -568,6 +595,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {

            // Expand dynamic range (gamma expansion)
            expandGamma(pcm32Left, pcm32Right, sampleCount)
+//            expandMuLaw(pcm32Left, pcm32Right, sampleCount)

            // Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8)
            applyDeemphasis(pcm32Left, pcm32Right, sampleCount)
@@ -632,7 +660,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
        }
    }

-    private fun dequantizeDwtCoefficients(quantized: ByteArray, coeffs: FloatArray, count: Int,
+    private fun dequantizeDwtCoefficients(channel: Int, quantized: ByteArray, coeffs: FloatArray, count: Int,
                                         maxIndex: Int, dwtLevels: Int) {
        // Calculate sideband boundaries dynamically
        val firstBandSize = count shr dwtLevels
@@ -643,7 +671,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
            sidebandStarts[i] = sidebandStarts[i - 1] + (firstBandSize shl (i - 2))
        }

-        // Step 1: Dequantize all coefficients using lambda decompanding
+        // Dequantize all coefficients with stochastic reconstruction for deadzoned values
        val quantiserScale = 1.0f
        for (i in 0 until count) {
            var sideband = dwtLevels
@@ -654,34 +682,33 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                }
            }

-            // Decode using lambda companding
-            val normalizedVal = lambdaDecompanding(quantized[i], maxIndex)
+            // Check for deadzone marker
+            /*if (quantized[i] == DEADZONE_MARKER_QUANT) {
+                // Stochastic reconstruction: generate Laplacian noise in deadband range
+                val deadbandThreshold = DEADBANDS[channel][sideband]

-            // Denormalize using the subband scalar and apply base weight + quantiser scaling
-            val weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiserScale
-            coeffs[i] = normalizedVal * TAD32_COEFF_SCALARS[sideband] * weight
+                // Generate Laplacian-distributed noise scaled to deadband width
+                // Use scale = threshold/3 to keep ~99% of samples within [-threshold, +threshold]
+                var noise = laplacianNoise(deadbandThreshold / 3.0f)
+
+                // Clamp to deadband range
+                if (noise > deadbandThreshold) noise = deadbandThreshold
+                if (noise < -deadbandThreshold) noise = -deadbandThreshold
+
+                // Apply scalar (but not quantiser weight - noise is already in correct range)
+                coeffs[i] = noise * TAD32_COEFF_SCALARS[sideband]
+            } else {*/
+                // Normal dequantization using lambda decompanding
+                val normalizedVal = lambdaDecompanding(quantized[i], maxIndex)
+
+                // Denormalize using the subband scalar and apply base weight + quantiser scaling
+                val weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiserScale
+                coeffs[i] = normalizedVal * TAD32_COEFF_SCALARS[sideband] * weight
+//            }
        }

-        // Step 2: Apply spectral interpolation per band
-        // Process bands from high to low frequency (dwtLevels down to 0)
-        var prevBandRms = 0.0f
-
-        for (band in dwtLevels downTo 0) {
-            val bandStart = sidebandStarts[band]
-            val bandEnd = sidebandStarts[band + 1]
-            val bandLen = bandEnd - bandStart
-
-            // Calculate quantization step Q for this band
-            val weight = BASE_QUANTISER_WEIGHTS[band] * quantiserScale
-            val scalar = TAD32_COEFF_SCALARS[band] * weight
-            val Q = scalar / maxIndex
-
-            // Apply spectral interpolation to this band
-            spectralInterpolateBand(coeffs, bandStart, bandLen, Q, prevBandRms)
-
-            // Compute RMS for this band to use as reference for next (lower frequency) band
-            prevBandRms = computeBandRms(coeffs, bandStart, bandLen)
-        }
+        // Note: Stochastic reconstruction replaces the old spectral interpolation step
+        // No need for additional processing - deadzoned coefficients already have appropriate noise
    }

    // 9/7 inverse DWT (CDF 9/7 wavelet - matches C implementation)
--- a/video_encoder/decoder_tad.c
+++ b/video_encoder/decoder_tad.c
@@ -162,6 +162,59 @@ static int calculate_dwt_levels(int chunk_size) {
    return 9;
 }

+//=============================================================================
+// Stochastic Reconstruction for Deadzoned Coefficients
+//=============================================================================
+
+// Special marker for deadzoned coefficients (must match encoder)
+#define DEADZONE_MARKER_QUANT (-128)
+
+// Deadband thresholds (must match encoder)
+static const float DEADBANDS[2][10] = {
+{ // mid channel
+    0.10f,    // LL (L9) DC
+    0.03f,    // H (L9) 31.25 hz
+    0.03f,    // H (L8) 62.5 hz
+    0.03f,    // H (L7) 125 hz
+    0.03f,    // H (L6) 250 hz
+    0.02f,    // H (L5) 500 hz
+    0.02f,    // H (L4) 1 khz
+    0.005f,    // H (L3) 2 khz
+    0.005f,    // H (L2) 4 khz
+    0.005f     // H (L1) 8 khz
+},
+{ // side channel
+    0.10f,    // LL (L9) DC
+    0.03f,    // H (L9) 31.25 hz
+    0.03f,    // H (L8) 62.5 hz
+    0.03f,    // H (L7) 125 hz
+    0.03f,    // H (L6) 250 hz
+    0.02f,    // H (L5) 500 hz
+    0.02f,    // H (L4) 1 khz
+    0.005f,    // H (L3) 2 khz
+    0.005f,    // H (L2) 4 khz
+    0.005f     // H (L1) 8 khz
+}};
+
+// Fast PRNG state (xorshift32) for stochastic reconstruction
+static uint32_t deadzone_rng_state = 0x12345678u;
+
+// Laplacian-distributed noise (better approximation than TPDF)
+// Uses inverse CDF method: X = -sign(U) * ln(1 - 2*|U|) / λ
+static float laplacian_noise(float scale) {
+    float u = urand(&deadzone_rng_state) - 0.5f;  // [-0.5, 0.5)
+    float sign = (u >= 0.0f) ? 1.0f : -1.0f;
+    float abs_u = fabsf(u);
+
+    // Avoid log(0) by clamping
+    if (abs_u >= 0.49999f) abs_u = 0.49999f;
+
+    // Inverse Laplacian CDF with λ = 1/scale
+    float x = -sign * logf(1.0f - 2.0f * abs_u) * scale;
+
+    return x;
+}
+
 //=============================================================================
 // Haar DWT Implementation (inverse only needed for decoder)
 //=============================================================================
@@ -380,9 +433,9 @@ static void expand_gamma(float *left, float *right, size_t count) {
    for (size_t i = 0; i < count; i++) {
        // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
        float x = left[i]; float a = fabsf(x);
-        left[i] = signum(x) * powf(a, 1.6f);
+        left[i] = signum(x) * a * a;
        float y = right[i]; float b = fabsf(y);
-        right[i] = signum(y) * powf(b, 1.6f);
+        right[i] = signum(y) * b * b;
    }
 }

@@ -534,7 +587,7 @@ static void dequantize_dwt_coefficients(int channel, const int8_t *quantized, fl
        sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
    }

-    // Step 1: Dequantize all coefficients (no dithering yet)
+    // Dequantize all coefficients with stochastic reconstruction for deadzoned values
    for (size_t i = 0; i < count; i++) {
        int sideband = dwt_levels;
        for (int s = 0; s <= dwt_levels; s++) {
@@ -544,35 +597,33 @@ static void dequantize_dwt_coefficients(int channel, const int8_t *quantized, fl
            }
        }

-        // Decode using lambda companding
-        float normalized_val = lambda_decompanding(quantized[i], max_index);
+        // Check for deadzone marker
+        /*if (quantized[i] == (int8_t)0) {//DEADZONE_MARKER_QUANT) {
+            // Stochastic reconstruction: generate Laplacian noise in deadband range
+            float deadband_threshold = DEADBANDS[channel][sideband];

-        // Denormalize using the subband scalar and apply base weight + quantiser scaling
-        float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
-        coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
+            // Generate Laplacian-distributed noise scaled to deadband width
+            // Use scale = threshold/3 to keep ~99% of samples within [-threshold, +threshold]
+            float noise = laplacian_noise(deadband_threshold / 3.0f);
+
+            // Clamp to deadband range
+            if (noise > deadband_threshold) noise = deadband_threshold;
+            if (noise < -deadband_threshold) noise = -deadband_threshold;
+
+            // Apply scalar (but not quantiser weight - noise is already in correct range)
+            coeffs[i] = noise * TAD32_COEFF_SCALARS[sideband];
+        } else {*/
+            // Normal dequantization using lambda decompanding
+            float normalized_val = lambda_decompanding(quantized[i], max_index);
+
+            // Denormalize using the subband scalar and apply base weight + quantiser scaling
+            float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
+            coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
+//        }
    }

-    // Step 2: Apply spectral interpolation per band
-    // Process bands from high to low frequency (dwt_levels down to 0)
-    // so we can use lower bands' RMS for higher band reconstruction
-    float prev_band_rms = 0.0f;
-
-    for (int band = dwt_levels; band >= 0; band--) {
-        size_t band_start = sideband_starts[band];
-        size_t band_end = sideband_starts[band + 1];
-        size_t band_len = band_end - band_start;
-
-        // Calculate quantization step Q for this band
-        float weight = BASE_QUANTISER_WEIGHTS[channel][band] * quantiser_scale;
-        float scalar = TAD32_COEFF_SCALARS[band] * weight;
-        float Q = scalar / max_index;
-
-        // Apply spectral interpolation to this band
-        spectral_interpolate_band(&coeffs[band_start], band_len, Q, prev_band_rms);
-
-        // Compute RMS for this band to use as reference for next (lower frequency) band
-        prev_band_rms = compute_band_rms(&coeffs[band_start], band_len);
-    }
+    // Note: Stochastic reconstruction replaces the old spectral interpolation step
+    // No need for additional processing - deadzoned coefficients already have appropriate noise

    free(sideband_starts);
 }
@@ -653,6 +704,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_

    // expand dynamic range
    expand_gamma(pcm32_left, pcm32_right, sample_count);
+//    expand_mu_law(pcm32_left, pcm32_right, sample_count);

    // Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8)
    apply_deemphasis(pcm32_left, pcm32_right, sample_count);
--- a/video_encoder/encoder_tad.c
+++ b/video_encoder/encoder_tad.c
@@ -46,6 +46,33 @@ static const float BASE_QUANTISER_WEIGHTS[2][10] = {
    3.2f     // H (L1) 8 khz
 }};

+// target: before quantisation
+static const float DEADBANDS[2][10] = {
+{ // mid channel
+    0.10f,    // LL (L9) DC
+    0.03f,    // H (L9) 31.25 hz
+    0.03f,    // H (L8) 62.5 hz
+    0.03f,    // H (L7) 125 hz
+    0.03f,    // H (L6) 250 hz
+    0.02f,    // H (L5) 500 hz
+    0.02f,    // H (L4) 1 khz
+    0.005f,    // H (L3) 2 khz
+    0.005f,    // H (L2) 4 khz
+    0.005f     // H (L1) 8 khz
+},
+{ // side channel
+    0.10f,    // LL (L9) DC
+    0.03f,    // H (L9) 31.25 hz
+    0.03f,    // H (L8) 62.5 hz
+    0.03f,    // H (L7) 125 hz
+    0.03f,    // H (L6) 250 hz
+    0.02f,    // H (L5) 500 hz
+    0.02f,    // H (L4) 1 khz
+    0.005f,    // H (L3) 2 khz
+    0.005f,    // H (L2) 4 khz
+    0.005f     // H (L1) 8 khz
+}};
+
 static inline float FCLAMP(float x, float min, float max) {
    return x < min ? min : (x > max ? max : x);
 }
@@ -75,6 +102,56 @@ static int calculate_dwt_levels(int chunk_size) {
    return 9;
 }

+// Special marker for deadzoned coefficients (will be reconstructed with noise on decode)
+#define DEADZONE_MARKER_FLOAT (-999.0f)  // Unmistakable marker in float domain
+#define DEADZONE_MARKER_QUANT (-128)     // Maps to this in quantized domain (int8 minimum)
+
+// Perceptual epsilon - coefficients below this are truly zero (inaudible)
+#define EPSILON_PERCEPTUAL 0.001f
+
+static void apply_coeff_deadzone(int channel, float *coeffs, size_t num_samples) {
+    // Apply deadzonning to each DWT subband using frequency-dependent thresholds
+    // Instead of zeroing, mark small coefficients for stochastic reconstruction
+
+    const int dwt_levels = 9;  // Fixed to match encoder
+
+    // Calculate subband boundaries (same logic as decoder)
+    const int first_band_size = num_samples >> dwt_levels;
+    int sideband_starts[11];  // dwt_levels + 2
+    sideband_starts[0] = 0;
+    sideband_starts[1] = first_band_size;
+    for (int i = 2; i <= dwt_levels + 1; i++) {
+        sideband_starts[i] = sideband_starts[i - 1] + (first_band_size << (i - 2));
+    }
+
+    // Apply deadzone threshold to each coefficient
+    for (size_t i = 0; i < num_samples; i++) {
+        // Determine which subband this coefficient belongs to
+        int sideband = dwt_levels;  // Default to highest frequency
+        for (int s = 0; s <= dwt_levels; s++) {
+            if (i < (size_t)sideband_starts[s + 1]) {
+                sideband = s;
+                break;
+            }
+        }
+
+        // Get threshold for this subband and channel
+        float threshold = DEADBANDS[channel][sideband];
+        float abs_coeff = fabsf(coeffs[i]);
+
+        // If coefficient is within deadband AND perceptually non-zero, mark it
+        if (abs_coeff > EPSILON_PERCEPTUAL && abs_coeff < threshold) {
+            // Mark for stochastic reconstruction (decoder will add noise)
+            coeffs[i] = 0.0f;//DEADZONE_MARKER_FLOAT;
+        }
+        // If below perceptual epsilon, truly zero it
+        else if (abs_coeff <= EPSILON_PERCEPTUAL) {
+            coeffs[i] = 0.0f;
+        }
+        // Otherwise keep coefficient unchanged
+    }
+}
+
 //=============================================================================
 // DD-4 DWT Implementation
 //=============================================================================
@@ -276,9 +353,9 @@ static void compress_gamma(float *left, float *right, size_t count) {
    for (size_t i = 0; i < count; i++) {
        // encode(x) = sign(x) * |x|^γ where γ=0.5
        float x = left[i];
-        left[i] = signum(x) * powf(fabsf(x), 0.625f);
+        left[i] = signum(x) * powf(fabsf(x), 0.5f);
        float y = right[i];
-        right[i] = signum(y) * powf(fabsf(y), 0.625f);
+        right[i] = signum(y) * powf(fabsf(y), 0.5f);
    }
 }

@@ -357,12 +434,17 @@ static void quantize_dwt_coefficients(int channel, const float *coeffs, int8_t *
            current_subband_index[i] = sideband;
        }

-        // Apply base weight and quantiser scaling
-        float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
-        float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
-        int8_t quant_val = lambda_companding(val, max_index);
-
-        quantized[i] = quant_val;
+        // Check for deadzone marker (special handling)
+        if (coeffs[i] == DEADZONE_MARKER_FLOAT) {
+            // Map to special quantized marker for stochastic reconstruction
+            quantized[i] = (int8_t)DEADZONE_MARKER_QUANT;
+        } else {
+            // Normal quantization
+            float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
+            float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
+            int8_t quant_val = lambda_companding(val, max_index);
+            quantized[i] = quant_val;
+        }
    }

    free(sideband_starts);
@@ -809,6 +891,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,

    // Step 1.2: Compress dynamic range
    compress_gamma(pcm32_left, pcm32_right, num_samples);
+//    compress_mu_law(pcm32_left, pcm32_right, num_samples);

    // Step 2: M/S decorrelation
    ms_decorrelate(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples);
@@ -835,6 +918,9 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
        accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators);
    }

+//    apply_coeff_deadzone(0, dwt_mid, num_samples);
+//    apply_coeff_deadzone(1, dwt_side, num_samples);
+
    // Step 4: Quantize with frequency-dependent weights and quantiser scaling
    quantize_dwt_coefficients(0, dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
    quantize_dwt_coefficients(1, dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);