From aa9ecee7ca9cfcc2153555eaeb1960e5b15025c8 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Fri, 7 Nov 2025 23:13:08 +0900 Subject: [PATCH] TAD: pre/de-emphasis --- .../torvald/tsvm/peripheral/AudioAdapter.kt | 89 +++++++++----- video_encoder/decoder_tad.c | 110 +++++++++++++----- video_encoder/encoder_tad.c | 102 ++++++++++++++-- 3 files changed, 233 insertions(+), 68 deletions(-) diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt index 2e25d13..aa56103 100644 --- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt +++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt @@ -157,6 +157,19 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { private val LAMBDA_FIXED = 6.0f + // Deadzone marker for stochastic reconstruction (must match encoder) + private val DEADZONE_MARKER_QUANT = (-128).toByte() + + // Deadband thresholds (must match encoder) + private val DEADBANDS = arrayOf( + floatArrayOf( // Mid channel + 1.0f, 0.3f, 0.3f, 0.3f, 0.3f, 0.2f, 0.2f, 0.05f, 0.05f, 0.05f + ), + floatArrayOf( // Side channel + 1.0f, 0.3f, 0.3f, 0.3f, 0.3f, 0.2f, 0.2f, 0.05f, 0.05f, 0.05f + ) + ) + // Dither state for noise shaping (2 channels, 2 history samples each) private val ditherError = Array(2) { FloatArray(2) } @@ -366,9 +379,23 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { // TAD (Terrarum Advanced Audio) Decoder //============================================================================= - // Uniform random in [0, 1) + // Laplacian-distributed noise (for stochastic reconstruction) + private fun laplacianNoise(scale: Float): Float { + val u = urand() - 0.5f // [-0.5, 0.5) + val sign = if (u >= 0.0f) 1.0f else -1.0f + var absU = kotlin.math.abs(u) + + // Avoid log(0) + if (absU >= 0.49999f) absU = 0.49999f + + // Inverse Laplacian CDF with λ = 1/scale + val x = -sign * kotlin.math.ln(1.0f - 2.0f * absU) * scale + return x + } + + // Uniform random in [0, 1) - kept for compatibility private fun frand01(): Float { - return Math.random().toFloat() + return urand() } // TPDF (Triangular Probability Density Function) noise in [-1, +1) @@ -554,8 +581,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { // Dequantize to Float32 val dwtMid = FloatArray(sampleCount) val dwtSide = FloatArray(sampleCount) - dequantizeDwtCoefficients(quantMid, dwtMid, sampleCount, maxIndex, dwtLevels) - dequantizeDwtCoefficients(quantSide, dwtSide, sampleCount, maxIndex, dwtLevels) + dequantizeDwtCoefficients(0, quantMid, dwtMid, sampleCount, maxIndex, dwtLevels) + dequantizeDwtCoefficients(1, quantSide, dwtSide, sampleCount, maxIndex, dwtLevels) // Inverse DWT using CDF 9/7 wavelet (produces Float32 samples in range [-1.0, 1.0]) dwt97InverseMultilevel(dwtMid, sampleCount, dwtLevels) @@ -568,6 +595,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { // Expand dynamic range (gamma expansion) expandGamma(pcm32Left, pcm32Right, sampleCount) +// expandMuLaw(pcm32Left, pcm32Right, sampleCount) // Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8) applyDeemphasis(pcm32Left, pcm32Right, sampleCount) @@ -632,7 +660,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { } } - private fun dequantizeDwtCoefficients(quantized: ByteArray, coeffs: FloatArray, count: Int, + private fun dequantizeDwtCoefficients(channel: Int, quantized: ByteArray, coeffs: FloatArray, count: Int, maxIndex: Int, dwtLevels: Int) { // Calculate sideband boundaries dynamically val firstBandSize = count shr dwtLevels @@ -643,7 +671,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { sidebandStarts[i] = sidebandStarts[i - 1] + (firstBandSize shl (i - 2)) } - // Step 1: Dequantize all coefficients using lambda decompanding + // Dequantize all coefficients with stochastic reconstruction for deadzoned values val quantiserScale = 1.0f for (i in 0 until count) { var sideband = dwtLevels @@ -654,34 +682,33 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { } } - // Decode using lambda companding - val normalizedVal = lambdaDecompanding(quantized[i], maxIndex) + // Check for deadzone marker + /*if (quantized[i] == DEADZONE_MARKER_QUANT) { + // Stochastic reconstruction: generate Laplacian noise in deadband range + val deadbandThreshold = DEADBANDS[channel][sideband] - // Denormalize using the subband scalar and apply base weight + quantiser scaling - val weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiserScale - coeffs[i] = normalizedVal * TAD32_COEFF_SCALARS[sideband] * weight + // Generate Laplacian-distributed noise scaled to deadband width + // Use scale = threshold/3 to keep ~99% of samples within [-threshold, +threshold] + var noise = laplacianNoise(deadbandThreshold / 3.0f) + + // Clamp to deadband range + if (noise > deadbandThreshold) noise = deadbandThreshold + if (noise < -deadbandThreshold) noise = -deadbandThreshold + + // Apply scalar (but not quantiser weight - noise is already in correct range) + coeffs[i] = noise * TAD32_COEFF_SCALARS[sideband] + } else {*/ + // Normal dequantization using lambda decompanding + val normalizedVal = lambdaDecompanding(quantized[i], maxIndex) + + // Denormalize using the subband scalar and apply base weight + quantiser scaling + val weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiserScale + coeffs[i] = normalizedVal * TAD32_COEFF_SCALARS[sideband] * weight +// } } - // Step 2: Apply spectral interpolation per band - // Process bands from high to low frequency (dwtLevels down to 0) - var prevBandRms = 0.0f - - for (band in dwtLevels downTo 0) { - val bandStart = sidebandStarts[band] - val bandEnd = sidebandStarts[band + 1] - val bandLen = bandEnd - bandStart - - // Calculate quantization step Q for this band - val weight = BASE_QUANTISER_WEIGHTS[band] * quantiserScale - val scalar = TAD32_COEFF_SCALARS[band] * weight - val Q = scalar / maxIndex - - // Apply spectral interpolation to this band - spectralInterpolateBand(coeffs, bandStart, bandLen, Q, prevBandRms) - - // Compute RMS for this band to use as reference for next (lower frequency) band - prevBandRms = computeBandRms(coeffs, bandStart, bandLen) - } + // Note: Stochastic reconstruction replaces the old spectral interpolation step + // No need for additional processing - deadzoned coefficients already have appropriate noise } // 9/7 inverse DWT (CDF 9/7 wavelet - matches C implementation) diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c index d42288a..7d8b166 100644 --- a/video_encoder/decoder_tad.c +++ b/video_encoder/decoder_tad.c @@ -162,6 +162,59 @@ static int calculate_dwt_levels(int chunk_size) { return 9; } +//============================================================================= +// Stochastic Reconstruction for Deadzoned Coefficients +//============================================================================= + +// Special marker for deadzoned coefficients (must match encoder) +#define DEADZONE_MARKER_QUANT (-128) + +// Deadband thresholds (must match encoder) +static const float DEADBANDS[2][10] = { +{ // mid channel + 0.10f, // LL (L9) DC + 0.03f, // H (L9) 31.25 hz + 0.03f, // H (L8) 62.5 hz + 0.03f, // H (L7) 125 hz + 0.03f, // H (L6) 250 hz + 0.02f, // H (L5) 500 hz + 0.02f, // H (L4) 1 khz + 0.005f, // H (L3) 2 khz + 0.005f, // H (L2) 4 khz + 0.005f // H (L1) 8 khz +}, +{ // side channel + 0.10f, // LL (L9) DC + 0.03f, // H (L9) 31.25 hz + 0.03f, // H (L8) 62.5 hz + 0.03f, // H (L7) 125 hz + 0.03f, // H (L6) 250 hz + 0.02f, // H (L5) 500 hz + 0.02f, // H (L4) 1 khz + 0.005f, // H (L3) 2 khz + 0.005f, // H (L2) 4 khz + 0.005f // H (L1) 8 khz +}}; + +// Fast PRNG state (xorshift32) for stochastic reconstruction +static uint32_t deadzone_rng_state = 0x12345678u; + +// Laplacian-distributed noise (better approximation than TPDF) +// Uses inverse CDF method: X = -sign(U) * ln(1 - 2*|U|) / λ +static float laplacian_noise(float scale) { + float u = urand(&deadzone_rng_state) - 0.5f; // [-0.5, 0.5) + float sign = (u >= 0.0f) ? 1.0f : -1.0f; + float abs_u = fabsf(u); + + // Avoid log(0) by clamping + if (abs_u >= 0.49999f) abs_u = 0.49999f; + + // Inverse Laplacian CDF with λ = 1/scale + float x = -sign * logf(1.0f - 2.0f * abs_u) * scale; + + return x; +} + //============================================================================= // Haar DWT Implementation (inverse only needed for decoder) //============================================================================= @@ -380,9 +433,9 @@ static void expand_gamma(float *left, float *right, size_t count) { for (size_t i = 0; i < count; i++) { // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5 float x = left[i]; float a = fabsf(x); - left[i] = signum(x) * powf(a, 1.6f); + left[i] = signum(x) * a * a; float y = right[i]; float b = fabsf(y); - right[i] = signum(y) * powf(b, 1.6f); + right[i] = signum(y) * b * b; } } @@ -534,7 +587,7 @@ static void dequantize_dwt_coefficients(int channel, const int8_t *quantized, fl sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2)); } - // Step 1: Dequantize all coefficients (no dithering yet) + // Dequantize all coefficients with stochastic reconstruction for deadzoned values for (size_t i = 0; i < count; i++) { int sideband = dwt_levels; for (int s = 0; s <= dwt_levels; s++) { @@ -544,35 +597,33 @@ static void dequantize_dwt_coefficients(int channel, const int8_t *quantized, fl } } - // Decode using lambda companding - float normalized_val = lambda_decompanding(quantized[i], max_index); + // Check for deadzone marker + /*if (quantized[i] == (int8_t)0) {//DEADZONE_MARKER_QUANT) { + // Stochastic reconstruction: generate Laplacian noise in deadband range + float deadband_threshold = DEADBANDS[channel][sideband]; - // Denormalize using the subband scalar and apply base weight + quantiser scaling - float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale; - coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight; + // Generate Laplacian-distributed noise scaled to deadband width + // Use scale = threshold/3 to keep ~99% of samples within [-threshold, +threshold] + float noise = laplacian_noise(deadband_threshold / 3.0f); + + // Clamp to deadband range + if (noise > deadband_threshold) noise = deadband_threshold; + if (noise < -deadband_threshold) noise = -deadband_threshold; + + // Apply scalar (but not quantiser weight - noise is already in correct range) + coeffs[i] = noise * TAD32_COEFF_SCALARS[sideband]; + } else {*/ + // Normal dequantization using lambda decompanding + float normalized_val = lambda_decompanding(quantized[i], max_index); + + // Denormalize using the subband scalar and apply base weight + quantiser scaling + float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale; + coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight; +// } } - // Step 2: Apply spectral interpolation per band - // Process bands from high to low frequency (dwt_levels down to 0) - // so we can use lower bands' RMS for higher band reconstruction - float prev_band_rms = 0.0f; - - for (int band = dwt_levels; band >= 0; band--) { - size_t band_start = sideband_starts[band]; - size_t band_end = sideband_starts[band + 1]; - size_t band_len = band_end - band_start; - - // Calculate quantization step Q for this band - float weight = BASE_QUANTISER_WEIGHTS[channel][band] * quantiser_scale; - float scalar = TAD32_COEFF_SCALARS[band] * weight; - float Q = scalar / max_index; - - // Apply spectral interpolation to this band - spectral_interpolate_band(&coeffs[band_start], band_len, Q, prev_band_rms); - - // Compute RMS for this band to use as reference for next (lower frequency) band - prev_band_rms = compute_band_rms(&coeffs[band_start], band_len); - } + // Note: Stochastic reconstruction replaces the old spectral interpolation step + // No need for additional processing - deadzoned coefficients already have appropriate noise free(sideband_starts); } @@ -653,6 +704,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ // expand dynamic range expand_gamma(pcm32_left, pcm32_right, sample_count); +// expand_mu_law(pcm32_left, pcm32_right, sample_count); // Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8) apply_deemphasis(pcm32_left, pcm32_right, sample_count); diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index 29e34c4..0eb011f 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -46,6 +46,33 @@ static const float BASE_QUANTISER_WEIGHTS[2][10] = { 3.2f // H (L1) 8 khz }}; +// target: before quantisation +static const float DEADBANDS[2][10] = { +{ // mid channel + 0.10f, // LL (L9) DC + 0.03f, // H (L9) 31.25 hz + 0.03f, // H (L8) 62.5 hz + 0.03f, // H (L7) 125 hz + 0.03f, // H (L6) 250 hz + 0.02f, // H (L5) 500 hz + 0.02f, // H (L4) 1 khz + 0.005f, // H (L3) 2 khz + 0.005f, // H (L2) 4 khz + 0.005f // H (L1) 8 khz +}, +{ // side channel + 0.10f, // LL (L9) DC + 0.03f, // H (L9) 31.25 hz + 0.03f, // H (L8) 62.5 hz + 0.03f, // H (L7) 125 hz + 0.03f, // H (L6) 250 hz + 0.02f, // H (L5) 500 hz + 0.02f, // H (L4) 1 khz + 0.005f, // H (L3) 2 khz + 0.005f, // H (L2) 4 khz + 0.005f // H (L1) 8 khz +}}; + static inline float FCLAMP(float x, float min, float max) { return x < min ? min : (x > max ? max : x); } @@ -75,6 +102,56 @@ static int calculate_dwt_levels(int chunk_size) { return 9; } +// Special marker for deadzoned coefficients (will be reconstructed with noise on decode) +#define DEADZONE_MARKER_FLOAT (-999.0f) // Unmistakable marker in float domain +#define DEADZONE_MARKER_QUANT (-128) // Maps to this in quantized domain (int8 minimum) + +// Perceptual epsilon - coefficients below this are truly zero (inaudible) +#define EPSILON_PERCEPTUAL 0.001f + +static void apply_coeff_deadzone(int channel, float *coeffs, size_t num_samples) { + // Apply deadzonning to each DWT subband using frequency-dependent thresholds + // Instead of zeroing, mark small coefficients for stochastic reconstruction + + const int dwt_levels = 9; // Fixed to match encoder + + // Calculate subband boundaries (same logic as decoder) + const int first_band_size = num_samples >> dwt_levels; + int sideband_starts[11]; // dwt_levels + 2 + sideband_starts[0] = 0; + sideband_starts[1] = first_band_size; + for (int i = 2; i <= dwt_levels + 1; i++) { + sideband_starts[i] = sideband_starts[i - 1] + (first_band_size << (i - 2)); + } + + // Apply deadzone threshold to each coefficient + for (size_t i = 0; i < num_samples; i++) { + // Determine which subband this coefficient belongs to + int sideband = dwt_levels; // Default to highest frequency + for (int s = 0; s <= dwt_levels; s++) { + if (i < (size_t)sideband_starts[s + 1]) { + sideband = s; + break; + } + } + + // Get threshold for this subband and channel + float threshold = DEADBANDS[channel][sideband]; + float abs_coeff = fabsf(coeffs[i]); + + // If coefficient is within deadband AND perceptually non-zero, mark it + if (abs_coeff > EPSILON_PERCEPTUAL && abs_coeff < threshold) { + // Mark for stochastic reconstruction (decoder will add noise) + coeffs[i] = 0.0f;//DEADZONE_MARKER_FLOAT; + } + // If below perceptual epsilon, truly zero it + else if (abs_coeff <= EPSILON_PERCEPTUAL) { + coeffs[i] = 0.0f; + } + // Otherwise keep coefficient unchanged + } +} + //============================================================================= // DD-4 DWT Implementation //============================================================================= @@ -276,9 +353,9 @@ static void compress_gamma(float *left, float *right, size_t count) { for (size_t i = 0; i < count; i++) { // encode(x) = sign(x) * |x|^γ where γ=0.5 float x = left[i]; - left[i] = signum(x) * powf(fabsf(x), 0.625f); + left[i] = signum(x) * powf(fabsf(x), 0.5f); float y = right[i]; - right[i] = signum(y) * powf(fabsf(y), 0.625f); + right[i] = signum(y) * powf(fabsf(y), 0.5f); } } @@ -357,12 +434,17 @@ static void quantize_dwt_coefficients(int channel, const float *coeffs, int8_t * current_subband_index[i] = sideband; } - // Apply base weight and quantiser scaling - float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale; - float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1] - int8_t quant_val = lambda_companding(val, max_index); - - quantized[i] = quant_val; + // Check for deadzone marker (special handling) + if (coeffs[i] == DEADZONE_MARKER_FLOAT) { + // Map to special quantized marker for stochastic reconstruction + quantized[i] = (int8_t)DEADZONE_MARKER_QUANT; + } else { + // Normal quantization + float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale; + float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1] + int8_t quant_val = lambda_companding(val, max_index); + quantized[i] = quant_val; + } } free(sideband_starts); @@ -809,6 +891,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, // Step 1.2: Compress dynamic range compress_gamma(pcm32_left, pcm32_right, num_samples); +// compress_mu_law(pcm32_left, pcm32_right, num_samples); // Step 2: M/S decorrelation ms_decorrelate(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples); @@ -835,6 +918,9 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators); } +// apply_coeff_deadzone(0, dwt_mid, num_samples); +// apply_coeff_deadzone(1, dwt_side, num_samples); + // Step 4: Quantize with frequency-dependent weights and quantiser scaling quantize_dwt_coefficients(0, dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale); quantize_dwt_coefficients(1, dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);