From ee2ddef1c1cbc7e52978d9ee03a35bf32148b49a Mon Sep 17 00:00:00 2001 From: minjaesong Date: Wed, 29 Oct 2025 03:19:36 +0900 Subject: [PATCH] TAD: coefficient dithering on decoder --- video_encoder/decoder_tad.c | 67 ++++++++++++++++++++++++++++++++---- video_encoder/encoder_tad.c | 68 ++++--------------------------------- 2 files changed, 67 insertions(+), 68 deletions(-) diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c index 2880089..196d7f8 100644 --- a/video_encoder/decoder_tad.c +++ b/video_encoder/decoder_tad.c @@ -52,6 +52,36 @@ static inline float FCLAMP(float x, float min, float max) { return x < min ? min : (x > max ? max : x); } +//============================================================================= +// Deterministic PRNG for Coefficient-Domain Dithering +//============================================================================= + +// Simple LCG for reproducible dithering +static inline uint32_t lcg_next(uint32_t *seed) { + *seed = (*seed * 1664525u) + 1013904223u; + return *seed; +} + +// Uniform random in [0, 1) +static inline float uniform_01(uint32_t *seed) { + return (lcg_next(seed) & 0xFFFFFF) / 16777216.0f; +} + +// TPDF (Triangular Probability Distribution Function) dither in range (-1, 1) +static inline float tpdf_dither(uint32_t *seed) { + float u1 = uniform_01(seed) - 0.5f; // [-0.5, 0.5) + float u2 = uniform_01(seed) - 0.5f; // [-0.5, 0.5) + return u1 - u2; // Triangular distribution in (-1, 1) +} + +// Calculate per-subband dither scaling factor +// alpha = 0.0 → flat per-band noise +// alpha = 0.5 → pinkish noise (default) +// alpha = 1.0 → more noise in low bands +static inline float subband_dither_scale(int level, float alpha) { + return (powf(alpha, level / 10.0f) - 1.0f) / alpha; +} + //============================================================================= // WAV Header Writing //============================================================================= @@ -343,7 +373,7 @@ static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *lef const float bias = 128.0f; // Reduced dither amplitude to coordinate with coefficient-domain dithering - // The encoder now adds TPDF dither in coefficient domain, so we reduce + // The decoder now adds TPDF dither in coefficient domain, so we reduce // sample-domain dither by ~60% to avoid doubling the noise floor const float dither_scale = 0.2f; // Reduced from 0.5 (was ±0.5 LSB, now ±0.2 LSB) @@ -418,7 +448,7 @@ static float lambda_decompanding(int8_t quant_val, int max_index) { return sign * abs_val; } -static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) { +static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale, uint32_t *dither_seed) { // Calculate sideband boundaries dynamically int first_band_size = chunk_size >> dwt_levels; @@ -430,6 +460,10 @@ static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2)); } + // Coefficient-domain dithering parameters + const float dither_k = 0.125f; // Amplitude factor (0.5 × Q_level) + const float dither_alpha = 78.0f; // Subband scaling exponent (0.5 = pinkish) + for (size_t i = 0; i < count; i++) { int sideband = dwt_levels; for (int s = 0; s <= dwt_levels; s++) { @@ -444,7 +478,22 @@ static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, // Denormalize using the subband scalar and apply base weight + quantiser scaling float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale; - coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight; + float dequantized = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight; + + // Apply coefficient-domain dithering AFTER dequantization + // Calculate quantization step size Q in coefficient domain + float scalar = TAD32_COEFF_SCALARS[sideband] * weight; + float Q = scalar / max_index; + + // Per-subband dither scaling: lower levels get more dither energy + float s_level = subband_dither_scale(sideband, dither_alpha); + + // TPDF dithering in coefficient domain + float tpdf = tpdf_dither(dither_seed); + float dither_amplitude = dither_k * Q * s_level; + + // Add dither to dequantized coefficient + coeffs[i] = dequantized + (tpdf * dither_amplitude); } free(sideband_starts); @@ -509,12 +558,18 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ memcpy(quant_mid, decompressed, sample_count); memcpy(quant_side, decompressed + sample_count, sample_count); + // Initialize deterministic dither seeds based on GLOBAL sample position + // This ensures reproducibility across multiple decoding runs + static size_t global_sample_position = 0; + uint32_t dither_seed_mid = 0x12345678u ^ (uint32_t)(global_sample_position / sample_count * 2); + uint32_t dither_seed_side = 0x87654321u ^ (uint32_t)(global_sample_position / sample_count * 2 + 1); + global_sample_position += sample_count; - // Dequantize with quantiser scaling + // Dequantize with quantiser scaling and coefficient-domain dithering // Use quantiser_scale = 1.0f for baseline (must match encoder) float quantiser_scale = 1.0f; - dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale); - dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale); + dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale, &dither_seed_mid); + dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale, &dither_seed_side); // Inverse DWT dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels); diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index ea9deb8..be6fdf8 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -37,43 +37,13 @@ static const float BASE_QUANTISER_WEIGHTS[] = { // Forward declarations for internal functions static void dwt_dd4_forward_1d(float *data, int length); static void dwt_dd4_forward_multilevel(float *data, int length, int levels); -static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index, float quantiser_scale, uint32_t *dither_seed); +static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index, float quantiser_scale); static size_t encode_twobitmap(const int8_t *values, size_t count, uint8_t *output); static inline float FCLAMP(float x, float min, float max) { return x < min ? min : (x > max ? max : x); } -//============================================================================= -// Deterministic PRNG for Coefficient-Domain Dithering -//============================================================================= - -// Simple LCG for reproducible dithering -static inline uint32_t lcg_next(uint32_t *seed) { - *seed = (*seed * 1664525u) + 1013904223u; - return *seed; -} - -// Uniform random in [0, 1) -static inline float uniform_01(uint32_t *seed) { - return (lcg_next(seed) & 0xFFFFFF) / 16777216.0f; -} - -// TPDF (Triangular Probability Distribution Function) dither in range (-1, 1) -static inline float tpdf_dither(uint32_t *seed) { - float u1 = uniform_01(seed) - 0.5f; // [-0.5, 0.5) - float u2 = uniform_01(seed) - 0.5f; // [-0.5, 0.5) - return u1 - u2; // Triangular distribution in (-1, 1) -} - -// Calculate per-subband dither scaling factor -// alpha = 0.0 → flat per-band noise -// alpha = 0.5 → pinkish noise (default) -// alpha = 1.0 → more noise in low bands -static inline float subband_dither_scale(int level, float alpha, float scale) { - return (powf(alpha, level / 10.0f) - 1.0f) / alpha * scale; -} - // Calculate DWT levels from chunk size static int calculate_dwt_levels(int chunk_size) { /*if (chunk_size < TAD32_MIN_CHUNK_SIZE) { @@ -310,7 +280,7 @@ static int8_t lambda_companding(float val, int max_index) { return (int8_t)(sign * index); } -static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale, uint32_t *dither_seed) { +static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale) { int first_band_size = chunk_size >> dwt_levels; int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int)); @@ -320,10 +290,6 @@ static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, si sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2)); } - // Coefficient-domain dithering parameters - const float dither_k = 0.5f; // Amplitude factor (0.5 × Q_level) - const float dither_alpha = 78.0f; // Subband scaling exponent (0.5 = pinkish) - for (size_t i = 0; i < count; i++) { int sideband = dwt_levels; for (int s = 0; s <= dwt_levels; s++) { @@ -340,22 +306,7 @@ static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, si // Apply base weight and quantiser scaling float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale; - float scalar = TAD32_COEFF_SCALARS[sideband] * weight; - - // Calculate quantization step size Q in coefficient domain - // Q represents the spacing between quantization levels - float Q = scalar / max_index; - - // Per-subband dither scaling: lower levels get more dither energy - float s_level = subband_dither_scale(sideband, dither_alpha, 0.3f); - - // TPDF dithering in coefficient domain - float tpdf = tpdf_dither(dither_seed); - float dither_amplitude = dither_k * Q * s_level; - float dithered_coeff = coeffs[i] + (tpdf * dither_amplitude); - - // Normalize dithered coefficient to [-1, 1] range for quantization - float val = dithered_coeff / scalar; + float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1] int8_t quant_val = lambda_companding(val, max_index); quantized[i] = quant_val; @@ -883,16 +834,9 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators); } - // Step 3.75: Initialize deterministic dither seed for coefficient-domain dithering - // Using a static counter ensures reproducible dithering per chunk - static uint32_t chunk_counter = 0; - uint32_t dither_seed_mid = 0x12345678u ^ (chunk_counter * 2); // Seed for Mid channel - uint32_t dither_seed_side = 0x87654321u ^ (chunk_counter * 2 + 1); // Seed for Side channel - chunk_counter++; - - // Step 4: Quantize with frequency-dependent weights, quantiser scaling, and coefficient-domain dithering - quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale, &dither_seed_mid); - quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale, &dither_seed_side); + // Step 4: Quantize with frequency-dependent weights and quantiser scaling + quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale); + quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale); // Step 4.5: Accumulate quantized coefficient statistics if enabled if (stats_enabled) {