TAD: pre/de-emphasis

This commit is contained in:
minjaesong
2025-11-07 23:13:08 +09:00
parent 8878d37e5b
commit aa9ecee7ca
3 changed files with 233 additions and 68 deletions

View File

@@ -162,6 +162,59 @@ static int calculate_dwt_levels(int chunk_size) {
return 9;
}
//=============================================================================
// Stochastic Reconstruction for Deadzoned Coefficients
//=============================================================================
// Special marker for deadzoned coefficients (must match encoder)
#define DEADZONE_MARKER_QUANT (-128)
// Deadband thresholds (must match encoder)
static const float DEADBANDS[2][10] = {
{ // mid channel
0.10f, // LL (L9) DC
0.03f, // H (L9) 31.25 hz
0.03f, // H (L8) 62.5 hz
0.03f, // H (L7) 125 hz
0.03f, // H (L6) 250 hz
0.02f, // H (L5) 500 hz
0.02f, // H (L4) 1 khz
0.005f, // H (L3) 2 khz
0.005f, // H (L2) 4 khz
0.005f // H (L1) 8 khz
},
{ // side channel
0.10f, // LL (L9) DC
0.03f, // H (L9) 31.25 hz
0.03f, // H (L8) 62.5 hz
0.03f, // H (L7) 125 hz
0.03f, // H (L6) 250 hz
0.02f, // H (L5) 500 hz
0.02f, // H (L4) 1 khz
0.005f, // H (L3) 2 khz
0.005f, // H (L2) 4 khz
0.005f // H (L1) 8 khz
}};
// Fast PRNG state (xorshift32) for stochastic reconstruction
static uint32_t deadzone_rng_state = 0x12345678u;
// Laplacian-distributed noise (better approximation than TPDF)
// Uses inverse CDF method: X = -sign(U) * ln(1 - 2*|U|) / λ
static float laplacian_noise(float scale) {
float u = urand(&deadzone_rng_state) - 0.5f; // [-0.5, 0.5)
float sign = (u >= 0.0f) ? 1.0f : -1.0f;
float abs_u = fabsf(u);
// Avoid log(0) by clamping
if (abs_u >= 0.49999f) abs_u = 0.49999f;
// Inverse Laplacian CDF with λ = 1/scale
float x = -sign * logf(1.0f - 2.0f * abs_u) * scale;
return x;
}
//=============================================================================
// Haar DWT Implementation (inverse only needed for decoder)
//=============================================================================
@@ -380,9 +433,9 @@ static void expand_gamma(float *left, float *right, size_t count) {
for (size_t i = 0; i < count; i++) {
// decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
float x = left[i]; float a = fabsf(x);
left[i] = signum(x) * powf(a, 1.6f);
left[i] = signum(x) * a * a;
float y = right[i]; float b = fabsf(y);
right[i] = signum(y) * powf(b, 1.6f);
right[i] = signum(y) * b * b;
}
}
@@ -534,7 +587,7 @@ static void dequantize_dwt_coefficients(int channel, const int8_t *quantized, fl
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
}
// Step 1: Dequantize all coefficients (no dithering yet)
// Dequantize all coefficients with stochastic reconstruction for deadzoned values
for (size_t i = 0; i < count; i++) {
int sideband = dwt_levels;
for (int s = 0; s <= dwt_levels; s++) {
@@ -544,35 +597,33 @@ static void dequantize_dwt_coefficients(int channel, const int8_t *quantized, fl
}
}
// Decode using lambda companding
float normalized_val = lambda_decompanding(quantized[i], max_index);
// Check for deadzone marker
/*if (quantized[i] == (int8_t)0) {//DEADZONE_MARKER_QUANT) {
// Stochastic reconstruction: generate Laplacian noise in deadband range
float deadband_threshold = DEADBANDS[channel][sideband];
// Denormalize using the subband scalar and apply base weight + quantiser scaling
float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
// Generate Laplacian-distributed noise scaled to deadband width
// Use scale = threshold/3 to keep ~99% of samples within [-threshold, +threshold]
float noise = laplacian_noise(deadband_threshold / 3.0f);
// Clamp to deadband range
if (noise > deadband_threshold) noise = deadband_threshold;
if (noise < -deadband_threshold) noise = -deadband_threshold;
// Apply scalar (but not quantiser weight - noise is already in correct range)
coeffs[i] = noise * TAD32_COEFF_SCALARS[sideband];
} else {*/
// Normal dequantization using lambda decompanding
float normalized_val = lambda_decompanding(quantized[i], max_index);
// Denormalize using the subband scalar and apply base weight + quantiser scaling
float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
// }
}
// Step 2: Apply spectral interpolation per band
// Process bands from high to low frequency (dwt_levels down to 0)
// so we can use lower bands' RMS for higher band reconstruction
float prev_band_rms = 0.0f;
for (int band = dwt_levels; band >= 0; band--) {
size_t band_start = sideband_starts[band];
size_t band_end = sideband_starts[band + 1];
size_t band_len = band_end - band_start;
// Calculate quantization step Q for this band
float weight = BASE_QUANTISER_WEIGHTS[channel][band] * quantiser_scale;
float scalar = TAD32_COEFF_SCALARS[band] * weight;
float Q = scalar / max_index;
// Apply spectral interpolation to this band
spectral_interpolate_band(&coeffs[band_start], band_len, Q, prev_band_rms);
// Compute RMS for this band to use as reference for next (lower frequency) band
prev_band_rms = compute_band_rms(&coeffs[band_start], band_len);
}
// Note: Stochastic reconstruction replaces the old spectral interpolation step
// No need for additional processing - deadzoned coefficients already have appropriate noise
free(sideband_starts);
}
@@ -653,6 +704,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
// expand dynamic range
expand_gamma(pcm32_left, pcm32_right, sample_count);
// expand_mu_law(pcm32_left, pcm32_right, sample_count);
// Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8)
apply_deemphasis(pcm32_left, pcm32_right, sample_count);

View File

@@ -46,6 +46,33 @@ static const float BASE_QUANTISER_WEIGHTS[2][10] = {
3.2f // H (L1) 8 khz
}};
// target: before quantisation
static const float DEADBANDS[2][10] = {
{ // mid channel
0.10f, // LL (L9) DC
0.03f, // H (L9) 31.25 hz
0.03f, // H (L8) 62.5 hz
0.03f, // H (L7) 125 hz
0.03f, // H (L6) 250 hz
0.02f, // H (L5) 500 hz
0.02f, // H (L4) 1 khz
0.005f, // H (L3) 2 khz
0.005f, // H (L2) 4 khz
0.005f // H (L1) 8 khz
},
{ // side channel
0.10f, // LL (L9) DC
0.03f, // H (L9) 31.25 hz
0.03f, // H (L8) 62.5 hz
0.03f, // H (L7) 125 hz
0.03f, // H (L6) 250 hz
0.02f, // H (L5) 500 hz
0.02f, // H (L4) 1 khz
0.005f, // H (L3) 2 khz
0.005f, // H (L2) 4 khz
0.005f // H (L1) 8 khz
}};
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
@@ -75,6 +102,56 @@ static int calculate_dwt_levels(int chunk_size) {
return 9;
}
// Special marker for deadzoned coefficients (will be reconstructed with noise on decode)
#define DEADZONE_MARKER_FLOAT (-999.0f) // Unmistakable marker in float domain
#define DEADZONE_MARKER_QUANT (-128) // Maps to this in quantized domain (int8 minimum)
// Perceptual epsilon - coefficients below this are truly zero (inaudible)
#define EPSILON_PERCEPTUAL 0.001f
static void apply_coeff_deadzone(int channel, float *coeffs, size_t num_samples) {
// Apply deadzonning to each DWT subband using frequency-dependent thresholds
// Instead of zeroing, mark small coefficients for stochastic reconstruction
const int dwt_levels = 9; // Fixed to match encoder
// Calculate subband boundaries (same logic as decoder)
const int first_band_size = num_samples >> dwt_levels;
int sideband_starts[11]; // dwt_levels + 2
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i - 1] + (first_band_size << (i - 2));
}
// Apply deadzone threshold to each coefficient
for (size_t i = 0; i < num_samples; i++) {
// Determine which subband this coefficient belongs to
int sideband = dwt_levels; // Default to highest frequency
for (int s = 0; s <= dwt_levels; s++) {
if (i < (size_t)sideband_starts[s + 1]) {
sideband = s;
break;
}
}
// Get threshold for this subband and channel
float threshold = DEADBANDS[channel][sideband];
float abs_coeff = fabsf(coeffs[i]);
// If coefficient is within deadband AND perceptually non-zero, mark it
if (abs_coeff > EPSILON_PERCEPTUAL && abs_coeff < threshold) {
// Mark for stochastic reconstruction (decoder will add noise)
coeffs[i] = 0.0f;//DEADZONE_MARKER_FLOAT;
}
// If below perceptual epsilon, truly zero it
else if (abs_coeff <= EPSILON_PERCEPTUAL) {
coeffs[i] = 0.0f;
}
// Otherwise keep coefficient unchanged
}
}
//=============================================================================
// DD-4 DWT Implementation
//=============================================================================
@@ -276,9 +353,9 @@ static void compress_gamma(float *left, float *right, size_t count) {
for (size_t i = 0; i < count; i++) {
// encode(x) = sign(x) * |x|^γ where γ=0.5
float x = left[i];
left[i] = signum(x) * powf(fabsf(x), 0.625f);
left[i] = signum(x) * powf(fabsf(x), 0.5f);
float y = right[i];
right[i] = signum(y) * powf(fabsf(y), 0.625f);
right[i] = signum(y) * powf(fabsf(y), 0.5f);
}
}
@@ -357,12 +434,17 @@ static void quantize_dwt_coefficients(int channel, const float *coeffs, int8_t *
current_subband_index[i] = sideband;
}
// Apply base weight and quantiser scaling
float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
int8_t quant_val = lambda_companding(val, max_index);
quantized[i] = quant_val;
// Check for deadzone marker (special handling)
if (coeffs[i] == DEADZONE_MARKER_FLOAT) {
// Map to special quantized marker for stochastic reconstruction
quantized[i] = (int8_t)DEADZONE_MARKER_QUANT;
} else {
// Normal quantization
float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
int8_t quant_val = lambda_companding(val, max_index);
quantized[i] = quant_val;
}
}
free(sideband_starts);
@@ -809,6 +891,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
// Step 1.2: Compress dynamic range
compress_gamma(pcm32_left, pcm32_right, num_samples);
// compress_mu_law(pcm32_left, pcm32_right, num_samples);
// Step 2: M/S decorrelation
ms_decorrelate(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples);
@@ -835,6 +918,9 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators);
}
// apply_coeff_deadzone(0, dwt_mid, num_samples);
// apply_coeff_deadzone(1, dwt_side, num_samples);
// Step 4: Quantize with frequency-dependent weights and quantiser scaling
quantize_dwt_coefficients(0, dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
quantize_dwt_coefficients(1, dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);