diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c index afb82e5..559c770 100644 --- a/video_encoder/decoder_tad.c +++ b/video_encoder/decoder_tad.c @@ -9,10 +9,15 @@ #include #include -#define DECODER_VENDOR_STRING "Decoder-TAD 20251023" +#define DECODER_VENDOR_STRING "Decoder-TAD 20251026" // TAD format constants (must match encoder) -#define TAD_COEFF_SCALAR 1024.0f +#undef TAD32_COEFF_SCALARS + +// Coefficient scalars for each subband (CDF 9/7 with 9 decomposition levels) +// Index 0 = LL band, Index 1-9 = H bands (L9 to L1) +static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f}; + #define TAD_DEFAULT_CHUNK_SIZE 32768 #define TAD_MIN_CHUNK_SIZE 1024 #define TAD_SAMPLE_RATE 32000 @@ -33,7 +38,7 @@ static inline float FCLAMP(float x, float min, float max) { // Calculate DWT levels from chunk size (must be power of 2, >= 1024) static int calculate_dwt_levels(int chunk_size) { - if (chunk_size < TAD_MIN_CHUNK_SIZE) { + /*if (chunk_size < TAD_MIN_CHUNK_SIZE) { fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE); return -1; } @@ -45,7 +50,8 @@ static int calculate_dwt_levels(int chunk_size) { size >>= 1; levels++; } - return levels - 2; + return levels - 2;*/ + return 9; } //============================================================================= @@ -71,6 +77,91 @@ static void dwt_haar_inverse_1d(float *data, int length) { free(temp); } +// 9/7 inverse DWT (from TSVM Kotlin code) +static void dwt_97_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + // Split into low and high frequency components (matching TSVM layout) + for (int i = 0; i < half; i++) { + temp[i] = data[i]; // Low-pass coefficients (first half) + } + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + temp[half + i] = data[half + i]; // High-pass coefficients (second half) + } + } + + // 9/7 inverse lifting coefficients from TSVM + const float alpha = -1.586134342f; + const float beta = -0.052980118f; + const float gamma = 0.882911076f; + const float delta = 0.443506852f; + const float K = 1.230174105f; + + // Step 1: Undo scaling + for (int i = 0; i < half; i++) { + temp[i] /= K; // Low-pass coefficients + } + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + temp[half + i] *= K; // High-pass coefficients + } + } + + // Step 2: Undo δ update + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] -= delta * (d_curr + d_prev); + } + + // Step 3: Undo γ predict + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] -= gamma * (s_curr + s_next); + } + } + + // Step 4: Undo β update + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] -= beta * (d_curr + d_prev); + } + + // Step 5: Undo α predict + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] -= alpha * (s_curr + s_next); + } + } + + // Reconstruction - interleave low and high pass + for (int i = 0; i < length; i++) { + if (i % 2 == 0) { + // Even positions: low-pass coefficients + data[i] = temp[i / 2]; + } else { + // Odd positions: high-pass coefficients + int idx = i / 2; + if (half + idx < length) { + data[i] = temp[half + idx]; + } else { + data[i] = 0.0f; + } + } + } + + free(temp); +} + // Inverse 1D transform of Four-point interpolating Deslauriers-Dubuc (DD-4) static void dwt_dd4_inverse_1d(float *data, int length) { if (length < 2) return; @@ -141,7 +232,8 @@ static void dwt_haar_inverse_multilevel(float *data, int length, int levels) { current_length *= 2; // MULTIPLY FIRST: 128→256, 256→512, ..., 16384→32768 if (current_length > length) current_length = length; // dwt_haar_inverse_1d(data, current_length); // THEN apply inverse - dwt_dd4_inverse_1d(data, current_length); // THEN apply inverse +// dwt_dd4_inverse_1d(data, current_length); // THEN apply inverse + dwt_97_inverse_1d(data, current_length); // THEN apply inverse } } @@ -159,23 +251,43 @@ static inline float tpdf1(void) { return (frand01() - frand01()); } -static void ms_correlate(const float *mid, const float *side, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) { +static void ms_correlate(const float *mid, const float *side, float *left, float *right, size_t count) { + for (size_t i = 0; i < count; i++) { + // Decode M/S → L/R + float m = mid[i]; + float s = side[i]; + left[i] = FCLAMP((m + s) * 1.7321f, -1.0f, 1.0f); + right[i] = FCLAMP((m - s) * 1.7321f, -1.0f, 1.0f); + } +} + +static float signum(float x) { + if (x > 0.0f) return 1.0f; + if (x < 0.0f) return -1.0f; + return 0.0f; +} + +static void expand_gamma(float *left, float *right, size_t count) { + for (size_t i = 0; i < count; i++) { + // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5 + float x = left[i]; float a = fabsf(x); + left[i] = signum(x) * a * a; + float y = right[i]; float b = fabsf(y); + right[i] = signum(y) * b * b; + } +} + +static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) { const float b1 = 1.5f; // 1st feedback coefficient const float b2 = -0.75f; // 2nd feedback coefficient const float scale = 127.5f; const float bias = 128.0f; for (size_t i = 0; i < count; i++) { - // Decode M/S → L/R - float m = mid[i]; - float s = side[i]; - float l = FCLAMP(m + s, -1.0f, 1.0f); - float r = FCLAMP(m - s, -1.0f, 1.0f); - // --- LEFT channel --- float feedbackL = b1 * dither_error[0][0] + b2 * dither_error[0][1]; float ditherL = 0.5f * tpdf1(); // ±0.5 LSB TPDF - float shapedL = l + feedbackL + ditherL / scale; + float shapedL = fleft[i] + feedbackL + ditherL / scale; shapedL = FCLAMP(shapedL, -1.0f, 1.0f); int qL = (int)lrintf(shapedL * scale); @@ -190,7 +302,7 @@ static void ms_correlate(const float *mid, const float *side, uint8_t *left, uin // --- RIGHT channel --- float feedbackR = b1 * dither_error[1][0] + b2 * dither_error[1][1]; float ditherR = 0.5f * tpdf1(); - float shapedR = r + feedbackR + ditherR / scale; + float shapedR = fright[i] + feedbackR + ditherR / scale; shapedR = FCLAMP(shapedR, -1.0f, 1.0f); int qR = (int)lrintf(shapedR * scale); @@ -228,13 +340,15 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f} }; - float quality_scale = 4.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f); + float quality_scale = 1.0f * (1.0f + FCLAMP((5 - quality) * 0.5f, 0.0f, 1000.0f)); for (int i = 0; i < dwt_levels; i++) { - weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f); + weights[i] = 1.0f;//base_weights[dwt_levels][i] * quality_scale; } } +#define QUANT_STEPS 8.0f // 64 -> [-64..64] -> 7 bits for LL + static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int quality, int chunk_size, int dwt_levels) { float weights[16]; get_quantization_weights(quality, dwt_levels, weights); @@ -263,7 +377,7 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1; float weight = weights[weight_idx]; - coeffs[i] = (float)quantized[i] * weight / TAD_COEFF_SCALAR; + coeffs[i] = ((float)quantized[i] * TAD32_COEFF_SCALARS[sideband]) / (QUANT_STEPS * weight); } free(sideband_starts); @@ -352,6 +466,8 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ int16_t *quant_side = malloc(sample_count * sizeof(int16_t)); float *dwt_mid = malloc(sample_count * sizeof(float)); float *dwt_side = malloc(sample_count * sizeof(float)); + float *pcm32_left = malloc(sample_count * sizeof(float)); + float *pcm32_right = malloc(sample_count * sizeof(float)); uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t)); uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t)); @@ -373,7 +489,13 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ float err[2][2] = {{0,0},{0,0}}; // M/S to L/R correlation - ms_correlate(dwt_mid, dwt_side, pcm8_left, pcm8_right, sample_count, err); + ms_correlate(dwt_mid, dwt_side, pcm32_left, pcm32_right, sample_count); + + // expand dynamic range +// expand_gamma(pcm32_left, pcm32_right, sample_count); + + // dither to 8-bit + pcm32f_to_pcm8(pcm32_left, pcm32_right, pcm8_left, pcm8_right, sample_count, err); // Interleave stereo output (PCMu8) for (size_t i = 0; i < sample_count; i++) { @@ -383,7 +505,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ // Cleanup free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side); - free(pcm8_left); free(pcm8_right); + free(pcm32_left); free(pcm32_right); free(pcm8_left); free(pcm8_right); if (decompressed) free(decompressed); return 0; diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index b2a7d43..7a57e89 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -11,13 +11,20 @@ #include #include "encoder_tad.h" +// Undefine the macro version from header and define as array +#undef TAD32_COEFF_SCALARS + +// Coefficient scalars for each subband (CDF 9/7 with 9 decomposition levels) +// Index 0 = LL band, Index 1-9 = H bands (L9 to L1) +static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f}; + // Forward declarations for internal functions static void dwt_dd4_forward_1d(float *data, int length); static void dwt_dd4_forward_multilevel(float *data, int length, int levels); static void ms_decorrelate_16(const float *left, const float *right, float *mid, float *side, size_t count); static void get_quantization_weights(int quality, int dwt_levels, float *weights); static int get_deadzone_threshold(int quality); -static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels); +static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels, int *current_subband_index); static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output); static inline float FCLAMP(float x, float min, float max) { @@ -26,7 +33,7 @@ static inline float FCLAMP(float x, float min, float max) { // Calculate DWT levels from chunk size static int calculate_dwt_levels(int chunk_size) { - if (chunk_size < TAD32_MIN_CHUNK_SIZE) { + /*if (chunk_size < TAD32_MIN_CHUNK_SIZE) { fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD32_MIN_CHUNK_SIZE); return -1; } @@ -44,7 +51,9 @@ static int calculate_dwt_levels(int chunk_size) { levels++; } - return levels - 2; // Maximum decomposition + return levels - 2;*/ // Maximum decomposition + + return 9; } //============================================================================= @@ -99,11 +108,80 @@ static void dwt_dd4_forward_1d(float *data, int length) { free(temp); } +// 1D DWT using lifting scheme for 9/7 irreversible filter +static void dwt_97_forward_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; // Handle odd lengths properly + + // Split into even/odd samples + for (int i = 0; i < half; i++) { + temp[i] = data[2 * i]; // Even (low) + } + for (int i = 0; i < length / 2; i++) { + temp[half + i] = data[2 * i + 1]; // Odd (high) + } + + // JPEG2000 9/7 forward lifting steps (corrected to match decoder) + const float alpha = -1.586134342f; + const float beta = -0.052980118f; + const float gamma = 0.882911076f; + const float delta = 0.443506852f; + const float K = 1.230174105f; + + // Step 1: Predict α - d[i] += α * (s[i] + s[i+1]) + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] += alpha * (s_curr + s_next); + } + } + + // Step 2: Update β - s[i] += β * (d[i-1] + d[i]) + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] += beta * (d_prev + d_curr); + } + + // Step 3: Predict γ - d[i] += γ * (s[i] + s[i+1]) + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] += gamma * (s_curr + s_next); + } + } + + // Step 4: Update δ - s[i] += δ * (d[i-1] + d[i]) + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] += delta * (d_prev + d_curr); + } + + // Step 5: Scaling - s[i] *= K, d[i] /= K + for (int i = 0; i < half; i++) { + temp[i] *= K; // Low-pass coefficients + } + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + temp[half + i] /= K; // High-pass coefficients + } + } + + memcpy(data, temp, length * sizeof(float)); + free(temp); +} + // Apply multi-level DWT (using DD-4 wavelet) static void dwt_dd4_forward_multilevel(float *data, int length, int levels) { int current_length = length; for (int level = 0; level < levels; level++) { - dwt_dd4_forward_1d(data, current_length); +// dwt_dd4_forward_1d(data, current_length); + dwt_97_forward_1d(data, current_length); current_length = (current_length + 1) / 2; } } @@ -112,7 +190,7 @@ static void dwt_dd4_forward_multilevel(float *data, int length, int levels) { // M/S Stereo Decorrelation (PCM32f version) //============================================================================= -static void ms_decorrelate_16(const float *left, const float *right, float *mid, float *side, size_t count) { +static void ms_decorrelate(const float *left, const float *right, float *mid, float *side, size_t count) { for (size_t i = 0; i < count; i++) { // Mid = (L + R) / 2, Side = (L - R) / 2 float l = left[i]; @@ -122,6 +200,22 @@ static void ms_decorrelate_16(const float *left, const float *right, float *mid, } } +static float signum(float x) { + if (x > 0.0f) return 1.0f; + if (x < 0.0f) return -1.0f; + return 0.0f; +} + +static void compress_gamma(float *left, float *right, size_t count) { + for (size_t i = 0; i < count; i++) { + // encode(x) = sign(x) * |x|^γ where γ=0.5 + float x = left[i]; + left[i] = signum(x) * sqrtf(fabsf(x)); + float y = right[i]; + right[i] = signum(y) * sqrtf(fabsf(y)); + } +} + //============================================================================= // Quantization with Frequency-Dependent Weighting //============================================================================= @@ -146,19 +240,21 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f} }; - float quality_scale = 4.0f * (1.0f + FCLAMP((5 - quality) * 0.5f, 0.0f, 1000.0f)); + float quality_scale = 1.0f * (1.0f + FCLAMP((5 - quality) * 0.5f, 0.0f, 1000.0f)); for (int i = 0; i < dwt_levels; i++) { - weights[i] = base_weights[dwt_levels][i] * quality_scale; + weights[i] = 1.0f;//base_weights[dwt_levels][i] * quality_scale; } } +#define QUANT_STEPS 512.0f // 64 -> [-64..64] -> 7 bits for LL + static int get_deadzone_threshold(int quality) { const int thresholds[] = {0,0,0,0,0,0}; // Q0 to Q5 return thresholds[quality]; } -static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels) { +static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels, int *current_subband_index) { float weights[16]; get_quantization_weights(quality, dwt_levels, weights); int deadzone = apply_deadzone ? get_deadzone_threshold(quality) : 0; @@ -181,11 +277,17 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s } } + // Store subband index (LL=0, H1=1, H2=2, ..., H9=9 for dwt_levels=9) + if (current_subband_index != NULL) { + current_subband_index[i] = sideband; + } + int weight_idx = (sideband == 0) ? 0 : sideband - 1; if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1; float weight = weights[weight_idx]; - float val = coeffs[i] / weight * TAD32_COEFF_SCALAR; + float val = (coeffs[i] / TAD32_COEFF_SCALARS[sideband]) * (QUANT_STEPS * weight); + // (coeffs[i] / TAD32_COEFF_SCALARS[sideband]) normalises coeffs to -1..1 int16_t quant_val = (int16_t)roundf(val); if (apply_deadzone && sideband >= dwt_levels - 1) { @@ -200,10 +302,280 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s free(sideband_starts); } +// idea 1: power-of-two companding +// for quant step 8: +// Q -> Float +// 0 -> 0 +// 1 -> 1/128 +// 2 -> 1/64 +// 3 -> 1/32 +// 4 -> 1/16 +// 5 -> 1/8 +// 6 -> 1/4 +// 7 -> 1/2 +// 8 -> 1/1 +// for -1 to -8, just invert the sign + + //============================================================================= // Significance Map Encoding //============================================================================= +//============================================================================= +// Coefficient Statistics +//============================================================================= + +static int compare_float(const void *a, const void *b) { + float fa = *(const float*)a; + float fb = *(const float*)b; + if (fa < fb) return -1; + if (fa > fb) return 1; + return 0; +} + +typedef struct { + float min; + float q1; + float median; + float q3; + float max; +} CoeffStats; + +typedef struct { + float *data; + size_t count; + size_t capacity; +} CoeffAccumulator; + +// Global accumulators for statistics +static CoeffAccumulator *mid_accumulators = NULL; +static CoeffAccumulator *side_accumulators = NULL; +static int num_subbands = 0; +static int stats_initialized = 0; +static int stats_dwt_levels = 0; + +static void init_statistics(int dwt_levels) { + if (stats_initialized) return; + + num_subbands = dwt_levels + 1; + stats_dwt_levels = dwt_levels; + + mid_accumulators = calloc(num_subbands, sizeof(CoeffAccumulator)); + side_accumulators = calloc(num_subbands, sizeof(CoeffAccumulator)); + + for (int i = 0; i < num_subbands; i++) { + mid_accumulators[i].capacity = 1024; + mid_accumulators[i].data = malloc(mid_accumulators[i].capacity * sizeof(float)); + mid_accumulators[i].count = 0; + + side_accumulators[i].capacity = 1024; + side_accumulators[i].data = malloc(side_accumulators[i].capacity * sizeof(float)); + side_accumulators[i].count = 0; + } + + stats_initialized = 1; +} + +static void accumulate_coefficients(const float *coeffs, int dwt_levels, int chunk_size, CoeffAccumulator *accumulators) { + int first_band_size = chunk_size >> dwt_levels; + + int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int)); + sideband_starts[0] = 0; + sideband_starts[1] = first_band_size; + for (int i = 2; i <= dwt_levels + 1; i++) { + sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2)); + } + + for (int s = 0; s <= dwt_levels; s++) { + size_t start = sideband_starts[s]; + size_t end = sideband_starts[s + 1]; + size_t band_size = end - start; + + // Expand capacity if needed + while (accumulators[s].count + band_size > accumulators[s].capacity) { + accumulators[s].capacity *= 2; + accumulators[s].data = realloc(accumulators[s].data, + accumulators[s].capacity * sizeof(float)); + } + + // Copy coefficients + memcpy(accumulators[s].data + accumulators[s].count, + coeffs + start, band_size * sizeof(float)); + accumulators[s].count += band_size; + } + + free(sideband_starts); +} + +static void calculate_coeff_stats(const float *coeffs, size_t count, CoeffStats *stats) { + if (count == 0) { + stats->min = stats->q1 = stats->median = stats->q3 = stats->max = 0.0f; + return; + } + + // Copy coefficients for sorting + float *sorted = malloc(count * sizeof(float)); + memcpy(sorted, coeffs, count * sizeof(float)); + qsort(sorted, count, sizeof(float), compare_float); + + stats->min = sorted[0]; + stats->max = sorted[count - 1]; + stats->median = sorted[count / 2]; + stats->q1 = sorted[count / 4]; + stats->q3 = sorted[(3 * count) / 4]; + + free(sorted); +} + +#define HISTOGRAM_BINS 40 +#define HISTOGRAM_WIDTH 60 + +static void print_histogram(const float *coeffs, size_t count, const char *title) { + if (count == 0) return; + + // Find min/max + float min_val = coeffs[0]; + float max_val = coeffs[0]; + for (size_t i = 1; i < count; i++) { + if (coeffs[i] < min_val) min_val = coeffs[i]; + if (coeffs[i] > max_val) max_val = coeffs[i]; + } + + // Handle case where all values are the same + if (fabsf(max_val - min_val) < 1e-9f) { + fprintf(stderr, " %s: All values are %.3f\n", title, min_val); + return; + } + + // Create histogram bins + size_t bins[HISTOGRAM_BINS] = {0}; + float bin_width = (max_val - min_val) / HISTOGRAM_BINS; + + for (size_t i = 0; i < count; i++) { + int bin = (int)((coeffs[i] - min_val) / bin_width); + if (bin >= HISTOGRAM_BINS) bin = HISTOGRAM_BINS - 1; + if (bin < 0) bin = 0; + bins[bin]++; + } + + // Find max bin count for scaling + size_t max_bin = 0; + for (int i = 0; i < HISTOGRAM_BINS; i++) { + if (bins[i] > max_bin) max_bin = bins[i]; + } + + // Print histogram + fprintf(stderr, " %s Histogram (range: %.3f to %.3f):\n", title, min_val, max_val); + + // Print top 20 bins to keep output manageable + for (int i = 0; i < HISTOGRAM_BINS; i++) { + float bin_start = min_val + i * bin_width; + float bin_end = bin_start + bin_width; + int bar_width = (int)((bins[i] * HISTOGRAM_WIDTH) / max_bin); + + // Only print bins with significant content (> 1% of max) + if (bins[i] > max_bin / 100) { + fprintf(stderr, " %8.3f-%8.3f [%7zu]: ", bin_start, bin_end, bins[i]); + for (int j = 0; j < bar_width; j++) { + fprintf(stderr, "█"); + } + fprintf(stderr, "\n"); + } + } + fprintf(stderr, "\n"); +} + +void tad32_print_statistics(void) { + if (!stats_initialized) return; + + fprintf(stderr, "\n=== TAD Coefficient Statistics (before quantization) ===\n"); + + // Print Mid channel statistics + fprintf(stderr, "\nMid Channel:\n"); + fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s\n", + "Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max"); + fprintf(stderr, "--------------------------------------------------------------------------------\n"); + + for (int s = 0; s < num_subbands; s++) { + CoeffStats stats; + calculate_coeff_stats(mid_accumulators[s].data, mid_accumulators[s].count, &stats); + + char band_name[16]; + if (s == 0) { + snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels); + } else { + snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1); + } + + fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f\n", + band_name, mid_accumulators[s].count, + stats.min, stats.q1, stats.median, stats.q3, stats.max); + } + + // Print Mid channel histograms + fprintf(stderr, "\nMid Channel Histograms:\n"); + for (int s = 0; s < num_subbands; s++) { + char band_name[32]; + if (s == 0) { + snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels); + } else { + snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1); + } + print_histogram(mid_accumulators[s].data, mid_accumulators[s].count, band_name); + } + + // Print Side channel statistics + fprintf(stderr, "\nSide Channel:\n"); + fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s\n", + "Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max"); + fprintf(stderr, "--------------------------------------------------------------------------------\n"); + + for (int s = 0; s < num_subbands; s++) { + CoeffStats stats; + calculate_coeff_stats(side_accumulators[s].data, side_accumulators[s].count, &stats); + + char band_name[16]; + if (s == 0) { + snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels); + } else { + snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1); + } + + fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f\n", + band_name, side_accumulators[s].count, + stats.min, stats.q1, stats.median, stats.q3, stats.max); + } + + // Print Side channel histograms + fprintf(stderr, "\nSide Channel Histograms:\n"); + for (int s = 0; s < num_subbands; s++) { + char band_name[32]; + if (s == 0) { + snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels); + } else { + snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1); + } + print_histogram(side_accumulators[s].data, side_accumulators[s].count, band_name); + } + + fprintf(stderr, "\n"); +} + +void tad32_free_statistics(void) { + if (!stats_initialized) return; + + for (int i = 0; i < num_subbands; i++) { + free(mid_accumulators[i].data); + free(side_accumulators[i].data); + } + free(mid_accumulators); + free(side_accumulators); + + mid_accumulators = NULL; + side_accumulators = NULL; + stats_initialized = 0; +} + static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output) { size_t map_bytes = (count * 2 + 7) / 8; uint8_t *map = output; @@ -269,8 +641,11 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int qua pcm32_right[i] = pcm32_stereo[i * 2 + 1]; } + // Step 1.1: Compress dynamic range +// compress_gamma(pcm32_left, pcm32_right, num_samples); + // Step 2: M/S decorrelation - ms_decorrelate_16(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples); + ms_decorrelate(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples); // Step 3: Convert to float and apply DWT for (size_t i = 0; i < num_samples; i++) { @@ -281,9 +656,22 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int qua dwt_dd4_forward_multilevel(dwt_mid, num_samples, dwt_levels); dwt_dd4_forward_multilevel(dwt_side, num_samples, dwt_levels); + // Step 3.5: Accumulate coefficient statistics if enabled + static int stats_enabled = -1; + if (stats_enabled == -1) { + stats_enabled = 1;//getenv("TAD_COEFF_STATS") != NULL; + if (stats_enabled) { + init_statistics(dwt_levels); + } + } + if (stats_enabled) { + accumulate_coefficients(dwt_mid, dwt_levels, num_samples, mid_accumulators); + accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators); + } + // Step 4: Quantize with frequency-dependent weights and dead zone - quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels); - quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels); + quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels, NULL); + quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels, NULL); // Step 5: Encode with 2-bit significance map (32-bit version) uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int32_t)); diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h index 29542c9..acc8ae6 100644 --- a/video_encoder/encoder_tad.h +++ b/video_encoder/encoder_tad.h @@ -9,7 +9,7 @@ // Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder // Constants -#define TAD32_COEFF_SCALAR 1024.0f +#define TAD32_COEFF_SCALARS {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f} // value only valid for CDF 9/7 with decomposition level 9. Index 0 = LL band #define TAD32_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples #define TAD32_SAMPLE_RATE 32000 #define TAD32_CHANNELS 2 // Stereo @@ -37,4 +37,16 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int quality, int use_zstd, uint8_t *output); +/** + * Print accumulated coefficient statistics + * Only effective if TAD_COEFF_STATS environment variable is set + */ +void tad32_print_statistics(void); + +/** + * Free accumulated statistics memory + * Should be called after tad32_print_statistics() + */ +void tad32_free_statistics(void); + #endif // TAD32_ENCODER_H diff --git a/video_encoder/encoder_tad_standalone.c b/video_encoder/encoder_tad_standalone.c index 170ab0d..897558b 100644 --- a/video_encoder/encoder_tad_standalone.c +++ b/video_encoder/encoder_tad_standalone.c @@ -1,6 +1,6 @@ // Created by CuriousTorvald and Claude on 2025-10-24. -// TAD32 (Terrarum Advanced Audio - PCM16 version) Encoder - Standalone program -// Alternative version: PCM16 throughout encoding, PCM8 conversion only at decoder +// TAD32 (Terrarum Advanced Audio - PCM32 version) Encoder - Standalone program +// Alternative version: PCM32 throughout encoding, PCM8 conversion only at decoder // Uses encoder_tad32.c library for encoding functions #include @@ -11,7 +11,7 @@ #include #include "encoder_tad.h" -#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251024" +#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251026" // TAD32 format constants #define TAD32_DEFAULT_CHUNK_SIZE 32768 // Default: power of 2 for optimal performance (2^15) @@ -52,8 +52,8 @@ static void print_usage(const char *prog_name) { printf(" -v Verbose output\n"); printf(" -h, --help Show this help\n"); printf("\nVersion: %s\n", ENCODER_VENDOR_STRING); - printf("Note: This is the PCM16 alternative version for comparison testing.\n"); - printf(" PCM16 is processed throughout encoding; PCM8 conversion happens at decoder.\n"); + printf("Note: This is the PCM32 alternative version for comparison testing.\n"); + printf(" PCM32 is processed throughout encoding; PCM8 conversion happens at decoder.\n"); } int main(int argc, char *argv[]) { @@ -269,6 +269,10 @@ int main(int argc, char *argv[]) { printf("\n"); } + // Print coefficient statistics if enabled + tad32_print_statistics(); + tad32_free_statistics(); + // Cleanup free(chunk_buffer); free(output_buffer);