Files
tsvm/video_encoder/lib/libtadenc/encoder_tad.c
2025-12-05 03:39:32 +09:00

1278 lines
43 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Created by CuriousTorvald and Claude on 2025-10-24.
// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder Library
// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
// This file contains only the encoding functions for comparison testing
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include "encoder_tad.h"
// Undefine the macro version from header and define as array
#undef TAD32_COEFF_SCALARS
// Coefficient scalars for each subband (CDF 9/7 with 9 decomposition levels)
// Index 0 = LL band, Index 1-9 = H bands (L9 to L1)
static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
// Base quantiser weight table (10 subbands: LL + 9 H bands)
// These weights are multiplied by quantiser_scale during quantisation
static const float BASE_QUANTISER_WEIGHTS[2][10] = {
{ // mid channel
4.0f, // LL (L9) DC
2.0f, // H (L9) 31.25 hz
1.8f, // H (L8) 62.5 hz
1.6f, // H (L7) 125 hz
1.4f, // H (L6) 250 hz
1.2f, // H (L5) 500 hz
1.0f, // H (L4) 1 khz
1.0f, // H (L3) 2 khz
1.3f, // H (L2) 4 khz
2.0f // H (L1) 8 khz
},
{ // side channel
6.0f, // LL (L9) DC
5.0f, // H (L9) 31.25 hz
2.6f, // H (L8) 62.5 hz
2.4f, // H (L7) 125 hz
1.8f, // H (L6) 250 hz
1.3f, // H (L5) 500 hz
1.0f, // H (L4) 1 khz
1.0f, // H (L3) 2 khz
1.6f, // H (L2) 4 khz
3.2f // H (L1) 8 khz
}};
// target: before quantisation
static const float DEADBANDS[2][10] = {
{ // mid channel
0.20f, // LL (L9) DC
0.06f, // H (L9) 31.25 hz
0.06f, // H (L8) 62.5 hz
0.06f, // H (L7) 125 hz
0.06f, // H (L6) 250 hz
0.04f, // H (L5) 500 hz
0.04f, // H (L4) 1 khz
0.01f, // H (L3) 2 khz
0.01f, // H (L2) 4 khz
0.01f // H (L1) 8 khz
},
{ // side channel
0.20f, // LL (L9) DC
0.06f, // H (L9) 31.25 hz
0.06f, // H (L8) 62.5 hz
0.06f, // H (L7) 125 hz
0.06f, // H (L6) 250 hz
0.04f, // H (L5) 500 hz
0.04f, // H (L4) 1 khz
0.01f, // H (L3) 2 khz
0.01f, // H (L2) 4 khz
0.01f // H (L1) 8 khz
}};
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
// Calculate DWT levels from chunk size
static int calculate_dwt_levels(int chunk_size) {
/*if (chunk_size < TAD32_MIN_CHUNK_SIZE) {
fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD32_MIN_CHUNK_SIZE);
return -1;
}
int levels = 0;
int size = chunk_size;
while (size > 1) {
size >>= 1;
levels++;
}
// For non-power-of-2, we need to add 1 to levels
int pow2 = 1 << levels;
if (pow2 < chunk_size) {
levels++;
}
return levels - 2;*/ // Maximum decomposition
return 9;
}
// Special marker for deadzoned coefficients (will be reconstructed with noise on decode)
#define DEADZONE_MARKER_FLOAT (-999.0f) // Unmistakable marker in float domain
#define DEADZONE_MARKER_QUANT (-128) // Maps to this in quantised domain (int8 minimum)
// Perceptual epsilon - coefficients below this are truly zero (inaudible)
#define EPSILON_PERCEPTUAL 0.001f
static void apply_coeff_deadzone(int channel, float *coeffs, size_t num_samples) {
// Apply deadzonning to each DWT subband using frequency-dependent thresholds
// Instead of zeroing, mark small coefficients for stochastic reconstruction
const int dwt_levels = 9; // Fixed to match encoder
// Calculate subband boundaries (same logic as decoder)
const int first_band_size = num_samples >> dwt_levels;
int sideband_starts[11]; // dwt_levels + 2
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i - 1] + (first_band_size << (i - 2));
}
// Apply deadzone threshold to each coefficient
for (size_t i = 0; i < num_samples; i++) {
// Determine which subband this coefficient belongs to
int sideband = dwt_levels; // Default to highest frequency
for (int s = 0; s <= dwt_levels; s++) {
if (i < (size_t)sideband_starts[s + 1]) {
sideband = s;
break;
}
}
// Get threshold for this subband and channel
float threshold = DEADBANDS[channel][sideband];
float abs_coeff = fabsf(coeffs[i]);
// If coefficient is within deadband AND perceptually non-zero, mark it
if (abs_coeff > EPSILON_PERCEPTUAL && abs_coeff < threshold) {
// Mark for stochastic reconstruction (decoder will add noise)
coeffs[i] = 0.0f;//DEADZONE_MARKER_FLOAT;
}
// If below perceptual epsilon, truly zero it
else if (abs_coeff <= EPSILON_PERCEPTUAL) {
coeffs[i] = 0.0f;
}
// Otherwise keep coefficient unchanged
}
}
//=============================================================================
// DD-4 DWT Implementation
//=============================================================================
// Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform
static void dwt_dd4_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i]; // Even (low)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1]; // Odd (high)
}
// DD-4 forward prediction step with four-point kernel
for (int i = 0; i < length / 2; i++) {
float s_m1, s_0, s_1, s_2;
if (i > 0) s_m1 = temp[i - 1];
else s_m1 = temp[0]; // Mirror boundary
s_0 = temp[i];
if (i + 1 < half) s_1 = temp[i + 1];
else s_1 = temp[half - 1];
if (i + 2 < half) s_2 = temp[i + 2];
else if (half > 1) s_2 = temp[half - 2];
else s_2 = temp[half - 1];
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
temp[half + i] -= prediction;
}
// DD-4 update step
for (int i = 0; i < half; i++) {
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
temp[i] += 0.25f * (d_prev + d_curr);
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// 1D DWT using lifting scheme for 9/7 irreversible filter
static void dwt_97_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2; // Handle odd lengths properly
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i]; // Even (low)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1]; // Odd (high)
}
// JPEG2000 9/7 forward lifting steps (corrected to match decoder)
const float alpha = -1.586134342f;
const float beta = -0.052980118f;
const float gamma = 0.882911076f;
const float delta = 0.443506852f;
const float K = 1.230174105f;
// Step 1: Predict α - d[i] += α * (s[i] + s[i+1])
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += alpha * (s_curr + s_next);
}
}
// Step 2: Update β - s[i] += β * (d[i-1] + d[i])
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += beta * (d_prev + d_curr);
}
// Step 3: Predict γ - d[i] += γ * (s[i] + s[i+1])
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += gamma * (s_curr + s_next);
}
}
// Step 4: Update δ - s[i] += δ * (d[i-1] + d[i])
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += delta * (d_prev + d_curr);
}
// Step 5: Scaling - s[i] *= K, d[i] /= K
for (int i = 0; i < half; i++) {
temp[i] *= K; // Low-pass coefficients
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K; // High-pass coefficients
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// Apply multi-level DWT (using DD-4 wavelet)
static void dwt_forward_multilevel(float *data, int length, int levels) {
int current_length = length;
for (int level = 0; level < levels; level++) {
// dwt_dd4_forward_1d(data, current_length);
dwt_97_forward_1d(data, current_length);
current_length = (current_length + 1) / 2;
}
}
//=============================================================================
// Pre-emphasis Filter
//=============================================================================
static void calculate_preemphasis_coeffs(float *b0, float *b1, float *a1) {
// Simple first-order digital pre-emphasis
// Corner frequency ≈ 1200 Hz (chosen for 32 kHz codec)
// Provides ~6 dB/octave boost above corner
// Pre-emphasis factor (0.95 = gentle, 0.90 = moderate, 0.85 = aggressive)
const float alpha = 0.5f; // Gentle boost suitable for music
*b0 = 1.0f;
*b1 = -alpha;
*a1 = 0.0f; // No feedback
}
// emphasis at alpha=0.5 shifts quantisation crackles to lower frequency which MIGHT be more preferable
static void apply_preemphasis(float *left, float *right, size_t count) {
// Static state variables - persistent across chunks to prevent discontinuities
static float prev_x_l = 0.0f;
static float prev_y_l = 0.0f;
static float prev_x_r = 0.0f;
static float prev_y_r = 0.0f;
float b0, b1, a1;
calculate_preemphasis_coeffs(&b0, &b1, &a1);
// Left channel - use persistent state
for (size_t i = 0; i < count; i++) {
float x = left[i];
float y = b0 * x + b1 * prev_x_l - a1 * prev_y_l;
left[i] = y;
prev_x_l = x;
prev_y_l = y;
}
// Right channel - use persistent state
for (size_t i = 0; i < count; i++) {
float x = right[i];
float y = b0 * x + b1 * prev_x_r - a1 * prev_y_r;
right[i] = y;
prev_x_r = x;
prev_y_r = y;
}
}
//=============================================================================
// M/S Stereo Decorrelation (PCM32f version)
//=============================================================================
static void ms_decorrelate(const float *left, const float *right, float *mid, float *side, size_t count) {
for (size_t i = 0; i < count; i++) {
// Mid = (L + R) / 2, Side = (L - R) / 2
float l = left[i];
float r = right[i];
mid[i] = (l + r) / 2.0f;
side[i] = (l - r) / 2.0f;
}
}
static float signum(float x) {
if (x > 0.0f) return 1.0f;
if (x < 0.0f) return -1.0f;
return 0.0f;
}
static void compress_gamma(float *left, float *right, size_t count) {
for (size_t i = 0; i < count; i++) {
// encode(x) = sign(x) * |x|^γ where γ=0.5
float x = left[i];
left[i] = signum(x) * powf(fabsf(x), 0.5f);
float y = right[i];
right[i] = signum(y) * powf(fabsf(y), 0.5f);
}
}
static void compress_mu_law(float *left, float *right, size_t count) {
static float MU = 255.0f;
for (size_t i = 0; i < count; i++) {
// encode(x) = sign(x) * |x|^γ where γ=0.5
float x = left[i];
left[i] = signum(x) * logf(1.0f + MU * fabsf(x)) / logf(1.0f + MU);
float y = right[i];
right[i] = signum(y) * logf(1.0f + MU * fabsf(y)) / logf(1.0f + MU);
}
}
//=============================================================================
// Quantisation with Frequency-Dependent Weighting
//=============================================================================
#define LAMBDA_FIXED 6.0f
// Lambda-based companding encoder (based on Laplacian distribution CDF)
// val must be normalised to [-1,1]
// Returns quantised index in range [-127, +127]
static int8_t lambda_companding(float val, int max_index) {
// Handle zero
if (fabsf(val) < 1e-9f) {
return 0;
}
int sign = (val < 0) ? -1 : 1;
float abs_val = fabsf(val);
// Clamp to [0, 1]
if (abs_val > 1.0f) abs_val = 1.0f;
// Laplacian CDF for x >= 0: F(x) = 1 - 0.5 * exp(-λ*x)
// Map to [0.5, 1.0] range (half of CDF for positive values)
float cdf = 1.0f - 0.5f * expf(-LAMBDA_FIXED * abs_val);
// Map CDF from [0.5, 1.0] to [0, 1] for positive half
float normalised_cdf = (cdf - 0.5f) * 2.0f;
// Quantise to index
int index = (int)roundf(normalised_cdf * max_index);
// Clamp index to valid range [0, max_index]
if (index < 0) index = 0;
if (index > max_index) index = max_index;
return (int8_t)(sign * index);
}
static void quantise_dwt_coefficients(int channel, const float *coeffs, int8_t *quantised, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale) {
int first_band_size = chunk_size >> dwt_levels;
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
}
for (size_t i = 0; i < count; i++) {
int sideband = dwt_levels;
for (int s = 0; s <= dwt_levels; s++) {
if (i < (size_t)sideband_starts[s + 1]) {
sideband = s;
break;
}
}
// Store subband index (LL=0, H1=1, H2=2, ..., H9=9 for dwt_levels=9)
if (current_subband_index != NULL) {
current_subband_index[i] = sideband;
}
// Check for deadzone marker (special handling)
/*if (coeffs[i] == DEADZONE_MARKER_FLOAT) {
// Map to special quantised marker for stochastic reconstruction
quantised[i] = (int8_t)DEADZONE_MARKER_QUANT;
} else {*/
// Normal quantisation
float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
int8_t quant_val = lambda_companding(val, max_index);
quantised[i] = quant_val;
// }
}
free(sideband_starts);
}
//=============================================================================
// Coefficient Statistics
//=============================================================================
static int compare_float(const void *a, const void *b) {
float fa = *(const float*)a;
float fb = *(const float*)b;
if (fa < fb) return -1;
if (fa > fb) return 1;
return 0;
}
typedef struct {
float min;
float q1;
float median;
float q3;
float max;
float lambda; // Laplacian distribution parameter (1/b, where b is scale)
} CoeffStats;
typedef struct {
float *data;
size_t count;
size_t capacity;
} CoeffAccumulator;
typedef struct {
int8_t *data;
size_t count;
size_t capacity;
} QuantAccumulator;
// Global accumulators for statistics
static CoeffAccumulator *mid_accumulators = NULL;
static CoeffAccumulator *side_accumulators = NULL;
static QuantAccumulator *mid_quant_accumulators = NULL;
static QuantAccumulator *side_quant_accumulators = NULL;
static int num_subbands = 0;
static int stats_initialised = 0;
static int stats_dwt_levels = 0;
static void init_statistics(int dwt_levels) {
if (stats_initialised) return;
num_subbands = dwt_levels + 1;
stats_dwt_levels = dwt_levels;
mid_accumulators = calloc(num_subbands, sizeof(CoeffAccumulator));
side_accumulators = calloc(num_subbands, sizeof(CoeffAccumulator));
mid_quant_accumulators = calloc(num_subbands, sizeof(QuantAccumulator));
side_quant_accumulators = calloc(num_subbands, sizeof(QuantAccumulator));
for (int i = 0; i < num_subbands; i++) {
mid_accumulators[i].capacity = 1024;
mid_accumulators[i].data = malloc(mid_accumulators[i].capacity * sizeof(float));
mid_accumulators[i].count = 0;
side_accumulators[i].capacity = 1024;
side_accumulators[i].data = malloc(side_accumulators[i].capacity * sizeof(float));
side_accumulators[i].count = 0;
mid_quant_accumulators[i].capacity = 1024;
mid_quant_accumulators[i].data = malloc(mid_quant_accumulators[i].capacity * sizeof(int8_t));
mid_quant_accumulators[i].count = 0;
side_quant_accumulators[i].capacity = 1024;
side_quant_accumulators[i].data = malloc(side_quant_accumulators[i].capacity * sizeof(int8_t));
side_quant_accumulators[i].count = 0;
}
stats_initialised = 1;
}
static void accumulate_coefficients(const float *coeffs, int dwt_levels, int chunk_size, CoeffAccumulator *accumulators) {
int first_band_size = chunk_size >> dwt_levels;
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
}
for (int s = 0; s <= dwt_levels; s++) {
size_t start = sideband_starts[s];
size_t end = sideband_starts[s + 1];
size_t band_size = end - start;
// Expand capacity if needed
while (accumulators[s].count + band_size > accumulators[s].capacity) {
accumulators[s].capacity *= 2;
accumulators[s].data = realloc(accumulators[s].data,
accumulators[s].capacity * sizeof(float));
}
// Copy coefficients
memcpy(accumulators[s].data + accumulators[s].count,
coeffs + start, band_size * sizeof(float));
accumulators[s].count += band_size;
}
free(sideband_starts);
}
static void accumulate_quantised(const int8_t *quant, int dwt_levels, int chunk_size, QuantAccumulator *accumulators) {
int first_band_size = chunk_size >> dwt_levels;
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
}
for (int s = 0; s <= dwt_levels; s++) {
size_t start = sideband_starts[s];
size_t end = sideband_starts[s + 1];
size_t band_size = end - start;
// Expand capacity if needed
while (accumulators[s].count + band_size > accumulators[s].capacity) {
accumulators[s].capacity *= 2;
accumulators[s].data = realloc(accumulators[s].data,
accumulators[s].capacity * sizeof(int8_t));
}
// Copy coefficients
memcpy(accumulators[s].data + accumulators[s].count,
quant + start, band_size * sizeof(int8_t));
accumulators[s].count += band_size;
}
free(sideband_starts);
}
static void calculate_coeff_stats(const float *coeffs, size_t count, CoeffStats *stats) {
if (count == 0) {
stats->min = stats->q1 = stats->median = stats->q3 = stats->max = 0.0f;
stats->lambda = 0.0f;
return;
}
// Copy coefficients for sorting
float *sorted = malloc(count * sizeof(float));
memcpy(sorted, coeffs, count * sizeof(float));
qsort(sorted, count, sizeof(float), compare_float);
stats->min = sorted[0];
stats->max = sorted[count - 1];
stats->median = sorted[count / 2];
stats->q1 = sorted[count / 4];
stats->q3 = sorted[(3 * count) / 4];
free(sorted);
// Estimate Laplacian distribution parameter λ = 1/b
// For Laplacian centered at μ=0, MLE gives: b = mean(|x|)
// Therefore: λ = 1/b = 1/mean(|x|)
double sum_abs = 0.0;
for (size_t i = 0; i < count; i++) {
sum_abs += fabs(coeffs[i]);
}
double mean_abs = sum_abs / count;
stats->lambda = (mean_abs > 1e-9) ? (1.0f / mean_abs) : 0.0f;
}
#define HISTOGRAM_BINS 40
#define HISTOGRAM_WIDTH 60
static void print_histogram(const float *coeffs, size_t count, const char *title) {
if (count == 0) return;
// Find min/max
float min_val = coeffs[0];
float max_val = coeffs[0];
for (size_t i = 1; i < count; i++) {
if (coeffs[i] < min_val) min_val = coeffs[i];
if (coeffs[i] > max_val) max_val = coeffs[i];
}
// Handle case where all values are the same
if (fabsf(max_val - min_val) < 1e-9f) {
fprintf(stderr, " %s: All values are %.3f\n", title, min_val);
return;
}
// Create histogram bins
size_t bins[HISTOGRAM_BINS] = {0};
float bin_width = (max_val - min_val) / HISTOGRAM_BINS;
for (size_t i = 0; i < count; i++) {
int bin = (int)((coeffs[i] - min_val) / bin_width);
if (bin >= HISTOGRAM_BINS) bin = HISTOGRAM_BINS - 1;
if (bin < 0) bin = 0;
bins[bin]++;
}
// Find max bin count for scaling
size_t max_bin = 0;
for (int i = 0; i < HISTOGRAM_BINS; i++) {
if (bins[i] > max_bin) max_bin = bins[i];
}
// Print histogram
fprintf(stderr, " %s Histogram (range: %.3f to %.3f):\n", title, min_val, max_val);
// Print top 20 bins to keep output manageable
for (int i = 0; i < HISTOGRAM_BINS; i++) {
float bin_start = min_val + i * bin_width;
float bin_end = bin_start + bin_width;
int bar_width = (int)((bins[i] * HISTOGRAM_WIDTH) / max_bin);
// Only print bins with significant content (> 1% of max)
if (bins[i] > max_bin / 100) {
fprintf(stderr, " %8.3f-%8.3f [%7zu]: ", bin_start, bin_end, bins[i]);
for (int j = 0; j < bar_width; j++) {
fprintf(stderr, "");
}
fprintf(stderr, "\n");
}
}
fprintf(stderr, "\n");
}
typedef struct {
int8_t value;
size_t count;
float percentage;
} ValueFrequency;
static int compare_value_frequency(const void *a, const void *b) {
const ValueFrequency *va = (const ValueFrequency*)a;
const ValueFrequency *vb = (const ValueFrequency*)b;
// Sort by count descending
if (vb->count > va->count) return 1;
if (vb->count < va->count) return -1;
return 0;
}
static void print_top5_quantised_values(const int8_t *quant, size_t count, const char *title) {
if (count == 0) {
fprintf(stderr, " %s: No data\n", title);
return;
}
// For int8_t range is at most 256, so we can use direct indexing
// Map from [-128, 127] to [0, 255]
size_t freq[256] = {0};
for (size_t i = 0; i < count; i++) {
int idx = (int)quant[i] + 128;
freq[idx]++;
}
// Find all unique values with their frequencies
ValueFrequency values[256];
int unique_count = 0;
for (int i = 0; i < 256; i++) {
if (freq[i] > 0) {
values[unique_count].value = (int8_t)(i - 128);
values[unique_count].count = freq[i];
values[unique_count].percentage = (float)(freq[i] * 100.0) / count;
unique_count++;
}
}
// Sort by frequency
qsort(values, unique_count, sizeof(ValueFrequency), compare_value_frequency);
// Print top 10
fprintf(stderr, " %s Top 100 Values:\n", title);
int print_count = (unique_count < 100) ? unique_count : 100;
for (int i = 0; i < print_count; i++) {
fprintf(stderr, " %6d: %8zu occurrences (%5.2f%%)\n",
values[i].value, values[i].count, values[i].percentage);
}
fprintf(stderr, "\n");
}
void tad32_print_statistics(void) {
if (!stats_initialised) return;
fprintf(stderr, "\n=== TAD Coefficient Statistics (before quantisation) ===\n");
// Print Mid channel statistics
fprintf(stderr, "\nMid Channel:\n");
fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s %10s\n",
"Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max", "Lambda");
fprintf(stderr, "----------------------------------------------------------------------------------------\n");
for (int s = 0; s < num_subbands; s++) {
CoeffStats stats;
calculate_coeff_stats(mid_accumulators[s].data, mid_accumulators[s].count, &stats);
char band_name[16];
if (s == 0) {
snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
} else {
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
}
fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f %10.3f\n",
band_name, mid_accumulators[s].count,
stats.min, stats.q1, stats.median, stats.q3, stats.max, stats.lambda);
}
// Print Mid channel histograms
fprintf(stderr, "\nMid Channel Histograms:\n");
for (int s = 0; s < num_subbands; s++) {
char band_name[32];
if (s == 0) {
snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
} else {
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
}
print_histogram(mid_accumulators[s].data, mid_accumulators[s].count, band_name);
}
// Print Side channel statistics
fprintf(stderr, "\nSide Channel:\n");
fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s %10s\n",
"Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max", "Lambda");
fprintf(stderr, "----------------------------------------------------------------------------------------\n");
for (int s = 0; s < num_subbands; s++) {
CoeffStats stats;
calculate_coeff_stats(side_accumulators[s].data, side_accumulators[s].count, &stats);
char band_name[16];
if (s == 0) {
snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
} else {
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
}
fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f %10.3f\n",
band_name, side_accumulators[s].count,
stats.min, stats.q1, stats.median, stats.q3, stats.max, stats.lambda);
}
// Print Side channel histograms
fprintf(stderr, "\nSide Channel Histograms:\n");
for (int s = 0; s < num_subbands; s++) {
char band_name[32];
if (s == 0) {
snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
} else {
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
}
print_histogram(side_accumulators[s].data, side_accumulators[s].count, band_name);
}
// Print quantised values statistics
fprintf(stderr, "\n=== TAD Quantised Values Statistics (after quantisation) ===\n");
// Print Mid channel quantised values
fprintf(stderr, "\nMid Channel Quantised Values:\n");
for (int s = 0; s < num_subbands; s++) {
char band_name[32];
if (s == 0) {
snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
} else {
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
}
print_top5_quantised_values(mid_quant_accumulators[s].data, mid_quant_accumulators[s].count, band_name);
}
// Print Side channel quantised values
fprintf(stderr, "\nSide Channel Quantised Values:\n");
for (int s = 0; s < num_subbands; s++) {
char band_name[32];
if (s == 0) {
snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
} else {
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
}
print_top5_quantised_values(side_quant_accumulators[s].data, side_quant_accumulators[s].count, band_name);
}
fprintf(stderr, "\n");
}
void tad32_free_statistics(void) {
if (!stats_initialised) return;
for (int i = 0; i < num_subbands; i++) {
free(mid_accumulators[i].data);
free(side_accumulators[i].data);
free(mid_quant_accumulators[i].data);
free(side_quant_accumulators[i].data);
}
free(mid_accumulators);
free(side_accumulators);
free(mid_quant_accumulators);
free(side_quant_accumulators);
mid_accumulators = NULL;
side_accumulators = NULL;
mid_quant_accumulators = NULL;
side_quant_accumulators = NULL;
stats_initialised = 0;
}
//=============================================================================
// Binary Tree EZBC (1D Variant for TAD)
//=============================================================================
#include <stdbool.h>
// Bitstream writer for EZBC
typedef struct {
uint8_t *data;
size_t capacity;
size_t byte_pos;
uint8_t bit_pos; // 0-7, current bit position in current byte
} tad_bitstream_t;
// Block structure for 1D binary tree
typedef struct {
int start; // Start index in 1D array
int length; // Block length
} tad_block_t;
// Queue for block processing
typedef struct {
tad_block_t *blocks;
size_t count;
size_t capacity;
} tad_block_queue_t;
// Track coefficient state for refinement
typedef struct {
bool significant; // Has been marked significant
int first_bitplane; // Bitplane where it became significant
} tad_coeff_state_t;
// Bitstream operations
static void tad_bitstream_init(tad_bitstream_t *bs, size_t initial_capacity) {
bs->capacity = initial_capacity;
bs->data = calloc(1, initial_capacity);
bs->byte_pos = 0;
bs->bit_pos = 0;
}
static void tad_bitstream_write_bit(tad_bitstream_t *bs, int bit) {
// Grow if needed
if (bs->byte_pos >= bs->capacity) {
bs->capacity *= 2;
bs->data = realloc(bs->data, bs->capacity);
// Clear new memory
memset(bs->data + bs->byte_pos, 0, bs->capacity - bs->byte_pos);
}
if (bit) {
bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
}
bs->bit_pos++;
if (bs->bit_pos == 8) {
bs->bit_pos = 0;
bs->byte_pos++;
}
}
static void tad_bitstream_write_bits(tad_bitstream_t *bs, uint32_t value, int num_bits) {
for (int i = 0; i < num_bits; i++) {
tad_bitstream_write_bit(bs, (value >> i) & 1);
}
}
static size_t tad_bitstream_size(tad_bitstream_t *bs) {
return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
}
static void tad_bitstream_free(tad_bitstream_t *bs) {
free(bs->data);
}
// Block queue operations
static void tad_queue_init(tad_block_queue_t *q) {
q->capacity = 1024;
q->blocks = malloc(q->capacity * sizeof(tad_block_t));
q->count = 0;
}
static void tad_queue_push(tad_block_queue_t *q, tad_block_t block) {
if (q->count >= q->capacity) {
q->capacity *= 2;
q->blocks = realloc(q->blocks, q->capacity * sizeof(tad_block_t));
}
q->blocks[q->count++] = block;
}
static void tad_queue_free(tad_block_queue_t *q) {
free(q->blocks);
}
// Check if all coefficients in block have |coeff| < threshold
static bool tad_is_zero_block(int8_t *coeffs, const tad_block_t *block, int threshold) {
for (int i = block->start; i < block->start + block->length; i++) {
if (abs(coeffs[i]) >= threshold) {
return false;
}
}
return true;
}
// Find maximum absolute coefficient value
static int tad_find_max_abs(int8_t *coeffs, size_t count) {
int max_abs = 0;
for (size_t i = 0; i < count; i++) {
int abs_val = abs(coeffs[i]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
return max_abs;
}
// Get MSB position (bitplane number)
static int tad_get_msb_bitplane(int value) {
if (value == 0) return 0;
int bitplane = 0;
while (value > 1) {
value >>= 1;
bitplane++;
}
return bitplane;
}
// Context for recursive EZBC processing
typedef struct {
tad_bitstream_t *bs;
int8_t *coeffs;
tad_coeff_state_t *states;
int length;
int bitplane;
int threshold;
tad_block_queue_t *next_insignificant;
tad_block_queue_t *next_significant;
int *sign_count;
} tad_ezbc_context_t;
// Recursively process a significant block - subdivide until size 1
static void tad_process_significant_block_recursive(tad_ezbc_context_t *ctx, tad_block_t block) {
// If size 1: emit sign bit and add to significant queue
if (block.length == 1) {
int idx = block.start;
tad_bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
(*ctx->sign_count)++;
ctx->states[idx].significant = true;
ctx->states[idx].first_bitplane = ctx->bitplane;
tad_queue_push(ctx->next_significant, block);
return;
}
// Block is > 1: subdivide into left and right halves
int mid = block.length / 2;
if (mid == 0) mid = 1;
// Process left child
tad_block_t left = {block.start, mid};
if (!tad_is_zero_block(ctx->coeffs, &left, ctx->threshold)) {
tad_bitstream_write_bit(ctx->bs, 1); // Significant
tad_process_significant_block_recursive(ctx, left);
} else {
tad_bitstream_write_bit(ctx->bs, 0); // Insignificant
tad_queue_push(ctx->next_insignificant, left);
}
// Process right child (if exists)
if (block.length > mid) {
tad_block_t right = {block.start + mid, block.length - mid};
if (!tad_is_zero_block(ctx->coeffs, &right, ctx->threshold)) {
tad_bitstream_write_bit(ctx->bs, 1);
tad_process_significant_block_recursive(ctx, right);
} else {
tad_bitstream_write_bit(ctx->bs, 0);
tad_queue_push(ctx->next_insignificant, right);
}
}
}
// Binary tree EZBC encoding for a single channel (1D variant)
// Made non-static for testing
size_t tad_encode_channel_ezbc(int8_t *coeffs, size_t count, uint8_t **output) {
tad_bitstream_t bs;
tad_bitstream_init(&bs, count / 4); // Initial guess
// Track coefficient significance
tad_coeff_state_t *states = calloc(count, sizeof(tad_coeff_state_t));
// Find maximum value to determine MSB bitplane
int max_abs = tad_find_max_abs(coeffs, count);
int msb_bitplane = tad_get_msb_bitplane(max_abs);
// Write header: MSB bitplane and length
tad_bitstream_write_bits(&bs, msb_bitplane, 8);
tad_bitstream_write_bits(&bs, (uint32_t)count, 16);
// Initialise queues
tad_block_queue_t insignificant_queue, next_insignificant;
tad_block_queue_t significant_queue, next_significant;
tad_queue_init(&insignificant_queue);
tad_queue_init(&next_insignificant);
tad_queue_init(&significant_queue);
tad_queue_init(&next_significant);
// Start with root block as insignificant
tad_block_t root = {0, (int)count};
tad_queue_push(&insignificant_queue, root);
// Process bitplanes from MSB to LSB
for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
int threshold = 1 << bitplane;
// Process insignificant blocks - check if they become significant
for (size_t i = 0; i < insignificant_queue.count; i++) {
tad_block_t block = insignificant_queue.blocks[i];
if (tad_is_zero_block(coeffs, &block, threshold)) {
// Still insignificant: emit 0
tad_bitstream_write_bit(&bs, 0);
// Keep in insignificant queue for next bitplane
tad_queue_push(&next_insignificant, block);
} else {
// Became significant: emit 1
tad_bitstream_write_bit(&bs, 1);
// Use recursive subdivision
int sign_count = 0;
tad_ezbc_context_t ctx = {
.bs = &bs,
.coeffs = coeffs,
.states = states,
.length = (int)count,
.bitplane = bitplane,
.threshold = threshold,
.next_insignificant = &next_insignificant,
.next_significant = &next_significant,
.sign_count = &sign_count
};
tad_process_significant_block_recursive(&ctx, block);
}
}
// Refinement pass: emit next bit for already-significant coefficients
for (size_t i = 0; i < significant_queue.count; i++) {
tad_block_t block = significant_queue.blocks[i];
int idx = block.start;
// Emit refinement bit (bit at position 'bitplane')
int bit = (abs(coeffs[idx]) >> bitplane) & 1;
tad_bitstream_write_bit(&bs, bit);
// Add to next_significant so it continues being refined
tad_queue_push(&next_significant, block);
}
// Swap queues for next bitplane
tad_block_queue_t temp_insig = insignificant_queue;
insignificant_queue = next_insignificant;
next_insignificant = temp_insig;
next_insignificant.count = 0; // Clear for reuse
tad_block_queue_t temp_sig = significant_queue;
significant_queue = next_significant;
next_significant = temp_sig;
next_significant.count = 0; // Clear for reuse
}
// Cleanup queues
tad_queue_free(&insignificant_queue);
tad_queue_free(&next_insignificant);
tad_queue_free(&significant_queue);
tad_queue_free(&next_significant);
free(states);
// Copy bitstream to output
size_t output_size = tad_bitstream_size(&bs);
*output = malloc(output_size);
memcpy(*output, bs.data, output_size);
tad_bitstream_free(&bs);
return output_size;
}
//=============================================================================
// Public API: Chunk Encoding
//=============================================================================
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int max_index,
float quantiser_scale, uint8_t *output) {
// Calculate DWT levels from chunk size
int dwt_levels = calculate_dwt_levels(num_samples);
if (dwt_levels < 0) {
fprintf(stderr, "Error: Invalid chunk size %zu\n", num_samples);
return 0;
}
// Allocate working buffers (PCM32f throughout, int32 coefficients)
float *pcm32_left = malloc(num_samples * sizeof(float));
float *pcm32_right = malloc(num_samples * sizeof(float));
float *pcm32_mid = malloc(num_samples * sizeof(float));
float *pcm32_side = malloc(num_samples * sizeof(float));
float *dwt_mid = malloc(num_samples * sizeof(float));
float *dwt_side = malloc(num_samples * sizeof(float));
int8_t *quant_mid = malloc(num_samples * sizeof(int8_t));
int8_t *quant_side = malloc(num_samples * sizeof(int8_t));
// Step 1: Deinterleave stereo
for (size_t i = 0; i < num_samples; i++) {
pcm32_left[i] = pcm32_stereo[i * 2];
pcm32_right[i] = pcm32_stereo[i * 2 + 1];
}
// Step 1.1: Apply pre-emphasis filter (BEFORE gamma compression)
apply_preemphasis(pcm32_left, pcm32_right, num_samples);
// Step 1.2: Compress dynamic range
compress_gamma(pcm32_left, pcm32_right, num_samples);
// compress_mu_law(pcm32_left, pcm32_right, num_samples);
// Step 2: M/S decorrelation
ms_decorrelate(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples);
// Step 3: Convert to float and apply DWT
for (size_t i = 0; i < num_samples; i++) {
dwt_mid[i] = pcm32_mid[i];
dwt_side[i] = pcm32_side[i];
}
dwt_forward_multilevel(dwt_mid, num_samples, dwt_levels);
dwt_forward_multilevel(dwt_side, num_samples, dwt_levels);
// Step 3.5: Accumulate coefficient statistics if enabled
static int stats_enabled = -1;
if (stats_enabled == -1) {
stats_enabled = getenv("TAD_COEFF_STATS") != NULL;
if (stats_enabled) {
init_statistics(dwt_levels);
}
}
if (stats_enabled) {
accumulate_coefficients(dwt_mid, dwt_levels, num_samples, mid_accumulators);
accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators);
}
// apply_coeff_deadzone(0, dwt_mid, num_samples);
// apply_coeff_deadzone(1, dwt_side, num_samples);
// Step 4: Quantise with frequency-dependent weights and quantiser scaling
quantise_dwt_coefficients(0, dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
quantise_dwt_coefficients(1, dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
// Step 4.5: Accumulate quantised coefficient statistics if enabled
if (stats_enabled) {
accumulate_quantised(quant_mid, dwt_levels, num_samples, mid_quant_accumulators);
accumulate_quantised(quant_side, dwt_levels, num_samples, side_quant_accumulators);
}
// Step 5: Encode with binary tree EZBC (1D variant) - FIXED!
uint8_t *mid_ezbc = NULL;
uint8_t *side_ezbc = NULL;
size_t mid_size = tad_encode_channel_ezbc(quant_mid, num_samples, &mid_ezbc);
size_t side_size = tad_encode_channel_ezbc(quant_side, num_samples, &side_ezbc);
// Concatenate EZBC outputs
size_t uncompressed_size = mid_size + side_size;
uint8_t *temp_buffer = malloc(uncompressed_size);
memcpy(temp_buffer, mid_ezbc, mid_size);
memcpy(temp_buffer + mid_size, side_ezbc, side_size);
free(mid_ezbc);
free(side_ezbc);
// Step 6: Zstd compression
uint8_t *write_ptr = output;
// Write chunk header
*((uint16_t*)write_ptr) = (uint16_t)num_samples;
write_ptr += sizeof(uint16_t);
*write_ptr = (uint8_t)max_index;
write_ptr += sizeof(uint8_t);
uint32_t *payload_size_ptr = (uint32_t*)write_ptr;
write_ptr += sizeof(uint32_t);
size_t payload_size;
size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
uint8_t *zstd_buffer = malloc(zstd_bound);
payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL);
if (ZSTD_isError(payload_size)) {
fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
free(zstd_buffer);
free(pcm32_left); free(pcm32_right);
free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
free(quant_mid); free(quant_side); free(temp_buffer);
return 0;
}
memcpy(write_ptr, zstd_buffer, payload_size);
free(zstd_buffer);
*payload_size_ptr = (uint32_t)payload_size;
write_ptr += payload_size;
// Cleanup
free(pcm32_left); free(pcm32_right);
free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
free(quant_mid); free(quant_side); free(temp_buffer);
return write_ptr - output;
}