diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c index 2b99ed0..74b01c1 100644 --- a/video_encoder/decoder_tad.c +++ b/video_encoder/decoder_tad.c @@ -18,6 +18,22 @@ // Index 0 = LL band, Index 1-9 = H bands (L9 to L1) static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f}; +// Base quantiser weight table (10 subbands: LL + 9 H bands) +// Linearly spaced from 1.0 (LL) to 2.0 (H9) +// These weights are multiplied by quantiser_scale during dequantization +static const float BASE_QUANTISER_WEIGHTS[] = { + 1.0f, // LL (L9) - finest preservation + 1.111f, // H (L9) + 1.222f, // H (L8) + 1.333f, // H (L7) + 1.444f, // H (L6) + 1.556f, // H (L5) + 1.667f, // H (L4) + 1.778f, // H (L3) + 1.889f, // H (L2) + 2.0f // H (L1) - coarsest quantization +}; + #define TAD_DEFAULT_CHUNK_SIZE 32768 #define TAD_MIN_CHUNK_SIZE 1024 #define TAD_SAMPLE_RATE 32000 @@ -333,11 +349,11 @@ static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *lef //============================================================================= -#define LAMBDA_FIXED 5.0f +#define LAMBDA_FIXED 6.0f // Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder) // Converts quantized index back to normalized float in [-1, 1] -static float lambda_decompanding(int16_t quant_val, int max_index) { +static float lambda_decompanding(int8_t quant_val, int max_index) { // Handle zero if (quant_val == 0) { return 0.0f; @@ -366,7 +382,7 @@ static float lambda_decompanding(int16_t quant_val, int max_index) { return sign * abs_val; } -static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index) { +static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) { // Calculate sideband boundaries dynamically int first_band_size = chunk_size >> dwt_levels; @@ -390,63 +406,14 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, // Decode using lambda companding float normalized_val = lambda_decompanding(quantized[i], max_index); - // Denormalize using the subband scalar - coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband]; + // Denormalize using the subband scalar and apply base weight + quantiser scaling + float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale; + coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight; } free(sideband_starts); } -//============================================================================= -// Bitplane Decoding with Delta Prediction -//============================================================================= - -// Pure bitplane decoding with delta prediction: each coefficient uses exactly (quant_bits + 1) bits -// Bit layout: 1 sign bit + quant_bits magnitude bits -// Sign bit: 0 = positive/zero, 1 = negative -// Magnitude: unsigned value [0, 2^quant_bits - 1] -// Delta prediction: plane[i] ^= plane[i-1] (reversed by same operation) -static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t count, int max_index) { - int bits_per_coeff = ((int)ceilf(log2f(max_index))) + 1; // 1 sign bit + quant_bits magnitude bits - size_t plane_bytes = (count + 7) / 8; // Bytes needed for one bitplane - size_t input_bytes = plane_bytes * bits_per_coeff; - - // Allocate temporary bitplanes - uint8_t **bitplanes = malloc(bits_per_coeff * sizeof(uint8_t*)); - for (int plane = 0; plane < bits_per_coeff; plane++) { - bitplanes[plane] = malloc(plane_bytes); - memcpy(bitplanes[plane], input + (plane * plane_bytes), plane_bytes); - } - - // Reconstruct coefficients from bitplanes - for (size_t i = 0; i < count; i++) { - size_t byte_idx = i / 8; - size_t bit_offset = i % 8; - - // Read sign bit (plane 0) - uint8_t sign_bit = (bitplanes[0][byte_idx] >> bit_offset) & 0x01; - - // Read magnitude bits (planes 1 to quant_bits) - uint16_t magnitude = 0; - for (int b = 0; b < bits_per_coeff - 1; b++) { - if (bitplanes[b + 1][byte_idx] & (1 << bit_offset)) { - magnitude |= (1 << b); - } - } - - // Reconstruct signed value - values[i] = sign_bit ? -(int16_t)magnitude : (int16_t)magnitude; - } - - // Free temporary bitplanes - for (int plane = 0; plane < bits_per_coeff; plane++) { - free(bitplanes[plane]); - } - free(bitplanes); - - return input_bytes; -} - //============================================================================= // Chunk Decoding //============================================================================= @@ -477,7 +444,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ uint8_t *decompressed = NULL; // Estimate decompressed size (generous upper bound) - size_t decompressed_size = sample_count * 4 * sizeof(int16_t); + size_t decompressed_size = sample_count * 4 * sizeof(int8_t); decompressed = malloc(decompressed_size); size_t actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size); @@ -488,15 +455,13 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ return -1; } - payload = decompressed; - read_ptr += payload_size; *bytes_consumed = read_ptr - input; *samples_decoded = sample_count; // Allocate working buffers - int16_t *quant_mid = malloc(sample_count * sizeof(int16_t)); - int16_t *quant_side = malloc(sample_count * sizeof(int16_t)); + int8_t *quant_mid = malloc(sample_count * sizeof(int8_t)); + int8_t *quant_side = malloc(sample_count * sizeof(int8_t)); float *dwt_mid = malloc(sample_count * sizeof(float)); float *dwt_side = malloc(sample_count * sizeof(float)); float *pcm32_left = malloc(sample_count * sizeof(float)); @@ -504,16 +469,16 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t)); uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t)); - // Decode bitplanes - const uint8_t *payload_ptr = payload; - size_t mid_bytes, side_bytes; + // Separate Mid/Side + memcpy(quant_mid, decompressed, sample_count); + memcpy(quant_side, decompressed + sample_count, sample_count); - mid_bytes = decode_bitplanes(payload_ptr, quant_mid, sample_count, max_index); - side_bytes = decode_bitplanes(payload_ptr + mid_bytes, quant_side, sample_count, max_index); - // Dequantize - dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index); - dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index); + // Dequantize with quantiser scaling + // Use quantiser_scale = 1.0f for baseline (must match encoder) + float quantiser_scale = 1.0f; + dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale); + dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale); // Inverse DWT dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels); diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index 9196718..6d0b42b 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -18,10 +18,26 @@ // Index 0 = LL band, Index 1-9 = H bands (L9 to L1) static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f}; +// Base quantiser weight table (10 subbands: LL + 9 H bands) +// Linearly spaced from 1.0 (LL) to 2.0 (H9) +// These weights are multiplied by quantiser_scale during quantization +static const float BASE_QUANTISER_WEIGHTS[] = { + 1.0f, // LL (L9) - finest preservation + 1.111f, // H (L9) + 1.222f, // H (L8) + 1.333f, // H (L7) + 1.444f, // H (L6) + 1.556f, // H (L5) + 1.667f, // H (L4) + 1.778f, // H (L3) + 1.889f, // H (L2) + 2.0f // H (L1) - coarsest quantization +}; + // Forward declarations for internal functions static void dwt_dd4_forward_1d(float *data, int length); static void dwt_dd4_forward_multilevel(float *data, int length, int levels); -static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index); +static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index, float quantiser_scale); static size_t encode_twobitmap(const int8_t *values, size_t count, uint8_t *output); static inline float FCLAMP(float x, float min, float max) { @@ -229,7 +245,7 @@ static void compress_mu_law(float *left, float *right, size_t count) { // Quantization with Frequency-Dependent Weighting //============================================================================= -#define LAMBDA_FIXED 5.0f +#define LAMBDA_FIXED 6.0f // Lambda-based companding encoder (based on Laplacian distribution CDF) // val must be normalised to [-1,1] @@ -264,7 +280,7 @@ static int8_t lambda_companding(float val, int max_index) { return (int8_t)(sign * index); } -static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index) { +static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale) { int first_band_size = chunk_size >> dwt_levels; int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int)); @@ -288,8 +304,10 @@ static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, si current_subband_index[i] = sideband; } - float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband])); // val is normalised to [-1,1] - int16_t quant_val = lambda_companding(val, max_index); + // Apply base weight and quantiser scaling + float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale; + float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1] + int8_t quant_val = lambda_companding(val, max_index); quantized[i] = quant_val; } @@ -761,7 +779,8 @@ void tad32_free_statistics(void) { //============================================================================= size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, - int max_index, int use_zstd, int use_twobitmap, uint8_t *output) { + int max_index, int use_zstd, int use_twobitmap, + float quantiser_scale, uint8_t *output) { // Calculate DWT levels from chunk size int dwt_levels = calculate_dwt_levels(num_samples); if (dwt_levels < 0) { @@ -815,9 +834,9 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators); } - // Step 4: Quantize with frequency-dependent weights and dead zone - quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL); - quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL); + // Step 4: Quantize with frequency-dependent weights and quantiser scaling + quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale); + quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale); // Step 4.5: Accumulate quantized coefficient statistics if enabled if (stats_enabled) { @@ -829,16 +848,11 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, uint8_t *temp_buffer = malloc(num_samples * 4); // Generous buffer size_t mid_size, side_size; - if (use_twobitmap) { - mid_size = encode_twobitmap(quant_mid, num_samples, temp_buffer); - side_size = encode_twobitmap(quant_side, num_samples, temp_buffer + mid_size); - } else { - // Raw int8_t storage - memcpy(temp_buffer, quant_mid, num_samples); - mid_size = num_samples; - memcpy(temp_buffer + mid_size, quant_side, num_samples); - side_size = num_samples; - } + // Raw int8_t storage + memcpy(temp_buffer, quant_mid, num_samples); + mid_size = num_samples; + memcpy(temp_buffer + mid_size, quant_side, num_samples); + side_size = num_samples; size_t uncompressed_size = mid_size + side_size; diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h index 75400eb..e200ca6 100644 --- a/video_encoder/encoder_tad.h +++ b/video_encoder/encoder_tad.h @@ -22,13 +22,15 @@ /** * Encode audio chunk with TAD32 codec (PCM32f version) * - * @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R) - * @param num_samples Number of samples per channel (min 1024) - * @param quant_bits Quantization bits 4-12 (default: 7) - * @param use_zstd 1=enable Zstd compression, 0=disable - * @param use_twobitmap 1=enable twobitmap encoding, 0=raw int8_t storage - * @param output Output buffer (must be large enough) - * @return Number of bytes written to output, or 0 on error + * @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R) + * @param num_samples Number of samples per channel (min 1024) + * @param quant_bits Quantization bits 4-12 (default: 7) + * @param use_zstd 1=enable Zstd compression, 0=disable + * @param use_twobitmap 1=enable twobitmap encoding, 0=raw int8_t storage + * @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantization) + * Higher values = more aggressive quantization = smaller files + * @param output Output buffer (must be large enough) + * @return Number of bytes written to output, or 0 on error * * Output format: * uint16 sample_count (samples per channel) @@ -37,7 +39,8 @@ * * payload (encoded M/S data, optionally Zstd-compressed) */ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, - int quant_bits, int use_zstd, int use_twobitmap, uint8_t *output); + int quant_bits, int use_zstd, int use_twobitmap, + float quantiser_scale, uint8_t *output); /** * Print accumulated coefficient statistics diff --git a/video_encoder/encoder_tad_standalone.c b/video_encoder/encoder_tad_standalone.c index bc88c7d..335a052 100644 --- a/video_encoder/encoder_tad_standalone.c +++ b/video_encoder/encoder_tad_standalone.c @@ -50,6 +50,9 @@ static void print_usage(const char *prog_name) { printf(" -o Output TAD32 file\n"); printf(" -q Quantization bits (default: 7, range: 4-8)\n"); printf(" Higher = more precision, larger files\n"); + printf(" -s Quantiser scaling factor (default: 1.0, range: 0.5-4.0)\n"); + printf(" Higher = more aggressive quantization, smaller files\n"); + printf(" 2.0 = quantize 2x coarser than baseline\n"); printf(" --no-zstd Disable Zstd compression\n"); printf(" --no-twobitmap Disable twobitmap encoding (use raw int8_t storage)\n"); printf(" -v Verbose output\n"); @@ -65,6 +68,7 @@ int main(int argc, char *argv[]) { char *input_file = NULL; char *output_file = NULL; int max_index = 7; // Default QUANT_BITS + float quantiser_scale = 1.0f; // Default quantiser scaling int use_zstd = 1; int use_twobitmap = 1; int verbose = 0; @@ -79,7 +83,7 @@ int main(int argc, char *argv[]) { int opt; int option_index = 0; - while ((opt = getopt_long(argc, argv, "i:o:q:vh", long_options, &option_index)) != -1) { + while ((opt = getopt_long(argc, argv, "i:o:q:s:vh", long_options, &option_index)) != -1) { switch (opt) { case 'i': input_file = optarg; @@ -89,7 +93,13 @@ int main(int argc, char *argv[]) { break; case 'q': max_index = atoi(optarg); - + break; + case 's': + quantiser_scale = atof(optarg); + if (quantiser_scale < 0.5f || quantiser_scale > 4.0f) { + fprintf(stderr, "Error: Quantiser scale must be in range 0.5-4.0\n"); + return 1; + } break; case 'z': use_zstd = 0; @@ -119,7 +129,8 @@ int main(int argc, char *argv[]) { printf("%s\n", ENCODER_VENDOR_STRING); printf("Input: %s\n", input_file); printf("Output: %s\n", output_file); - printf("Quant: %d\n", max_index); + printf("Quant bits: %d\n", max_index); + printf("Quantiser scale: %.2f\n", quantiser_scale); printf("Encoding method: %s (int8_t coefficients)\n", use_twobitmap ? "Twobit-map significance map" : "Raw int8_t storage"); printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled"); @@ -248,7 +259,8 @@ int main(int argc, char *argv[]) { // Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE, - max_index, use_zstd, use_twobitmap, output_buffer); + max_index, use_zstd, use_twobitmap, + quantiser_scale, output_buffer); if (encoded_size == 0) { fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);