diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c index 344c94e..2b99ed0 100644 --- a/video_encoder/decoder_tad.c +++ b/video_encoder/decoder_tad.c @@ -337,7 +337,7 @@ static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *lef // Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder) // Converts quantized index back to normalized float in [-1, 1] -static float lambda_decompanding(int16_t quant_val, int quant_bits) { +static float lambda_decompanding(int16_t quant_val, int max_index) { // Handle zero if (quant_val == 0) { return 0.0f; @@ -346,9 +346,6 @@ static float lambda_decompanding(int16_t quant_val, int quant_bits) { int sign = (quant_val < 0) ? -1 : 1; int abs_index = abs(quant_val); - // Maximum index for the given quant_bits - int max_index = (1 << (quant_bits - 1)) - 1; - // Clamp to valid range if (abs_index > max_index) abs_index = max_index; @@ -369,7 +366,7 @@ static float lambda_decompanding(int16_t quant_val, int quant_bits) { return sign * abs_val; } -static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int quant_bits) { +static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index) { // Calculate sideband boundaries dynamically int first_band_size = chunk_size >> dwt_levels; @@ -391,7 +388,7 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, } // Decode using lambda companding - float normalized_val = lambda_decompanding(quantized[i], quant_bits); + float normalized_val = lambda_decompanding(quantized[i], max_index); // Denormalize using the subband scalar coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband]; @@ -409,8 +406,8 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, // Sign bit: 0 = positive/zero, 1 = negative // Magnitude: unsigned value [0, 2^quant_bits - 1] // Delta prediction: plane[i] ^= plane[i-1] (reversed by same operation) -static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t count, int quant_bits) { - int bits_per_coeff = quant_bits + 1; // 1 sign bit + quant_bits magnitude bits +static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t count, int max_index) { + int bits_per_coeff = ((int)ceilf(log2f(max_index))) + 1; // 1 sign bit + quant_bits magnitude bits size_t plane_bytes = (count + 7) / 8; // Bytes needed for one bitplane size_t input_bytes = plane_bytes * bits_per_coeff; @@ -421,13 +418,6 @@ static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t cou memcpy(bitplanes[plane], input + (plane * plane_bytes), plane_bytes); } - // Reverse delta prediction: plane[i] ^= plane[i-1] - for (int plane = 0; plane < bits_per_coeff; plane++) { - for (size_t byte = 1; byte < plane_bytes; byte++) { - bitplanes[plane][byte] ^= bitplanes[plane][byte - 1]; - } - } - // Reconstruct coefficients from bitplanes for (size_t i = 0; i < count; i++) { size_t byte_idx = i / 8; @@ -438,7 +428,7 @@ static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t cou // Read magnitude bits (planes 1 to quant_bits) uint16_t magnitude = 0; - for (int b = 0; b < quant_bits; b++) { + for (int b = 0; b < bits_per_coeff - 1; b++) { if (bitplanes[b + 1][byte_idx] & (1 << bit_offset)) { magnitude |= (1 << b); } @@ -469,7 +459,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ uint16_t sample_count = *((const uint16_t*)read_ptr); read_ptr += sizeof(uint16_t); - uint8_t quant_bits = *read_ptr; + uint8_t max_index = *read_ptr; read_ptr += sizeof(uint8_t); uint32_t payload_size = *((const uint32_t*)read_ptr); @@ -518,12 +508,12 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ const uint8_t *payload_ptr = payload; size_t mid_bytes, side_bytes; - mid_bytes = decode_bitplanes(payload_ptr, quant_mid, sample_count, quant_bits); - side_bytes = decode_bitplanes(payload_ptr + mid_bytes, quant_side, sample_count, quant_bits); + mid_bytes = decode_bitplanes(payload_ptr, quant_mid, sample_count, max_index); + side_bytes = decode_bitplanes(payload_ptr + mid_bytes, quant_side, sample_count, max_index); // Dequantize - dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, quant_bits); - dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, quant_bits); + dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index); + dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index); // Inverse DWT dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels); diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index 9889db4..71b183c 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -236,7 +236,7 @@ static void compress_mu_law(float *left, float *right, size_t count) { // Lambda-based companding encoder (based on Laplacian distribution CDF) // val must be normalised to [-1,1] // Returns quantized index in range [-(2^quant_bits-1), +(2^quant_bits-1)] -static int16_t lambda_companding(float val, int quant_bits) { +static int16_t lambda_companding(float val, int max_index) { // Handle zero if (fabsf(val) < 1e-9f) { return 0; @@ -248,8 +248,6 @@ static int16_t lambda_companding(float val, int quant_bits) { // Clamp to [0, 1] if (abs_val > 1.0f) abs_val = 1.0f; - // Maximum index for the given quant_bits - int max_index = (1 << (quant_bits - 1)) - 1; // Laplacian CDF for x >= 0: F(x) = 1 - 0.5 * exp(-λ*x) // Map to [0.5, 1.0] range (half of CDF for positive values) @@ -268,7 +266,7 @@ static int16_t lambda_companding(float val, int quant_bits) { return (int16_t)(sign * index); } -static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index) { +static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index) { int first_band_size = chunk_size >> dwt_levels; int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int)); @@ -293,7 +291,7 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s } float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband])); // val is normalised to [-1,1] - int16_t quant_val = lambda_companding(val, quant_bits); + int16_t quant_val = lambda_companding(val, max_index); quantized[i] = quant_val; } @@ -310,8 +308,8 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s // Sign bit: 0 = positive/zero, 1 = negative // Magnitude: unsigned value [0, 2^quant_bits - 1] // Delta prediction: plane[i] ^= plane[i-1] for better compression -static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *output, int quant_bits) { - int bits_per_coeff = quant_bits + 1; // 1 sign bit + quant_bits magnitude bits +static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *output, int max_index) { + int bits_per_coeff = ((int)ceilf(log2f(max_index))) + 1; // 1 sign bit + quant_bits magnitude bits size_t plane_bytes = (count + 7) / 8; // Bytes needed for one bitplane size_t output_bytes = plane_bytes * bits_per_coeff; @@ -332,7 +330,7 @@ static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *out uint16_t magnitude = (val < 0) ? -val : val; // Clamp magnitude to max value for quant_bits - uint16_t max_magnitude = (1 << quant_bits) - 1; + uint16_t max_magnitude = max_index; if (magnitude > max_magnitude) { magnitude = max_magnitude; } @@ -346,20 +344,13 @@ static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *out } // Magnitude bitplanes (planes 1 to quant_bits) - for (int b = 0; b < quant_bits; b++) { + for (int b = 0; b < bits_per_coeff - 1; b++) { if (magnitude & (1 << b)) { bitplanes[b + 1][byte_idx] |= (1 << bit_offset); } } } - // Apply delta prediction: plane[i] ^= plane[i-1] - for (int plane = 0; plane < bits_per_coeff; plane++) { - for (size_t byte = plane_bytes - 1; byte > 0; byte--) { - bitplanes[plane][byte] ^= bitplanes[plane][byte - 1]; - } - } - free(bitplanes); return output_bytes; } @@ -636,7 +627,7 @@ void tad32_free_statistics(void) { //============================================================================= size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, - int quant_bits, int use_zstd, uint8_t *output) { + int max_index, int use_zstd, uint8_t *output) { // Calculate DWT levels from chunk size int dwt_levels = calculate_dwt_levels(num_samples); if (dwt_levels < 0) { @@ -680,7 +671,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, // Step 3.5: Accumulate coefficient statistics if enabled static int stats_enabled = -1; if (stats_enabled == -1) { - stats_enabled = 1;//getenv("TAD_COEFF_STATS") != NULL; + stats_enabled = getenv("TAD_COEFF_STATS") != NULL; if (stats_enabled) { init_statistics(dwt_levels); } @@ -691,13 +682,13 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, } // Step 4: Quantize with frequency-dependent weights and dead zone - quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL); - quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL); + quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL); + quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL); // Step 5: Encode with pure bitplanes (quant_bits + 1 bits per coefficient) uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int32_t)); - size_t mid_size = encode_bitplanes(quant_mid, num_samples, temp_buffer, quant_bits); - size_t side_size = encode_bitplanes(quant_side, num_samples, temp_buffer + mid_size, quant_bits); + size_t mid_size = encode_bitplanes(quant_mid, num_samples, temp_buffer, max_index); + size_t side_size = encode_bitplanes(quant_side, num_samples, temp_buffer + mid_size, max_index); size_t uncompressed_size = mid_size + side_size; @@ -708,7 +699,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, *((uint16_t*)write_ptr) = (uint16_t)num_samples; write_ptr += sizeof(uint16_t); - *write_ptr = (uint8_t)quant_bits; + *write_ptr = (uint8_t)max_index; write_ptr += sizeof(uint8_t); uint32_t *payload_size_ptr = (uint32_t*)write_ptr; diff --git a/video_encoder/encoder_tad_standalone.c b/video_encoder/encoder_tad_standalone.c index b661948..35a855a 100644 --- a/video_encoder/encoder_tad_standalone.c +++ b/video_encoder/encoder_tad_standalone.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "encoder_tad.h" @@ -62,7 +63,7 @@ int main(int argc, char *argv[]) { char *input_file = NULL; char *output_file = NULL; - int quant_bits = 7; // Default QUANT_BITS + int max_index = 7; // Default QUANT_BITS int use_zstd = 1; int verbose = 0; @@ -84,11 +85,8 @@ int main(int argc, char *argv[]) { output_file = optarg; break; case 'q': - quant_bits = atoi(optarg); - if (quant_bits < 2 || quant_bits > 12) { - fprintf(stderr, "Error: Quantization bits must be between 2 and 12\n"); - return 1; - } + max_index = atoi(optarg); + break; case 'z': use_zstd = 0; @@ -115,8 +113,8 @@ int main(int argc, char *argv[]) { printf("%s\n", ENCODER_VENDOR_STRING); printf("Input: %s\n", input_file); printf("Output: %s\n", output_file); - printf("Quant: %d\n", quant_bits); - printf("Encoding method: Pure bitplanes (%d bits per coefficient)\n", quant_bits + 1); + printf("Quant: %d\n", max_index); + printf("Encoding method: Pure bitplanes (%d bits per coefficient)\n", ((int)ceilf(log2f(max_index))) + 1); printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled"); } @@ -243,7 +241,7 @@ int main(int argc, char *argv[]) { // Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE, - quant_bits, use_zstd, output_buffer); + max_index, use_zstd, output_buffer); if (encoded_size == 0) { fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx); diff --git a/video_encoder/range_coder.c b/video_encoder/range_coder.c new file mode 100644 index 0000000..d5bb5de --- /dev/null +++ b/video_encoder/range_coder.c @@ -0,0 +1,152 @@ +// Simple range coder for TAD audio codec +// Based on range coding with Laplacian probability model + +#include "range_coder.h" +#include +#include + +#define TOP_VALUE 0xFFFFFFFFU +#define BOTTOM_VALUE 0x00FFFFFF + +static inline void range_encoder_put_byte(RangeEncoder *enc, uint8_t byte) { + if (enc->buffer_pos < enc->buffer_capacity) { + enc->buffer[enc->buffer_pos++] = byte; + } +} + +static inline uint8_t range_decoder_get_byte(RangeDecoder *dec) { + if (dec->buffer_pos < dec->buffer_size) { + return dec->buffer[dec->buffer_pos++]; + } + return 0; +} + +static void range_encoder_renormalize(RangeEncoder *enc) { + while (enc->range <= BOTTOM_VALUE) { + range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF); + enc->low <<= 8; + enc->range <<= 8; + } +} + +static void range_decoder_renormalize(RangeDecoder *dec) { + while (dec->range <= BOTTOM_VALUE) { + dec->code = (dec->code << 8) | range_decoder_get_byte(dec); + dec->low <<= 8; + dec->range <<= 8; + } +} + +void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity) { + enc->low = 0; + enc->range = TOP_VALUE; + enc->buffer = buffer; + enc->buffer_pos = 0; + enc->buffer_capacity = capacity; +} + +// Calculate Laplacian CDF for a given value +// CDF(x) = 0.5 * exp(λx) for x < 0 +// CDF(x) = 1 - 0.5 * exp(-λx) for x ≥ 0 +static inline double laplacian_cdf(int16_t value, float lambda) { + if (value < 0) { + return 0.5 * exp(lambda * value); + } else { + return 1.0 - 0.5 * exp(-lambda * value); + } +} + +void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda) { + // Clamp to valid range + if (value < -max_abs_value) value = -max_abs_value; + if (value > max_abs_value) value = max_abs_value; + + // Calculate cumulative probabilities using Laplacian distribution + // We need CDF at value and value+1 to get the probability mass for this symbol + double cdf_low = (value == -max_abs_value) ? 0.0 : laplacian_cdf(value - 1, lambda); + double cdf_high = laplacian_cdf(value, lambda); + + // Normalize to get cumulative counts in range [0, SCALE] + const uint32_t SCALE = 0x10000; // 65536 for precision + uint32_t cum_low = (uint32_t)(cdf_low * SCALE); + uint32_t cum_high = (uint32_t)(cdf_high * SCALE); + + // Ensure we have at least 1 unit of probability + if (cum_high <= cum_low) cum_high = cum_low + 1; + if (cum_high > SCALE) cum_high = SCALE; + + // Encode using cumulative probabilities + uint64_t range_64 = (uint64_t)enc->range; + enc->low += (uint32_t)((range_64 * cum_low) / SCALE); + enc->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE); + + range_encoder_renormalize(enc); +} + +size_t range_encoder_finish(RangeEncoder *enc) { + // Flush remaining bytes + for (int i = 0; i < 4; i++) { + range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF); + enc->low <<= 8; + } + return enc->buffer_pos; +} + +void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size) { + dec->low = 0; + dec->range = TOP_VALUE; + dec->code = 0; + dec->buffer = buffer; + dec->buffer_pos = 0; + dec->buffer_size = size; + + // Read initial bytes into code + for (int i = 0; i < 4; i++) { + dec->code = (dec->code << 8) | range_decoder_get_byte(dec); + } +} + +int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda) { + const uint32_t SCALE = 0x10000; // Must match encoder + + // Calculate current position in probability space + uint64_t range_64 = (uint64_t)dec->range; + uint32_t cum_freq = (uint32_t)(((uint64_t)(dec->code - dec->low) * SCALE) / range_64); + + // Binary search to find symbol whose CDF range contains cum_freq + int16_t low = -max_abs_value; + int16_t high = max_abs_value; + int16_t value = 0; + + while (low <= high) { + int16_t mid = (low + high) / 2; + + double cdf_low = (mid == -max_abs_value) ? 0.0 : laplacian_cdf(mid - 1, lambda); + double cdf_high = laplacian_cdf(mid, lambda); + + uint32_t cum_low = (uint32_t)(cdf_low * SCALE); + uint32_t cum_high = (uint32_t)(cdf_high * SCALE); + + if (cum_high <= cum_low) cum_high = cum_low + 1; + + if (cum_freq >= cum_low && cum_freq < cum_high) { + // Found the symbol + value = mid; + + // Update decoder state + dec->low += (uint32_t)((range_64 * cum_low) / SCALE); + dec->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE); + + range_decoder_renormalize(dec); + return value; + } else if (cum_freq < cum_low) { + high = mid - 1; + } else { + low = mid + 1; + } + } + + // Fallback: shouldn't happen with correct encoding + range_decoder_renormalize(dec); + return value; +} diff --git a/video_encoder/range_coder.h b/video_encoder/range_coder.h new file mode 100644 index 0000000..7ea5681 --- /dev/null +++ b/video_encoder/range_coder.h @@ -0,0 +1,42 @@ +#ifndef RANGE_CODER_H +#define RANGE_CODER_H + +#include +#include + +// Simple range coder for signed 16-bit integers +// Uses adaptive frequency model for better compression + +typedef struct { + uint32_t low; + uint32_t range; + uint8_t *buffer; + size_t buffer_pos; + size_t buffer_capacity; +} RangeEncoder; + +typedef struct { + uint32_t low; + uint32_t range; + uint32_t code; + const uint8_t *buffer; + size_t buffer_pos; + size_t buffer_size; +} RangeDecoder; + +// Initialize encoder +void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity); + +// Encode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0) +void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda); + +// Finalize encoding and return bytes written +size_t range_encoder_finish(RangeEncoder *enc); + +// Initialize decoder +void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size); + +// Decode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0) +int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda); + +#endif // RANGE_CODER_H