TAD: arbitrary steps with bitplanes

This commit is contained in:
minjaesong
2025-10-27 09:14:52 +09:00
parent 9c27d114fc
commit 1d0f369827
5 changed files with 226 additions and 53 deletions

View File

@@ -337,7 +337,7 @@ static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *lef
// Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder)
// Converts quantized index back to normalized float in [-1, 1]
static float lambda_decompanding(int16_t quant_val, int quant_bits) {
static float lambda_decompanding(int16_t quant_val, int max_index) {
// Handle zero
if (quant_val == 0) {
return 0.0f;
@@ -346,9 +346,6 @@ static float lambda_decompanding(int16_t quant_val, int quant_bits) {
int sign = (quant_val < 0) ? -1 : 1;
int abs_index = abs(quant_val);
// Maximum index for the given quant_bits
int max_index = (1 << (quant_bits - 1)) - 1;
// Clamp to valid range
if (abs_index > max_index) abs_index = max_index;
@@ -369,7 +366,7 @@ static float lambda_decompanding(int16_t quant_val, int quant_bits) {
return sign * abs_val;
}
static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int quant_bits) {
static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index) {
// Calculate sideband boundaries dynamically
int first_band_size = chunk_size >> dwt_levels;
@@ -391,7 +388,7 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs,
}
// Decode using lambda companding
float normalized_val = lambda_decompanding(quantized[i], quant_bits);
float normalized_val = lambda_decompanding(quantized[i], max_index);
// Denormalize using the subband scalar
coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband];
@@ -409,8 +406,8 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs,
// Sign bit: 0 = positive/zero, 1 = negative
// Magnitude: unsigned value [0, 2^quant_bits - 1]
// Delta prediction: plane[i] ^= plane[i-1] (reversed by same operation)
static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t count, int quant_bits) {
int bits_per_coeff = quant_bits + 1; // 1 sign bit + quant_bits magnitude bits
static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t count, int max_index) {
int bits_per_coeff = ((int)ceilf(log2f(max_index))) + 1; // 1 sign bit + quant_bits magnitude bits
size_t plane_bytes = (count + 7) / 8; // Bytes needed for one bitplane
size_t input_bytes = plane_bytes * bits_per_coeff;
@@ -421,13 +418,6 @@ static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t cou
memcpy(bitplanes[plane], input + (plane * plane_bytes), plane_bytes);
}
// Reverse delta prediction: plane[i] ^= plane[i-1]
for (int plane = 0; plane < bits_per_coeff; plane++) {
for (size_t byte = 1; byte < plane_bytes; byte++) {
bitplanes[plane][byte] ^= bitplanes[plane][byte - 1];
}
}
// Reconstruct coefficients from bitplanes
for (size_t i = 0; i < count; i++) {
size_t byte_idx = i / 8;
@@ -438,7 +428,7 @@ static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t cou
// Read magnitude bits (planes 1 to quant_bits)
uint16_t magnitude = 0;
for (int b = 0; b < quant_bits; b++) {
for (int b = 0; b < bits_per_coeff - 1; b++) {
if (bitplanes[b + 1][byte_idx] & (1 << bit_offset)) {
magnitude |= (1 << b);
}
@@ -469,7 +459,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
uint16_t sample_count = *((const uint16_t*)read_ptr);
read_ptr += sizeof(uint16_t);
uint8_t quant_bits = *read_ptr;
uint8_t max_index = *read_ptr;
read_ptr += sizeof(uint8_t);
uint32_t payload_size = *((const uint32_t*)read_ptr);
@@ -518,12 +508,12 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
const uint8_t *payload_ptr = payload;
size_t mid_bytes, side_bytes;
mid_bytes = decode_bitplanes(payload_ptr, quant_mid, sample_count, quant_bits);
side_bytes = decode_bitplanes(payload_ptr + mid_bytes, quant_side, sample_count, quant_bits);
mid_bytes = decode_bitplanes(payload_ptr, quant_mid, sample_count, max_index);
side_bytes = decode_bitplanes(payload_ptr + mid_bytes, quant_side, sample_count, max_index);
// Dequantize
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, quant_bits);
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, quant_bits);
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index);
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index);
// Inverse DWT
dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels);

View File

@@ -236,7 +236,7 @@ static void compress_mu_law(float *left, float *right, size_t count) {
// Lambda-based companding encoder (based on Laplacian distribution CDF)
// val must be normalised to [-1,1]
// Returns quantized index in range [-(2^quant_bits-1), +(2^quant_bits-1)]
static int16_t lambda_companding(float val, int quant_bits) {
static int16_t lambda_companding(float val, int max_index) {
// Handle zero
if (fabsf(val) < 1e-9f) {
return 0;
@@ -248,8 +248,6 @@ static int16_t lambda_companding(float val, int quant_bits) {
// Clamp to [0, 1]
if (abs_val > 1.0f) abs_val = 1.0f;
// Maximum index for the given quant_bits
int max_index = (1 << (quant_bits - 1)) - 1;
// Laplacian CDF for x >= 0: F(x) = 1 - 0.5 * exp(-λ*x)
// Map to [0.5, 1.0] range (half of CDF for positive values)
@@ -268,7 +266,7 @@ static int16_t lambda_companding(float val, int quant_bits) {
return (int16_t)(sign * index);
}
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index) {
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index) {
int first_band_size = chunk_size >> dwt_levels;
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
@@ -293,7 +291,7 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s
}
float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband])); // val is normalised to [-1,1]
int16_t quant_val = lambda_companding(val, quant_bits);
int16_t quant_val = lambda_companding(val, max_index);
quantized[i] = quant_val;
}
@@ -310,8 +308,8 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s
// Sign bit: 0 = positive/zero, 1 = negative
// Magnitude: unsigned value [0, 2^quant_bits - 1]
// Delta prediction: plane[i] ^= plane[i-1] for better compression
static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *output, int quant_bits) {
int bits_per_coeff = quant_bits + 1; // 1 sign bit + quant_bits magnitude bits
static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *output, int max_index) {
int bits_per_coeff = ((int)ceilf(log2f(max_index))) + 1; // 1 sign bit + quant_bits magnitude bits
size_t plane_bytes = (count + 7) / 8; // Bytes needed for one bitplane
size_t output_bytes = plane_bytes * bits_per_coeff;
@@ -332,7 +330,7 @@ static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *out
uint16_t magnitude = (val < 0) ? -val : val;
// Clamp magnitude to max value for quant_bits
uint16_t max_magnitude = (1 << quant_bits) - 1;
uint16_t max_magnitude = max_index;
if (magnitude > max_magnitude) {
magnitude = max_magnitude;
}
@@ -346,20 +344,13 @@ static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *out
}
// Magnitude bitplanes (planes 1 to quant_bits)
for (int b = 0; b < quant_bits; b++) {
for (int b = 0; b < bits_per_coeff - 1; b++) {
if (magnitude & (1 << b)) {
bitplanes[b + 1][byte_idx] |= (1 << bit_offset);
}
}
}
// Apply delta prediction: plane[i] ^= plane[i-1]
for (int plane = 0; plane < bits_per_coeff; plane++) {
for (size_t byte = plane_bytes - 1; byte > 0; byte--) {
bitplanes[plane][byte] ^= bitplanes[plane][byte - 1];
}
}
free(bitplanes);
return output_bytes;
}
@@ -636,7 +627,7 @@ void tad32_free_statistics(void) {
//=============================================================================
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int quant_bits, int use_zstd, uint8_t *output) {
int max_index, int use_zstd, uint8_t *output) {
// Calculate DWT levels from chunk size
int dwt_levels = calculate_dwt_levels(num_samples);
if (dwt_levels < 0) {
@@ -680,7 +671,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
// Step 3.5: Accumulate coefficient statistics if enabled
static int stats_enabled = -1;
if (stats_enabled == -1) {
stats_enabled = 1;//getenv("TAD_COEFF_STATS") != NULL;
stats_enabled = getenv("TAD_COEFF_STATS") != NULL;
if (stats_enabled) {
init_statistics(dwt_levels);
}
@@ -691,13 +682,13 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
}
// Step 4: Quantize with frequency-dependent weights and dead zone
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL);
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL);
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL);
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL);
// Step 5: Encode with pure bitplanes (quant_bits + 1 bits per coefficient)
uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int32_t));
size_t mid_size = encode_bitplanes(quant_mid, num_samples, temp_buffer, quant_bits);
size_t side_size = encode_bitplanes(quant_side, num_samples, temp_buffer + mid_size, quant_bits);
size_t mid_size = encode_bitplanes(quant_mid, num_samples, temp_buffer, max_index);
size_t side_size = encode_bitplanes(quant_side, num_samples, temp_buffer + mid_size, max_index);
size_t uncompressed_size = mid_size + side_size;
@@ -708,7 +699,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
*((uint16_t*)write_ptr) = (uint16_t)num_samples;
write_ptr += sizeof(uint16_t);
*write_ptr = (uint8_t)quant_bits;
*write_ptr = (uint8_t)max_index;
write_ptr += sizeof(uint8_t);
uint32_t *payload_size_ptr = (uint32_t*)write_ptr;

View File

@@ -8,6 +8,7 @@
#include <stdint.h>
#include <string.h>
#include <getopt.h>
#include <math.h>
#include <time.h>
#include "encoder_tad.h"
@@ -62,7 +63,7 @@ int main(int argc, char *argv[]) {
char *input_file = NULL;
char *output_file = NULL;
int quant_bits = 7; // Default QUANT_BITS
int max_index = 7; // Default QUANT_BITS
int use_zstd = 1;
int verbose = 0;
@@ -84,11 +85,8 @@ int main(int argc, char *argv[]) {
output_file = optarg;
break;
case 'q':
quant_bits = atoi(optarg);
if (quant_bits < 2 || quant_bits > 12) {
fprintf(stderr, "Error: Quantization bits must be between 2 and 12\n");
return 1;
}
max_index = atoi(optarg);
break;
case 'z':
use_zstd = 0;
@@ -115,8 +113,8 @@ int main(int argc, char *argv[]) {
printf("%s\n", ENCODER_VENDOR_STRING);
printf("Input: %s\n", input_file);
printf("Output: %s\n", output_file);
printf("Quant: %d\n", quant_bits);
printf("Encoding method: Pure bitplanes (%d bits per coefficient)\n", quant_bits + 1);
printf("Quant: %d\n", max_index);
printf("Encoding method: Pure bitplanes (%d bits per coefficient)\n", ((int)ceilf(log2f(max_index))) + 1);
printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled");
}
@@ -243,7 +241,7 @@ int main(int argc, char *argv[]) {
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
quant_bits, use_zstd, output_buffer);
max_index, use_zstd, output_buffer);
if (encoded_size == 0) {
fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);

152
video_encoder/range_coder.c Normal file
View File

@@ -0,0 +1,152 @@
// Simple range coder for TAD audio codec
// Based on range coding with Laplacian probability model
#include "range_coder.h"
#include <string.h>
#include <math.h>
#define TOP_VALUE 0xFFFFFFFFU
#define BOTTOM_VALUE 0x00FFFFFF
static inline void range_encoder_put_byte(RangeEncoder *enc, uint8_t byte) {
if (enc->buffer_pos < enc->buffer_capacity) {
enc->buffer[enc->buffer_pos++] = byte;
}
}
static inline uint8_t range_decoder_get_byte(RangeDecoder *dec) {
if (dec->buffer_pos < dec->buffer_size) {
return dec->buffer[dec->buffer_pos++];
}
return 0;
}
static void range_encoder_renormalize(RangeEncoder *enc) {
while (enc->range <= BOTTOM_VALUE) {
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
enc->low <<= 8;
enc->range <<= 8;
}
}
static void range_decoder_renormalize(RangeDecoder *dec) {
while (dec->range <= BOTTOM_VALUE) {
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
dec->low <<= 8;
dec->range <<= 8;
}
}
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity) {
enc->low = 0;
enc->range = TOP_VALUE;
enc->buffer = buffer;
enc->buffer_pos = 0;
enc->buffer_capacity = capacity;
}
// Calculate Laplacian CDF for a given value
// CDF(x) = 0.5 * exp(λx) for x < 0
// CDF(x) = 1 - 0.5 * exp(-λx) for x ≥ 0
static inline double laplacian_cdf(int16_t value, float lambda) {
if (value < 0) {
return 0.5 * exp(lambda * value);
} else {
return 1.0 - 0.5 * exp(-lambda * value);
}
}
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda) {
// Clamp to valid range
if (value < -max_abs_value) value = -max_abs_value;
if (value > max_abs_value) value = max_abs_value;
// Calculate cumulative probabilities using Laplacian distribution
// We need CDF at value and value+1 to get the probability mass for this symbol
double cdf_low = (value == -max_abs_value) ? 0.0 : laplacian_cdf(value - 1, lambda);
double cdf_high = laplacian_cdf(value, lambda);
// Normalize to get cumulative counts in range [0, SCALE]
const uint32_t SCALE = 0x10000; // 65536 for precision
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
// Ensure we have at least 1 unit of probability
if (cum_high <= cum_low) cum_high = cum_low + 1;
if (cum_high > SCALE) cum_high = SCALE;
// Encode using cumulative probabilities
uint64_t range_64 = (uint64_t)enc->range;
enc->low += (uint32_t)((range_64 * cum_low) / SCALE);
enc->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
range_encoder_renormalize(enc);
}
size_t range_encoder_finish(RangeEncoder *enc) {
// Flush remaining bytes
for (int i = 0; i < 4; i++) {
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
enc->low <<= 8;
}
return enc->buffer_pos;
}
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size) {
dec->low = 0;
dec->range = TOP_VALUE;
dec->code = 0;
dec->buffer = buffer;
dec->buffer_pos = 0;
dec->buffer_size = size;
// Read initial bytes into code
for (int i = 0; i < 4; i++) {
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
}
}
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda) {
const uint32_t SCALE = 0x10000; // Must match encoder
// Calculate current position in probability space
uint64_t range_64 = (uint64_t)dec->range;
uint32_t cum_freq = (uint32_t)(((uint64_t)(dec->code - dec->low) * SCALE) / range_64);
// Binary search to find symbol whose CDF range contains cum_freq
int16_t low = -max_abs_value;
int16_t high = max_abs_value;
int16_t value = 0;
while (low <= high) {
int16_t mid = (low + high) / 2;
double cdf_low = (mid == -max_abs_value) ? 0.0 : laplacian_cdf(mid - 1, lambda);
double cdf_high = laplacian_cdf(mid, lambda);
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
if (cum_high <= cum_low) cum_high = cum_low + 1;
if (cum_freq >= cum_low && cum_freq < cum_high) {
// Found the symbol
value = mid;
// Update decoder state
dec->low += (uint32_t)((range_64 * cum_low) / SCALE);
dec->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
range_decoder_renormalize(dec);
return value;
} else if (cum_freq < cum_low) {
high = mid - 1;
} else {
low = mid + 1;
}
}
// Fallback: shouldn't happen with correct encoding
range_decoder_renormalize(dec);
return value;
}

View File

@@ -0,0 +1,42 @@
#ifndef RANGE_CODER_H
#define RANGE_CODER_H
#include <stdint.h>
#include <stddef.h>
// Simple range coder for signed 16-bit integers
// Uses adaptive frequency model for better compression
typedef struct {
uint32_t low;
uint32_t range;
uint8_t *buffer;
size_t buffer_pos;
size_t buffer_capacity;
} RangeEncoder;
typedef struct {
uint32_t low;
uint32_t range;
uint32_t code;
const uint8_t *buffer;
size_t buffer_pos;
size_t buffer_size;
} RangeDecoder;
// Initialize encoder
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity);
// Encode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda);
// Finalize encoding and return bytes written
size_t range_encoder_finish(RangeEncoder *enc);
// Initialize decoder
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size);
// Decode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda);
#endif // RANGE_CODER_H