TAD: better compression using bitmap and delta prediction

This commit is contained in:
minjaesong
2025-10-26 20:30:28 +09:00
parent 370d511f44
commit 67f7c091eb
5 changed files with 106 additions and 59 deletions

View File

@@ -389,38 +389,60 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs,
}
//=============================================================================
// Significance Map Decoding
// Bitplane Decoding with Delta Prediction
//=============================================================================
static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t count) {
size_t map_bytes = (count * 2 + 7) / 8;
const uint8_t *map = input;
const uint8_t *read_ptr = input + map_bytes;
// Pure bitplane decoding with delta prediction: each coefficient uses exactly (quant_bits + 1) bits
// Bit layout: 1 sign bit + quant_bits magnitude bits
// Sign bit: 0 = positive/zero, 1 = negative
// Magnitude: unsigned value [0, 2^quant_bits - 1]
// Delta prediction: plane[i] ^= plane[i-1] (reversed by same operation)
static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t count, int quant_bits) {
int bits_per_coeff = quant_bits + 1; // 1 sign bit + quant_bits magnitude bits
size_t plane_bytes = (count + 7) / 8; // Bytes needed for one bitplane
size_t input_bytes = plane_bytes * bits_per_coeff;
const int16_t *value_ptr = (const int16_t*)read_ptr;
uint32_t other_idx = 0;
// Allocate temporary bitplanes
uint8_t **bitplanes = malloc(bits_per_coeff * sizeof(uint8_t*));
for (int plane = 0; plane < bits_per_coeff; plane++) {
bitplanes[plane] = malloc(plane_bytes);
memcpy(bitplanes[plane], input + (plane * plane_bytes), plane_bytes);
}
for (size_t i = 0; i < count; i++) {
size_t bit_pos = i * 2;
size_t byte_idx = bit_pos / 8;
size_t bit_offset = bit_pos % 8;
uint8_t code = (map[byte_idx] >> bit_offset) & 0x03;
// Handle bit spillover
if (bit_offset == 7) {
code = (map[byte_idx] >> 7) | ((map[byte_idx + 1] & 0x01) << 1);
}
switch (code) {
case 0: values[i] = 0; break;
case 1: values[i] = 1; break;
case 2: values[i] = -1; break;
case 3: values[i] = value_ptr[other_idx++]; break;
// Reverse delta prediction: plane[i] ^= plane[i-1]
for (int plane = 0; plane < bits_per_coeff; plane++) {
for (size_t byte = 1; byte < plane_bytes; byte++) {
bitplanes[plane][byte] ^= bitplanes[plane][byte - 1];
}
}
return map_bytes + other_idx * sizeof(int16_t);
// Reconstruct coefficients from bitplanes
for (size_t i = 0; i < count; i++) {
size_t byte_idx = i / 8;
size_t bit_offset = i % 8;
// Read sign bit (plane 0)
uint8_t sign_bit = (bitplanes[0][byte_idx] >> bit_offset) & 0x01;
// Read magnitude bits (planes 1 to quant_bits)
uint16_t magnitude = 0;
for (int b = 0; b < quant_bits; b++) {
if (bitplanes[b + 1][byte_idx] & (1 << bit_offset)) {
magnitude |= (1 << b);
}
}
// Reconstruct signed value
values[i] = sign_bit ? -(int16_t)magnitude : (int16_t)magnitude;
}
// Free temporary bitplanes
for (int plane = 0; plane < bits_per_coeff; plane++) {
free(bitplanes[plane]);
}
free(bitplanes);
return input_bytes;
}
//=============================================================================
@@ -480,12 +502,12 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t));
uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t));
// Decode significance maps
// Decode bitplanes
const uint8_t *payload_ptr = payload;
size_t mid_bytes, side_bytes;
mid_bytes = decode_sigmap_2bit(payload_ptr, quant_mid, sample_count);
side_bytes = decode_sigmap_2bit(payload_ptr + mid_bytes, quant_side, sample_count);
mid_bytes = decode_bitplanes(payload_ptr, quant_mid, sample_count, quant_bits);
side_bytes = decode_bitplanes(payload_ptr + mid_bytes, quant_side, sample_count, quant_bits);
// Dequantize
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, quant_bits);

View File

@@ -290,41 +290,66 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s
}
//=============================================================================
// Significance Map Encoding
// Bitplane Encoding with Delta Prediction
//=============================================================================
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output) {
size_t map_bytes = (count * 2 + 7) / 8;
uint8_t *map = output;
memset(map, 0, map_bytes);
// Pure bitplane encoding with delta prediction: each coefficient uses exactly (quant_bits + 1) bits
// Bit layout: 1 sign bit + quant_bits magnitude bits
// Sign bit: 0 = positive/zero, 1 = negative
// Magnitude: unsigned value [0, 2^quant_bits - 1]
// Delta prediction: plane[i] ^= plane[i-1] for better compression
static size_t encode_bitplanes(const int16_t *values, size_t count, uint8_t *output, int quant_bits) {
int bits_per_coeff = quant_bits + 1; // 1 sign bit + quant_bits magnitude bits
size_t plane_bytes = (count + 7) / 8; // Bytes needed for one bitplane
size_t output_bytes = plane_bytes * bits_per_coeff;
uint8_t *write_ptr = output + map_bytes;
int16_t *value_ptr = (int16_t*)write_ptr;
uint32_t other_count = 0;
memset(output, 0, output_bytes);
// Separate bitplanes (sign + magnitude)
uint8_t **bitplanes = malloc(bits_per_coeff * sizeof(uint8_t*));
for (int plane = 0; plane < bits_per_coeff; plane++) {
bitplanes[plane] = output + (plane * plane_bytes);
}
// Extract coefficients into bitplanes
for (size_t i = 0; i < count; i++) {
int16_t val = values[i];
uint8_t code;
if (val == 0) code = 0; // 00
else if (val == 1) code = 1; // 01
else if (val == -1) code = 2; // 10
else {
code = 3; // 11
value_ptr[other_count++] = val;
// Extract sign and magnitude
uint8_t sign_bit = (val < 0) ? 1 : 0;
uint16_t magnitude = (val < 0) ? -val : val;
// Clamp magnitude to max value for quant_bits
uint16_t max_magnitude = (1 << quant_bits) - 1;
if (magnitude > max_magnitude) {
magnitude = max_magnitude;
}
size_t bit_pos = i * 2;
size_t byte_idx = bit_pos / 8;
size_t bit_offset = bit_pos % 8;
size_t byte_idx = i / 8;
size_t bit_offset = i % 8;
map[byte_idx] |= (code << bit_offset);
if (bit_offset == 7 && byte_idx + 1 < map_bytes) {
map[byte_idx + 1] |= (code >> 1);
// Sign bitplane (plane 0)
if (sign_bit) {
bitplanes[0][byte_idx] |= (1 << bit_offset);
}
// Magnitude bitplanes (planes 1 to quant_bits)
for (int b = 0; b < quant_bits; b++) {
if (magnitude & (1 << b)) {
bitplanes[b + 1][byte_idx] |= (1 << bit_offset);
}
}
}
return map_bytes + other_count * sizeof(int16_t);
// Apply delta prediction: plane[i] ^= plane[i-1]
for (int plane = 0; plane < bits_per_coeff; plane++) {
for (size_t byte = plane_bytes - 1; byte > 0; byte--) {
bitplanes[plane][byte] ^= bitplanes[plane][byte - 1];
}
}
free(bitplanes);
return output_bytes;
}
//=============================================================================
@@ -657,10 +682,10 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL);
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL);
// Step 5: Encode with 2-bit significance map (32-bit version)
// Step 5: Encode with pure bitplanes (quant_bits + 1 bits per coefficient)
uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int32_t));
size_t mid_size = encode_sigmap_2bit(quant_mid, num_samples, temp_buffer);
size_t side_size = encode_sigmap_2bit(quant_side, num_samples, temp_buffer + mid_size);
size_t mid_size = encode_bitplanes(quant_mid, num_samples, temp_buffer, quant_bits);
size_t side_size = encode_bitplanes(quant_side, num_samples, temp_buffer + mid_size, quant_bits);
size_t uncompressed_size = mid_size + side_size;

View File

@@ -17,7 +17,7 @@
#define TAD32_QUALITY_MIN 0
#define TAD32_QUALITY_MAX 5
#define TAD32_QUALITY_DEFAULT 3
#define TAD32_ZSTD_LEVEL 7
#define TAD32_ZSTD_LEVEL 15
/**
* Encode audio chunk with TAD32 codec (PCM32f version)

View File

@@ -62,7 +62,6 @@ int main(int argc, char *argv[]) {
char *input_file = NULL;
char *output_file = NULL;
int quality = TAD32_QUALITY_DEFAULT;
int quant_bits = 7; // Default QUANT_BITS
int use_zstd = 1;
int verbose = 0;
@@ -86,8 +85,8 @@ int main(int argc, char *argv[]) {
break;
case 'q':
quant_bits = atoi(optarg);
if (quant_bits < 4 || quant_bits > 8) {
fprintf(stderr, "Error: Quantization bits must be between 4 and 8\n");
if (quant_bits < 4 || quant_bits > 12) {
fprintf(stderr, "Error: Quantization bits must be between 4 and 12\n");
return 1;
}
break;
@@ -116,7 +115,8 @@ int main(int argc, char *argv[]) {
printf("%s\n", ENCODER_VENDOR_STRING);
printf("Input: %s\n", input_file);
printf("Output: %s\n", output_file);
printf("Significance map: 2-bit\n");
printf("Quant: %d\n", quant_bits);
printf("Encoding method: Pure bitplanes (%d bits per coefficient)\n", quant_bits + 1);
printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled");
}

View File

@@ -72,7 +72,7 @@
#define TAD32_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other
#define TAD32_QUALITY_MIN 0
#define TAD32_QUALITY_MAX 5
#define TAD32_ZSTD_LEVEL 7
#define TAD32_ZSTD_LEVEL 15
// DWT settings
#define TILE_SIZE_X 640