TAD: somehow removing entropy coding yields better compression?

This commit is contained in:
minjaesong
2025-10-28 04:19:11 +09:00
parent 86de627734
commit 86864c4b7a
3 changed files with 28 additions and 10 deletions

View File

@@ -623,8 +623,8 @@ static void print_top5_quantized_values(const int8_t *quant, size_t count, const
qsort(values, unique_count, sizeof(ValueFrequency), compare_value_frequency);
// Print top 10
fprintf(stderr, " %s Top 10 Values:\n", title);
int print_count = (unique_count < 10) ? unique_count : 10;
fprintf(stderr, " %s Top 100 Values:\n", title);
int print_count = (unique_count < 100) ? unique_count : 100;
for (int i = 0; i < print_count; i++) {
fprintf(stderr, " %6d: %8zu occurrences (%5.2f%%)\n",
values[i].value, values[i].count, values[i].percentage);
@@ -761,7 +761,7 @@ void tad32_free_statistics(void) {
//=============================================================================
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int max_index, int use_zstd, uint8_t *output) {
int max_index, int use_zstd, int use_twobitmap, uint8_t *output) {
// Calculate DWT levels from chunk size
int dwt_levels = calculate_dwt_levels(num_samples);
if (dwt_levels < 0) {
@@ -825,10 +825,20 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
accumulate_quantized(quant_side, dwt_levels, num_samples, side_quant_accumulators);
}
// Step 5: Encode with twobit-map significance map
uint8_t *temp_buffer = malloc(num_samples * 4); // Generous buffer for twobitmap + other values
size_t mid_size = encode_twobitmap(quant_mid, num_samples, temp_buffer);
size_t side_size = encode_twobitmap(quant_side, num_samples, temp_buffer + mid_size);
// Step 5: Encode with twobit-map significance map or raw int8_t storage
uint8_t *temp_buffer = malloc(num_samples * 4); // Generous buffer
size_t mid_size, side_size;
if (use_twobitmap) {
mid_size = encode_twobitmap(quant_mid, num_samples, temp_buffer);
side_size = encode_twobitmap(quant_side, num_samples, temp_buffer + mid_size);
} else {
// Raw int8_t storage
memcpy(temp_buffer, quant_mid, num_samples);
mid_size = num_samples;
memcpy(temp_buffer + mid_size, quant_side, num_samples);
side_size = num_samples;
}
size_t uncompressed_size = mid_size + side_size;

View File

@@ -26,6 +26,7 @@
* @param num_samples Number of samples per channel (min 1024)
* @param quant_bits Quantization bits 4-12 (default: 7)
* @param use_zstd 1=enable Zstd compression, 0=disable
* @param use_twobitmap 1=enable twobitmap encoding, 0=raw int8_t storage
* @param output Output buffer (must be large enough)
* @return Number of bytes written to output, or 0 on error
*
@@ -36,7 +37,7 @@
* * payload (encoded M/S data, optionally Zstd-compressed)
*/
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int quant_bits, int use_zstd, uint8_t *output);
int quant_bits, int use_zstd, int use_twobitmap, uint8_t *output);
/**
* Print accumulated coefficient statistics

View File

@@ -51,6 +51,7 @@ static void print_usage(const char *prog_name) {
printf(" -q <bits> Quantization bits (default: 7, range: 4-8)\n");
printf(" Higher = more precision, larger files\n");
printf(" --no-zstd Disable Zstd compression\n");
printf(" --no-twobitmap Disable twobitmap encoding (use raw int8_t storage)\n");
printf(" -v Verbose output\n");
printf(" -h, --help Show this help\n");
printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
@@ -65,11 +66,13 @@ int main(int argc, char *argv[]) {
char *output_file = NULL;
int max_index = 7; // Default QUANT_BITS
int use_zstd = 1;
int use_twobitmap = 1;
int verbose = 0;
// Parse command line arguments
static struct option long_options[] = {
{"no-zstd", no_argument, 0, 'z'},
{"no-twobitmap", no_argument, 0, 't'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
@@ -91,6 +94,9 @@ int main(int argc, char *argv[]) {
case 'z':
use_zstd = 0;
break;
case 't':
use_twobitmap = 0;
break;
case 'v':
verbose = 1;
break;
@@ -114,7 +120,8 @@ int main(int argc, char *argv[]) {
printf("Input: %s\n", input_file);
printf("Output: %s\n", output_file);
printf("Quant: %d\n", max_index);
printf("Encoding method: Twobit-map significance map (int8_t coefficients)\n");
printf("Encoding method: %s (int8_t coefficients)\n",
use_twobitmap ? "Twobit-map significance map" : "Raw int8_t storage");
printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled");
}
@@ -241,7 +248,7 @@ int main(int argc, char *argv[]) {
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
max_index, use_zstd, output_buffer);
max_index, use_zstd, use_twobitmap, output_buffer);
if (encoded_size == 0) {
fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);