TAD: imma just finalise it here

This commit is contained in:
minjaesong
2025-10-29 23:59:08 +09:00
parent 692defdbb8
commit 4a6edeca09
4 changed files with 20 additions and 51 deletions

View File

@@ -1570,27 +1570,13 @@ If the audio duration doesn't align to chunk boundaries, the final chunk can use
a smaller power-of-2 size or be zero-padded.
uint16 Sample Count: number of samples per channel (must be power of 2, min 1024)
uint8 Max quantisation index: this number * 2 + 1 is the total steps of quantisation
uint32 Chunk Payload Size: size of following payload in bytes
* Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set)
### Chunk Payload Structure (before optional Zstd compression)
* Mid Channel Encoded Data
* Side Channel Encoded Data
### Encoded Channel Data (2-bit Twobitmap Significance Map)
uint8 Significance Map[(num_samples * 2 + 7) / 8] // 2 bits per coefficient
int16 Other Values[variable length] // Non-{-1,0,+1} values
#### 2-bit Twobitmap Encoding
Each DWT coefficient is encoded using 2 bits in the significance map:
- 00: coefficient is 0
- 01: coefficient is +1
- 10: coefficient is -1
- 11: coefficient is "other" (value stored in Other Values array)
This encoding exploits the sparsity of quantized DWT coefficients where most
values are 0, ±1 after quantization. "Other" values are stored sequentially
as int16 in the order they appear.
* Mid Channel Encoded Data (raw int8 values)
* Side Channel Encoded Data (raw int8 values)
## Encoding Pipeline

View File

@@ -779,7 +779,7 @@ void tad32_free_statistics(void) {
//=============================================================================
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int max_index, int use_zstd, int use_twobitmap,
int max_index,
float quantiser_scale, uint8_t *output) {
// Calculate DWT levels from chunk size
int dwt_levels = calculate_dwt_levels(num_samples);
@@ -824,7 +824,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
// Step 3.5: Accumulate coefficient statistics if enabled
static int stats_enabled = -1;
if (stats_enabled == -1) {
stats_enabled = 1;//getenv("TAD_COEFF_STATS") != NULL;
stats_enabled = getenv("TAD_COEFF_STATS") != NULL;
if (stats_enabled) {
init_statistics(dwt_levels);
}
@@ -871,28 +871,24 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
size_t payload_size;
if (use_zstd) {
size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
uint8_t *zstd_buffer = malloc(zstd_bound);
size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
uint8_t *zstd_buffer = malloc(zstd_bound);
payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL);
payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL);
if (ZSTD_isError(payload_size)) {
fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
free(zstd_buffer);
free(pcm32_left); free(pcm32_right);
free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
free(quant_mid); free(quant_side); free(temp_buffer);
return 0;
}
memcpy(write_ptr, zstd_buffer, payload_size);
if (ZSTD_isError(payload_size)) {
fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
free(zstd_buffer);
} else {
payload_size = uncompressed_size;
memcpy(write_ptr, temp_buffer, payload_size);
free(pcm32_left); free(pcm32_right);
free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
free(quant_mid); free(quant_side); free(temp_buffer);
return 0;
}
memcpy(write_ptr, zstd_buffer, payload_size);
free(zstd_buffer);
*payload_size_ptr = (uint32_t)payload_size;
write_ptr += payload_size;

View File

@@ -39,7 +39,7 @@
* * payload (encoded M/S data, optionally Zstd-compressed)
*/
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int quant_bits, int use_zstd, int use_twobitmap,
int quant_bits,
float quantiser_scale, uint8_t *output);
/**

View File

@@ -69,14 +69,10 @@ int main(int argc, char *argv[]) {
char *output_file = NULL;
int max_index = 7; // Default QUANT_BITS
float quantiser_scale = 1.0f; // Default quantiser scaling
int use_zstd = 1;
int use_twobitmap = 1;
int verbose = 0;
// Parse command line arguments
static struct option long_options[] = {
{"no-zstd", no_argument, 0, 'z'},
{"no-twobitmap", no_argument, 0, 't'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
@@ -101,12 +97,6 @@ int main(int argc, char *argv[]) {
return 1;
}
break;
case 'z':
use_zstd = 0;
break;
case 't':
use_twobitmap = 0;
break;
case 'v':
verbose = 1;
break;
@@ -166,9 +156,6 @@ int main(int argc, char *argv[]) {
printf("Output: %s\n", output_file);
printf("Quant bits: %d\n", max_index);
printf("Quantiser scale: %.2f\n", quantiser_scale);
printf("Encoding method: %s (int8_t coefficients)\n",
use_twobitmap ? "Twobit-map significance map" : "Raw int8_t storage");
printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled");
}
// Detect original sample rate for high-quality resampling
@@ -294,7 +281,7 @@ int main(int argc, char *argv[]) {
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
max_index, use_zstd, use_twobitmap,
max_index,
quantiser_scale, output_buffer);
if (encoded_size == 0) {