mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-16 07:56:06 +09:00
TAD: imma just finalise it here
This commit is contained in:
@@ -1570,27 +1570,13 @@ If the audio duration doesn't align to chunk boundaries, the final chunk can use
|
|||||||
a smaller power-of-2 size or be zero-padded.
|
a smaller power-of-2 size or be zero-padded.
|
||||||
|
|
||||||
uint16 Sample Count: number of samples per channel (must be power of 2, min 1024)
|
uint16 Sample Count: number of samples per channel (must be power of 2, min 1024)
|
||||||
|
uint8 Max quantisation index: this number * 2 + 1 is the total steps of quantisation
|
||||||
uint32 Chunk Payload Size: size of following payload in bytes
|
uint32 Chunk Payload Size: size of following payload in bytes
|
||||||
* Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set)
|
* Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set)
|
||||||
|
|
||||||
### Chunk Payload Structure (before optional Zstd compression)
|
### Chunk Payload Structure (before optional Zstd compression)
|
||||||
* Mid Channel Encoded Data
|
* Mid Channel Encoded Data (raw int8 values)
|
||||||
* Side Channel Encoded Data
|
* Side Channel Encoded Data (raw int8 values)
|
||||||
|
|
||||||
### Encoded Channel Data (2-bit Twobitmap Significance Map)
|
|
||||||
uint8 Significance Map[(num_samples * 2 + 7) / 8] // 2 bits per coefficient
|
|
||||||
int16 Other Values[variable length] // Non-{-1,0,+1} values
|
|
||||||
|
|
||||||
#### 2-bit Twobitmap Encoding
|
|
||||||
Each DWT coefficient is encoded using 2 bits in the significance map:
|
|
||||||
- 00: coefficient is 0
|
|
||||||
- 01: coefficient is +1
|
|
||||||
- 10: coefficient is -1
|
|
||||||
- 11: coefficient is "other" (value stored in Other Values array)
|
|
||||||
|
|
||||||
This encoding exploits the sparsity of quantized DWT coefficients where most
|
|
||||||
values are 0, ±1 after quantization. "Other" values are stored sequentially
|
|
||||||
as int16 in the order they appear.
|
|
||||||
|
|
||||||
## Encoding Pipeline
|
## Encoding Pipeline
|
||||||
|
|
||||||
|
|||||||
@@ -779,7 +779,7 @@ void tad32_free_statistics(void) {
|
|||||||
//=============================================================================
|
//=============================================================================
|
||||||
|
|
||||||
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
||||||
int max_index, int use_zstd, int use_twobitmap,
|
int max_index,
|
||||||
float quantiser_scale, uint8_t *output) {
|
float quantiser_scale, uint8_t *output) {
|
||||||
// Calculate DWT levels from chunk size
|
// Calculate DWT levels from chunk size
|
||||||
int dwt_levels = calculate_dwt_levels(num_samples);
|
int dwt_levels = calculate_dwt_levels(num_samples);
|
||||||
@@ -824,7 +824,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
|||||||
// Step 3.5: Accumulate coefficient statistics if enabled
|
// Step 3.5: Accumulate coefficient statistics if enabled
|
||||||
static int stats_enabled = -1;
|
static int stats_enabled = -1;
|
||||||
if (stats_enabled == -1) {
|
if (stats_enabled == -1) {
|
||||||
stats_enabled = 1;//getenv("TAD_COEFF_STATS") != NULL;
|
stats_enabled = getenv("TAD_COEFF_STATS") != NULL;
|
||||||
if (stats_enabled) {
|
if (stats_enabled) {
|
||||||
init_statistics(dwt_levels);
|
init_statistics(dwt_levels);
|
||||||
}
|
}
|
||||||
@@ -871,28 +871,24 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
|||||||
|
|
||||||
size_t payload_size;
|
size_t payload_size;
|
||||||
|
|
||||||
if (use_zstd) {
|
size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
|
||||||
size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
|
uint8_t *zstd_buffer = malloc(zstd_bound);
|
||||||
uint8_t *zstd_buffer = malloc(zstd_bound);
|
|
||||||
|
|
||||||
payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL);
|
payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL);
|
||||||
|
|
||||||
if (ZSTD_isError(payload_size)) {
|
if (ZSTD_isError(payload_size)) {
|
||||||
fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
|
fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
|
||||||
free(zstd_buffer);
|
|
||||||
free(pcm32_left); free(pcm32_right);
|
|
||||||
free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
|
|
||||||
free(quant_mid); free(quant_side); free(temp_buffer);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(write_ptr, zstd_buffer, payload_size);
|
|
||||||
free(zstd_buffer);
|
free(zstd_buffer);
|
||||||
} else {
|
free(pcm32_left); free(pcm32_right);
|
||||||
payload_size = uncompressed_size;
|
free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
|
||||||
memcpy(write_ptr, temp_buffer, payload_size);
|
free(quant_mid); free(quant_side); free(temp_buffer);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memcpy(write_ptr, zstd_buffer, payload_size);
|
||||||
|
free(zstd_buffer);
|
||||||
|
|
||||||
|
|
||||||
*payload_size_ptr = (uint32_t)payload_size;
|
*payload_size_ptr = (uint32_t)payload_size;
|
||||||
write_ptr += payload_size;
|
write_ptr += payload_size;
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,7 @@
|
|||||||
* * payload (encoded M/S data, optionally Zstd-compressed)
|
* * payload (encoded M/S data, optionally Zstd-compressed)
|
||||||
*/
|
*/
|
||||||
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
||||||
int quant_bits, int use_zstd, int use_twobitmap,
|
int quant_bits,
|
||||||
float quantiser_scale, uint8_t *output);
|
float quantiser_scale, uint8_t *output);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -69,14 +69,10 @@ int main(int argc, char *argv[]) {
|
|||||||
char *output_file = NULL;
|
char *output_file = NULL;
|
||||||
int max_index = 7; // Default QUANT_BITS
|
int max_index = 7; // Default QUANT_BITS
|
||||||
float quantiser_scale = 1.0f; // Default quantiser scaling
|
float quantiser_scale = 1.0f; // Default quantiser scaling
|
||||||
int use_zstd = 1;
|
|
||||||
int use_twobitmap = 1;
|
|
||||||
int verbose = 0;
|
int verbose = 0;
|
||||||
|
|
||||||
// Parse command line arguments
|
// Parse command line arguments
|
||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
{"no-zstd", no_argument, 0, 'z'},
|
|
||||||
{"no-twobitmap", no_argument, 0, 't'},
|
|
||||||
{"help", no_argument, 0, 'h'},
|
{"help", no_argument, 0, 'h'},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
@@ -101,12 +97,6 @@ int main(int argc, char *argv[]) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'z':
|
|
||||||
use_zstd = 0;
|
|
||||||
break;
|
|
||||||
case 't':
|
|
||||||
use_twobitmap = 0;
|
|
||||||
break;
|
|
||||||
case 'v':
|
case 'v':
|
||||||
verbose = 1;
|
verbose = 1;
|
||||||
break;
|
break;
|
||||||
@@ -166,9 +156,6 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("Output: %s\n", output_file);
|
printf("Output: %s\n", output_file);
|
||||||
printf("Quant bits: %d\n", max_index);
|
printf("Quant bits: %d\n", max_index);
|
||||||
printf("Quantiser scale: %.2f\n", quantiser_scale);
|
printf("Quantiser scale: %.2f\n", quantiser_scale);
|
||||||
printf("Encoding method: %s (int8_t coefficients)\n",
|
|
||||||
use_twobitmap ? "Twobit-map significance map" : "Raw int8_t storage");
|
|
||||||
printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect original sample rate for high-quality resampling
|
// Detect original sample rate for high-quality resampling
|
||||||
@@ -294,7 +281,7 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
|
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
|
||||||
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
|
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
|
||||||
max_index, use_zstd, use_twobitmap,
|
max_index,
|
||||||
quantiser_scale, output_buffer);
|
quantiser_scale, output_buffer);
|
||||||
|
|
||||||
if (encoded_size == 0) {
|
if (encoded_size == 0) {
|
||||||
|
|||||||
Reference in New Issue
Block a user