diff --git a/terranmon.txt b/terranmon.txt index 84d7d4c..9b90be1 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -1570,27 +1570,13 @@ If the audio duration doesn't align to chunk boundaries, the final chunk can use a smaller power-of-2 size or be zero-padded. uint16 Sample Count: number of samples per channel (must be power of 2, min 1024) + uint8 Max quantisation index: this number * 2 + 1 is the total steps of quantisation uint32 Chunk Payload Size: size of following payload in bytes * Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set) ### Chunk Payload Structure (before optional Zstd compression) - * Mid Channel Encoded Data - * Side Channel Encoded Data - -### Encoded Channel Data (2-bit Twobitmap Significance Map) - uint8 Significance Map[(num_samples * 2 + 7) / 8] // 2 bits per coefficient - int16 Other Values[variable length] // Non-{-1,0,+1} values - -#### 2-bit Twobitmap Encoding -Each DWT coefficient is encoded using 2 bits in the significance map: - - 00: coefficient is 0 - - 01: coefficient is +1 - - 10: coefficient is -1 - - 11: coefficient is "other" (value stored in Other Values array) - -This encoding exploits the sparsity of quantized DWT coefficients where most -values are 0, ±1 after quantization. "Other" values are stored sequentially -as int16 in the order they appear. + * Mid Channel Encoded Data (raw int8 values) + * Side Channel Encoded Data (raw int8 values) ## Encoding Pipeline diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index be6fdf8..929d0bd 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -779,7 +779,7 @@ void tad32_free_statistics(void) { //============================================================================= size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, - int max_index, int use_zstd, int use_twobitmap, + int max_index, float quantiser_scale, uint8_t *output) { // Calculate DWT levels from chunk size int dwt_levels = calculate_dwt_levels(num_samples); @@ -824,7 +824,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, // Step 3.5: Accumulate coefficient statistics if enabled static int stats_enabled = -1; if (stats_enabled == -1) { - stats_enabled = 1;//getenv("TAD_COEFF_STATS") != NULL; + stats_enabled = getenv("TAD_COEFF_STATS") != NULL; if (stats_enabled) { init_statistics(dwt_levels); } @@ -871,28 +871,24 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, size_t payload_size; - if (use_zstd) { - size_t zstd_bound = ZSTD_compressBound(uncompressed_size); - uint8_t *zstd_buffer = malloc(zstd_bound); + size_t zstd_bound = ZSTD_compressBound(uncompressed_size); + uint8_t *zstd_buffer = malloc(zstd_bound); - payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL); + payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL); - if (ZSTD_isError(payload_size)) { - fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size)); - free(zstd_buffer); - free(pcm32_left); free(pcm32_right); - free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side); - free(quant_mid); free(quant_side); free(temp_buffer); - return 0; - } - - memcpy(write_ptr, zstd_buffer, payload_size); + if (ZSTD_isError(payload_size)) { + fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size)); free(zstd_buffer); - } else { - payload_size = uncompressed_size; - memcpy(write_ptr, temp_buffer, payload_size); + free(pcm32_left); free(pcm32_right); + free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side); + free(quant_mid); free(quant_side); free(temp_buffer); + return 0; } + memcpy(write_ptr, zstd_buffer, payload_size); + free(zstd_buffer); + + *payload_size_ptr = (uint32_t)payload_size; write_ptr += payload_size; diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h index e200ca6..e421c64 100644 --- a/video_encoder/encoder_tad.h +++ b/video_encoder/encoder_tad.h @@ -39,7 +39,7 @@ * * payload (encoded M/S data, optionally Zstd-compressed) */ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, - int quant_bits, int use_zstd, int use_twobitmap, + int quant_bits, float quantiser_scale, uint8_t *output); /** diff --git a/video_encoder/encoder_tad_standalone.c b/video_encoder/encoder_tad_standalone.c index 8424c8e..1bf7ba5 100644 --- a/video_encoder/encoder_tad_standalone.c +++ b/video_encoder/encoder_tad_standalone.c @@ -69,14 +69,10 @@ int main(int argc, char *argv[]) { char *output_file = NULL; int max_index = 7; // Default QUANT_BITS float quantiser_scale = 1.0f; // Default quantiser scaling - int use_zstd = 1; - int use_twobitmap = 1; int verbose = 0; // Parse command line arguments static struct option long_options[] = { - {"no-zstd", no_argument, 0, 'z'}, - {"no-twobitmap", no_argument, 0, 't'}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; @@ -101,12 +97,6 @@ int main(int argc, char *argv[]) { return 1; } break; - case 'z': - use_zstd = 0; - break; - case 't': - use_twobitmap = 0; - break; case 'v': verbose = 1; break; @@ -166,9 +156,6 @@ int main(int argc, char *argv[]) { printf("Output: %s\n", output_file); printf("Quant bits: %d\n", max_index); printf("Quantiser scale: %.2f\n", quantiser_scale); - printf("Encoding method: %s (int8_t coefficients)\n", - use_twobitmap ? "Twobit-map significance map" : "Raw int8_t storage"); - printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled"); } // Detect original sample rate for high-quality resampling @@ -294,7 +281,7 @@ int main(int argc, char *argv[]) { // Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE, - max_index, use_zstd, use_twobitmap, + max_index, quantiser_scale, output_buffer); if (encoded_size == 0) {