TAD: bringing coeff weight back

2026-06-19 10:54:04 +09:00 · 2025-10-29 01:47:14 +09:00
parent 86864c4b7a
commit f06f339d99
4 changed files with 93 additions and 99 deletions
--- a/video_encoder/decoder_tad.c
+++ b/video_encoder/decoder_tad.c
@@ -18,6 +18,22 @@
 // Index 0 = LL band, Index 1-9 = H bands (L9 to L1)
 static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};

+// Base quantiser weight table (10 subbands: LL + 9 H bands)
+// Linearly spaced from 1.0 (LL) to 2.0 (H9)
+// These weights are multiplied by quantiser_scale during dequantization
+static const float BASE_QUANTISER_WEIGHTS[] = {
+    1.0f,      // LL (L9) - finest preservation
+    1.111f,    // H (L9)
+    1.222f,    // H (L8)
+    1.333f,    // H (L7)
+    1.444f,    // H (L6)
+    1.556f,    // H (L5)
+    1.667f,    // H (L4)
+    1.778f,    // H (L3)
+    1.889f,    // H (L2)
+    2.0f       // H (L1) - coarsest quantization
+};
+
 #define TAD_DEFAULT_CHUNK_SIZE 32768
 #define TAD_MIN_CHUNK_SIZE 1024
 #define TAD_SAMPLE_RATE 32000
@@ -333,11 +349,11 @@ static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *lef
 //=============================================================================


-#define LAMBDA_FIXED 5.0f
+#define LAMBDA_FIXED 6.0f

 // Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder)
 // Converts quantized index back to normalized float in [-1, 1]
-static float lambda_decompanding(int16_t quant_val, int max_index) {
+static float lambda_decompanding(int8_t quant_val, int max_index) {
    // Handle zero
    if (quant_val == 0) {
        return 0.0f;
@@ -366,7 +382,7 @@ static float lambda_decompanding(int16_t quant_val, int max_index) {
    return sign * abs_val;
 }

-static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index) {
+static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) {

    // Calculate sideband boundaries dynamically
    int first_band_size = chunk_size >> dwt_levels;
@@ -390,63 +406,14 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs,
        // Decode using lambda companding
        float normalized_val = lambda_decompanding(quantized[i], max_index);

-        // Denormalize using the subband scalar
-        coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband];
+        // Denormalize using the subband scalar and apply base weight + quantiser scaling
+        float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale;
+        coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
    }

    free(sideband_starts);
 }

-//=============================================================================
-// Bitplane Decoding with Delta Prediction
-//=============================================================================
-
-// Pure bitplane decoding with delta prediction: each coefficient uses exactly (quant_bits + 1) bits
-// Bit layout: 1 sign bit + quant_bits magnitude bits
-// Sign bit: 0 = positive/zero, 1 = negative
-// Magnitude: unsigned value [0, 2^quant_bits - 1]
-// Delta prediction: plane[i] ^= plane[i-1] (reversed by same operation)
-static size_t decode_bitplanes(const uint8_t *input, int16_t *values, size_t count, int max_index) {
-    int bits_per_coeff = ((int)ceilf(log2f(max_index))) + 1;  // 1 sign bit + quant_bits magnitude bits
-    size_t plane_bytes = (count + 7) / 8;  // Bytes needed for one bitplane
-    size_t input_bytes = plane_bytes * bits_per_coeff;
-
-    // Allocate temporary bitplanes
-    uint8_t **bitplanes = malloc(bits_per_coeff * sizeof(uint8_t*));
-    for (int plane = 0; plane < bits_per_coeff; plane++) {
-        bitplanes[plane] = malloc(plane_bytes);
-        memcpy(bitplanes[plane], input + (plane * plane_bytes), plane_bytes);
-    }
-
-    // Reconstruct coefficients from bitplanes
-    for (size_t i = 0; i < count; i++) {
-        size_t byte_idx = i / 8;
-        size_t bit_offset = i % 8;
-
-        // Read sign bit (plane 0)
-        uint8_t sign_bit = (bitplanes[0][byte_idx] >> bit_offset) & 0x01;
-
-        // Read magnitude bits (planes 1 to quant_bits)
-        uint16_t magnitude = 0;
-        for (int b = 0; b < bits_per_coeff - 1; b++) {
-            if (bitplanes[b + 1][byte_idx] & (1 << bit_offset)) {
-                magnitude |= (1 << b);
-            }
-        }
-
-        // Reconstruct signed value
-        values[i] = sign_bit ? -(int16_t)magnitude : (int16_t)magnitude;
-    }
-
-    // Free temporary bitplanes
-    for (int plane = 0; plane < bits_per_coeff; plane++) {
-        free(bitplanes[plane]);
-    }
-    free(bitplanes);
-
-    return input_bytes;
-}
-
 //=============================================================================
 // Chunk Decoding
 //=============================================================================
@@ -477,7 +444,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
    uint8_t *decompressed = NULL;

    // Estimate decompressed size (generous upper bound)
-    size_t decompressed_size = sample_count * 4 * sizeof(int16_t);
+    size_t decompressed_size = sample_count * 4 * sizeof(int8_t);
    decompressed = malloc(decompressed_size);

    size_t actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size);
@@ -488,15 +455,13 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
        return -1;
    }

-    payload = decompressed;
-
    read_ptr += payload_size;
    *bytes_consumed = read_ptr - input;
    *samples_decoded = sample_count;

    // Allocate working buffers
-    int16_t *quant_mid = malloc(sample_count * sizeof(int16_t));
-    int16_t *quant_side = malloc(sample_count * sizeof(int16_t));
+    int8_t *quant_mid = malloc(sample_count * sizeof(int8_t));
+    int8_t *quant_side = malloc(sample_count * sizeof(int8_t));
    float *dwt_mid = malloc(sample_count * sizeof(float));
    float *dwt_side = malloc(sample_count * sizeof(float));
    float *pcm32_left = malloc(sample_count * sizeof(float));
@@ -504,16 +469,16 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
    uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t));
    uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t));

-    // Decode bitplanes
-    const uint8_t *payload_ptr = payload;
-    size_t mid_bytes, side_bytes;
+    // Separate Mid/Side
+    memcpy(quant_mid, decompressed, sample_count);
+    memcpy(quant_side, decompressed + sample_count, sample_count);

-    mid_bytes = decode_bitplanes(payload_ptr, quant_mid, sample_count, max_index);
-    side_bytes = decode_bitplanes(payload_ptr + mid_bytes, quant_side, sample_count, max_index);

-    // Dequantize
-    dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index);
-    dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index);
+    // Dequantize with quantiser scaling
+    // Use quantiser_scale = 1.0f for baseline (must match encoder)
+    float quantiser_scale = 1.0f;
+    dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
+    dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);

    // Inverse DWT
    dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
--- a/video_encoder/encoder_tad.c
+++ b/video_encoder/encoder_tad.c
@@ -18,10 +18,26 @@
 // Index 0 = LL band, Index 1-9 = H bands (L9 to L1)
 static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};

+// Base quantiser weight table (10 subbands: LL + 9 H bands)
+// Linearly spaced from 1.0 (LL) to 2.0 (H9)
+// These weights are multiplied by quantiser_scale during quantization
+static const float BASE_QUANTISER_WEIGHTS[] = {
+    1.0f,      // LL (L9) - finest preservation
+    1.111f,    // H (L9)
+    1.222f,    // H (L8)
+    1.333f,    // H (L7)
+    1.444f,    // H (L6)
+    1.556f,    // H (L5)
+    1.667f,    // H (L4)
+    1.778f,    // H (L3)
+    1.889f,    // H (L2)
+    2.0f       // H (L1) - coarsest quantization
+};
+
 // Forward declarations for internal functions
 static void dwt_dd4_forward_1d(float *data, int length);
 static void dwt_dd4_forward_multilevel(float *data, int length, int levels);
-static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index);
+static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index, float quantiser_scale);
 static size_t encode_twobitmap(const int8_t *values, size_t count, uint8_t *output);

 static inline float FCLAMP(float x, float min, float max) {
@@ -229,7 +245,7 @@ static void compress_mu_law(float *left, float *right, size_t count) {
 // Quantization with Frequency-Dependent Weighting
 //=============================================================================

-#define LAMBDA_FIXED 5.0f
+#define LAMBDA_FIXED 6.0f

 // Lambda-based companding encoder (based on Laplacian distribution CDF)
 // val must be normalised to [-1,1]
@@ -264,7 +280,7 @@ static int8_t lambda_companding(float val, int max_index) {
    return (int8_t)(sign * index);
 }

-static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index) {
+static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale) {
    int first_band_size = chunk_size >> dwt_levels;

    int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
@@ -288,8 +304,10 @@ static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, si
            current_subband_index[i] = sideband;
        }

-        float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband])); // val is normalised to [-1,1]
-        int16_t quant_val = lambda_companding(val, max_index);
+        // Apply base weight and quantiser scaling
+        float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale;
+        float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
+        int8_t quant_val = lambda_companding(val, max_index);

        quantized[i] = quant_val;
    }
@@ -761,7 +779,8 @@ void tad32_free_statistics(void) {
 //=============================================================================

 size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
-                          int max_index, int use_zstd, int use_twobitmap, uint8_t *output) {
+                          int max_index, int use_zstd, int use_twobitmap,
+                          float quantiser_scale, uint8_t *output) {
    // Calculate DWT levels from chunk size
    int dwt_levels = calculate_dwt_levels(num_samples);
    if (dwt_levels < 0) {
@@ -815,9 +834,9 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
        accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators);
    }

-    // Step 4: Quantize with frequency-dependent weights and dead zone
-    quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL);
-    quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL);
+    // Step 4: Quantize with frequency-dependent weights and quantiser scaling
+    quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
+    quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);

    // Step 4.5: Accumulate quantized coefficient statistics if enabled
    if (stats_enabled) {
@@ -829,16 +848,11 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
    uint8_t *temp_buffer = malloc(num_samples * 4);  // Generous buffer
    size_t mid_size, side_size;

-    if (use_twobitmap) {
-        mid_size = encode_twobitmap(quant_mid, num_samples, temp_buffer);
-        side_size = encode_twobitmap(quant_side, num_samples, temp_buffer + mid_size);
-    } else {
-        // Raw int8_t storage
-        memcpy(temp_buffer, quant_mid, num_samples);
-        mid_size = num_samples;
-        memcpy(temp_buffer + mid_size, quant_side, num_samples);
-        side_size = num_samples;
-    }
+    // Raw int8_t storage
+    memcpy(temp_buffer, quant_mid, num_samples);
+    mid_size = num_samples;
+    memcpy(temp_buffer + mid_size, quant_side, num_samples);
+    side_size = num_samples;

    size_t uncompressed_size = mid_size + side_size;

--- a/video_encoder/encoder_tad.h
+++ b/video_encoder/encoder_tad.h
@@ -22,13 +22,15 @@
 /**
 * Encode audio chunk with TAD32 codec (PCM32f version)
 *
- * @param pcm32_stereo  Input PCM32fLE stereo samples (interleaved L,R)
- * @param num_samples   Number of samples per channel (min 1024)
- * @param quant_bits    Quantization bits 4-12 (default: 7)
- * @param use_zstd      1=enable Zstd compression, 0=disable
- * @param use_twobitmap 1=enable twobitmap encoding, 0=raw int8_t storage
- * @param output        Output buffer (must be large enough)
- * @return              Number of bytes written to output, or 0 on error
+ * @param pcm32_stereo    Input PCM32fLE stereo samples (interleaved L,R)
+ * @param num_samples     Number of samples per channel (min 1024)
+ * @param quant_bits      Quantization bits 4-12 (default: 7)
+ * @param use_zstd        1=enable Zstd compression, 0=disable
+ * @param use_twobitmap   1=enable twobitmap encoding, 0=raw int8_t storage
+ * @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantization)
+ *                        Higher values = more aggressive quantization = smaller files
+ * @param output          Output buffer (must be large enough)
+ * @return                Number of bytes written to output, or 0 on error
 *
 * Output format:
 *   uint16 sample_count (samples per channel)
@@ -37,7 +39,8 @@
 *   *      payload (encoded M/S data, optionally Zstd-compressed)
 */
 size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
-                          int quant_bits, int use_zstd, int use_twobitmap, uint8_t *output);
+                          int quant_bits, int use_zstd, int use_twobitmap,
+                          float quantiser_scale, uint8_t *output);

 /**
 * Print accumulated coefficient statistics
--- a/video_encoder/encoder_tad_standalone.c
+++ b/video_encoder/encoder_tad_standalone.c
@@ -50,6 +50,9 @@ static void print_usage(const char *prog_name) {
    printf("  -o <file>       Output TAD32 file\n");
    printf("  -q <bits>       Quantization bits (default: 7, range: 4-8)\n");
    printf("                  Higher = more precision, larger files\n");
+    printf("  -s <scale>      Quantiser scaling factor (default: 1.0, range: 0.5-4.0)\n");
+    printf("                  Higher = more aggressive quantization, smaller files\n");
+    printf("                  2.0 = quantize 2x coarser than baseline\n");
    printf("  --no-zstd       Disable Zstd compression\n");
    printf("  --no-twobitmap  Disable twobitmap encoding (use raw int8_t storage)\n");
    printf("  -v              Verbose output\n");
@@ -65,6 +68,7 @@ int main(int argc, char *argv[]) {
    char *input_file = NULL;
    char *output_file = NULL;
    int max_index = 7;  // Default QUANT_BITS
+    float quantiser_scale = 1.0f;  // Default quantiser scaling
    int use_zstd = 1;
    int use_twobitmap = 1;
    int verbose = 0;
@@ -79,7 +83,7 @@ int main(int argc, char *argv[]) {

    int opt;
    int option_index = 0;
-    while ((opt = getopt_long(argc, argv, "i:o:q:vh", long_options, &option_index)) != -1) {
+    while ((opt = getopt_long(argc, argv, "i:o:q:s:vh", long_options, &option_index)) != -1) {
        switch (opt) {
            case 'i':
                input_file = optarg;
@@ -89,7 +93,13 @@ int main(int argc, char *argv[]) {
                break;
            case 'q':
                max_index = atoi(optarg);
-
+                break;
+            case 's':
+                quantiser_scale = atof(optarg);
+                if (quantiser_scale < 0.5f || quantiser_scale > 4.0f) {
+                    fprintf(stderr, "Error: Quantiser scale must be in range 0.5-4.0\n");
+                    return 1;
+                }
                break;
            case 'z':
                use_zstd = 0;
@@ -119,7 +129,8 @@ int main(int argc, char *argv[]) {
        printf("%s\n", ENCODER_VENDOR_STRING);
        printf("Input: %s\n", input_file);
        printf("Output: %s\n", output_file);
-        printf("Quant: %d\n", max_index);
+        printf("Quant bits: %d\n", max_index);
+        printf("Quantiser scale: %.2f\n", quantiser_scale);
        printf("Encoding method: %s (int8_t coefficients)\n",
               use_twobitmap ? "Twobit-map significance map" : "Raw int8_t storage");
        printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled");
@@ -248,7 +259,8 @@ int main(int argc, char *argv[]) {

        // Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
        size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
-                                                 max_index, use_zstd, use_twobitmap, output_buffer);
+                                                 max_index, use_zstd, use_twobitmap,
+                                                 quantiser_scale, output_buffer);

        if (encoded_size == 0) {
            fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);