TAD: even the slight companding vastly improves low-volume samples but also increases encoded size by a lot

2026-06-06 13:38:30 +09:00 · 2025-10-27 01:22:36 +09:00
parent 67f7c091eb
commit 9c27d114fc
3 changed files with 30 additions and 6 deletions
--- a/video_encoder/decoder_tad.c
+++ b/video_encoder/decoder_tad.c
@@ -277,6 +277,18 @@ static void expand_gamma(float *left, float *right, size_t count) {
    }
 }

+static void expand_mu_law(float *left, float *right, size_t count) {
+    static float MU = 255.0f;
+
+    for (size_t i = 0; i < count; i++) {
+        // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
+        float x = left[i];
+        left[i] = signum(x) * (powf(1.0f + MU, fabsf(x)) - 1.0f) / MU;
+        float y = right[i];
+        right[i] = signum(y) * (powf(1.0f + MU, fabsf(y)) - 1.0f) / MU;
+    }
+}
+
 static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) {
    const float b1 = 1.5f;   // 1st feedback coefficient
    const float b2 = -0.75f; // 2nd feedback coefficient
@@ -321,7 +333,7 @@ static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *lef
 //=============================================================================


-#define LAMBDA_FIXED 5.8f
+#define LAMBDA_FIXED 5.0f

 // Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder)
 // Converts quantized index back to normalized float in [-1, 1]
@@ -523,7 +535,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
    ms_correlate(dwt_mid, dwt_side, pcm32_left, pcm32_right, sample_count);

    // expand dynamic range
-//    expand_gamma(pcm32_left, pcm32_right, sample_count);
+    expand_gamma(pcm32_left, pcm32_right, sample_count);

    // dither to 8-bit
    pcm32f_to_pcm8(pcm32_left, pcm32_right, pcm8_left, pcm8_right, sample_count, err);
--- a/video_encoder/encoder_tad.c
+++ b/video_encoder/encoder_tad.c
@@ -215,11 +215,23 @@ static void compress_gamma(float *left, float *right, size_t count) {
    }
 }

+static void compress_mu_law(float *left, float *right, size_t count) {
+    static float MU = 255.0f;
+
+    for (size_t i = 0; i < count; i++) {
+        // encode(x) = sign(x) * |x|^γ where γ=0.5
+        float x = left[i];
+        left[i] = signum(x) * logf(1.0f + MU * fabsf(x)) / logf(1.0f + MU);
+        float y = right[i];
+        right[i] = signum(y) * logf(1.0f + MU * fabsf(y)) / logf(1.0f + MU);
+    }
+}
+
 //=============================================================================
 // Quantization with Frequency-Dependent Weighting
 //=============================================================================

-#define LAMBDA_FIXED 5.8f
+#define LAMBDA_FIXED 5.0f

 // Lambda-based companding encoder (based on Laplacian distribution CDF)
 // val must be normalised to [-1,1]
@@ -651,7 +663,7 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
    }

    // Step 1.1: Compress dynamic range
-//    compress_gamma(pcm32_left, pcm32_right, num_samples);
+    compress_gamma(pcm32_left, pcm32_right, num_samples);

    // Step 2: M/S decorrelation
    ms_decorrelate(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples);
--- a/video_encoder/encoder_tad_standalone.c
+++ b/video_encoder/encoder_tad_standalone.c
@@ -85,8 +85,8 @@ int main(int argc, char *argv[]) {
                break;
            case 'q':
                quant_bits = atoi(optarg);
-                if (quant_bits < 4 || quant_bits > 12) {
-                    fprintf(stderr, "Error: Quantization bits must be between 4 and 12\n");
+                if (quant_bits < 2 || quant_bits > 12) {
+                    fprintf(stderr, "Error: Quantization bits must be between 2 and 12\n");
                    return 1;
                }
                break;