TAD: pre/de-emphasis

2026-06-06 13:38:30 +09:00 · 2025-11-07 15:16:35 +09:00
parent e743fbf3c0
commit 8878d37e5b
3 changed files with 142 additions and 1 deletions
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
@@ -160,6 +160,12 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
    // Dither state for noise shaping (2 channels, 2 history samples each)
    private val ditherError = Array(2) { FloatArray(2) }

+    // De-emphasis filter state (persistent across chunks to prevent discontinuities)
+    private var deemphPrevXL = 0.0f
+    private var deemphPrevYL = 0.0f
+    private var deemphPrevXR = 0.0f
+    private var deemphPrevYR = 0.0f
+
    private val renderRunnables: Array<RenderRunnable>
    private val renderThreads: Array<Thread>
    private val writeQueueingRunnables: Array<WriteQueueingRunnable>
@@ -422,6 +428,43 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
        }
    }

+    //=============================================================================
+    // De-emphasis Filter
+    //=============================================================================
+
+    private fun calculateDeemphasisCoeffs(): Triple<Float, Float, Float> {
+        // De-emphasis factor
+        val alpha = 0.5f
+
+        val b0 = 1.0f
+        val b1 = 0.0f  // No feedforward delay
+        val a1 = -alpha  // NEGATIVE because equation has minus sign: y = x - a1*prev_y
+
+        return Triple(b0, b1, a1)
+    }
+
+    private fun applyDeemphasis(left: FloatArray, right: FloatArray, count: Int) {
+        val (b0, b1, a1) = calculateDeemphasisCoeffs()
+
+        // Left channel - use instance state variables (persistent across chunks)
+        for (i in 0 until count) {
+            val x = left[i]
+            val y = b0 * x + b1 * deemphPrevXL - a1 * deemphPrevYL
+            left[i] = y
+            deemphPrevXL = x
+            deemphPrevYL = y
+        }
+
+        // Right channel - use instance state variables (persistent across chunks)
+        for (i in 0 until count) {
+            val x = right[i]
+            val y = b0 * x + b1 * deemphPrevXR - a1 * deemphPrevYR
+            right[i] = y
+            deemphPrevXR = x
+            deemphPrevYR = y
+        }
+    }
+
    // M/S stereo correlation (no dithering - that's now in spectral interpolation)
    private fun msCorrelate(mid: FloatArray, side: FloatArray, left: FloatArray, right: FloatArray, sampleCount: Int) {
        for (i in 0 until sampleCount) {
@@ -526,6 +569,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
            // Expand dynamic range (gamma expansion)
            expandGamma(pcm32Left, pcm32Right, sampleCount)

+            // Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8)
+            applyDeemphasis(pcm32Left, pcm32Right, sampleCount)
+
            // Dither to 8-bit PCMu8
            pcm32fToPcm8(pcm32Left, pcm32Right, sampleCount)

--- a/video_encoder/decoder_tad.c
+++ b/video_encoder/decoder_tad.c
@@ -398,6 +398,48 @@ static void expand_mu_law(float *left, float *right, size_t count) {
    }
 }

+//=============================================================================
+// De-emphasis Filter
+//=============================================================================
+
+static void calculate_deemphasis_coeffs(float *b0, float *b1, float *a1) {
+    // De-emphasis factor
+    const float alpha = 0.5f;
+
+    *b0 = 1.0f;
+    *b1 = 0.0f;  // No feedforward delay
+    *a1 = -alpha;  // NEGATIVE because equation has minus sign: y = x - a1*prev_y
+}
+
+static void apply_deemphasis(float *left, float *right, size_t count) {
+    // Static state variables - persistent across chunks to prevent discontinuities
+    static float prev_x_l = 0.0f;
+    static float prev_y_l = 0.0f;
+    static float prev_x_r = 0.0f;
+    static float prev_y_r = 0.0f;
+
+    float b0, b1, a1;
+    calculate_deemphasis_coeffs(&b0, &b1, &a1);
+
+    // Left channel - use persistent state
+    for (size_t i = 0; i < count; i++) {
+        float x = left[i];
+        float y = b0 * x + b1 * prev_x_l - a1 * prev_y_l;
+        left[i] = y;
+        prev_x_l = x;
+        prev_y_l = y;
+    }
+
+    // Right channel - use persistent state
+    for (size_t i = 0; i < count; i++) {
+        float x = right[i];
+        float y = b0 * x + b1 * prev_x_r - a1 * prev_y_r;
+        right[i] = y;
+        prev_x_r = x;
+        prev_y_r = y;
+    }
+}
+
 static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) {
    const float b1 = 1.5f;   // 1st feedback coefficient
    const float b2 = -0.75f; // 2nd feedback coefficient
@@ -612,6 +654,9 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
    // expand dynamic range
    expand_gamma(pcm32_left, pcm32_right, sample_count);

+    // Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8)
+    apply_deemphasis(pcm32_left, pcm32_right, sample_count);
+
    // dither to 8-bit
    pcm32f_to_pcm8(pcm32_left, pcm32_right, pcm8_left, pcm8_right, sample_count, err);

--- a/video_encoder/encoder_tad.c
+++ b/video_encoder/encoder_tad.c
@@ -205,6 +205,53 @@ static void dwt_forward_multilevel(float *data, int length, int levels) {
    }
 }

+//=============================================================================
+// Pre-emphasis Filter
+//=============================================================================
+
+static void calculate_preemphasis_coeffs(float *b0, float *b1, float *a1) {
+    // Simple first-order digital pre-emphasis
+    // Corner frequency ≈ 1200 Hz (chosen for 32 kHz codec)
+    // Provides ~6 dB/octave boost above corner
+
+    // Pre-emphasis factor (0.95 = gentle, 0.90 = moderate, 0.85 = aggressive)
+    const float alpha = 0.5f;  // Gentle boost suitable for music
+
+    *b0 = 1.0f;
+    *b1 = -alpha;
+    *a1 = 0.0f;  // No feedback (FIR filter)
+}
+
+// emphasis at alpha=0.5 shifts quantisation crackles to lower frequency which MIGHT be more preferable
+static void apply_preemphasis(float *left, float *right, size_t count) {
+    // Static state variables - persistent across chunks to prevent discontinuities
+    static float prev_x_l = 0.0f;
+    static float prev_y_l = 0.0f;
+    static float prev_x_r = 0.0f;
+    static float prev_y_r = 0.0f;
+
+    float b0, b1, a1;
+    calculate_preemphasis_coeffs(&b0, &b1, &a1);
+
+    // Left channel - use persistent state
+    for (size_t i = 0; i < count; i++) {
+        float x = left[i];
+        float y = b0 * x + b1 * prev_x_l - a1 * prev_y_l;
+        left[i] = y;
+        prev_x_l = x;
+        prev_y_l = y;
+    }
+
+    // Right channel - use persistent state
+    for (size_t i = 0; i < count; i++) {
+        float x = right[i];
+        float y = b0 * x + b1 * prev_x_r - a1 * prev_y_r;
+        right[i] = y;
+        prev_x_r = x;
+        prev_y_r = y;
+    }
+}
+
 //=============================================================================
 // M/S Stereo Decorrelation (PCM32f version)
 //=============================================================================
@@ -757,7 +804,10 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
        pcm32_right[i] = pcm32_stereo[i * 2 + 1];
    }

-    // Step 1.1: Compress dynamic range
+    // Step 1.1: Apply pre-emphasis filter (BEFORE gamma compression)
+    apply_preemphasis(pcm32_left, pcm32_right, num_samples);
+
+    // Step 1.2: Compress dynamic range
    compress_gamma(pcm32_left, pcm32_right, num_samples);

    // Step 2: M/S decorrelation