perceptual model copied to TAV decoder_tav.c

2026-06-06 13:38:30 +09:00 · 2025-09-27 00:50:25 +09:00
parent c50d015515
commit d85f8002cc
1 changed files with 195 additions and 7 deletions
--- a/video_encoder/decoder_tav.c
+++ b/video_encoder/decoder_tav.c
@@ -63,6 +63,167 @@ typedef struct {
    int frame_size;
 } tav_decoder_t;

+// TAV Perceptual quantization constants (must match Kotlin decoder exactly)
+static const float ANISOTROPY_MULT[] = {1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f};
+static const float ANISOTROPY_BIAS[] = {0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f};
+static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f};
+static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f};
+static const float FOUR_PIXEL_DETAILER = 0.88f;
+static const float TWO_PIXEL_DETAILER = 0.92f;
+
+// DWT subband information for perceptual quantization
+typedef struct {
+    int level;              // Decomposition level (1 to decompLevels)
+    int subband_type;       // 0=LL, 1=LH, 2=HL, 3=HH
+    int coeff_start;        // Starting index in linear coefficient array
+    int coeff_count;        // Number of coefficients in this subband
+} dwt_subband_info_t;
+
+// Perceptual model functions (must match Kotlin exactly)
+static int tav_derive_encoder_qindex(int q_index, int q_y_global) {
+    if (q_index > 0) return q_index - 1;
+    if (q_y_global >= 60) return 0;
+    else if (q_y_global >= 42) return 1;
+    else if (q_y_global >= 25) return 2;
+    else if (q_y_global >= 12) return 3;
+    else if (q_y_global >= 6) return 4;
+    else if (q_y_global >= 2) return 5;
+    else return 5;
+}
+
+static float perceptual_model3_LH(int quality, float level) {
+    const float H4 = 1.2f;
+    const float Lx = H4 - ((quality + 1.0f) / 15.0f) * (level - 4.0f);
+    const float Ld = (quality + 1.0f) / -15.0f;
+    const float C = H4 - 4.0f * Ld - ((-16.0f * (quality - 5.0f)) / 15.0f);
+    const float Gx = (Ld * level) - (((quality - 5.0f) * (level - 8.0f) * level) / 15.0f) + C;
+    return (level >= 4) ? Lx : Gx;
+}
+
+static float perceptual_model3_HL(int quality, float LH) {
+    return LH * ANISOTROPY_MULT[quality] + ANISOTROPY_BIAS[quality];
+}
+
+static float perceptual_model3_HH(float LH, float HL) {
+    return (HL / LH) * 1.44f;
+}
+
+static float perceptual_model3_LL(int quality, float level) {
+    const float n = perceptual_model3_LH(quality, level);
+    const float m = perceptual_model3_LH(quality, level - 1) / n;
+    return n / m;
+}
+
+static float perceptual_model3_chroma_basecurve(int quality, float level) {
+    return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
+}
+
+static float get_perceptual_weight(int q_index, int q_y_global, int level0, int subband_type,
+                                  int is_chroma, int max_levels) {
+    // Convert to perceptual level (1-6 scale)
+    const float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
+    const int quality_level = tav_derive_encoder_qindex(q_index, q_y_global);
+
+    if (!is_chroma) {
+        // LUMA CHANNEL
+        if (subband_type == 0) {
+            return perceptual_model3_LL(quality_level, level);
+        }
+
+        const float LH = perceptual_model3_LH(quality_level, level);
+        if (subband_type == 1) {
+            return LH;
+        }
+
+        const float HL = perceptual_model3_HL(quality_level, LH);
+        if (subband_type == 2) {
+            float detailer = 1.0f;
+            if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER;
+            else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER;
+            return HL * detailer;
+        } else {
+            // HH subband
+            float detailer = 1.0f;
+            if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER;
+            else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER;
+            return perceptual_model3_HH(LH, HL) * detailer;
+        }
+    } else {
+        // CHROMA CHANNELS
+        const float base = perceptual_model3_chroma_basecurve(quality_level, level - 1);
+        if (subband_type == 0) {
+            return 1.0f;
+        } else if (subband_type == 1) {
+            return fmaxf(base, 1.0f);
+        } else if (subband_type == 2) {
+            return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level], 1.0f);
+        } else {
+            return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level] + ANISOTROPY_BIAS_CHROMA[quality_level], 1.0f);
+        }
+    }
+}
+
+// Calculate DWT subband layout (must match Kotlin exactly)
+static int calculate_subband_layout(int width, int height, int decomp_levels, dwt_subband_info_t *subbands) {
+    int subband_count = 0;
+
+    // LL subband at maximum decomposition level
+    const int ll_width = width >> decomp_levels;
+    const int ll_height = height >> decomp_levels;
+    subbands[subband_count++] = (dwt_subband_info_t){decomp_levels, 0, 0, ll_width * ll_height};
+    int coeff_offset = ll_width * ll_height;
+
+    // LH, HL, HH subbands for each level from max down to 1
+    for (int level = decomp_levels; level >= 1; level--) {
+        const int level_width = width >> (decomp_levels - level + 1);
+        const int level_height = height >> (decomp_levels - level + 1);
+        const int subband_size = level_width * level_height;
+
+        // LH subband
+        subbands[subband_count++] = (dwt_subband_info_t){level, 1, coeff_offset, subband_size};
+        coeff_offset += subband_size;
+
+        // HL subband
+        subbands[subband_count++] = (dwt_subband_info_t){level, 2, coeff_offset, subband_size};
+        coeff_offset += subband_size;
+
+        // HH subband
+        subbands[subband_count++] = (dwt_subband_info_t){level, 3, coeff_offset, subband_size};
+        coeff_offset += subband_size;
+    }
+
+    return subband_count;
+}
+
+// Apply perceptual dequantization to DWT coefficients
+static void dequantize_dwt_subbands_perceptual(int q_index, int q_y_global, const int16_t *quantized,
+                                              float *dequantized, int width, int height, int decomp_levels,
+                                              float base_quantizer, int is_chroma) {
+    dwt_subband_info_t subbands[32]; // Max possible subbands
+    const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
+
+    // Initialize output array
+    const int coeff_count = width * height;
+    for (int i = 0; i < coeff_count; i++) {
+        dequantized[i] = 0.0f;
+    }
+
+    // Apply perceptual weighting to each subband
+    for (int s = 0; s < subband_count; s++) {
+        const dwt_subband_info_t *subband = &subbands[s];
+        const float weight = get_perceptual_weight(q_index, q_y_global, subband->level,
+                                                  subband->subband_type, is_chroma, decomp_levels);
+        const float effective_quantizer = base_quantizer * weight;
+
+        for (int i = 0; i < subband->coeff_count; i++) {
+            const int idx = subband->coeff_start + i;
+            if (idx < coeff_count) {
+                dequantized[idx] = quantized[idx] * effective_quantizer;
+            }
+        }
+    }
+}
+
 // 9/7 inverse DWT (from TSVM Kotlin code)
 static void dwt_97_inverse_1d(float *data, int length) {
    if (length < 2) return;
@@ -401,23 +562,50 @@ static int decode_frame(tav_decoder_t *decoder) {
        int coeff_count = decoder->frame_size;
        uint8_t *coeff_ptr = ptr;

-        // Read and dequantize coefficients (simple version for now)
+        // Read coefficients into temporary arrays
+        int16_t *quantized_y = malloc(coeff_count * sizeof(int16_t));
+        int16_t *quantized_co = malloc(coeff_count * sizeof(int16_t));
+        int16_t *quantized_cg = malloc(coeff_count * sizeof(int16_t));
+
        for (int i = 0; i < coeff_count; i++) {
-            int16_t y_coeff = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
-            decoder->dwt_buffer_y[i] = y_coeff * qy;
+            quantized_y[i] = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
            coeff_ptr += 2;
        }
        for (int i = 0; i < coeff_count; i++) {
-            int16_t co_coeff = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
-            decoder->dwt_buffer_co[i] = co_coeff * qco;
+            quantized_co[i] = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
            coeff_ptr += 2;
        }
        for (int i = 0; i < coeff_count; i++) {
-            int16_t cg_coeff = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
-            decoder->dwt_buffer_cg[i] = cg_coeff * qcg;
+            quantized_cg[i] = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
            coeff_ptr += 2;
        }

+        // Apply dequantization (perceptual for version 5, uniform for earlier versions)
+        const int is_perceptual = (decoder->header.version == 5);
+        if (is_perceptual) {
+            // Use perceptual dequantization matching Kotlin decoder
+            dequantize_dwt_subbands_perceptual(0, qy, quantized_y, decoder->dwt_buffer_y,
+                                              decoder->header.width, decoder->header.height,
+                                              decoder->header.decomp_levels, qy, 0);
+            dequantize_dwt_subbands_perceptual(0, qy, quantized_co, decoder->dwt_buffer_co,
+                                              decoder->header.width, decoder->header.height,
+                                              decoder->header.decomp_levels, qco, 1);
+            dequantize_dwt_subbands_perceptual(0, qy, quantized_cg, decoder->dwt_buffer_cg,
+                                              decoder->header.width, decoder->header.height,
+                                              decoder->header.decomp_levels, qcg, 1);
+        } else {
+            // Uniform dequantization for older versions
+            for (int i = 0; i < coeff_count; i++) {
+                decoder->dwt_buffer_y[i] = quantized_y[i] * qy;
+                decoder->dwt_buffer_co[i] = quantized_co[i] * qco;
+                decoder->dwt_buffer_cg[i] = quantized_cg[i] * qcg;
+            }
+        }
+
+        free(quantized_y);
+        free(quantized_co);
+        free(quantized_cg);
+
        // Apply inverse DWT
        apply_inverse_dwt_multilevel(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
                                   decoder->header.decomp_levels, decoder->header.wavelet_filter);