tav: temporal CDF 5/3 is causing coeff overflow on -q 5, replacing with Haar+sports mode

2026-06-06 05:28:31 +09:00 · 2025-12-02 16:07:02 +09:00
parent 046fa98025
commit bc5779d4f5
5 changed files with 1948 additions and 15 deletions
--- a/video_encoder/decoder_tav_dt.c
+++ b/video_encoder/decoder_tav_dt.c
@@ -702,15 +702,15 @@ int main(int argc, char *argv[]) {
        .decomp_levels = 4,           // TAV-DT fixed: 4 spatial levels
        .temporal_levels = 2,         // TAV-DT fixed: 2 temporal levels
        .wavelet_filter = 1,          // TAV-DT fixed: CDF 9/7
-        .temporal_wavelet = 1,        // TAV-DT fixed: CDF 5/3 (NOT Haar!)
+        .temporal_wavelet = 0,        // TAV-DT fixed: Haar
        .entropy_coder = 1,           // TAV-DT fixed: EZBC
        .channel_layout = 0,          // TAV-DT fixed: YCoCg-R
        .perceptual_tuning = 1,       // TAV-DT fixed: Perceptual
        .quantiser_y = (uint8_t)quant_y,     // From DT quality map
        .quantiser_co = (uint8_t)quant_co,
        .quantiser_cg = (uint8_t)quant_cg,
-        .encoder_preset = 0,          // No special presets
-        .monoblock = 1                // TAV-DT fixed: Single tile
+        .encoder_preset = 1,          // Sports mode
+        .monoblock = 1               // TAV-DT fixed: Single tile
    };

    decoder.video_ctx = tav_video_create(&video_params);
@@ -734,9 +734,7 @@ int main(int argc, char *argv[]) {
        long start_pos = ftell(decoder.input_fp);

        // Pass 1: Process all packets for audio only
-        if (decoder.verbose) {
-            printf("\n=== Pass 1: Extracting audio ===\n");
-        }
+        printf("\n=== Pass 1: Extracting audio ===\n");
        while ((result = process_dt_packet(&decoder)) == 0) {
            // Continue processing (only audio is written)
        }
@@ -754,9 +752,7 @@ int main(int argc, char *argv[]) {
        }

        // Pass 2: Rewind and process all packets for video
-        if (decoder.verbose) {
-            printf("\n=== Pass 2: Decoding video ===\n");
-        }
+        printf("\n=== Pass 2: Decoding video ===\n");
        fseek(decoder.input_fp, start_pos, SEEK_SET);
        decoder.packets_processed = 0;  // Reset statistics
        decoder.frames_decoded = 0;
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -12581,7 +12581,8 @@ int main(int argc, char *argv[]) {
        // Enforce mandatory settings
        enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE;  // 9/7 spatial
        enc->decomp_levels = 4;  // 4 spatial levels
-        enc->temporal_motion_coder = 1;  // CDF 5/3 temporal
+        enc->temporal_motion_coder = 0;  // Haar temporal
+        enc->encoder_preset = 0x01; // Sports mode
        enc->temporal_decomp_levels = 2;  // 2 temporal levels
        enc->channel_layout = CHANNEL_LAYOUT_YCOCG;  // Y-Co-Cg only
        enc->preprocess_mode = PREPROCESS_EZBC;  // EZBC entropy coder
@@ -12603,7 +12604,7 @@ int main(int argc, char *argv[]) {
        printf("TAV-DT: Quality level %d -> Y=%d, Co=%d, Cg=%d, TAD_quality=%d\n",
               enc->quality_level, enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg,
               enc->quality_level);
-        printf("TAV-DT: Enforcing format constraints (9/7 spatial, 5/3 temporal, 4+2 levels, EZBC, monoblock)\n");
+        printf("TAV-DT: Enforcing format constraints (9/7 spatial, Haar temporal, sports mode, 4+2 levels, EZBC, monoblock)\n");
    }

    // Halve internal height for interlaced mode (FFmpeg will output half-height fields)
@@ -12615,7 +12616,7 @@ int main(int argc, char *argv[]) {
    }

    // Smart preset for temporal motion coder based on resolution
-    // For small videos (<500k pixels), use CDF 5/3 (better for fine details)
+    // For small videos (<500k pixels), use Haar with sports preset (better for fine details)
    // For larger videos, use Haar (better compression, smoother motion matters less)
    if (enc->temporal_motion_coder == -1) {
        int num_pixels = enc->width * enc->height;
@@ -12632,9 +12633,10 @@ int main(int argc, char *argv[]) {
            }
        }
        else {
-            enc->temporal_motion_coder = 1;  // CDF 5/3
+            enc->temporal_motion_coder = 0;  // Haar
+            enc->encoder_preset |= 0x01;  // Sports mode
            if (enc->verbose) {
-                printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
+                printf("Auto-selected Haar temporal wavelet with sports preset (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
                       enc->width, enc->height, num_pixels, enc->quantiser_y);
            }
        }
--- a/video_encoder/tav_video_decoder.c
+++ b/video_encoder/tav_video_decoder.c
--- a/video_encoder/tav_video_decoder.h
+++ b/video_encoder/tav_video_decoder.h
@@ -0,0 +1,77 @@
+// Created by CuriousTorvald and Claude on 2025-12-02.
+// TAV Video Decoder Library - Shared decoding functions for TAV format
+// Can be used by both regular TAV decoder and TAV-DT decoder
+
+#ifndef TAV_VIDEO_DECODER_H
+#define TAV_VIDEO_DECODER_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+// Video decoder context - opaque to users
+typedef struct tav_video_context tav_video_context_t;
+
+// Video parameters structure
+typedef struct {
+    int width;
+    int height;
+    int decomp_levels;        // Spatial DWT levels (typically 4)
+    int temporal_levels;      // Temporal DWT levels (typically 2)
+    int wavelet_filter;       // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
+    int temporal_wavelet;     // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
+    int entropy_coder;        // 0=Twobitmap, 1=EZBC, 2=RAW
+    int channel_layout;       // 0=YCoCg-R, 1=ICtCp
+    int perceptual_tuning;    // 1=perceptual quantisation, 0=uniform
+    uint8_t quantiser_y;      // Base quantiser index for Y/I
+    uint8_t quantiser_co;     // Base quantiser index for Co/Ct
+    uint8_t quantiser_cg;     // Base quantiser index for Cg/Cp
+    uint8_t encoder_preset;   // Encoder preset flags (sports, anime, etc.)
+    int monoblock;            // 1=single tile (monoblock), 0=multi-tile
+} tav_video_params_t;
+
+// Create video decoder context
+// Returns NULL on failure
+tav_video_context_t *tav_video_create(const tav_video_params_t *params);
+
+// Free video decoder context
+void tav_video_free(tav_video_context_t *ctx);
+
+// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
+// Input: compressed_data - GOP packet data (after packet type byte)
+//        compressed_size - size of compressed data
+//        gop_size - number of frames in GOP (read from packet)
+// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
+//         Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
+// Returns: 0 on success, -1 on error
+int tav_video_decode_gop(tav_video_context_t *ctx,
+                         const uint8_t *compressed_data, uint32_t compressed_size,
+                         uint8_t gop_size, uint8_t **rgb_frames);
+
+// Decode IFRAME packet (0x10) to RGB24 frame
+// Input: compressed_data - I-frame packet data (after packet type byte)
+//        packet_size - size of packet data
+// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
+//         Must be pre-allocated by caller
+// Returns: 0 on success, -1 on error
+int tav_video_decode_iframe(tav_video_context_t *ctx,
+                            const uint8_t *compressed_data, uint32_t packet_size,
+                            uint8_t *rgb_frame);
+
+// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
+// Input: compressed_data - P-frame packet data (after packet type byte)
+//        packet_size - size of packet data
+// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
+//         Must be pre-allocated by caller
+// Returns: 0 on success, -1 on error
+// Note: Requires previous frame to be decoded first (stored internally as reference)
+int tav_video_decode_pframe(tav_video_context_t *ctx,
+                            const uint8_t *compressed_data, uint32_t packet_size,
+                            uint8_t *rgb_frame);
+
+// Get last error message
+const char *tav_video_get_error(tav_video_context_t *ctx);
+
+// Enable verbose debug output
+void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
+
+#endif // TAV_VIDEO_DECODER_H