tav: temporal CDF 5/3 is causing coeff overflow on -q 5, replacing with Haar+sports mode

2026-06-09 22:54:03 +09:00 · 2025-12-02 16:07:02 +09:00
parent 046fa98025
commit bc5779d4f5
5 changed files with 1948 additions and 15 deletions
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -1615,7 +1615,7 @@ start of the next packet
 # Video Format
    - Dimension: 720x480 for NTSC, 720x576 for PAL
    - FPS: arbitrary (defined in packet header)
-    - Wavelet: 9/7 Spatial, 5/3 Temporal
+    - Wavelet: 9/7 Spatial, Haar Temporal ("sport" preset always enabled)
    - Decomposition levels: 4 spatial, 2 temporal
    - Quantiser and encoder quality level: arbitrary (defined in packet header as quality index)
    - Extra features:
--- a/video_encoder/decoder_tav_dt.c
+++ b/video_encoder/decoder_tav_dt.c
@@ -702,15 +702,15 @@ int main(int argc, char *argv[]) {
        .decomp_levels = 4,           // TAV-DT fixed: 4 spatial levels
        .temporal_levels = 2,         // TAV-DT fixed: 2 temporal levels
        .wavelet_filter = 1,          // TAV-DT fixed: CDF 9/7
-        .temporal_wavelet = 1,        // TAV-DT fixed: CDF 5/3 (NOT Haar!)
+        .temporal_wavelet = 0,        // TAV-DT fixed: Haar
        .entropy_coder = 1,           // TAV-DT fixed: EZBC
        .channel_layout = 0,          // TAV-DT fixed: YCoCg-R
        .perceptual_tuning = 1,       // TAV-DT fixed: Perceptual
        .quantiser_y = (uint8_t)quant_y,     // From DT quality map
        .quantiser_co = (uint8_t)quant_co,
        .quantiser_cg = (uint8_t)quant_cg,
-        .encoder_preset = 0,          // No special presets
+        .encoder_preset = 1,          // Sports mode
-        .monoblock = 1                // TAV-DT fixed: Single tile
+        .monoblock = 1               // TAV-DT fixed: Single tile
    };
    decoder.video_ctx = tav_video_create(&video_params);
@@ -734,9 +734,7 @@ int main(int argc, char *argv[]) {
        long start_pos = ftell(decoder.input_fp);
        // Pass 1: Process all packets for audio only
-        if (decoder.verbose) {
+        printf("\n=== Pass 1: Extracting audio ===\n");
            printf("\n=== Pass 1: Extracting audio ===\n");
        }
        while ((result = process_dt_packet(&decoder)) == 0) {
            // Continue processing (only audio is written)
        }
@@ -754,9 +752,7 @@ int main(int argc, char *argv[]) {
        }
        // Pass 2: Rewind and process all packets for video
-        if (decoder.verbose) {
+        printf("\n=== Pass 2: Decoding video ===\n");
            printf("\n=== Pass 2: Decoding video ===\n");
        }
        fseek(decoder.input_fp, start_pos, SEEK_SET);
        decoder.packets_processed = 0;  // Reset statistics
        decoder.frames_decoded = 0;
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -12581,7 +12581,8 @@ int main(int argc, char *argv[]) {
        // Enforce mandatory settings
        enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE;  // 9/7 spatial
        enc->decomp_levels = 4;  // 4 spatial levels
-        enc->temporal_motion_coder = 1;  // CDF 5/3 temporal
+        enc->temporal_motion_coder = 0;  // Haar temporal
        enc->encoder_preset = 0x01; // Sports mode
        enc->temporal_decomp_levels = 2;  // 2 temporal levels
        enc->channel_layout = CHANNEL_LAYOUT_YCOCG;  // Y-Co-Cg only
        enc->preprocess_mode = PREPROCESS_EZBC;  // EZBC entropy coder
@@ -12603,7 +12604,7 @@ int main(int argc, char *argv[]) {
        printf("TAV-DT: Quality level %d -> Y=%d, Co=%d, Cg=%d, TAD_quality=%d\n",
               enc->quality_level, enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg,
               enc->quality_level);
-        printf("TAV-DT: Enforcing format constraints (9/7 spatial, 5/3 temporal, 4+2 levels, EZBC, monoblock)\n");
+        printf("TAV-DT: Enforcing format constraints (9/7 spatial, Haar temporal, sports mode, 4+2 levels, EZBC, monoblock)\n");
    }
    // Halve internal height for interlaced mode (FFmpeg will output half-height fields)
@@ -12615,7 +12616,7 @@ int main(int argc, char *argv[]) {
    }
    // Smart preset for temporal motion coder based on resolution
-    // For small videos (<500k pixels), use CDF 5/3 (better for fine details)
+    // For small videos (<500k pixels), use Haar with sports preset (better for fine details)
    // For larger videos, use Haar (better compression, smoother motion matters less)
    if (enc->temporal_motion_coder == -1) {
        int num_pixels = enc->width * enc->height;
@@ -12632,9 +12633,10 @@ int main(int argc, char *argv[]) {
            }
        }
        else {
-            enc->temporal_motion_coder = 1;  // CDF 5/3
+            enc->temporal_motion_coder = 0;  // Haar
            enc->encoder_preset |= 0x01;  // Sports mode
            if (enc->verbose) {
-                printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
+                printf("Auto-selected Haar temporal wavelet with sports preset (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
                       enc->width, enc->height, num_pixels, enc->quantiser_y);
            }
        }
--- a/video_encoder/tav_video_decoder.c
+++ b/video_encoder/tav_video_decoder.c
--- a/video_encoder/tav_video_decoder.h
+++ b/video_encoder/tav_video_decoder.h
@@ -0,0 +1,77 @@
 // Created by CuriousTorvald and Claude on 2025-12-02.
 // TAV Video Decoder Library - Shared decoding functions for TAV format
 // Can be used by both regular TAV decoder and TAV-DT decoder
 #ifndef TAV_VIDEO_DECODER_H
 #define TAV_VIDEO_DECODER_H
 #include <stdint.h>
 #include <stddef.h>
 // Video decoder context - opaque to users
 typedef struct tav_video_context tav_video_context_t;
 // Video parameters structure
 typedef struct {
    int width;
    int height;
    int decomp_levels;        // Spatial DWT levels (typically 4)
    int temporal_levels;      // Temporal DWT levels (typically 2)
    int wavelet_filter;       // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
    int temporal_wavelet;     // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
    int entropy_coder;        // 0=Twobitmap, 1=EZBC, 2=RAW
    int channel_layout;       // 0=YCoCg-R, 1=ICtCp
    int perceptual_tuning;    // 1=perceptual quantisation, 0=uniform
    uint8_t quantiser_y;      // Base quantiser index for Y/I
    uint8_t quantiser_co;     // Base quantiser index for Co/Ct
    uint8_t quantiser_cg;     // Base quantiser index for Cg/Cp
    uint8_t encoder_preset;   // Encoder preset flags (sports, anime, etc.)
    int monoblock;            // 1=single tile (monoblock), 0=multi-tile
 } tav_video_params_t;
 // Create video decoder context
 // Returns NULL on failure
 tav_video_context_t *tav_video_create(const tav_video_params_t *params);
 // Free video decoder context
 void tav_video_free(tav_video_context_t *ctx);
 // Decode GOP_UNIFIED packet (0x12) to RGB24 frames
 // Input: compressed_data - GOP packet data (after packet type byte)
 //        compressed_size - size of compressed data
 //        gop_size - number of frames in GOP (read from packet)
 // Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
 //         Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
 // Returns: 0 on success, -1 on error
 int tav_video_decode_gop(tav_video_context_t *ctx,
                         const uint8_t *compressed_data, uint32_t compressed_size,
                         uint8_t gop_size, uint8_t **rgb_frames);
 // Decode IFRAME packet (0x10) to RGB24 frame
 // Input: compressed_data - I-frame packet data (after packet type byte)
 //        packet_size - size of packet data
 // Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
 //         Must be pre-allocated by caller
 // Returns: 0 on success, -1 on error
 int tav_video_decode_iframe(tav_video_context_t *ctx,
                            const uint8_t *compressed_data, uint32_t packet_size,
                            uint8_t *rgb_frame);
 // Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
 // Input: compressed_data - P-frame packet data (after packet type byte)
 //        packet_size - size of packet data
 // Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
 //         Must be pre-allocated by caller
 // Returns: 0 on success, -1 on error
 // Note: Requires previous frame to be decoded first (stored internally as reference)
 int tav_video_decode_pframe(tav_video_context_t *ctx,
                            const uint8_t *compressed_data, uint32_t packet_size,
                            uint8_t *rgb_frame);
 // Get last error message
 const char *tav_video_get_error(tav_video_context_t *ctx);
 // Enable verbose debug output
 void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
 #endif // TAV_VIDEO_DECODER_H