tav: temporal CDF 5/3 is causing coeff overflow on -q 5, replacing with Haar+sports mode

This commit is contained in:
minjaesong
2025-12-02 16:07:02 +09:00
parent 046fa98025
commit bc5779d4f5
5 changed files with 1948 additions and 15 deletions

View File

@@ -702,15 +702,15 @@ int main(int argc, char *argv[]) {
.decomp_levels = 4, // TAV-DT fixed: 4 spatial levels
.temporal_levels = 2, // TAV-DT fixed: 2 temporal levels
.wavelet_filter = 1, // TAV-DT fixed: CDF 9/7
.temporal_wavelet = 1, // TAV-DT fixed: CDF 5/3 (NOT Haar!)
.temporal_wavelet = 0, // TAV-DT fixed: Haar
.entropy_coder = 1, // TAV-DT fixed: EZBC
.channel_layout = 0, // TAV-DT fixed: YCoCg-R
.perceptual_tuning = 1, // TAV-DT fixed: Perceptual
.quantiser_y = (uint8_t)quant_y, // From DT quality map
.quantiser_co = (uint8_t)quant_co,
.quantiser_cg = (uint8_t)quant_cg,
.encoder_preset = 0, // No special presets
.monoblock = 1 // TAV-DT fixed: Single tile
.encoder_preset = 1, // Sports mode
.monoblock = 1 // TAV-DT fixed: Single tile
};
decoder.video_ctx = tav_video_create(&video_params);
@@ -734,9 +734,7 @@ int main(int argc, char *argv[]) {
long start_pos = ftell(decoder.input_fp);
// Pass 1: Process all packets for audio only
if (decoder.verbose) {
printf("\n=== Pass 1: Extracting audio ===\n");
}
printf("\n=== Pass 1: Extracting audio ===\n");
while ((result = process_dt_packet(&decoder)) == 0) {
// Continue processing (only audio is written)
}
@@ -754,9 +752,7 @@ int main(int argc, char *argv[]) {
}
// Pass 2: Rewind and process all packets for video
if (decoder.verbose) {
printf("\n=== Pass 2: Decoding video ===\n");
}
printf("\n=== Pass 2: Decoding video ===\n");
fseek(decoder.input_fp, start_pos, SEEK_SET);
decoder.packets_processed = 0; // Reset statistics
decoder.frames_decoded = 0;

View File

@@ -12581,7 +12581,8 @@ int main(int argc, char *argv[]) {
// Enforce mandatory settings
enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; // 9/7 spatial
enc->decomp_levels = 4; // 4 spatial levels
enc->temporal_motion_coder = 1; // CDF 5/3 temporal
enc->temporal_motion_coder = 0; // Haar temporal
enc->encoder_preset = 0x01; // Sports mode
enc->temporal_decomp_levels = 2; // 2 temporal levels
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Y-Co-Cg only
enc->preprocess_mode = PREPROCESS_EZBC; // EZBC entropy coder
@@ -12603,7 +12604,7 @@ int main(int argc, char *argv[]) {
printf("TAV-DT: Quality level %d -> Y=%d, Co=%d, Cg=%d, TAD_quality=%d\n",
enc->quality_level, enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg,
enc->quality_level);
printf("TAV-DT: Enforcing format constraints (9/7 spatial, 5/3 temporal, 4+2 levels, EZBC, monoblock)\n");
printf("TAV-DT: Enforcing format constraints (9/7 spatial, Haar temporal, sports mode, 4+2 levels, EZBC, monoblock)\n");
}
// Halve internal height for interlaced mode (FFmpeg will output half-height fields)
@@ -12615,7 +12616,7 @@ int main(int argc, char *argv[]) {
}
// Smart preset for temporal motion coder based on resolution
// For small videos (<500k pixels), use CDF 5/3 (better for fine details)
// For small videos (<500k pixels), use Haar with sports preset (better for fine details)
// For larger videos, use Haar (better compression, smoother motion matters less)
if (enc->temporal_motion_coder == -1) {
int num_pixels = enc->width * enc->height;
@@ -12632,9 +12633,10 @@ int main(int argc, char *argv[]) {
}
}
else {
enc->temporal_motion_coder = 1; // CDF 5/3
enc->temporal_motion_coder = 0; // Haar
enc->encoder_preset |= 0x01; // Sports mode
if (enc->verbose) {
printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
printf("Auto-selected Haar temporal wavelet with sports preset (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
enc->width, enc->height, num_pixels, enc->quantiser_y);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,77 @@
// Created by CuriousTorvald and Claude on 2025-12-02.
// TAV Video Decoder Library - Shared decoding functions for TAV format
// Can be used by both regular TAV decoder and TAV-DT decoder
#ifndef TAV_VIDEO_DECODER_H
#define TAV_VIDEO_DECODER_H
#include <stdint.h>
#include <stddef.h>
// Video decoder context - opaque to users
typedef struct tav_video_context tav_video_context_t;
// Video parameters structure
typedef struct {
int width;
int height;
int decomp_levels; // Spatial DWT levels (typically 4)
int temporal_levels; // Temporal DWT levels (typically 2)
int wavelet_filter; // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
int temporal_wavelet; // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
int entropy_coder; // 0=Twobitmap, 1=EZBC, 2=RAW
int channel_layout; // 0=YCoCg-R, 1=ICtCp
int perceptual_tuning; // 1=perceptual quantisation, 0=uniform
uint8_t quantiser_y; // Base quantiser index for Y/I
uint8_t quantiser_co; // Base quantiser index for Co/Ct
uint8_t quantiser_cg; // Base quantiser index for Cg/Cp
uint8_t encoder_preset; // Encoder preset flags (sports, anime, etc.)
int monoblock; // 1=single tile (monoblock), 0=multi-tile
} tav_video_params_t;
// Create video decoder context
// Returns NULL on failure
tav_video_context_t *tav_video_create(const tav_video_params_t *params);
// Free video decoder context
void tav_video_free(tav_video_context_t *ctx);
// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
// Input: compressed_data - GOP packet data (after packet type byte)
// compressed_size - size of compressed data
// gop_size - number of frames in GOP (read from packet)
// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
// Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
// Returns: 0 on success, -1 on error
int tav_video_decode_gop(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t compressed_size,
uint8_t gop_size, uint8_t **rgb_frames);
// Decode IFRAME packet (0x10) to RGB24 frame
// Input: compressed_data - I-frame packet data (after packet type byte)
// packet_size - size of packet data
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
// Must be pre-allocated by caller
// Returns: 0 on success, -1 on error
int tav_video_decode_iframe(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t packet_size,
uint8_t *rgb_frame);
// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
// Input: compressed_data - P-frame packet data (after packet type byte)
// packet_size - size of packet data
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
// Must be pre-allocated by caller
// Returns: 0 on success, -1 on error
// Note: Requires previous frame to be decoded first (stored internally as reference)
int tav_video_decode_pframe(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t packet_size,
uint8_t *rgb_frame);
// Get last error message
const char *tav_video_get_error(tav_video_context_t *ctx);
// Enable verbose debug output
void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
#endif // TAV_VIDEO_DECODER_H