mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
tav: temporal CDF 5/3 is causing coeff overflow on -q 5, replacing with Haar+sports mode
This commit is contained in:
@@ -702,15 +702,15 @@ int main(int argc, char *argv[]) {
|
||||
.decomp_levels = 4, // TAV-DT fixed: 4 spatial levels
|
||||
.temporal_levels = 2, // TAV-DT fixed: 2 temporal levels
|
||||
.wavelet_filter = 1, // TAV-DT fixed: CDF 9/7
|
||||
.temporal_wavelet = 1, // TAV-DT fixed: CDF 5/3 (NOT Haar!)
|
||||
.temporal_wavelet = 0, // TAV-DT fixed: Haar
|
||||
.entropy_coder = 1, // TAV-DT fixed: EZBC
|
||||
.channel_layout = 0, // TAV-DT fixed: YCoCg-R
|
||||
.perceptual_tuning = 1, // TAV-DT fixed: Perceptual
|
||||
.quantiser_y = (uint8_t)quant_y, // From DT quality map
|
||||
.quantiser_co = (uint8_t)quant_co,
|
||||
.quantiser_cg = (uint8_t)quant_cg,
|
||||
.encoder_preset = 0, // No special presets
|
||||
.monoblock = 1 // TAV-DT fixed: Single tile
|
||||
.encoder_preset = 1, // Sports mode
|
||||
.monoblock = 1 // TAV-DT fixed: Single tile
|
||||
};
|
||||
|
||||
decoder.video_ctx = tav_video_create(&video_params);
|
||||
@@ -734,9 +734,7 @@ int main(int argc, char *argv[]) {
|
||||
long start_pos = ftell(decoder.input_fp);
|
||||
|
||||
// Pass 1: Process all packets for audio only
|
||||
if (decoder.verbose) {
|
||||
printf("\n=== Pass 1: Extracting audio ===\n");
|
||||
}
|
||||
printf("\n=== Pass 1: Extracting audio ===\n");
|
||||
while ((result = process_dt_packet(&decoder)) == 0) {
|
||||
// Continue processing (only audio is written)
|
||||
}
|
||||
@@ -754,9 +752,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Pass 2: Rewind and process all packets for video
|
||||
if (decoder.verbose) {
|
||||
printf("\n=== Pass 2: Decoding video ===\n");
|
||||
}
|
||||
printf("\n=== Pass 2: Decoding video ===\n");
|
||||
fseek(decoder.input_fp, start_pos, SEEK_SET);
|
||||
decoder.packets_processed = 0; // Reset statistics
|
||||
decoder.frames_decoded = 0;
|
||||
|
||||
@@ -12581,7 +12581,8 @@ int main(int argc, char *argv[]) {
|
||||
// Enforce mandatory settings
|
||||
enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; // 9/7 spatial
|
||||
enc->decomp_levels = 4; // 4 spatial levels
|
||||
enc->temporal_motion_coder = 1; // CDF 5/3 temporal
|
||||
enc->temporal_motion_coder = 0; // Haar temporal
|
||||
enc->encoder_preset = 0x01; // Sports mode
|
||||
enc->temporal_decomp_levels = 2; // 2 temporal levels
|
||||
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Y-Co-Cg only
|
||||
enc->preprocess_mode = PREPROCESS_EZBC; // EZBC entropy coder
|
||||
@@ -12603,7 +12604,7 @@ int main(int argc, char *argv[]) {
|
||||
printf("TAV-DT: Quality level %d -> Y=%d, Co=%d, Cg=%d, TAD_quality=%d\n",
|
||||
enc->quality_level, enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg,
|
||||
enc->quality_level);
|
||||
printf("TAV-DT: Enforcing format constraints (9/7 spatial, 5/3 temporal, 4+2 levels, EZBC, monoblock)\n");
|
||||
printf("TAV-DT: Enforcing format constraints (9/7 spatial, Haar temporal, sports mode, 4+2 levels, EZBC, monoblock)\n");
|
||||
}
|
||||
|
||||
// Halve internal height for interlaced mode (FFmpeg will output half-height fields)
|
||||
@@ -12615,7 +12616,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Smart preset for temporal motion coder based on resolution
|
||||
// For small videos (<500k pixels), use CDF 5/3 (better for fine details)
|
||||
// For small videos (<500k pixels), use Haar with sports preset (better for fine details)
|
||||
// For larger videos, use Haar (better compression, smoother motion matters less)
|
||||
if (enc->temporal_motion_coder == -1) {
|
||||
int num_pixels = enc->width * enc->height;
|
||||
@@ -12632,9 +12633,10 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
else {
|
||||
enc->temporal_motion_coder = 1; // CDF 5/3
|
||||
enc->temporal_motion_coder = 0; // Haar
|
||||
enc->encoder_preset |= 0x01; // Sports mode
|
||||
if (enc->verbose) {
|
||||
printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
||||
printf("Auto-selected Haar temporal wavelet with sports preset (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
||||
enc->width, enc->height, num_pixels, enc->quantiser_y);
|
||||
}
|
||||
}
|
||||
|
||||
1858
video_encoder/tav_video_decoder.c
Normal file
1858
video_encoder/tav_video_decoder.c
Normal file
File diff suppressed because it is too large
Load Diff
77
video_encoder/tav_video_decoder.h
Normal file
77
video_encoder/tav_video_decoder.h
Normal file
@@ -0,0 +1,77 @@
|
||||
// Created by CuriousTorvald and Claude on 2025-12-02.
|
||||
// TAV Video Decoder Library - Shared decoding functions for TAV format
|
||||
// Can be used by both regular TAV decoder and TAV-DT decoder
|
||||
|
||||
#ifndef TAV_VIDEO_DECODER_H
|
||||
#define TAV_VIDEO_DECODER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// Video decoder context - opaque to users
|
||||
typedef struct tav_video_context tav_video_context_t;
|
||||
|
||||
// Video parameters structure
|
||||
typedef struct {
|
||||
int width;
|
||||
int height;
|
||||
int decomp_levels; // Spatial DWT levels (typically 4)
|
||||
int temporal_levels; // Temporal DWT levels (typically 2)
|
||||
int wavelet_filter; // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
|
||||
int temporal_wavelet; // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
|
||||
int entropy_coder; // 0=Twobitmap, 1=EZBC, 2=RAW
|
||||
int channel_layout; // 0=YCoCg-R, 1=ICtCp
|
||||
int perceptual_tuning; // 1=perceptual quantisation, 0=uniform
|
||||
uint8_t quantiser_y; // Base quantiser index for Y/I
|
||||
uint8_t quantiser_co; // Base quantiser index for Co/Ct
|
||||
uint8_t quantiser_cg; // Base quantiser index for Cg/Cp
|
||||
uint8_t encoder_preset; // Encoder preset flags (sports, anime, etc.)
|
||||
int monoblock; // 1=single tile (monoblock), 0=multi-tile
|
||||
} tav_video_params_t;
|
||||
|
||||
// Create video decoder context
|
||||
// Returns NULL on failure
|
||||
tav_video_context_t *tav_video_create(const tav_video_params_t *params);
|
||||
|
||||
// Free video decoder context
|
||||
void tav_video_free(tav_video_context_t *ctx);
|
||||
|
||||
// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
|
||||
// Input: compressed_data - GOP packet data (after packet type byte)
|
||||
// compressed_size - size of compressed data
|
||||
// gop_size - number of frames in GOP (read from packet)
|
||||
// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
|
||||
// Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
|
||||
// Returns: 0 on success, -1 on error
|
||||
int tav_video_decode_gop(tav_video_context_t *ctx,
|
||||
const uint8_t *compressed_data, uint32_t compressed_size,
|
||||
uint8_t gop_size, uint8_t **rgb_frames);
|
||||
|
||||
// Decode IFRAME packet (0x10) to RGB24 frame
|
||||
// Input: compressed_data - I-frame packet data (after packet type byte)
|
||||
// packet_size - size of packet data
|
||||
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
|
||||
// Must be pre-allocated by caller
|
||||
// Returns: 0 on success, -1 on error
|
||||
int tav_video_decode_iframe(tav_video_context_t *ctx,
|
||||
const uint8_t *compressed_data, uint32_t packet_size,
|
||||
uint8_t *rgb_frame);
|
||||
|
||||
// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
|
||||
// Input: compressed_data - P-frame packet data (after packet type byte)
|
||||
// packet_size - size of packet data
|
||||
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
|
||||
// Must be pre-allocated by caller
|
||||
// Returns: 0 on success, -1 on error
|
||||
// Note: Requires previous frame to be decoded first (stored internally as reference)
|
||||
int tav_video_decode_pframe(tav_video_context_t *ctx,
|
||||
const uint8_t *compressed_data, uint32_t packet_size,
|
||||
uint8_t *rgb_frame);
|
||||
|
||||
// Get last error message
|
||||
const char *tav_video_get_error(tav_video_context_t *ctx);
|
||||
|
||||
// Enable verbose debug output
|
||||
void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
|
||||
|
||||
#endif // TAV_VIDEO_DECODER_H
|
||||
Reference in New Issue
Block a user