mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-09 14:44:05 +09:00
tav: temporal CDF 5/3 is causing coeff overflow on -q 5, replacing with Haar+sports mode
This commit is contained in:
@@ -1615,7 +1615,7 @@ start of the next packet
|
|||||||
# Video Format
|
# Video Format
|
||||||
- Dimension: 720x480 for NTSC, 720x576 for PAL
|
- Dimension: 720x480 for NTSC, 720x576 for PAL
|
||||||
- FPS: arbitrary (defined in packet header)
|
- FPS: arbitrary (defined in packet header)
|
||||||
- Wavelet: 9/7 Spatial, 5/3 Temporal
|
- Wavelet: 9/7 Spatial, Haar Temporal ("sport" preset always enabled)
|
||||||
- Decomposition levels: 4 spatial, 2 temporal
|
- Decomposition levels: 4 spatial, 2 temporal
|
||||||
- Quantiser and encoder quality level: arbitrary (defined in packet header as quality index)
|
- Quantiser and encoder quality level: arbitrary (defined in packet header as quality index)
|
||||||
- Extra features:
|
- Extra features:
|
||||||
|
|||||||
@@ -702,15 +702,15 @@ int main(int argc, char *argv[]) {
|
|||||||
.decomp_levels = 4, // TAV-DT fixed: 4 spatial levels
|
.decomp_levels = 4, // TAV-DT fixed: 4 spatial levels
|
||||||
.temporal_levels = 2, // TAV-DT fixed: 2 temporal levels
|
.temporal_levels = 2, // TAV-DT fixed: 2 temporal levels
|
||||||
.wavelet_filter = 1, // TAV-DT fixed: CDF 9/7
|
.wavelet_filter = 1, // TAV-DT fixed: CDF 9/7
|
||||||
.temporal_wavelet = 1, // TAV-DT fixed: CDF 5/3 (NOT Haar!)
|
.temporal_wavelet = 0, // TAV-DT fixed: Haar
|
||||||
.entropy_coder = 1, // TAV-DT fixed: EZBC
|
.entropy_coder = 1, // TAV-DT fixed: EZBC
|
||||||
.channel_layout = 0, // TAV-DT fixed: YCoCg-R
|
.channel_layout = 0, // TAV-DT fixed: YCoCg-R
|
||||||
.perceptual_tuning = 1, // TAV-DT fixed: Perceptual
|
.perceptual_tuning = 1, // TAV-DT fixed: Perceptual
|
||||||
.quantiser_y = (uint8_t)quant_y, // From DT quality map
|
.quantiser_y = (uint8_t)quant_y, // From DT quality map
|
||||||
.quantiser_co = (uint8_t)quant_co,
|
.quantiser_co = (uint8_t)quant_co,
|
||||||
.quantiser_cg = (uint8_t)quant_cg,
|
.quantiser_cg = (uint8_t)quant_cg,
|
||||||
.encoder_preset = 0, // No special presets
|
.encoder_preset = 1, // Sports mode
|
||||||
.monoblock = 1 // TAV-DT fixed: Single tile
|
.monoblock = 1 // TAV-DT fixed: Single tile
|
||||||
};
|
};
|
||||||
|
|
||||||
decoder.video_ctx = tav_video_create(&video_params);
|
decoder.video_ctx = tav_video_create(&video_params);
|
||||||
@@ -734,9 +734,7 @@ int main(int argc, char *argv[]) {
|
|||||||
long start_pos = ftell(decoder.input_fp);
|
long start_pos = ftell(decoder.input_fp);
|
||||||
|
|
||||||
// Pass 1: Process all packets for audio only
|
// Pass 1: Process all packets for audio only
|
||||||
if (decoder.verbose) {
|
printf("\n=== Pass 1: Extracting audio ===\n");
|
||||||
printf("\n=== Pass 1: Extracting audio ===\n");
|
|
||||||
}
|
|
||||||
while ((result = process_dt_packet(&decoder)) == 0) {
|
while ((result = process_dt_packet(&decoder)) == 0) {
|
||||||
// Continue processing (only audio is written)
|
// Continue processing (only audio is written)
|
||||||
}
|
}
|
||||||
@@ -754,9 +752,7 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pass 2: Rewind and process all packets for video
|
// Pass 2: Rewind and process all packets for video
|
||||||
if (decoder.verbose) {
|
printf("\n=== Pass 2: Decoding video ===\n");
|
||||||
printf("\n=== Pass 2: Decoding video ===\n");
|
|
||||||
}
|
|
||||||
fseek(decoder.input_fp, start_pos, SEEK_SET);
|
fseek(decoder.input_fp, start_pos, SEEK_SET);
|
||||||
decoder.packets_processed = 0; // Reset statistics
|
decoder.packets_processed = 0; // Reset statistics
|
||||||
decoder.frames_decoded = 0;
|
decoder.frames_decoded = 0;
|
||||||
|
|||||||
@@ -12581,7 +12581,8 @@ int main(int argc, char *argv[]) {
|
|||||||
// Enforce mandatory settings
|
// Enforce mandatory settings
|
||||||
enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; // 9/7 spatial
|
enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; // 9/7 spatial
|
||||||
enc->decomp_levels = 4; // 4 spatial levels
|
enc->decomp_levels = 4; // 4 spatial levels
|
||||||
enc->temporal_motion_coder = 1; // CDF 5/3 temporal
|
enc->temporal_motion_coder = 0; // Haar temporal
|
||||||
|
enc->encoder_preset = 0x01; // Sports mode
|
||||||
enc->temporal_decomp_levels = 2; // 2 temporal levels
|
enc->temporal_decomp_levels = 2; // 2 temporal levels
|
||||||
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Y-Co-Cg only
|
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Y-Co-Cg only
|
||||||
enc->preprocess_mode = PREPROCESS_EZBC; // EZBC entropy coder
|
enc->preprocess_mode = PREPROCESS_EZBC; // EZBC entropy coder
|
||||||
@@ -12603,7 +12604,7 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("TAV-DT: Quality level %d -> Y=%d, Co=%d, Cg=%d, TAD_quality=%d\n",
|
printf("TAV-DT: Quality level %d -> Y=%d, Co=%d, Cg=%d, TAD_quality=%d\n",
|
||||||
enc->quality_level, enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg,
|
enc->quality_level, enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg,
|
||||||
enc->quality_level);
|
enc->quality_level);
|
||||||
printf("TAV-DT: Enforcing format constraints (9/7 spatial, 5/3 temporal, 4+2 levels, EZBC, monoblock)\n");
|
printf("TAV-DT: Enforcing format constraints (9/7 spatial, Haar temporal, sports mode, 4+2 levels, EZBC, monoblock)\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Halve internal height for interlaced mode (FFmpeg will output half-height fields)
|
// Halve internal height for interlaced mode (FFmpeg will output half-height fields)
|
||||||
@@ -12615,7 +12616,7 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Smart preset for temporal motion coder based on resolution
|
// Smart preset for temporal motion coder based on resolution
|
||||||
// For small videos (<500k pixels), use CDF 5/3 (better for fine details)
|
// For small videos (<500k pixels), use Haar with sports preset (better for fine details)
|
||||||
// For larger videos, use Haar (better compression, smoother motion matters less)
|
// For larger videos, use Haar (better compression, smoother motion matters less)
|
||||||
if (enc->temporal_motion_coder == -1) {
|
if (enc->temporal_motion_coder == -1) {
|
||||||
int num_pixels = enc->width * enc->height;
|
int num_pixels = enc->width * enc->height;
|
||||||
@@ -12632,9 +12633,10 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
enc->temporal_motion_coder = 1; // CDF 5/3
|
enc->temporal_motion_coder = 0; // Haar
|
||||||
|
enc->encoder_preset |= 0x01; // Sports mode
|
||||||
if (enc->verbose) {
|
if (enc->verbose) {
|
||||||
printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
printf("Auto-selected Haar temporal wavelet with sports preset (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
||||||
enc->width, enc->height, num_pixels, enc->quantiser_y);
|
enc->width, enc->height, num_pixels, enc->quantiser_y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
1858
video_encoder/tav_video_decoder.c
Normal file
1858
video_encoder/tav_video_decoder.c
Normal file
File diff suppressed because it is too large
Load Diff
77
video_encoder/tav_video_decoder.h
Normal file
77
video_encoder/tav_video_decoder.h
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
// Created by CuriousTorvald and Claude on 2025-12-02.
|
||||||
|
// TAV Video Decoder Library - Shared decoding functions for TAV format
|
||||||
|
// Can be used by both regular TAV decoder and TAV-DT decoder
|
||||||
|
|
||||||
|
#ifndef TAV_VIDEO_DECODER_H
|
||||||
|
#define TAV_VIDEO_DECODER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
// Video decoder context - opaque to users
|
||||||
|
typedef struct tav_video_context tav_video_context_t;
|
||||||
|
|
||||||
|
// Video parameters structure
|
||||||
|
typedef struct {
|
||||||
|
int width;
|
||||||
|
int height;
|
||||||
|
int decomp_levels; // Spatial DWT levels (typically 4)
|
||||||
|
int temporal_levels; // Temporal DWT levels (typically 2)
|
||||||
|
int wavelet_filter; // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
|
||||||
|
int temporal_wavelet; // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
|
||||||
|
int entropy_coder; // 0=Twobitmap, 1=EZBC, 2=RAW
|
||||||
|
int channel_layout; // 0=YCoCg-R, 1=ICtCp
|
||||||
|
int perceptual_tuning; // 1=perceptual quantisation, 0=uniform
|
||||||
|
uint8_t quantiser_y; // Base quantiser index for Y/I
|
||||||
|
uint8_t quantiser_co; // Base quantiser index for Co/Ct
|
||||||
|
uint8_t quantiser_cg; // Base quantiser index for Cg/Cp
|
||||||
|
uint8_t encoder_preset; // Encoder preset flags (sports, anime, etc.)
|
||||||
|
int monoblock; // 1=single tile (monoblock), 0=multi-tile
|
||||||
|
} tav_video_params_t;
|
||||||
|
|
||||||
|
// Create video decoder context
|
||||||
|
// Returns NULL on failure
|
||||||
|
tav_video_context_t *tav_video_create(const tav_video_params_t *params);
|
||||||
|
|
||||||
|
// Free video decoder context
|
||||||
|
void tav_video_free(tav_video_context_t *ctx);
|
||||||
|
|
||||||
|
// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
|
||||||
|
// Input: compressed_data - GOP packet data (after packet type byte)
|
||||||
|
// compressed_size - size of compressed data
|
||||||
|
// gop_size - number of frames in GOP (read from packet)
|
||||||
|
// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
|
||||||
|
// Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
|
||||||
|
// Returns: 0 on success, -1 on error
|
||||||
|
int tav_video_decode_gop(tav_video_context_t *ctx,
|
||||||
|
const uint8_t *compressed_data, uint32_t compressed_size,
|
||||||
|
uint8_t gop_size, uint8_t **rgb_frames);
|
||||||
|
|
||||||
|
// Decode IFRAME packet (0x10) to RGB24 frame
|
||||||
|
// Input: compressed_data - I-frame packet data (after packet type byte)
|
||||||
|
// packet_size - size of packet data
|
||||||
|
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
|
||||||
|
// Must be pre-allocated by caller
|
||||||
|
// Returns: 0 on success, -1 on error
|
||||||
|
int tav_video_decode_iframe(tav_video_context_t *ctx,
|
||||||
|
const uint8_t *compressed_data, uint32_t packet_size,
|
||||||
|
uint8_t *rgb_frame);
|
||||||
|
|
||||||
|
// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
|
||||||
|
// Input: compressed_data - P-frame packet data (after packet type byte)
|
||||||
|
// packet_size - size of packet data
|
||||||
|
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
|
||||||
|
// Must be pre-allocated by caller
|
||||||
|
// Returns: 0 on success, -1 on error
|
||||||
|
// Note: Requires previous frame to be decoded first (stored internally as reference)
|
||||||
|
int tav_video_decode_pframe(tav_video_context_t *ctx,
|
||||||
|
const uint8_t *compressed_data, uint32_t packet_size,
|
||||||
|
uint8_t *rgb_frame);
|
||||||
|
|
||||||
|
// Get last error message
|
||||||
|
const char *tav_video_get_error(tav_video_context_t *ctx);
|
||||||
|
|
||||||
|
// Enable verbose debug output
|
||||||
|
void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
|
||||||
|
|
||||||
|
#endif // TAV_VIDEO_DECODER_H
|
||||||
Reference in New Issue
Block a user