TAV: still bugfixing

This commit is contained in:
minjaesong
2025-10-16 00:03:58 +09:00
parent 7e248bc83d
commit ea72dec996
6 changed files with 697 additions and 23 deletions

View File

@@ -15,11 +15,15 @@
#define TAV_MODE_SKIP 0x00
#define TAV_MODE_INTRA 0x01
#define TAV_MODE_DELTA 0x02
#define TAV_PACKET_IFRAME 0x10
#define TAV_PACKET_PFRAME 0x11
#define TAV_PACKET_AUDIO_MP2 0x20
#define TAV_PACKET_SUBTITLE 0x30
#define TAV_PACKET_SYNC 0xFF
#define TAV_PACKET_IFRAME 0x10
#define TAV_PACKET_PFRAME 0x11
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP
#define TAV_PACKET_AUDIO_MP2 0x20
#define TAV_PACKET_SUBTITLE 0x30
#define TAV_PACKET_EXTENDED_HDR 0xEF
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (N frames decoded)
#define TAV_PACKET_TIMECODE 0xFD
#define TAV_PACKET_SYNC 0xFF
// Channel layout constants (bit-field design)
#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg (000: no alpha, has chroma, has luma)

View File

@@ -104,7 +104,7 @@ static int needs_alpha_channel(int channel_layout) {
#define DEFAULT_FPS 30
#define DEFAULT_QUALITY 3
#define DEFAULT_ZSTD_LEVEL 9
#define GOP_SIZE 16
#define GOP_SIZE /*1*/4
// Audio/subtitle constants (reused from TEV)
#define MP2_DEFAULT_PACKET_SIZE 1152
@@ -1456,8 +1456,8 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
int spatial_size,
int base_quantiser,
int is_chroma) {
const float BETA = 0.8f; // Temporal scaling exponent
const float TEMPORAL_BASE_SCALE = 0.7f; // Temporal coefficients are typically sparser
const float BETA = 0.8f; // Temporal scaling exponent (aggressive for temporal high-pass)
const float TEMPORAL_BASE_SCALE = 1.0f; // Don't reduce tLL quantization (same as intra)
// Process each temporal subband independently (separable approach)
for (int t = 0; t < num_frames; t++) {
@@ -1468,11 +1468,11 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
// Step 2: Compute temporal base quantizer using exponential scaling
// Formula: tH_base = Qbase_t * 0.7 * 2^(0.8 * level)
// Formula: tH_base = Qbase_t * 1.0 * 2^(2.0 * level)
// Example with Qbase_t=16:
// - Level 0 (tLL): 16 * 0.7 * 2^0 = 11.2
// - Level 1 (tLH): 16 * 0.7 * 2^0.8 = 19.5
// - Level 2 (tHH): 16 * 0.7 * 2^1.6 = 33.8
// - Level 0 (tLL): 16 * 1.0 * 2^0 = 16 (same as intra-only)
// - Level 1 (tH): 16 * 1.0 * 2^2.0 = 64 (4× base, aggressive)
// - Level 2 (tHH): 16 * 1.0 * 2^4.0 = 256 → clamped to 255 (very aggressive)
float temporal_scale = TEMPORAL_BASE_SCALE * powf(2.0f, BETA * temporal_level);
float temporal_quantiser = base_quantiser * temporal_scale;
@@ -1622,6 +1622,40 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
memcpy(gop_cg_coeffs[i], enc->gop_cg_frames[i], num_pixels * sizeof(float));
}
// Step 0.5: Apply motion compensation to align frames before temporal DWT
// This uses the computed translation vectors to align each frame to the previous one
for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame)
float *aligned_y = malloc(num_pixels * sizeof(float));
float *aligned_co = malloc(num_pixels * sizeof(float));
float *aligned_cg = malloc(num_pixels * sizeof(float));
if (!aligned_y || !aligned_co || !aligned_cg) {
fprintf(stderr, "Error: Failed to allocate motion compensation buffers\n");
// Cleanup and skip motion compensation for this GOP
free(aligned_y);
free(aligned_co);
free(aligned_cg);
break;
}
// Apply translation to align this frame
apply_translation(gop_y_coeffs[i], enc->width, enc->height,
enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_y);
apply_translation(gop_co_coeffs[i], enc->width, enc->height,
enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_co);
apply_translation(gop_cg_coeffs[i], enc->width, enc->height,
enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_cg);
// Copy aligned frames back
memcpy(gop_y_coeffs[i], aligned_y, num_pixels * sizeof(float));
memcpy(gop_co_coeffs[i], aligned_co, num_pixels * sizeof(float));
memcpy(gop_cg_coeffs[i], aligned_cg, num_pixels * sizeof(float));
free(aligned_y);
free(aligned_co);
free(aligned_cg);
}
// Step 1: Apply 3D DWT (temporal + spatial) to each channel
// Note: This modifies gop_*_coeffs in-place
dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,