From c52224457475301d4f5010a893eff4ff9310750f Mon Sep 17 00:00:00 2001 From: minjaesong Date: Sun, 30 Nov 2025 23:41:46 +0900 Subject: [PATCH] tav: fix: incorrect brightness jumping on MT mode --- CLAUDE.md | 16 ++++++++-------- video_encoder/encoder_tav.c | 22 ++++++++++++---------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6dc59a4..cbf8d64 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -153,7 +153,7 @@ Peripheral memories can be accessed using `vm.peek()` and `vm.poke()` functions, - **Usage Examples**: ```bash # Quality mode - ./encoder_tev -i input.mp4 -q 2 -o output.tev + ./encoder_tev -i input.mp4 -o output.tev -q 3 # Playback playtev output.tev @@ -180,18 +180,18 @@ Peripheral memories can be accessed using `vm.peek()` and `vm.poke()` functions, - **Usage Examples**: ```bash # Different wavelets - ./encoder_tav -i input.mp4 -w 0 -q 2 -o output.tav # 5/3 reversible (lossless capable) - ./encoder_tav -i input.mp4 -w 1 -q 2 -o output.tav # 9/7 irreversible (default, best compression) - ./encoder_tav -i input.mp4 -w 2 -q 2 -o output.tav # CDF 13/7 (experimental) - ./encoder_tav -i input.mp4 -w 16 -q 2 -o output.tav # DD-4 (four-point interpolating) - ./encoder_tav -i input.mp4 -w 255 -q 2 -o output.tav # Haar (demonstration) + ./encoder_tav -i input.mp4 -w 0 -o output.tav # 5/3 reversible (lossless capable) + ./encoder_tav -i input.mp4 -w 1 -o output.tav # 9/7 irreversible (default, best compression) + ./encoder_tav -i input.mp4 -w 2 -o output.tav # CDF 13/7 (experimental) + ./encoder_tav -i input.mp4 -w 16 -o output.tav # DD-4 (four-point interpolating) + ./encoder_tav -i input.mp4 -w 255 -o output.tav # Haar (demonstration) # Quality levels (0-5) ./encoder_tav -i input.mp4 -q 0 -o output.tav # Lowest quality, smallest file ./encoder_tav -i input.mp4 -q 5 -o output.tav # Highest quality, largest file # Temporal 3D DWT (GOP-based encoding) - ./encoder_tav -i input.mp4 --temporal-dwt -q 2 -o output.tav + ./encoder_tav -i input.mp4 --temporal-dwt -o output.tav # Playback playtav output.tav @@ -259,7 +259,7 @@ Implemented on 2025-10-15 for improved temporal compression through group-of-pic **Usage**: ```bash # Enable temporal 3D DWT -./encoder_tav -i input.mp4 --temporal-dwt -q 2 -o output.tav +./encoder_tav -i input.mp4 --temporal-dwt -o output.tav # Inspect GOP structure ./tav_inspector output.tav -v diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 651f3f1..4687b69 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -1797,6 +1797,8 @@ typedef struct tav_encoder_s { // Encoding parameters int quality_level; + // IMPORTANT: quantiser_* stores RAW INDICES (0-255), not actual quantizer values + // When passing to quantization functions, MUST use QLUT[quantiser_*] to get actual values int quantiser_y, quantiser_co, quantiser_cg; int wavelet_filter; int decomp_levels; @@ -3492,8 +3494,8 @@ static int worker_thread_main(void *arg) { // Step 3: Quantise coefficients (using 3D DWT quantisation for GOP) // Use channel-specific quantisers from encoder settings - // Apply QLUT mapping to chroma quantisers (matches single-threaded path) - int base_quantiser_y = enc->quantiser_y; + // Apply QLUT mapping to ALL quantisers (matches single-threaded path) + int base_quantiser_y = QLUT[enc->quantiser_y]; // Y quantiser from encoder (via QLUT) int base_quantiser_co = QLUT[enc->quantiser_co]; // Co quantiser from encoder (via QLUT) int base_quantiser_cg = QLUT[enc->quantiser_cg]; // Cg quantiser from encoder (via QLUT) @@ -5212,10 +5214,10 @@ static size_t encode_pframe_residual(tav_encoder_t *enc, int qY) { qY, enc->width, enc->height, enc->decomp_levels, 0, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size, - enc->quantiser_co, enc->width, enc->height, + QLUT[enc->quantiser_co], enc->width, enc->height, enc->decomp_levels, 1, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size, - enc->quantiser_cg, enc->width, enc->height, + QLUT[enc->quantiser_cg], enc->width, enc->height, enc->decomp_levels, 1, 0); // Print max abs for debug @@ -5232,10 +5234,10 @@ static size_t encode_pframe_residual(tav_encoder_t *enc, int qY) { qY, enc->width, enc->height, enc->decomp_levels, 0, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size, - enc->quantiser_co, enc->width, enc->height, + QLUT[enc->quantiser_co], enc->width, enc->height, enc->decomp_levels, 1, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size, - enc->quantiser_cg, enc->width, enc->height, + QLUT[enc->quantiser_cg], enc->width, enc->height, enc->decomp_levels, 1, 0); } @@ -5528,10 +5530,10 @@ static size_t encode_pframe_adaptive(tav_encoder_t *enc, int qY) { qY, enc->width, enc->height, enc->decomp_levels, 0, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size, - enc->quantiser_co, enc->width, enc->height, + QLUT[enc->quantiser_co], enc->width, enc->height, enc->decomp_levels, 1, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size, - enc->quantiser_cg, enc->width, enc->height, + QLUT[enc->quantiser_cg], enc->width, enc->height, enc->decomp_levels, 1, 0); // Step 8: Preprocess coefficients @@ -5762,10 +5764,10 @@ static size_t encode_bframe_adaptive(tav_encoder_t *enc, int qY) { qY, enc->width, enc->height, enc->decomp_levels, 0, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size, - enc->quantiser_co, enc->width, enc->height, + QLUT[enc->quantiser_co], enc->width, enc->height, enc->decomp_levels, 1, 0); quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size, - enc->quantiser_cg, enc->width, enc->height, + QLUT[enc->quantiser_cg], enc->width, enc->height, enc->decomp_levels, 1, 0); // Step 8: Preprocess coefficients