mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
fix: EZBC for TAV producing dark bloches on white background due to coeff clipping
This commit is contained in:
@@ -5022,28 +5022,21 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
|
||||
// Apply linear dequantisation with perceptual weights (matching encoder's linear storage)
|
||||
// EZBC mode: coefficients are ALREADY DENORMALIZED by encoder
|
||||
// e.g., encoder: coeff=377 → quantize: 377/48=7.85→8 → denormalize: 8*48=384 → store 384
|
||||
// decoder: read 384 → pass through as-is (already in correct range for IDWT)
|
||||
// Significance-map mode: coefficients are normalized (quantized only)
|
||||
// e.g., encoder stores 8 = round(377/48)
|
||||
// decoder must multiply: 8 * 48 = 384 (denormalize for IDWT)
|
||||
// FIX (2025-11-11): Both EZBC and Significance-map modes now store NORMALIZED coefficients
|
||||
// Encoder stores quantised values (e.g., round(377/48) = 8)
|
||||
// Decoder must multiply by effective quantiser to denormalize
|
||||
// Previous denormalization in EZBC caused int16_t overflow (clipping at 32767)
|
||||
// for bright pixels, creating dark DWT-pattern blemishes
|
||||
for (i in quantised.indices) {
|
||||
if (i < dequantised.size) {
|
||||
val effectiveQuantiser = baseQuantiser * weights[i]
|
||||
|
||||
dequantised[i] = if (isEZBC) {
|
||||
// EZBC mode: pass through as-is (coefficients already denormalized and rounded by encoder)
|
||||
quantised[i].toFloat()
|
||||
} else {
|
||||
// Significance-map mode: multiply to denormalize, then round
|
||||
// CRITICAL: Must ROUND (not truncate) to match EZBC encoder's roundf() behavior
|
||||
// Truncation toward zero was wrong - it created mismatch with EZBC for odd baseQ values
|
||||
val untruncated = quantised[i] * effectiveQuantiser
|
||||
val rounded = kotlin.math.round(untruncated)
|
||||
// Both modes now use the same dequantisation: multiply to denormalize, then round
|
||||
// CRITICAL: Must ROUND (not truncate) to match encoder's roundf() behavior
|
||||
val untruncated = quantised[i] * effectiveQuantiser
|
||||
val rounded = kotlin.math.round(untruncated)
|
||||
|
||||
rounded
|
||||
}
|
||||
dequantised[i] = rounded
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2407,13 +2407,52 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
|
||||
const int is_ezbc = (decoder->header.entropy_coder == 1);
|
||||
|
||||
if (is_ezbc) {
|
||||
// EZBC mode: coefficients are already denormalised by encoder
|
||||
// Just convert int16 to float without multiplying by quantiser
|
||||
for (int i = 0; i < coeff_count; i++) {
|
||||
decoder->dwt_buffer_y[i] = (float)quantised_y[i];
|
||||
decoder->dwt_buffer_co[i] = (float)quantised_co[i];
|
||||
decoder->dwt_buffer_cg[i] = (float)quantised_cg[i];
|
||||
// Debug: Print decoder state
|
||||
static int state_debug_once = 1;
|
||||
if (state_debug_once) {
|
||||
fprintf(stderr, "[DECODER-STATE] version=%d, entropy_coder=%d, is_perceptual=%d, is_ezbc=%d\n",
|
||||
decoder->header.version, decoder->header.entropy_coder, is_perceptual, is_ezbc);
|
||||
state_debug_once = 0;
|
||||
}
|
||||
|
||||
if (is_ezbc && is_perceptual) {
|
||||
// EZBC mode with perceptual quantisation: coefficients are normalised
|
||||
// Need to dequantise using perceptual weights (same as twobit-map mode)
|
||||
|
||||
// Debug: Print quantised LL values before dequantisation
|
||||
static int debug_count = 0;
|
||||
if (debug_count < 1) {
|
||||
fprintf(stderr, "[EZBC-DECODER-DEBUG] Quantised LL coefficients (9x7):\n");
|
||||
for (int y = 0; y < 7 && y < decoder->header.height; y++) {
|
||||
for (int x = 0; x < 9 && x < decoder->header.width; x++) {
|
||||
int idx = y * decoder->header.width + x;
|
||||
fprintf(stderr, "%6d ", quantised_y[idx]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
debug_count++;
|
||||
}
|
||||
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
|
||||
decoder->header.width, decoder->header.height,
|
||||
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, decoder->dwt_buffer_co,
|
||||
decoder->header.width, decoder->header.height,
|
||||
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, decoder->dwt_buffer_cg,
|
||||
decoder->header.width, decoder->header.height,
|
||||
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
|
||||
|
||||
// Debug: Print dequantised LL values
|
||||
if (debug_count <= 1) {
|
||||
fprintf(stderr, "[EZBC-DECODER-DEBUG] Dequantised LL coefficients (9x7):\n");
|
||||
for (int y = 0; y < 7 && y < decoder->header.height; y++) {
|
||||
for (int x = 0; x < 9 && x < decoder->header.width; x++) {
|
||||
int idx = y * decoder->header.width + x;
|
||||
fprintf(stderr, "%7.0f ", decoder->dwt_buffer_y[idx]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
} else if (is_perceptual) {
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
|
||||
@@ -2912,28 +2951,34 @@ int main(int argc, char *argv[]) {
|
||||
const int temporal_levels = 2; // Fixed for TAV GOP encoding
|
||||
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
if (is_ezbc) {
|
||||
// EZBC mode: coefficients are already denormalised by encoder
|
||||
// Just convert int16 to float without multiplying by quantiser
|
||||
for (int i = 0; i < num_pixels; i++) {
|
||||
gop_y[t][i] = (float)quantised_gop[t][0][i];
|
||||
gop_co[t][i] = (float)quantised_gop[t][1][i];
|
||||
gop_cg[t][i] = (float)quantised_gop[t][2][i];
|
||||
}
|
||||
if (is_ezbc && is_perceptual) {
|
||||
// EZBC mode with perceptual quantisation: coefficients are normalised
|
||||
// Need to dequantise using perceptual weights (same as twobit-map mode)
|
||||
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
|
||||
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
|
||||
|
||||
if (t == 0) {
|
||||
// Debug first frame
|
||||
int16_t max_y = 0, min_y = 0;
|
||||
for (int i = 0; i < num_pixels; i++) {
|
||||
if (quantised_gop[t][0][i] > max_y) max_y = quantised_gop[t][0][i];
|
||||
if (quantised_gop[t][0][i] < min_y) min_y = quantised_gop[t][0][i];
|
||||
}
|
||||
fprintf(stderr, "[GOP-EZBC] Frame 0 Y coeffs range: [%d, %d], first 5: %d %d %d %d %d\n",
|
||||
min_y, max_y,
|
||||
quantised_gop[t][0][0], quantised_gop[t][0][1], quantised_gop[t][0][2],
|
||||
quantised_gop[t][0][3], quantised_gop[t][0][4]);
|
||||
const float base_q_y = roundf(decoder->header.quantiser_y * temporal_scale);
|
||||
const float base_q_co = roundf(decoder->header.quantiser_co * temporal_scale);
|
||||
const float base_q_cg = roundf(decoder->header.quantiser_cg * temporal_scale);
|
||||
|
||||
dequantise_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
|
||||
quantised_gop[t][0], gop_y[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
|
||||
dequantise_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
|
||||
quantised_gop[t][1], gop_co[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
|
||||
dequantise_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
|
||||
quantised_gop[t][2], gop_cg[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
|
||||
|
||||
if (t == 0 && verbose) {
|
||||
fprintf(stderr, "[GOP-EZBC] Frame 0: Quantised LL[0]=%d, Dequantised LL[0]=%.1f, base_q_y=%.1f\n",
|
||||
quantised_gop[t][0][0], gop_y[t][0], base_q_y);
|
||||
}
|
||||
} else {
|
||||
} else if (!is_ezbc) {
|
||||
// Normal mode: multiply by quantiser
|
||||
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
|
||||
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
|
||||
|
||||
@@ -1976,7 +1976,7 @@ typedef struct tav_encoder_s {
|
||||
int two_pass_mode; // Enable two-pass encoding (0=disabled, 1=enabled)
|
||||
frame_analysis_t *frame_analyses; // Array of frame analysis metrics (first pass)
|
||||
int frame_analyses_capacity; // Allocated capacity
|
||||
int frame_analyses_count; // Current number of analyzed frames
|
||||
int frame_analyses_count; // Current number of analysed frames
|
||||
gop_boundary_t *gop_boundaries; // Linked list of GOP boundaries (computed in first pass)
|
||||
gop_boundary_t *current_gop_boundary; // Current GOP being encoded (second pass)
|
||||
int two_pass_current_frame; // Current frame number in second pass
|
||||
@@ -6702,13 +6702,22 @@ static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_
|
||||
// Step 3: Round to discrete quantisation levels
|
||||
quantised_val = roundf(quantised_val); // file size explodes without rounding
|
||||
|
||||
// Step 4: Denormalise - multiply back by quantiser to restore magnitude
|
||||
// This gives us quantised values at original scale (not shrunken to 0-10 range)
|
||||
float denormalised = quantised_val * effective_q;
|
||||
// FIX: Store normalised values (not denormalised) to avoid int16_t overflow
|
||||
// EZBC bitplane encoding works fine with normalised coefficients
|
||||
// Denormalisation was causing bright pixels to clip at 32767
|
||||
quantised[i] = (int16_t)CLAMP((int)quantised_val, -32768, 32767);
|
||||
|
||||
// CRITICAL FIX: Must round (not truncate) to match decoder behavior
|
||||
// With odd baseQ values and fractional weights, truncation causes mismatch with Sigmap mode
|
||||
quantised[i] = (int16_t)CLAMP((int)roundf(denormalised), -32768, 32767);
|
||||
// Debug: Print LL subband coefficients (9×7 at top-left for 560×448)
|
||||
static int debug_once = 1;
|
||||
if (debug_once && i < 63 && width == 560 && !is_chroma) {
|
||||
int x = i % width;
|
||||
int y = i / width;
|
||||
if (x < 9 && y < 7) {
|
||||
fprintf(stderr, "[EZBC-QUANT-DEBUG] LL coeff[%d,%d] (idx=%d): coeff=%.1f, weight=%.3f, effective_q=%.1f, quantised_val=%.1f, stored=%d\n",
|
||||
x, y, i, coeffs[i], weight, effective_q, quantised_val, quantised[i]);
|
||||
if (i == 62) debug_once = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9631,7 +9640,7 @@ static void free_gop_boundaries(gop_boundary_t *head) {
|
||||
}
|
||||
}
|
||||
|
||||
// First pass: Analyze all frames and build GOP boundaries
|
||||
// First pass: Analyse all frames and build GOP boundaries
|
||||
// Returns 0 on success, -1 on error
|
||||
static int two_pass_first_pass(tav_encoder_t *enc, const char *input_file) {
|
||||
printf("=== Two-Pass Encoding: First Pass (Scene Analysis) ===\n");
|
||||
@@ -9737,12 +9746,12 @@ static int two_pass_first_pass(tav_encoder_t *enc, const char *input_file) {
|
||||
frame_num++;
|
||||
|
||||
if (frame_num % 100 == 0) {
|
||||
printf(" Analyzed %d frames...\r", frame_num);
|
||||
printf(" Analysed %d frames...\r", frame_num);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n Analyzed %d frames total\n", frame_num);
|
||||
printf("\n Analysed %d frames total\n", frame_num);
|
||||
|
||||
free(frame_rgb);
|
||||
if (prev_dwt) free(prev_dwt);
|
||||
@@ -9881,7 +9890,7 @@ int main(int argc, char *argv[]) {
|
||||
{"adaptive-blocks", no_argument, 0, 1022},
|
||||
{"bframes", required_argument, 0, 1023},
|
||||
{"gop-size", required_argument, 0, 1024},
|
||||
{"ezbc", no_argument, 0, 1025},
|
||||
{"sigmap", no_argument, 0, 1025},
|
||||
{"separate-audio-track", no_argument, 0, 1026},
|
||||
{"pcm8-audio", no_argument, 0, 1027},
|
||||
{"pcm-audio", no_argument, 0, 1027},
|
||||
@@ -10095,9 +10104,8 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
printf("GOP size set to %d frames\n", enc->residual_coding_gop_size);
|
||||
break;
|
||||
case 1025: // --ezbc
|
||||
enc->preprocess_mode = PREPROCESS_EZBC;
|
||||
printf("EZBC (Embedded Zero Block Coding) enabled for significance maps\n");
|
||||
case 1025: // --sigmap
|
||||
enc->preprocess_mode = PREPROCESS_TWOBITMAP;
|
||||
break;
|
||||
case 1026: // --separate-audio-track
|
||||
enc->separate_audio_track = 1;
|
||||
|
||||
Reference in New Issue
Block a user