diff --git a/assets/disk0/tvdos/bin/zfm.js b/assets/disk0/tvdos/bin/zfm.js index a47c1dc..de75ea5 100644 --- a/assets/disk0/tvdos/bin/zfm.js +++ b/assets/disk0/tvdos/bin/zfm.js @@ -474,6 +474,7 @@ let filenavOninput = (window, event) => { firstRunLatch = true con.curs_set(0);clearScr() + refreshFilePanelCache(windowMode) redraw() } } diff --git a/terranmon.txt b/terranmon.txt index ad5a52f..a19aad0 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -687,7 +687,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. - Version 3.0: Additional support of ICtCp Colour space # File Structure -\x1F T S V M T E V +\x1F T S V M T E V (if video), \x1F T S V M T E P (if still picture) [HEADER] [PACKET 0] [PACKET 1] @@ -695,7 +695,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. ... ## Header (24 bytes) - uint8 Magic[8]: "\x1F TSVM TEV" + uint8 Magic[8]: "\x1F TSVM TEV" or "\x1F TSVM TEP" uint8 Version: 2 (YCoCg-R) or 3 (ICtCp) uint16 Width: video width in pixels uint16 Height: video height in pixels @@ -726,11 +726,13 @@ DCT-based compression, motion compensation, and efficient temporal coding. 0x30: Subtitle in "Simple" format 0x31: Subtitle in "Karaoke" format 0xE0: EXIF packet - 0xE1: ID3 packet - 0xE2: Vorbis Comment packet + 0xE1: ID3v1 packet + 0xE2: ID3v2 packet + 0xE3: Vorbis Comment packet + 0xE4: CD-text packet 0xFF: sync packet -## EXIF/ID3/Vorbis Comment packet structure +## Standard metadata payload packet structure uint8 0xE0/0xE1/0xE2/.../0xEF (see Packet Types section) uint32 Length of the payload * Standard payload @@ -792,11 +794,25 @@ to larger block sizes and hardware acceleration. Reuses existing MP2 audio infrastructure from TSVM MOV format for seamless compatibility with existing audio processing pipeline. -## Simple Subtitle Format -SSF is a simple subtitle that is intended to use text buffer to display texts. -The format is designed to be compatible with SubRip and SAMI (without markups). +## NTSC Framerate handling +The encoder encodes the frames as-is. The decoder must duplicate every 1000th frame to keep the decoding +in-sync. -### SSF Packet Structure +-------------------------------------------------------------------------------- + +Simple Subtitle Format (SSF) + +SSF is a simple subtitle that is intended to use text buffer to display texts. +The format is designed to be compatible with SubRip and SAMI (without markups) and interoperable with +TEV and TAV formats. + +When SSF is interleaved with MP2 audio, the payload must be inserted in-between MP2 frames. + +## Packet Structure + uint8 0x30 (packet type) + * SSF Payload (see below) + +## SSF Packet Structure uint24 index (used to specify target subtitle object) uint8 opcode 0x00 = , is NOP when used here @@ -811,9 +827,51 @@ The format is designed to be compatible with SubRip and SAMI (without markups). text argument may be terminated by 0x00 BEFORE the entire arguments being terminated by 0x00, leaving extra 0x00 on the byte stream. A decoder must be able to handle the extra zeros. -## NTSC Framerate handling -The encoder encodes the frames as-is. The decoder must duplicate every 1000th frame to keep the decoding -in-sync. +-------------------------------------------------------------------------------- + +Karaoke Subtitle Format (KSF) + +KSF is a frame-synced subtitle that is intended to use Karaoke-style subtitles. +The format is designed to be interoperable with TEV and TAV formats. +For non-karaoke style synced lyrics, use SSF. + +When KSF is interleaved with MP2 audio, the payload must be inserted in-between MP2 frames. + +## Packet Structure + uint8 0x31 (packet type) + * KSF Payload (see below) + +### KSF Packet Structure + KSF is line-based: you define an unrevealed line, then subsequent commands reveal words/syllables + on appropriate timings. + + uint24 index (used to specify target subtitle object) + uint8 opcode + + 0x00 = , is NOP when used here + 0x01 = define line (arguments: UTF-8 text. Players will also show it in grey) + 0x02 = delete line (arguments: none) + 0x03 = move to different nonant (arguments: 0x00-bottom centre; 0x01-bottom left; 0x02-centre left; 0x03-top left; 0x04-top centre; 0x05-top right; 0x06-centre right; 0x07-bottom right; 0x08-centre + + + 0x30 = reveal text normally (arguments: UTF-8 text. The reveal text must contain spaces when required) + 0x31 = reveal text slowly (arguments: UTF-8 text. The effect is implementation-dependent) + + 0x40 = reveal text normally with emphasize (arguments: UTF-8 text. On TEV/TAV player, the text will be white; otherwise, implementation-dependent) + 0x41 = reveal text slowly with emphasize (arguments: UTF-8 text) + + 0x50 = reveal text normally with target colour (arguments: uint8 target colour; UTF-8 text) + 0x51 = reveal text slowly with target colour (arguments: uint8 target colour; UTF-8 text) + + + 0x80 = upload to low font rom (arguments: uint16 payload length, var bytes) + 0x81 = upload to high font rom (arguments: uint16 payload length, var bytes) + note: changing the font rom will change the appearance of the every subtitle currently being displayed + * arguments separated AND terminated by 0x00 + text argument may be terminated by 0x00 BEFORE the entire arguments being terminated by 0x00, + leaving extra 0x00 on the byte stream. A decoder must be able to handle the extra zeros. + + -------------------------------------------------------------------------------- @@ -826,7 +884,7 @@ to DCT-based codecs like TEV. Features include multi-resolution encoding, progre transmission capability, and region-of-interest coding. # File Structure -\x1F T S V M T A V +\x1F T S V M T A V (if video), \x1F T S V M T A P (if still picture) [HEADER] [PACKET 0] [PACKET 1] @@ -834,7 +892,7 @@ transmission capability, and region-of-interest coding. ... ## Header (32 bytes) - uint8 Magic[8]: "\x1F TSVM TAV" + uint8 Magic[8]: "\x1F TSVM TAV" or "\x1F TSVM TAP" uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual) uint16 Width: video width in pixels uint16 Height: video height in pixels @@ -856,6 +914,7 @@ transmission capability, and region-of-interest coding. - bit 0 = has alpha channel - bit 1 = is NTSC framerate - bit 2 = is lossless mode + - bit 3 = has region-of-interest coding (for still images only) uint8 File Role - 0 = generic - 1 = this file is header-only, and UCF payload will be followed (used by seekable movie file) @@ -871,11 +930,13 @@ transmission capability, and region-of-interest coding. 0x30: Subtitle in "Simple" format 0x31: Subtitle in "Karaoke" format 0xE0: EXIF packet - 0xE1: ID3 packet - 0xE2: Vorbis Comment packet + 0xE1: ID3v1 packet + 0xE2: ID3v2 packet + 0xE3: Vorbis Comment packet + 0xE4: CD-text packet 0xFF: sync packet -## EXIF/ID3/Vorbis Comment packet structure +## Standard metadata payload packet structure uint8 0xE0/0xE1/0xE2/.../0xEF (see Packet Types section) uint32 Length of the payload * Standard payload diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index c6974dc..c68c36a 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4923,6 +4923,119 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + // Delta-specific perceptual weight model for motion-optimized coefficient reconstruction + private fun getPerceptualWeightDelta(qualityLevel: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float { + // Delta coefficients have different perceptual characteristics than full-picture coefficients: + // 1. Motion edges are more perceptually critical than static edges + // 2. Temporal masking allows more aggressive quantization in high-motion areas + // 3. Smaller delta magnitudes make relative quantization errors more visible + // 4. Frequency distribution is motion-dependent rather than spatial-dependent + + return if (!isChroma) { + // LUMA DELTA CHANNEL: Emphasize motion coherence and edge preservation + when (subbandType) { + 0 -> { // LL subband - DC motion changes, still important + // DC motion changes - preserve somewhat but allow coarser quantization than full-picture + 2f // Slightly coarser than full-picture + } + 1 -> { // LH subband - horizontal motion edges + // Motion boundaries benefit from temporal masking - allow coarser quantization + 0.9f + } + 2 -> { // HL subband - vertical motion edges + // Vertical motion boundaries - equal treatment with horizontal for deltas + 1.2f + } + else -> { // HH subband - diagonal motion details + // Diagonal motion deltas can be quantized most aggressively + 0.5f + } + } + } else { + // CHROMA DELTA CHANNELS: More aggressive quantization allowed due to temporal masking + // Motion chroma changes are less perceptually critical than static chroma + val base = getPerceptualModelChromaBase(qualityLevel, level - 1) + + when (subbandType) { + 0 -> 1.3f // LL chroma deltas - more aggressive than full-picture chroma + 1 -> kotlin.math.max(1.2f, kotlin.math.min(120.0f, base * 1.4f)) // LH chroma deltas + 2 -> kotlin.math.max(1.4f, kotlin.math.min(140.0f, base * 1.6f)) // HL chroma deltas + else -> kotlin.math.max(1.6f, kotlin.math.min(160.0f, base * 1.8f)) // HH chroma deltas + } + } + } + + // Helper functions for perceptual models (simplified versions of encoder models) + private fun getPerceptualModelLL(qualityLevel: Int, level: Int): Float { + // Simplified LL model - preserve DC components + return 1.0f - (level.toFloat() / 8.0f) * (qualityLevel.toFloat() / 6.0f) + } + + private fun getPerceptualModelLH(qualityLevel: Int, level: Int): Float { + // Simplified LH model - horizontal details + return 1.2f + (level.toFloat() / 4.0f) * (qualityLevel.toFloat() / 3.0f) + } + + private fun getPerceptualModelHL(qualityLevel: Int, lhWeight: Float): Float { + // Simplified HL model - vertical details + return lhWeight * 1.1f + } + + private fun getPerceptualModelHH(lhWeight: Float, hlWeight: Float): Float { + // Simplified HH model - diagonal details + return (lhWeight + hlWeight) * 0.6f + } + + private fun getPerceptualModelChromaBase(qualityLevel: Int, level: Int): Float { + // Simplified chroma base curve + return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f) + } + + // Determine delta-specific perceptual weight for coefficient at linear position + private fun getPerceptualWeightForPositionDelta(qualityLevel: Int, linearIdx: Int, width: Int, height: Int, decompLevels: Int, isChroma: Boolean): Float { + // Map linear coefficient index to DWT subband using same layout as encoder + var offset = 0 + + // First: LL subband at maximum decomposition level + val llWidth = width shr decompLevels + val llHeight = height shr decompLevels + val llSize = llWidth * llHeight + + if (linearIdx < offset + llSize) { + // LL subband at maximum level - use delta-specific perceptual weight + return getPerceptualWeightDelta(qualityLevel, decompLevels, 0, isChroma, decompLevels) + } + offset += llSize + + // Then: LH, HL, HH subbands for each level from max down to 1 + for (level in decompLevels downTo 1) { + val levelWidth = width shr (decompLevels - level + 1) + val levelHeight = height shr (decompLevels - level + 1) + val subbandSize = levelWidth * levelHeight + + // LH subband (horizontal details) + if (linearIdx < offset + subbandSize) { + return getPerceptualWeightDelta(qualityLevel, level, 1, isChroma, decompLevels) + } + offset += subbandSize + + // HL subband (vertical details) + if (linearIdx < offset + subbandSize) { + return getPerceptualWeightDelta(qualityLevel, level, 2, isChroma, decompLevels) + } + offset += subbandSize + + // HH subband (diagonal details) + if (linearIdx < offset + subbandSize) { + return getPerceptualWeightDelta(qualityLevel, level, 3, isChroma, decompLevels) + } + offset += subbandSize + } + + // Fallback for out-of-bounds indices + return 1.0f + } + private fun tavDecodeDeltaTileRGB(qYGlobal: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long { @@ -4972,7 +5085,18 @@ class GraphicsJSR223Delegate(private val vm: VM) { val currentCo = FloatArray(coeffCount) val currentCg = FloatArray(coeffCount) - // Uniform delta reconstruction because coefficient deltas cannot be perceptually coded + // Delta-specific perceptual reconstruction using motion-optimized coefficients + // Estimate quality level from quantization parameters for perceptual weighting + val estimatedQualityY = when { + qY <= 6 -> 4 // High quality + qY <= 12 -> 3 // Medium-high quality + qY <= 25 -> 2 // Medium quality + qY <= 42 -> 1 // Medium-low quality + else -> 0 // Low quality + } + + // TEMPORARILY DISABLED: Delta-specific perceptual reconstruction + // Use uniform delta reconstruction (same as original implementation) for (i in 0 until coeffCount) { currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY) currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo) diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 74b9b13..d1d63ca 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -947,6 +947,58 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty } } +// Delta-specific perceptual weight model optimized for temporal coefficient differences +static float get_perceptual_weight_delta(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) { + // Delta coefficients have different perceptual characteristics than full-picture coefficients: + // 1. Motion edges are more perceptually critical than static edges + // 2. Temporal masking allows more aggressive quantization in high-motion areas + // 3. Smaller delta magnitudes make relative quantization errors more visible + // 4. Frequency distribution is motion-dependent rather than spatial-dependent + + if (!is_chroma) { + // LUMA DELTA CHANNEL: Emphasize motion coherence and edge preservation + if (subband_type == 0) { // LL subband - DC motion changes, still important + // DC motion changes - preserve somewhat but allow coarser quantization than full-picture + return 2.0f; // Slightly coarser than full-picture + } + + if (subband_type == 1) { // LH subband - horizontal motion edges + // Motion boundaries benefit from temporal masking - allow coarser quantization + return 0.9f; // More aggressive quantization for deltas + } + + if (subband_type == 2) { // HL subband - vertical motion edges + // Vertical motion boundaries - equal treatment with horizontal for deltas + return 1.2f; // Same aggressiveness as horizontal + } + + // HH subband - diagonal motion details + + // Diagonal motion deltas can be quantized most aggressively + return 0.5f; + + } else { + // CHROMA DELTA CHANNELS: More aggressive quantization allowed due to temporal masking + // Motion chroma changes are less perceptually critical than static chroma + + float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1); + + if (subband_type == 0) { // LL chroma deltas + // Chroma DC motion changes - allow more aggressive quantization + return 1.3f; // More aggressive than full-picture chroma + } else if (subband_type == 1) { // LH chroma deltas + // Horizontal chroma motion - temporal masking allows more quantization + return FCLAMP(base * 1.4f, 1.2f, 120.0f); + } else if (subband_type == 2) { // HL chroma deltas + // Vertical chroma motion - most aggressive + return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] * 1.6f, 1.4f, 140.0f); + } else { // HH chroma deltas + // Diagonal chroma motion - extremely aggressive quantization + return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] * 1.8f + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.6f, 160.0f); + } + } +} + // Determine perceptual weight for coefficient at linear position (matches actual DWT layout) static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) { @@ -993,6 +1045,51 @@ static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_i return 1.0f; } +// Determine delta-specific perceptual weight for coefficient at linear position +static float get_perceptual_weight_for_position_delta(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) { + // Map linear coefficient index to DWT subband using same layout as decoder + int offset = 0; + + // First: LL subband at maximum decomposition level + int ll_width = width >> decomp_levels; + int ll_height = height >> decomp_levels; + int ll_size = ll_width * ll_height; + + if (linear_idx < offset + ll_size) { + // LL subband at maximum level - use delta-specific perceptual weight + return get_perceptual_weight_delta(enc, decomp_levels, 0, is_chroma, decomp_levels); + } + offset += ll_size; + + // Then: LH, HL, HH subbands for each level from max down to 1 + for (int level = decomp_levels; level >= 1; level--) { + int level_width = width >> (decomp_levels - level + 1); + int level_height = height >> (decomp_levels - level + 1); + int subband_size = level_width * level_height; + + // LH subband (horizontal details) + if (linear_idx < offset + subband_size) { + return get_perceptual_weight_delta(enc, level, 1, is_chroma, decomp_levels); + } + offset += subband_size; + + // HL subband (vertical details) + if (linear_idx < offset + subband_size) { + return get_perceptual_weight_delta(enc, level, 2, is_chroma, decomp_levels); + } + offset += subband_size; + + // HH subband (diagonal details) + if (linear_idx < offset + subband_size) { + return get_perceptual_weight_delta(enc, level, 3, is_chroma, decomp_levels); + } + offset += subband_size; + } + + // Fallback for out-of-bounds indices + return 1.0f; +} + // Apply perceptual quantisation per-coefficient (same loop as uniform but with spatial weights) static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, float *coeffs, int16_t *quantised, int size, @@ -1011,6 +1108,38 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, } } +// Apply delta-specific perceptual quantisation for temporal coefficients +static void quantise_dwt_coefficients_perceptual_delta(tav_encoder_t *enc, + float *delta_coeffs, int16_t *quantised, int size, + int base_quantiser, int width, int height, + int decomp_levels, int is_chroma) { + // Delta-specific perceptual quantization uses motion-optimized weights + // Key differences from full-picture quantization: + // 1. Finer quantization steps for deltas (smaller magnitudes) + // 2. Motion-coherence emphasis over spatial-detail emphasis + // 3. Enhanced temporal masking for chroma channels + + float effective_base_q = base_quantiser; + effective_base_q = FCLAMP(effective_base_q, 1.0f, 255.0f); + + // Delta-specific base quantization adjustment + // Deltas benefit from temporal masking - allow coarser quantization steps + float delta_coarse_tune = 1.2f; // 20% coarser quantization for delta coefficients + effective_base_q *= delta_coarse_tune; + + for (int i = 0; i < size; i++) { + // Apply delta-specific perceptual weight based on coefficient's position in DWT layout + float weight = get_perceptual_weight_for_position_delta(enc, i, width, height, decomp_levels, is_chroma); + float effective_q = effective_base_q * weight; + + // Ensure minimum quantization step for very small deltas to prevent over-quantization + effective_q = fmaxf(effective_q, 0.5f); + + float quantised_val = delta_coeffs[i] / effective_q; + quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); + } +} + // Convert 2D spatial DWT layout to linear subband layout (for decoder compatibility) @@ -1132,29 +1261,90 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, memcpy(prev_cg, tile_cg_data, tile_size * sizeof(float)); } else if (mode == TAV_MODE_DELTA) { - // DELTA mode: compute coefficient deltas and quantise them + // DELTA mode with predictive error compensation to mitigate accumulation artifacts int tile_idx = tile_y * enc->tiles_x + tile_x; float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size); float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size); float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size); - - // Compute deltas: delta = current - previous + + // Allocate temporary buffers for error compensation float *delta_y = malloc(tile_size * sizeof(float)); float *delta_co = malloc(tile_size * sizeof(float)); float *delta_cg = malloc(tile_size * sizeof(float)); - + float *compensated_delta_y = malloc(tile_size * sizeof(float)); + float *compensated_delta_co = malloc(tile_size * sizeof(float)); + float *compensated_delta_cg = malloc(tile_size * sizeof(float)); + + // Step 1: Compute naive deltas for (int i = 0; i < tile_size; i++) { delta_y[i] = tile_y_data[i] - prev_y[i]; delta_co[i] = tile_co_data[i] - prev_co[i]; delta_cg[i] = tile_cg_data[i] - prev_cg[i]; } - - // Quantise the deltas with uniform quantisation (perceptual tuning is for original coefficients, not deltas) - quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY); - quantise_dwt_coefficients(delta_co, quantised_co, tile_size, this_frame_qCo); - quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, this_frame_qCg); - // Reconstruct coefficients like decoder will (previous + uniform_dequantised_delta) + // Step 2: Predictive error compensation using iterative refinement + // We simulate the quantization-dequantization process to predict decoder behavior + for (int iteration = 0; iteration < 2; iteration++) { // 2 iterations for good convergence + // Test quantization of current deltas + int16_t *test_quant_y = malloc(tile_size * sizeof(int16_t)); + int16_t *test_quant_co = malloc(tile_size * sizeof(int16_t)); + int16_t *test_quant_cg = malloc(tile_size * sizeof(int16_t)); + + // TEMPORARILY DISABLED: Use uniform quantization in error compensation prediction + quantise_dwt_coefficients(iteration == 0 ? delta_y : compensated_delta_y, test_quant_y, tile_size, this_frame_qY); + quantise_dwt_coefficients(iteration == 0 ? delta_co : compensated_delta_co, test_quant_co, tile_size, this_frame_qCo); + quantise_dwt_coefficients(iteration == 0 ? delta_cg : compensated_delta_cg, test_quant_cg, tile_size, this_frame_qCg); + + // Predict what decoder will reconstruct + float predicted_y, predicted_co, predicted_cg; + float prediction_error_y, prediction_error_co, prediction_error_cg; + + for (int i = 0; i < tile_size; i++) { + // Simulate decoder reconstruction + predicted_y = prev_y[i] + ((float)test_quant_y[i] * this_frame_qY); + predicted_co = prev_co[i] + ((float)test_quant_co[i] * this_frame_qCo); + predicted_cg = prev_cg[i] + ((float)test_quant_cg[i] * this_frame_qCg); + + // Calculate prediction error (difference between true target and predicted reconstruction) + prediction_error_y = tile_y_data[i] - predicted_y; + prediction_error_co = tile_co_data[i] - predicted_co; + prediction_error_cg = tile_cg_data[i] - predicted_cg; + + // Debug: accumulate error statistics for first tile only + static float total_error_y = 0, total_error_co = 0, total_error_cg = 0; + static int error_samples = 0; + if (tile_x == 0 && tile_y == 0 && i < 16) { // First tile, first 16 coeffs + total_error_y += fabs(prediction_error_y); + total_error_co += fabs(prediction_error_co); + total_error_cg += fabs(prediction_error_cg); + error_samples++; + if (error_samples % 160 == 0) { // Print every 10 frames + printf("[ERROR-COMP] Avg errors: Y=%.3f Co=%.3f Cg=%.3f\n", + total_error_y/160, total_error_co/160, total_error_cg/160); + total_error_y = total_error_co = total_error_cg = 0; + } + } + + // Compensate delta by adding prediction error + // This counteracts the quantization error that will occur + compensated_delta_y[i] = delta_y[i] + prediction_error_y; + compensated_delta_co[i] = delta_co[i] + prediction_error_co; + compensated_delta_cg[i] = delta_cg[i] + prediction_error_cg; + } + + free(test_quant_y); + free(test_quant_co); + free(test_quant_cg); + } + + // Step 3: Quantise the error-compensated deltas with delta-specific perceptual weighting + // TEMPORARILY DISABLED: Delta-specific perceptual quantization + // Use uniform quantization for deltas (same as original implementation) + quantise_dwt_coefficients(compensated_delta_y, quantised_y, tile_size, this_frame_qY); + quantise_dwt_coefficients(compensated_delta_co, quantised_co, tile_size, this_frame_qCo); + quantise_dwt_coefficients(compensated_delta_cg, quantised_cg, tile_size, this_frame_qCg); + + // Step 4: Update reference coefficients exactly as decoder will reconstruct them for (int i = 0; i < tile_size; i++) { float dequant_delta_y = (float)quantised_y[i] * this_frame_qY; float dequant_delta_co = (float)quantised_co[i] * this_frame_qCo; @@ -1168,6 +1358,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, free(delta_y); free(delta_co); free(delta_cg); + free(compensated_delta_y); + free(compensated_delta_co); + free(compensated_delta_cg); } // Debug: check quantised coefficients after quantisation @@ -2777,7 +2970,7 @@ int main(int argc, char *argv[]) { int count_iframe = 0; int count_pframe = 0; - KEYFRAME_INTERVAL = enc->output_fps >> 2; // refresh often because deltas in DWT are more visible than DCT + KEYFRAME_INTERVAL = enc->output_fps * 2; // Longer intervals for testing error compensation (was >> 2) while (continue_encoding) { // Check encode limit if specified