From e19af854dcff63ccc1c960da4dad162365758cba Mon Sep 17 00:00:00 2001 From: minjaesong Date: Thu, 16 Oct 2025 01:39:51 +0900 Subject: [PATCH] TAV: Haar delta decoding --- terranmon.txt | 5 ++ .../torvald/tsvm/GraphicsJSR223Delegate.kt | 67 ++++++++++++++----- video_encoder/encoder_tav.c | 17 +++-- 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/terranmon.txt b/terranmon.txt index a58da94..e5c747a 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -1104,6 +1104,11 @@ Note: GOP Sync packets have no payload size field (fixed 2-byte packet). 0x00 = SKIP (just use frame data from previous frame) 0x01 = INTRA (DWT-coded) 0x02 = DELTA (DWT delta) + - 0x02: DWT level 1 + - 0x12: DWT level 2 + - 0x22: DWT level 3 + ... + - 0xF2: DWT Level 16 uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 1b74535..ecd96f4 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4835,24 +4835,31 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (tileX in 0 until tilesX) { // Read tile header (4 bytes: mode + qY + qCo + qCg) - val mode = vm.peek(readPtr++).toUint() + val modeRaw = vm.peek(readPtr++).toUint() val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else TAV_QLUT[it - 1] } val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else TAV_QLUT[it - 1] } val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else TAV_QLUT[it - 1] } + // Extract base mode and Haar level from mode byte + // Mode encoding: base_mode | ((haar_level - 1) << 4) + // Examples: 0x02 = DELTA (no Haar), 0x12 = DELTA+Haar2, 0x22 = DELTA+Haar3 + val baseMode = modeRaw and 0x0F + val haarNibble = (modeRaw shr 4) + val haarLevel = if (baseMode == 0x02 && haarNibble > 0) (haarNibble + 1) else 0 + dbgOut["qY"] = qY dbgOut["qCo"] = qCo dbgOut["qCg"] = qCg dbgOut["frameMode"] = "" // debug print: raw decompressed bytes - /*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ") + /*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[baseMode]}): ") for (i in 0 until 32) { print("${vm.peek(blockDataPtr + i).toUint().toString(16).uppercase().padStart(2, '0')} ") } println("...")*/ - when (mode) { + when (baseMode) { 0x00 -> { // TAV_MODE_SKIP // Copy 280x224 tile from previous frame to current frame tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) @@ -4865,11 +4872,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount) dbgOut["frameMode"] = " " } - 0x02 -> { // TAV_MODE_DELTA + 0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet) // Coefficient delta encoding for efficient P-frames readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, - decompLevels, tavVersion, isMonoblock, frameCount) + waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel) dbgOut["frameMode"] = " " } } @@ -5487,7 +5494,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, - decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long { + spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long { val tileIdx = if (isMonoblock) { 0 // Single tile index for monoblock @@ -5598,15 +5605,45 @@ class GraphicsJSR223Delegate(private val vm: VM) { // TEMPORARILY DISABLED: Delta-specific perceptual reconstruction // Use uniform delta reconstruction (same as original implementation) - for (i in 0 until coeffCount) { - currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY) - currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo) - currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg) + + // Determine tile dimensions for DWT operations + val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X + val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y + + // Apply inverse Haar DWT to deltas if Haar encoding was used + if (haarLevel > 0) { + // Debug: Check if previous coefficients exist + if (frameCount in 0..5) { + println("[HAAR-DELTA] Frame $frameCount, Tile $tileIdx: haarLevel=$haarLevel, prevY exists=${tavPreviousCoeffsY?.contains(tileIdx) ?: false}") + } + + // Dequantize deltas to float arrays + val deltaYFloat = FloatArray(coeffCount) { deltaY[it].toFloat() * qY } + val deltaCoFloat = FloatArray(coeffCount) { deltaCo[it].toFloat() * qCo } + val deltaCgFloat = FloatArray(coeffCount) { deltaCg[it].toFloat() * qCg } + + // Apply inverse Haar DWT (same as encoder: Haar wavelet filter = 255) + tavApplyDWTInverseMultiLevel(deltaYFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter) + tavApplyDWTInverseMultiLevel(deltaCoFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter) + tavApplyDWTInverseMultiLevel(deltaCgFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter) + + // Add transformed deltas to previous coefficients + for (i in 0 until coeffCount) { + currentY[i] = prevY[i] + deltaYFloat[i] + currentCo[i] = prevCo[i] + deltaCoFloat[i] + currentCg[i] = prevCg[i] + deltaCgFloat[i] + } + } else { + throw Error() + // No Haar transform: direct dequantization + for (i in 0 until coeffCount) { + currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY) + currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo) + currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg) + } } // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT) - val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X - val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels) // Delta frames use uniform quantization for the deltas themselves, so no perceptual weights removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands) @@ -5628,9 +5665,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() // Apply inverse DWT - tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 255, TavSharpenLuma) - tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 255, TavNullFilter) - tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 255, TavNullFilter) + tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, spatialFilter, TavSharpenLuma) + tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, spatialFilter, TavNullFilter) + tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, spatialFilter, TavNullFilter) // Debug: Check coefficient values after inverse DWT if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 6a0411b..6f46a7c 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -104,7 +104,8 @@ static int needs_alpha_channel(int channel_layout) { #define DEFAULT_FPS 30 #define DEFAULT_QUALITY 3 #define DEFAULT_ZSTD_LEVEL 9 -#define GOP_SIZE /*1*/4 +#define GOP_SIZE 8 +#define TEMPORAL_DECOMP_LEVEL 2 // Audio/subtitle constants (reused from TEV) #define MP2_DEFAULT_PACKET_SIZE 1152 @@ -778,13 +779,13 @@ static tav_encoder_t* create_encoder(void) { enc->progressive_mode = 1; // Default to progressive mode enc->grain_synthesis = 0; // Default: disable grain synthesis (only do it on the decoder) enc->use_delta_encoding = 0; - enc->delta_haar_levels = 2; + enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL; // GOP / temporal DWT settings enc->enable_temporal_dwt = 0; // Default: disabled for backward compatibility. Mutually exclusive with use_delta_encoding enc->gop_capacity = GOP_SIZE; // 16 frames enc->gop_frame_count = 0; - enc->temporal_decomp_levels = 2; // 2 levels of temporal DWT (16 -> 4x4 subbands) + enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 2 levels of temporal DWT (16 -> 4x4 subbands) enc->gop_rgb_frames = NULL; enc->gop_y_frames = NULL; enc->gop_co_frames = NULL; @@ -2985,8 +2986,14 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, uint8_t mode, uint8_t *buffer) { size_t offset = 0; - // Write tile header - buffer[offset++] = mode; + // Write tile header with Haar level encoded in upper nibble for DELTA mode + // Mode encoding: base_mode | ((haar_level - 1) << 4) + // - level 1: 0x02, level 2: 0x12, level 3: 0x22 + uint8_t encoded_mode = mode; + if (mode == TAV_MODE_DELTA && enc->delta_haar_levels >= 1) { + encoded_mode = mode | ((enc->delta_haar_levels - 1) << 4); + } + buffer[offset++] = encoded_mode; // Use adjusted quantiser from bitrate control, or base quantiser if not in bitrate mode int qY_override = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;