TAV: Haar delta decoding

This commit is contained in:
minjaesong
2025-10-16 01:39:51 +09:00
parent ea72dec996
commit e19af854dc
3 changed files with 69 additions and 20 deletions

View File

@@ -1104,6 +1104,11 @@ Note: GOP Sync packets have no payload size field (fixed 2-byte packet).
0x00 = SKIP (just use frame data from previous frame)
0x01 = INTRA (DWT-coded)
0x02 = DELTA (DWT delta)
- 0x02: DWT level 1
- 0x12: DWT level 2
- 0x22: DWT level 3
...
- 0xF2: DWT Level 16
uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)

View File

@@ -4835,24 +4835,31 @@ class GraphicsJSR223Delegate(private val vm: VM) {
for (tileX in 0 until tilesX) {
// Read tile header (4 bytes: mode + qY + qCo + qCg)
val mode = vm.peek(readPtr++).toUint()
val modeRaw = vm.peek(readPtr++).toUint()
val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else TAV_QLUT[it - 1] }
val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else TAV_QLUT[it - 1] }
val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else TAV_QLUT[it - 1] }
// Extract base mode and Haar level from mode byte
// Mode encoding: base_mode | ((haar_level - 1) << 4)
// Examples: 0x02 = DELTA (no Haar), 0x12 = DELTA+Haar2, 0x22 = DELTA+Haar3
val baseMode = modeRaw and 0x0F
val haarNibble = (modeRaw shr 4)
val haarLevel = if (baseMode == 0x02 && haarNibble > 0) (haarNibble + 1) else 0
dbgOut["qY"] = qY
dbgOut["qCo"] = qCo
dbgOut["qCg"] = qCg
dbgOut["frameMode"] = ""
// debug print: raw decompressed bytes
/*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
/*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[baseMode]}): ")
for (i in 0 until 32) {
print("${vm.peek(blockDataPtr + i).toUint().toString(16).uppercase().padStart(2, '0')} ")
}
println("...")*/
when (mode) {
when (baseMode) {
0x00 -> { // TAV_MODE_SKIP
// Copy 280x224 tile from previous frame to current frame
tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
@@ -4865,11 +4872,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
dbgOut["frameMode"] = " "
}
0x02 -> { // TAV_MODE_DELTA
0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet)
// Coefficient delta encoding for efficient P-frames
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg,
decompLevels, tavVersion, isMonoblock, frameCount)
waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel)
dbgOut["frameMode"] = " "
}
}
@@ -5487,7 +5494,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long {
spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long {
val tileIdx = if (isMonoblock) {
0 // Single tile index for monoblock
@@ -5598,15 +5605,45 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// TEMPORARILY DISABLED: Delta-specific perceptual reconstruction
// Use uniform delta reconstruction (same as original implementation)
for (i in 0 until coeffCount) {
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY)
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo)
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
// Determine tile dimensions for DWT operations
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
// Apply inverse Haar DWT to deltas if Haar encoding was used
if (haarLevel > 0) {
// Debug: Check if previous coefficients exist
if (frameCount in 0..5) {
println("[HAAR-DELTA] Frame $frameCount, Tile $tileIdx: haarLevel=$haarLevel, prevY exists=${tavPreviousCoeffsY?.contains(tileIdx) ?: false}")
}
// Dequantize deltas to float arrays
val deltaYFloat = FloatArray(coeffCount) { deltaY[it].toFloat() * qY }
val deltaCoFloat = FloatArray(coeffCount) { deltaCo[it].toFloat() * qCo }
val deltaCgFloat = FloatArray(coeffCount) { deltaCg[it].toFloat() * qCg }
// Apply inverse Haar DWT (same as encoder: Haar wavelet filter = 255)
tavApplyDWTInverseMultiLevel(deltaYFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter)
tavApplyDWTInverseMultiLevel(deltaCoFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter)
tavApplyDWTInverseMultiLevel(deltaCgFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter)
// Add transformed deltas to previous coefficients
for (i in 0 until coeffCount) {
currentY[i] = prevY[i] + deltaYFloat[i]
currentCo[i] = prevCo[i] + deltaCoFloat[i]
currentCg[i] = prevCg[i] + deltaCgFloat[i]
}
} else {
throw Error()
// No Haar transform: direct dequantization
for (i in 0 until coeffCount) {
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY)
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo)
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
}
}
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
// Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
@@ -5628,9 +5665,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
// Apply inverse DWT
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 255, TavSharpenLuma)
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 255, TavNullFilter)
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 255, TavNullFilter)
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, spatialFilter, TavSharpenLuma)
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, spatialFilter, TavNullFilter)
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, spatialFilter, TavNullFilter)
// Debug: Check coefficient values after inverse DWT
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {

View File

@@ -104,7 +104,8 @@ static int needs_alpha_channel(int channel_layout) {
#define DEFAULT_FPS 30
#define DEFAULT_QUALITY 3
#define DEFAULT_ZSTD_LEVEL 9
#define GOP_SIZE /*1*/4
#define GOP_SIZE 8
#define TEMPORAL_DECOMP_LEVEL 2
// Audio/subtitle constants (reused from TEV)
#define MP2_DEFAULT_PACKET_SIZE 1152
@@ -778,13 +779,13 @@ static tav_encoder_t* create_encoder(void) {
enc->progressive_mode = 1; // Default to progressive mode
enc->grain_synthesis = 0; // Default: disable grain synthesis (only do it on the decoder)
enc->use_delta_encoding = 0;
enc->delta_haar_levels = 2;
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
// GOP / temporal DWT settings
enc->enable_temporal_dwt = 0; // Default: disabled for backward compatibility. Mutually exclusive with use_delta_encoding
enc->gop_capacity = GOP_SIZE; // 16 frames
enc->gop_frame_count = 0;
enc->temporal_decomp_levels = 2; // 2 levels of temporal DWT (16 -> 4x4 subbands)
enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 2 levels of temporal DWT (16 -> 4x4 subbands)
enc->gop_rgb_frames = NULL;
enc->gop_y_frames = NULL;
enc->gop_co_frames = NULL;
@@ -2985,8 +2986,14 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
uint8_t mode, uint8_t *buffer) {
size_t offset = 0;
// Write tile header
buffer[offset++] = mode;
// Write tile header with Haar level encoded in upper nibble for DELTA mode
// Mode encoding: base_mode | ((haar_level - 1) << 4)
// - level 1: 0x02, level 2: 0x12, level 3: 0x22
uint8_t encoded_mode = mode;
if (mode == TAV_MODE_DELTA && enc->delta_haar_levels >= 1) {
encoded_mode = mode | ((enc->delta_haar_levels - 1) << 4);
}
buffer[offset++] = encoded_mode;
// Use adjusted quantiser from bitrate control, or base quantiser if not in bitrate mode
int qY_override = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;