mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-09 14:44:05 +09:00
TAV: Haar delta decoding
This commit is contained in:
@@ -1104,6 +1104,11 @@ Note: GOP Sync packets have no payload size field (fixed 2-byte packet).
|
|||||||
0x00 = SKIP (just use frame data from previous frame)
|
0x00 = SKIP (just use frame data from previous frame)
|
||||||
0x01 = INTRA (DWT-coded)
|
0x01 = INTRA (DWT-coded)
|
||||||
0x02 = DELTA (DWT delta)
|
0x02 = DELTA (DWT delta)
|
||||||
|
- 0x02: DWT level 1
|
||||||
|
- 0x12: DWT level 2
|
||||||
|
- 0x22: DWT level 3
|
||||||
|
...
|
||||||
|
- 0xF2: DWT Level 16
|
||||||
uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
|
uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
|
||||||
uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
|
uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
|
||||||
uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
|
uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
|
||||||
|
|||||||
@@ -4835,24 +4835,31 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
for (tileX in 0 until tilesX) {
|
for (tileX in 0 until tilesX) {
|
||||||
|
|
||||||
// Read tile header (4 bytes: mode + qY + qCo + qCg)
|
// Read tile header (4 bytes: mode + qY + qCo + qCg)
|
||||||
val mode = vm.peek(readPtr++).toUint()
|
val modeRaw = vm.peek(readPtr++).toUint()
|
||||||
val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else TAV_QLUT[it - 1] }
|
val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else TAV_QLUT[it - 1] }
|
||||||
val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else TAV_QLUT[it - 1] }
|
val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else TAV_QLUT[it - 1] }
|
||||||
val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else TAV_QLUT[it - 1] }
|
val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else TAV_QLUT[it - 1] }
|
||||||
|
|
||||||
|
// Extract base mode and Haar level from mode byte
|
||||||
|
// Mode encoding: base_mode | ((haar_level - 1) << 4)
|
||||||
|
// Examples: 0x02 = DELTA (no Haar), 0x12 = DELTA+Haar2, 0x22 = DELTA+Haar3
|
||||||
|
val baseMode = modeRaw and 0x0F
|
||||||
|
val haarNibble = (modeRaw shr 4)
|
||||||
|
val haarLevel = if (baseMode == 0x02 && haarNibble > 0) (haarNibble + 1) else 0
|
||||||
|
|
||||||
dbgOut["qY"] = qY
|
dbgOut["qY"] = qY
|
||||||
dbgOut["qCo"] = qCo
|
dbgOut["qCo"] = qCo
|
||||||
dbgOut["qCg"] = qCg
|
dbgOut["qCg"] = qCg
|
||||||
dbgOut["frameMode"] = ""
|
dbgOut["frameMode"] = ""
|
||||||
|
|
||||||
// debug print: raw decompressed bytes
|
// debug print: raw decompressed bytes
|
||||||
/*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
|
/*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[baseMode]}): ")
|
||||||
for (i in 0 until 32) {
|
for (i in 0 until 32) {
|
||||||
print("${vm.peek(blockDataPtr + i).toUint().toString(16).uppercase().padStart(2, '0')} ")
|
print("${vm.peek(blockDataPtr + i).toUint().toString(16).uppercase().padStart(2, '0')} ")
|
||||||
}
|
}
|
||||||
println("...")*/
|
println("...")*/
|
||||||
|
|
||||||
when (mode) {
|
when (baseMode) {
|
||||||
0x00 -> { // TAV_MODE_SKIP
|
0x00 -> { // TAV_MODE_SKIP
|
||||||
// Copy 280x224 tile from previous frame to current frame
|
// Copy 280x224 tile from previous frame to current frame
|
||||||
tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
|
tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
|
||||||
@@ -4865,11 +4872,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
|
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
|
||||||
dbgOut["frameMode"] = " "
|
dbgOut["frameMode"] = " "
|
||||||
}
|
}
|
||||||
0x02 -> { // TAV_MODE_DELTA
|
0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet)
|
||||||
// Coefficient delta encoding for efficient P-frames
|
// Coefficient delta encoding for efficient P-frames
|
||||||
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
|
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
|
||||||
width, height, qY, qCo, qCg,
|
width, height, qY, qCo, qCg,
|
||||||
decompLevels, tavVersion, isMonoblock, frameCount)
|
waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel)
|
||||||
dbgOut["frameMode"] = " "
|
dbgOut["frameMode"] = " "
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -5487,7 +5494,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
|
||||||
decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long {
|
spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long {
|
||||||
|
|
||||||
val tileIdx = if (isMonoblock) {
|
val tileIdx = if (isMonoblock) {
|
||||||
0 // Single tile index for monoblock
|
0 // Single tile index for monoblock
|
||||||
@@ -5598,15 +5605,45 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
// TEMPORARILY DISABLED: Delta-specific perceptual reconstruction
|
// TEMPORARILY DISABLED: Delta-specific perceptual reconstruction
|
||||||
// Use uniform delta reconstruction (same as original implementation)
|
// Use uniform delta reconstruction (same as original implementation)
|
||||||
for (i in 0 until coeffCount) {
|
|
||||||
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY)
|
// Determine tile dimensions for DWT operations
|
||||||
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo)
|
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
|
||||||
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
|
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
|
||||||
|
|
||||||
|
// Apply inverse Haar DWT to deltas if Haar encoding was used
|
||||||
|
if (haarLevel > 0) {
|
||||||
|
// Debug: Check if previous coefficients exist
|
||||||
|
if (frameCount in 0..5) {
|
||||||
|
println("[HAAR-DELTA] Frame $frameCount, Tile $tileIdx: haarLevel=$haarLevel, prevY exists=${tavPreviousCoeffsY?.contains(tileIdx) ?: false}")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dequantize deltas to float arrays
|
||||||
|
val deltaYFloat = FloatArray(coeffCount) { deltaY[it].toFloat() * qY }
|
||||||
|
val deltaCoFloat = FloatArray(coeffCount) { deltaCo[it].toFloat() * qCo }
|
||||||
|
val deltaCgFloat = FloatArray(coeffCount) { deltaCg[it].toFloat() * qCg }
|
||||||
|
|
||||||
|
// Apply inverse Haar DWT (same as encoder: Haar wavelet filter = 255)
|
||||||
|
tavApplyDWTInverseMultiLevel(deltaYFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter)
|
||||||
|
tavApplyDWTInverseMultiLevel(deltaCoFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter)
|
||||||
|
tavApplyDWTInverseMultiLevel(deltaCgFloat, tileWidth, tileHeight, haarLevel, 255, TavNullFilter)
|
||||||
|
|
||||||
|
// Add transformed deltas to previous coefficients
|
||||||
|
for (i in 0 until coeffCount) {
|
||||||
|
currentY[i] = prevY[i] + deltaYFloat[i]
|
||||||
|
currentCo[i] = prevCo[i] + deltaCoFloat[i]
|
||||||
|
currentCg[i] = prevCg[i] + deltaCgFloat[i]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw Error()
|
||||||
|
// No Haar transform: direct dequantization
|
||||||
|
for (i in 0 until coeffCount) {
|
||||||
|
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY)
|
||||||
|
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo)
|
||||||
|
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
|
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
|
||||||
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
|
|
||||||
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
|
|
||||||
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
|
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
|
||||||
// Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
|
// Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
|
||||||
removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
|
removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
|
||||||
@@ -5628,9 +5665,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
|
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
|
||||||
|
|
||||||
// Apply inverse DWT
|
// Apply inverse DWT
|
||||||
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 255, TavSharpenLuma)
|
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, spatialFilter, TavSharpenLuma)
|
||||||
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 255, TavNullFilter)
|
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, spatialFilter, TavNullFilter)
|
||||||
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 255, TavNullFilter)
|
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, spatialFilter, TavNullFilter)
|
||||||
|
|
||||||
// Debug: Check coefficient values after inverse DWT
|
// Debug: Check coefficient values after inverse DWT
|
||||||
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
||||||
|
|||||||
@@ -104,7 +104,8 @@ static int needs_alpha_channel(int channel_layout) {
|
|||||||
#define DEFAULT_FPS 30
|
#define DEFAULT_FPS 30
|
||||||
#define DEFAULT_QUALITY 3
|
#define DEFAULT_QUALITY 3
|
||||||
#define DEFAULT_ZSTD_LEVEL 9
|
#define DEFAULT_ZSTD_LEVEL 9
|
||||||
#define GOP_SIZE /*1*/4
|
#define GOP_SIZE 8
|
||||||
|
#define TEMPORAL_DECOMP_LEVEL 2
|
||||||
|
|
||||||
// Audio/subtitle constants (reused from TEV)
|
// Audio/subtitle constants (reused from TEV)
|
||||||
#define MP2_DEFAULT_PACKET_SIZE 1152
|
#define MP2_DEFAULT_PACKET_SIZE 1152
|
||||||
@@ -778,13 +779,13 @@ static tav_encoder_t* create_encoder(void) {
|
|||||||
enc->progressive_mode = 1; // Default to progressive mode
|
enc->progressive_mode = 1; // Default to progressive mode
|
||||||
enc->grain_synthesis = 0; // Default: disable grain synthesis (only do it on the decoder)
|
enc->grain_synthesis = 0; // Default: disable grain synthesis (only do it on the decoder)
|
||||||
enc->use_delta_encoding = 0;
|
enc->use_delta_encoding = 0;
|
||||||
enc->delta_haar_levels = 2;
|
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
|
||||||
|
|
||||||
// GOP / temporal DWT settings
|
// GOP / temporal DWT settings
|
||||||
enc->enable_temporal_dwt = 0; // Default: disabled for backward compatibility. Mutually exclusive with use_delta_encoding
|
enc->enable_temporal_dwt = 0; // Default: disabled for backward compatibility. Mutually exclusive with use_delta_encoding
|
||||||
enc->gop_capacity = GOP_SIZE; // 16 frames
|
enc->gop_capacity = GOP_SIZE; // 16 frames
|
||||||
enc->gop_frame_count = 0;
|
enc->gop_frame_count = 0;
|
||||||
enc->temporal_decomp_levels = 2; // 2 levels of temporal DWT (16 -> 4x4 subbands)
|
enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 2 levels of temporal DWT (16 -> 4x4 subbands)
|
||||||
enc->gop_rgb_frames = NULL;
|
enc->gop_rgb_frames = NULL;
|
||||||
enc->gop_y_frames = NULL;
|
enc->gop_y_frames = NULL;
|
||||||
enc->gop_co_frames = NULL;
|
enc->gop_co_frames = NULL;
|
||||||
@@ -2985,8 +2986,14 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
uint8_t mode, uint8_t *buffer) {
|
uint8_t mode, uint8_t *buffer) {
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
||||||
// Write tile header
|
// Write tile header with Haar level encoded in upper nibble for DELTA mode
|
||||||
buffer[offset++] = mode;
|
// Mode encoding: base_mode | ((haar_level - 1) << 4)
|
||||||
|
// - level 1: 0x02, level 2: 0x12, level 3: 0x22
|
||||||
|
uint8_t encoded_mode = mode;
|
||||||
|
if (mode == TAV_MODE_DELTA && enc->delta_haar_levels >= 1) {
|
||||||
|
encoded_mode = mode | ((enc->delta_haar_levels - 1) << 4);
|
||||||
|
}
|
||||||
|
buffer[offset++] = encoded_mode;
|
||||||
|
|
||||||
// Use adjusted quantiser from bitrate control, or base quantiser if not in bitrate mode
|
// Use adjusted quantiser from bitrate control, or base quantiser if not in bitrate mode
|
||||||
int qY_override = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
|
int qY_override = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
|
||||||
|
|||||||
Reference in New Issue
Block a user