diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 28b04e1..437bb43 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -25,9 +25,11 @@ const TAV_MODE_MOTION = 0x03 // Packet types (same as TEV) const TAV_PACKET_IFRAME = 0x10 const TAV_PACKET_PFRAME = 0x11 +const TAV_PACKET_GOP_UNIFIED = 0x12 // Unified 3D DWT GOP (temporal + spatial) const TAV_PACKET_AUDIO_MP2 = 0x20 const TAV_PACKET_SUBTITLE = 0x30 const TAV_PACKET_EXTENDED_HDR = 0xEF +const TAV_PACKET_GOP_SYNC = 0xFC // GOP sync (N frames decoded from GOP block) const TAV_PACKET_TIMECODE = 0xFD const TAV_PACKET_SYNC_NTSC = 0xFE const TAV_PACKET_SYNC = 0xFF @@ -989,6 +991,159 @@ try { } } + else if (packetType === TAV_PACKET_GOP_UNIFIED) { + // GOP Unified packet (temporal 3D DWT) + + // Read GOP size (number of frames in this GOP, 1-16) + const gopSize = seqread.readOneByte() + + // Read motion vectors (quarter-pixel units, int16) + // Encoder writes ALL motion vectors including frame 0 + let motionX = new Array(gopSize) + let motionY = new Array(gopSize) + + for (let i = 0; i < gopSize; i++) { + motionX[i] = seqread.readShort() // Signed int16 + motionY[i] = seqread.readShort() + } + + // Read compressed data size + const compressedSize = seqread.readInt() + + // Read compressed data + let compressedPtr = seqread.readBytes(compressedSize) + updateDataRateBin(compressedSize) + + // Check if GOP fits in VM memory + const gopMemoryNeeded = gopSize * FRAME_SIZE + if (gopMemoryNeeded > 8 * 1024 * 1024) { + throw new Error(`GOP too large: ${gopSize} frames needs ${(gopMemoryNeeded / 1024 / 1024).toFixed(2)}MB, but VM has only 8MB. Max GOP size: 11 frames.`) + } + + // Allocate GOP buffers outside try block so finally can free them + let gopRGBBuffers = new Array(gopSize) + for (let i = 0; i < gopSize; i++) { + gopRGBBuffers[i] = sys.malloc(FRAME_SIZE) + if (gopRGBBuffers[i] === 0) { + // Malloc failed - free what we allocated and bail out + for (let j = 0; j < i; j++) { + sys.free(gopRGBBuffers[j]) + } + throw new Error(`Failed to allocate GOP buffer ${i}/${gopSize}. Out of memory.`) + } + } + + try { + let decodeStart = sys.nanoTime() + + // Call GOP decoder + const framesDecoded = graphics.tavDecodeGopUnified( + compressedPtr, + compressedSize, + gopSize, + motionX, + motionY, + gopRGBBuffers, // Array of output buffer addresses + header.width, + header.height, + header.qualityLevel, + QLUT[header.qualityY], + QLUT[header.qualityCo], + QLUT[header.qualityCg], + header.channelLayout, + header.waveletFilter, + header.decompLevels, + 2 // temporalLevels (hardcoded for now, could be in header) + ) + + decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 + decompressTime = 0 // Included in decode time + + // Display each decoded frame + for (let i = 0; i < framesDecoded; i++) { + let uploadStart = sys.nanoTime() + + // Upload GOP frame directly (no copy needed - already in ARGB format) + graphics.uploadRGBToFramebuffer(gopRGBBuffers[i], header.width, header.height, trueFrameCount + i, false) + uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 + + // Apply bias lighting (only for first/last frame to save CPU) + let biasStart = sys.nanoTime() + if (i === 0 || i === framesDecoded - 1) { + setBiasLighting() + } + biasTime = (sys.nanoTime() - biasStart) / 1000000.0 + + // Fire audio on first frame + if (!audioFired && (frameCount > 0 || i > 0)) { + audio.play(0) + audioFired = true + } + + // Wait for frame timing + akku -= FRAME_TIME + while (akku < 0 && !stopPlay && !paused) { + let t = sys.nanoTime() + // Busy wait for accurate timing + akku += (sys.nanoTime() - t) / 1000000000.0 + } + + // Swap ping-pong buffers for P-frame reference + let temp = CURRENT_RGB_ADDR + CURRENT_RGB_ADDR = PREV_RGB_ADDR + PREV_RGB_ADDR = temp + + // Log performance for first frame of GOP + if (i === 0 && (frameCount % 60 == 0 || frameCount == 0)) { + let totalTime = decompressTime + decodeTime + uploadTime + biasTime + console.log(`GOP Frame ${frameCount}: Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms (${gopSize} frames)`) + } + } + + // Note: frameCount and trueFrameCount will be incremented by GOP_SYNC packet + // Note: GOP buffers will be freed in finally block + + } catch (e) { + console.log(`GOP Frame ${frameCount}: decode failed: ${e}`) + // Try to get more details from the exception + if (e.stack) { + console.log(`Stack trace: ${e.stack}`) + } + if (e.javaException) { + console.log(`Java exception: ${e.javaException}`) + if (e.javaException.printStackTrace) { + serial.println("Java stack trace:") + e.javaException.printStackTrace() + } + } + // Print exception properties + try { + const props = Object.keys(e) + if (props.length > 0) { + console.log(`Exception properties: ${props.join(', ')}`) + e.printStackTrace() + let ee = e.getStackTrace() + console.log(ee.length) + console.log(ee.join('\n')) + } + } catch (ex) {} + } finally { + // Always free GOP buffers even on error + for (let i = 0; i < gopSize; i++) { + sys.free(gopRGBBuffers[i]) + } + sys.free(compressedPtr) + } + } + else if (packetType === TAV_PACKET_GOP_SYNC) { + // GOP sync packet - increment frame counters by number of frames decoded + const framesInGOP = seqread.readOneByte() + + frameCount += framesInGOP + trueFrameCount += framesInGOP + + // Note: Buffer swapping already handled in GOP_UNIFIED handler + } else if (packetType === TAV_PACKET_AUDIO_MP2) { // MP2 Audio packet let audioLen = seqread.readInt() diff --git a/assets/disk0/tvdos/i18n/hang_hi.chr b/assets/disk0/tvdos/i18n/hang_hi.chr index cd28dfe..940e651 100644 Binary files a/assets/disk0/tvdos/i18n/hang_hi.chr and b/assets/disk0/tvdos/i18n/hang_hi.chr differ diff --git a/assets/disk0/tvdos/i18n/hang_lo.chr b/assets/disk0/tvdos/i18n/hang_lo.chr index 67f1129..8d54737 100644 Binary files a/assets/disk0/tvdos/i18n/hang_lo.chr and b/assets/disk0/tvdos/i18n/hang_lo.chr differ diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 803be96..1b74535 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4203,6 +4203,184 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + /** + * Reconstruct per-frame coefficients from unified GOP block (2-bit format) + * Reverse of encoder's preprocess_gop_unified() + * + * Layout: [Y_maps_all][Co_maps_all][Cg_maps_all][Y_other_vals][Co_other_vals][Cg_other_vals] + * + * 2-bit encoding: 00=0, 01=+1, 10=-1, 11=other (stored in value array) + * + * @param decompressedData Unified block data (after Zstd decompression) + * @param numFrames Number of frames in GOP + * @param numPixels Pixels per frame (width × height) + * @param channelLayout Channel layout (0=YCoCg, 2=Y-only, etc) + * @return Array of [frame][channel] where channel: 0=Y, 1=Co, 2=Cg + */ + private fun tavPostprocessGopUnified( + decompressedData: ByteArray, + numFrames: Int, + numPixels: Int, + channelLayout: Int + ): Array> { + // 2 bits per coefficient + val mapBytesPerFrame = (numPixels * 2 + 7) / 8 + + // Determine which channels are present + // Bit 0: has alpha, Bit 1: has chroma (inverted), Bit 2: has luma (inverted) + val hasY = (channelLayout and 0x04) == 0 + val hasCo = (channelLayout and 0x02) == 0 // Inverted: 0 = has chroma + val hasCg = (channelLayout and 0x02) == 0 // Inverted: 0 = has chroma + + // Calculate buffer positions for maps + var readPtr = 0 + val yMapsStart = if (hasY) readPtr else -1 + if (hasY) readPtr += mapBytesPerFrame * numFrames + + val coMapsStart = if (hasCo) readPtr else -1 + if (hasCo) readPtr += mapBytesPerFrame * numFrames + + val cgMapsStart = if (hasCg) readPtr else -1 + if (hasCg) readPtr += mapBytesPerFrame * numFrames + + // Count "other" values (code 11) across ALL frames + var yOtherCount = 0 + var coOtherCount = 0 + var cgOtherCount = 0 + + for (frame in 0 until numFrames) { + val frameMapOffset = frame * mapBytesPerFrame + for (i in 0 until numPixels) { + val bitPos = i * 2 + val byteIdx = bitPos / 8 + val bitOffset = bitPos % 8 + + if (hasY && yMapsStart + frameMapOffset + byteIdx < decompressedData.size) { + var code = (decompressedData[yMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03 + if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) { + val nextByte = decompressedData[yMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF + code = (code and 0x01) or ((nextByte and 0x01) shl 1) + } + if (code == 3) yOtherCount++ + } + if (hasCo && coMapsStart + frameMapOffset + byteIdx < decompressedData.size) { + var code = (decompressedData[coMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03 + if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) { + val nextByte = decompressedData[coMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF + code = (code and 0x01) or ((nextByte and 0x01) shl 1) + } + if (code == 3) coOtherCount++ + } + if (hasCg && cgMapsStart + frameMapOffset + byteIdx < decompressedData.size) { + var code = (decompressedData[cgMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03 + if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) { + val nextByte = decompressedData[cgMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF + code = (code and 0x01) or ((nextByte and 0x01) shl 1) + } + if (code == 3) cgOtherCount++ + } + } + } + + // Value arrays start after all maps + val yValuesStart = readPtr + readPtr += yOtherCount * 2 + + val coValuesStart = readPtr + readPtr += coOtherCount * 2 + + val cgValuesStart = readPtr + + // Allocate output arrays + val output = Array(numFrames) { Array(3) { ShortArray(numPixels) } } + var yValueIdx = 0 + var coValueIdx = 0 + var cgValueIdx = 0 + + for (frame in 0 until numFrames) { + val frameMapOffset = frame * mapBytesPerFrame + for (i in 0 until numPixels) { + val bitPos = i * 2 + val byteIdx = bitPos / 8 + val bitOffset = bitPos % 8 + + // Decode Y + if (hasY && yMapsStart + frameMapOffset + byteIdx < decompressedData.size) { + var code = (decompressedData[yMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03 + if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) { + val nextByte = decompressedData[yMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF + code = (code and 0x01) or ((nextByte and 0x01) shl 1) + } + output[frame][0][i] = when (code) { + 0 -> 0 + 1 -> 1 + 2 -> -1 + 3 -> { + val valOffset = yValuesStart + yValueIdx * 2 + yValueIdx++ + if (valOffset + 1 < decompressedData.size) { + val lo = decompressedData[valOffset].toInt() and 0xFF + val hi = decompressedData[valOffset + 1].toInt() + ((hi shl 8) or lo).toShort() + } else 0 + } + else -> 0 + } + } + + // Decode Co + if (hasCo && coMapsStart + frameMapOffset + byteIdx < decompressedData.size) { + var code = (decompressedData[coMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03 + if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) { + val nextByte = decompressedData[coMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF + code = (code and 0x01) or ((nextByte and 0x01) shl 1) + } + output[frame][1][i] = when (code) { + 0 -> 0 + 1 -> 1 + 2 -> -1 + 3 -> { + val valOffset = coValuesStart + coValueIdx * 2 + coValueIdx++ + if (valOffset + 1 < decompressedData.size) { + val lo = decompressedData[valOffset].toInt() and 0xFF + val hi = decompressedData[valOffset + 1].toInt() + ((hi shl 8) or lo).toShort() + } else 0 + } + else -> 0 + } + } + + // Decode Cg + if (hasCg && cgMapsStart + frameMapOffset + byteIdx < decompressedData.size) { + var code = (decompressedData[cgMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03 + if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) { + val nextByte = decompressedData[cgMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF + code = (code and 0x01) or ((nextByte and 0x01) shl 1) + } + output[frame][2][i] = when (code) { + 0 -> 0 + 1 -> 1 + 2 -> -1 + 3 -> { + val valOffset = cgValuesStart + cgValueIdx * 2 + cgValueIdx++ + if (valOffset + 1 < decompressedData.size) { + val lo = decompressedData[valOffset].toInt() and 0xFF + val hi = decompressedData[valOffset + 1].toInt() + ((hi shl 8) or lo).toShort() + } else 0 + } + else -> 0 + } + } + } + } + + return output + } + // TAV Simulated overlapping tiles constants (must match encoder) private val TAV_TILE_SIZE_X = 640 private val TAV_TILE_SIZE_Y = 540 @@ -4348,6 +4526,53 @@ class GraphicsJSR223Delegate(private val vm: VM) { else 6 } + // GOP temporal quantization helpers + + /** + * Determines the temporal subband level for a given frame in a GOP. + * Returns 0 for tLL (temporal low-pass), 1+ for temporal high-pass levels. + * + * For 2-level Haar decomposition on 16 frames: + * - Frames 0-3: Level 0 (tLL - lowest frequency) + * - Frames 4-7: Level 1 (tH - mid frequency) + * - Frames 8-15: Level 2 (tHH - highest frequency) + */ + private fun getTemporalSubbandLevel(frameIdx: Int, numFrames: Int, temporalLevels: Int): Int { + if (temporalLevels == 0) return 0 + + val framesPerSubband = numFrames shr temporalLevels // numFrames / 2^temporalLevels + + // Determine which temporal subband this frame belongs to + val subbandIdx = frameIdx / framesPerSubband + + // Map subband index to level (0 = tLL, 1+ = temporal high-pass levels) + return if (subbandIdx == 0) 0 else { + // Find highest bit position in subbandIdx to determine level + var level = 0 + var idx = subbandIdx + while (idx > 1) { + idx = idx shr 1 + level++ + } + level + 1 + } + } + + /** + * Calculates temporal quantizer scale for a given temporal subband level. + * Uses exponential scaling: TEMPORAL_BASE_SCALE × 2^(BETA × level) + * + * With BETA=0.8, TEMPORAL_BASE_SCALE=1.0: + * - Level 0 (tLL): 1.0 × 2^0.0 = 1.00 + * - Level 1 (tH): 1.0 × 2^0.8 = 1.74 + * - Level 2 (tHH): 1.0 × 2^1.6 = 3.03 + */ + private fun getTemporalQuantizerScale(temporalLevel: Int): Float { + val BETA = 0.8f + val TEMPORAL_BASE_SCALE = 1.0f + return TEMPORAL_BASE_SCALE * Math.pow(2.0, (BETA * temporalLevel).toDouble()).toFloat() + } + // level is one-based index private fun getPerceptualWeight(qIndex: Int, qYGlobal: Int, level0: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float { // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity @@ -4644,7 +4869,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Coefficient delta encoding for efficient P-frames readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, - waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount) + decompLevels, tavVersion, isMonoblock, frameCount) dbgOut["frameMode"] = " " } } @@ -5262,7 +5487,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, - waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long { + decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long { val tileIdx = if (isMonoblock) { 0 // Single tile index for monoblock @@ -5403,15 +5628,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() // Apply inverse DWT - if (isLossless) { - tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenLuma) - tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavNullFilter) - tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0, TavNullFilter) - } else { - tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenLuma) - tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter) - tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter) - } + tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 255, TavSharpenLuma) + tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 255, TavNullFilter) + tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 255, TavNullFilter) // Debug: Check coefficient values after inverse DWT if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { @@ -5882,6 +6101,194 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + /** + * Apply inverse translation (motion compensation) to a frame. + * Inverse operation: shifts by +dx, +dy (opposite of forward encoder). + * + * @param frameData Input frame data to shift + * @param width Frame width + * @param height Frame height + * @param dx Translation in X direction (pixels) + * @param dy Translation in Y direction (pixels) + */ + private fun applyInverseTranslation(frameData: FloatArray, width: Int, height: Int, dx: Int, dy: Int) { + val output = FloatArray(width * height) + + // Apply inverse translation with boundary clamping + for (y in 0 until height) { + for (x in 0 until width) { + // Inverse: shift by +dx, +dy (opposite of encoder's -dx, -dy) + var srcX = x + dx + var srcY = y + dy + + // Clamp to frame boundaries + srcX = srcX.coerceIn(0, width - 1) + srcY = srcY.coerceIn(0, height - 1) + + output[y * width + x] = frameData[srcY * width + srcX] + } + } + + // Copy back to original array + System.arraycopy(output, 0, frameData, 0, frameData.size) + } + + /** + * Main GOP unified decoder function. + * Decodes a unified 3D DWT GOP block (temporal + spatial) and outputs RGB frames. + * + * @param compressedDataPtr Pointer to compressed Zstd data + * @param compressedSize Size of compressed data + * @param gopSize Number of frames in GOP (1-16) + * @param motionVectorsX X motion vectors in quarter-pixel units + * @param motionVectorsY Y motion vectors in quarter-pixel units + * @param outputRGBAddrs Array of output RGB buffer addresses + * @param width Frame width + * @param height Frame height + * @param qIndex Quality index + * @param qYGlobal Global Y quantizer + * @param qCoGlobal Global Co quantizer + * @param qCgGlobal Global Cg quantizer + * @param channelLayout Channel layout flags + * @param spatialFilter Wavelet filter type + * @param spatialLevels Number of spatial DWT levels (default 6) + * @param temporalLevels Number of temporal DWT levels (default 2) + * @return Number of frames decoded + */ + fun tavDecodeGopUnified( + compressedDataPtr: Long, + compressedSize: Int, + gopSize: Int, + motionVectorsX: IntArray, + motionVectorsY: IntArray, + outputRGBAddrs: LongArray, + width: Int, + height: Int, + qIndex: Int, + qYGlobal: Int, + qCoGlobal: Int, + qCgGlobal: Int, + channelLayout: Int, + spatialFilter: Int = 1, + spatialLevels: Int = 6, + temporalLevels: Int = 2 + ): Int { + val numPixels = width * height + + // Step 1: Decompress unified GOP block + val compressedData = ByteArray(compressedSize) + UnsafeHelper.memcpyRaw( + null, + vm.usermem.ptr + compressedDataPtr, + compressedData, + UnsafeHelper.getArrayOffset(compressedData), + compressedSize.toLong() + ) + + val decompressedData = try { + ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd -> + zstd.readBytes() + } + } catch (e: Exception) { + println("ERROR: Zstd decompression failed: ${e.message}") + return 0 + } + + // Step 2: Postprocess unified block to per-frame coefficients + val quantizedCoeffs = tavPostprocessGopUnified( + decompressedData, + gopSize, + numPixels, + channelLayout + ) + + // Step 3: Allocate GOP buffers for float coefficients + val gopY = Array(gopSize) { FloatArray(numPixels) } + val gopCo = Array(gopSize) { FloatArray(numPixels) } + val gopCg = Array(gopSize) { FloatArray(numPixels) } + + // Step 4: Calculate subband layout (needed for perceptual dequantization) + val subbands = calculateSubbandLayout(width, height, spatialLevels) + + // Step 5: Dequantize with temporal-spatial scaling + for (t in 0 until gopSize) { + val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels) + val temporalScale = getTemporalQuantizerScale(temporalLevel) + + // Apply temporal scaling to base quantizers + val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 255.0f) + val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 255.0f) + val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 255.0f) + + // Use existing perceptual dequantization for spatial weighting + dequantiseDWTSubbandsPerceptual( + qIndex, qYGlobal, + quantizedCoeffs[t][0], gopY[t], + subbands, baseQY, false, spatialLevels // isChroma=false + ) + + dequantiseDWTSubbandsPerceptual( + qIndex, qYGlobal, + quantizedCoeffs[t][1], gopCo[t], + subbands, baseQCo, true, spatialLevels // isChroma=true + ) + + dequantiseDWTSubbandsPerceptual( + qIndex, qYGlobal, + quantizedCoeffs[t][2], gopCg[t], + subbands, baseQCg, true, spatialLevels // isChroma=true + ) + } + + // Step 6: Apply inverse 3D DWT (spatial first, then temporal) + tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter) + tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter) + tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter) + + // Step 7: Apply inverse motion compensation (shift frames back) + // Note: Motion vectors are in quarter-pixel units + for (t in 1 until gopSize) { // Skip frame 0 (reference) + val dx = motionVectorsX[t] / 4 // Convert to pixel units + val dy = motionVectorsY[t] / 4 + + if (dx != 0 || dy != 0) { + applyInverseTranslation(gopY[t], width, height, dx, dy) + applyInverseTranslation(gopCo[t], width, height, dx, dy) + applyInverseTranslation(gopCg[t], width, height, dx, dy) + } + } + + // Step 8: Convert each frame to RGB and write to output buffers + for (t in 0 until gopSize) { + val rgbAddr = outputRGBAddrs[t] + + for (i in 0 until numPixels) { + val y = gopY[t][i] + val co = gopCo[t][i] + val cg = gopCg[t][i] + + // YCoCg-R to RGB conversion + val tmp = y - (cg / 2.0f) + val g = cg + tmp + val b = tmp - (co / 2.0f) + val r = b + co + + // Clamp to 0-255 range + val rClamped = r.toInt().coerceIn(0, 255) + val gClamped = g.toInt().coerceIn(0, 255) + val bClamped = b.toInt().coerceIn(0, 255) + + // Write RGB24 format (3 bytes per pixel) + val offset = rgbAddr + i * 3L + vm.usermem[offset] = rClamped.toByte() + vm.usermem[offset + 1] = gClamped.toByte() + vm.usermem[offset + 2] = bClamped.toByte() + } + } + + return gopSize + } + // Biorthogonal 13/7 wavelet inverse 1D transform // Synthesis filters: Low-pass (13 taps), High-pass (7 taps) private fun tavApplyDWTBior137Inverse1D(data: FloatArray, length: Int) { @@ -5994,4 +6401,78 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + // ============================================================================= + // Temporal 3D DWT Functions (GOP Decoding) + // ============================================================================= + + /** + * Inverse 1D temporal DWT (Haar) along time axis + * Reuses existing Haar inverse implementation + */ + private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int) { + if (numFrames < 2) return + tavApplyDWTHaarInverse1D(data, numFrames) + } + + /** + * Apply inverse 3D DWT to GOP data (spatial + temporal) + * Order: SPATIAL first (each frame), then TEMPORAL (across frames) + * + * @param gopData Array of frame buffers [frame][pixel] + * @param width Frame width + * @param height Frame height + * @param numFrames Number of frames in GOP + * @param spatialLevels Spatial decomposition levels (typically 6) + * @param temporalLevels Temporal decomposition levels (typically 2) + * @param spatialFilter Spatial wavelet filter type (0=5/3, 1=9/7, 255=Haar) + */ + private fun tavApplyInverse3DDWT( + gopData: Array, + width: Int, + height: Int, + numFrames: Int, + spatialLevels: Int, + temporalLevels: Int, + spatialFilter: Int + ) { + if (numFrames < 2) return + + val numPixels = width * height + val temporalLine = FloatArray(numFrames) + + // Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame) + for (t in 0 until numFrames) { + tavApplyDWTInverseMultiLevel( + gopData[t], width, height, + spatialLevels, spatialFilter, + TavNullFilter // No sharpening for GOP frames + ) + } + + // Step 2: Apply inverse temporal DWT to each spatial location + for (y in 0 until height) { + for (x in 0 until width) { + val pixelIdx = y * width + x + + // Extract temporal coefficients for this spatial location + for (t in 0 until numFrames) { + temporalLine[t] = gopData[t][pixelIdx] + } + + // Apply inverse temporal DWT with multiple levels (reverse order) + for (level in temporalLevels - 1 downTo 0) { + val levelFrames = numFrames shr level + if (levelFrames >= 2) { + tavApplyTemporalDWTInverse1D(temporalLine, levelFrames) + } + } + + // Write back reconstructed values + for (t in 0 until numFrames) { + gopData[t][pixelIdx] = temporalLine[t] + } + } + } + } + } \ No newline at end of file diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index a503869..0465b75 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -15,11 +15,15 @@ #define TAV_MODE_SKIP 0x00 #define TAV_MODE_INTRA 0x01 #define TAV_MODE_DELTA 0x02 -#define TAV_PACKET_IFRAME 0x10 -#define TAV_PACKET_PFRAME 0x11 -#define TAV_PACKET_AUDIO_MP2 0x20 -#define TAV_PACKET_SUBTITLE 0x30 -#define TAV_PACKET_SYNC 0xFF +#define TAV_PACKET_IFRAME 0x10 +#define TAV_PACKET_PFRAME 0x11 +#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP +#define TAV_PACKET_AUDIO_MP2 0x20 +#define TAV_PACKET_SUBTITLE 0x30 +#define TAV_PACKET_EXTENDED_HDR 0xEF +#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (N frames decoded) +#define TAV_PACKET_TIMECODE 0xFD +#define TAV_PACKET_SYNC 0xFF // Channel layout constants (bit-field design) #define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg (000: no alpha, has chroma, has luma) diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index f1862a2..6a0411b 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -104,7 +104,7 @@ static int needs_alpha_channel(int channel_layout) { #define DEFAULT_FPS 30 #define DEFAULT_QUALITY 3 #define DEFAULT_ZSTD_LEVEL 9 -#define GOP_SIZE 16 +#define GOP_SIZE /*1*/4 // Audio/subtitle constants (reused from TEV) #define MP2_DEFAULT_PACKET_SIZE 1152 @@ -1456,8 +1456,8 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc, int spatial_size, int base_quantiser, int is_chroma) { - const float BETA = 0.8f; // Temporal scaling exponent - const float TEMPORAL_BASE_SCALE = 0.7f; // Temporal coefficients are typically sparser + const float BETA = 0.8f; // Temporal scaling exponent (aggressive for temporal high-pass) + const float TEMPORAL_BASE_SCALE = 1.0f; // Don't reduce tLL quantization (same as intra) // Process each temporal subband independently (separable approach) for (int t = 0; t < num_frames; t++) { @@ -1468,11 +1468,11 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc, int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels); // Step 2: Compute temporal base quantizer using exponential scaling - // Formula: tH_base = Qbase_t * 0.7 * 2^(0.8 * level) + // Formula: tH_base = Qbase_t * 1.0 * 2^(2.0 * level) // Example with Qbase_t=16: - // - Level 0 (tLL): 16 * 0.7 * 2^0 = 11.2 - // - Level 1 (tLH): 16 * 0.7 * 2^0.8 = 19.5 - // - Level 2 (tHH): 16 * 0.7 * 2^1.6 = 33.8 + // - Level 0 (tLL): 16 * 1.0 * 2^0 = 16 (same as intra-only) + // - Level 1 (tH): 16 * 1.0 * 2^2.0 = 64 (4× base, aggressive) + // - Level 2 (tHH): 16 * 1.0 * 2^4.0 = 256 → clamped to 255 (very aggressive) float temporal_scale = TEMPORAL_BASE_SCALE * powf(2.0f, BETA * temporal_level); float temporal_quantiser = base_quantiser * temporal_scale; @@ -1622,6 +1622,40 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, memcpy(gop_cg_coeffs[i], enc->gop_cg_frames[i], num_pixels * sizeof(float)); } + // Step 0.5: Apply motion compensation to align frames before temporal DWT + // This uses the computed translation vectors to align each frame to the previous one + for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame) + float *aligned_y = malloc(num_pixels * sizeof(float)); + float *aligned_co = malloc(num_pixels * sizeof(float)); + float *aligned_cg = malloc(num_pixels * sizeof(float)); + + if (!aligned_y || !aligned_co || !aligned_cg) { + fprintf(stderr, "Error: Failed to allocate motion compensation buffers\n"); + // Cleanup and skip motion compensation for this GOP + free(aligned_y); + free(aligned_co); + free(aligned_cg); + break; + } + + // Apply translation to align this frame + apply_translation(gop_y_coeffs[i], enc->width, enc->height, + enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_y); + apply_translation(gop_co_coeffs[i], enc->width, enc->height, + enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_co); + apply_translation(gop_cg_coeffs[i], enc->width, enc->height, + enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_cg); + + // Copy aligned frames back + memcpy(gop_y_coeffs[i], aligned_y, num_pixels * sizeof(float)); + memcpy(gop_co_coeffs[i], aligned_co, num_pixels * sizeof(float)); + memcpy(gop_cg_coeffs[i], aligned_cg, num_pixels * sizeof(float)); + + free(aligned_y); + free(aligned_co); + free(aligned_cg); + } + // Step 1: Apply 3D DWT (temporal + spatial) to each channel // Note: This modifies gop_*_coeffs in-place dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,