TAV: double buffered playback

2026-03-07 19:51:51 +09:00 · 2025-10-21 16:17:00 +09:00
parent f0ad0ef034
commit 9ac0424be3
4 changed files with 631 additions and 157 deletions
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -355,6 +355,12 @@ let decodeHeight = isInterlaced ? (header.height >> 1) : header.height
 const FRAME_PIXELS = header.width * header.height
 const FRAME_SIZE = FRAME_PIXELS * 3  // RGB buffer size

+// Double-buffering: Fixed slot sizes in videoBuffer (32 MB total)
+const MAX_GOP_SIZE = 21  // Maximum frames per slot (21 * 752KB = ~15MB per slot)
+const SLOT_SIZE = MAX_GOP_SIZE * FRAME_SIZE  // Fixed slot size regardless of actual GOP size
+
+console.log(`Double-buffering: Max ${MAX_GOP_SIZE} frames/slot, ${(SLOT_SIZE / 1048576).toFixed(1)}MB per slot`)
+
 const RGB_BUFFER_A = sys.malloc(FRAME_SIZE)
 const RGB_BUFFER_B = sys.malloc(FRAME_SIZE)

@@ -384,7 +390,6 @@ let nextFieldAddr = NEXT_FIELD_BUFFER
 let audioBufferBytesLastFrame = 0
 let frame_cnt = 0
 let frametime = 1000000000.0 / header.fps
-let nextFrameTime = 0
 let mp2Initialised = false
 let audioFired = false

@@ -474,14 +479,51 @@ let trueFrameCount = 0
 let stopPlay = false
 let akku = FRAME_TIME
 let akku2 = 0.0
+let nextFrameTime = 0  // Absolute time when next frame should display (nanoseconds)
 let currentFileIndex = 1  // Track which file we're playing in concatenated stream
 let totalFilesProcessed = 0
 let decoderDbgInfo = {}

+// GOP double-buffering state
+let currentGopBufferSlot = 0  // Which buffer slot is currently being displayed (0 or 1)
+let currentGopSize = 0         // Number of frames in current GOP being displayed
+let currentGopFrameIndex = 0   // Which frame of current GOP we're displaying
+let nextGopData = null         // Buffered next GOP packet data for background decode
+let asyncDecodeInProgress = false  // Track if async decode is running
+let asyncDecodeSlot = 0        // Which slot the async decode is targeting
+let asyncDecodeGopSize = 0     // Size of GOP being decoded async
+let asyncDecodePtr = 0         // Compressed data pointer to free after decode
+let asyncDecodeStartTime = 0   // When async decode started (for diagnostics)
+let shouldReadPackets = true   // Gate packet reading: false when both buffers are full
+
 let cueElements = []
 let currentCueIndex = -1  // Track current cue position
 let iframePositions = []  // Track I-frame positions for seeking: [{offset, frameNum}]

+// Helper function to clean up async decode state (prevents memory leaks)
+function cleanupAsyncDecode() {
+    // Free first GOP decode memory if in progress
+    if (asyncDecodeInProgress && asyncDecodePtr && asyncDecodePtr !== 0) {
+        sys.free(asyncDecodePtr)
+        asyncDecodeInProgress = false
+        asyncDecodePtr = 0
+        asyncDecodeGopSize = 0
+    }
+
+    // Free background GOP decode memory if in progress
+    if (nextGopData !== null && nextGopData.compressedPtr && nextGopData.compressedPtr !== 0) {
+        sys.free(nextGopData.compressedPtr)
+        nextGopData.compressedPtr = 0
+    }
+    nextGopData = null
+
+    // Reset GOP playback state
+    currentGopSize = 0
+    currentGopFrameIndex = 0
+    nextFrameTime = 0  // Reset frame timing
+    shouldReadPackets = true  // Resume packet reading after cleanup
+}
+
 // Function to find nearest I-frame before or at target frame
 function findNearestIframe(targetFrame) {
    if (iframePositions.length === 0) return null
@@ -738,6 +780,7 @@ try {

                    if (cue.addressingMode === ADDRESSING_INTERNAL) {
                        serial.println(`Seeking to cue: ${cue.name} (offset ${cue.offset})`)
+                        cleanupAsyncDecode()  // Free any pending async decode memory
                        seqread.seek(cue.offset)
                        frameCount = 0
                        akku = FRAME_TIME
@@ -756,6 +799,7 @@ try {

                    if (cue.addressingMode === ADDRESSING_INTERNAL) {
                        serial.println(`Seeking to cue: ${cue.name} (offset ${cue.offset})`)
+                        cleanupAsyncDecode()  // Free any pending async decode memory
                        seqread.seek(cue.offset)
                        frameCount = 0
                        akku = FRAME_TIME
@@ -774,6 +818,7 @@ try {

                    if (seekTarget) {
                        serial.println(`Seeking back to frame ${seekTarget.frameNum} (offset ${seekTarget.offset})`)
+                        cleanupAsyncDecode()  // Free any pending async decode memory
                        seqread.seek(seekTarget.offset)
                        frameCount = seekTarget.frameNum
                        akku = FRAME_TIME
@@ -800,6 +845,7 @@ try {

                    if (seekTarget && seekTarget.frameNum > frameCount) {
                        serial.println(`Seeking forward to frame ${seekTarget.frameNum} (offset ${seekTarget.offset})`)
+                        cleanupAsyncDecode()  // Free any pending async decode memory
                        seqread.seek(seekTarget.offset)
                        frameCount = seekTarget.frameNum
                        akku = FRAME_TIME
@@ -819,9 +865,10 @@ try {
            lastKey = keyCode
        }

-        if (akku >= FRAME_TIME) {
-            // When paused, just reset accumulator and skip frame processing
-            if (!paused) {
+        // GATED PACKET READING
+        // Stop reading when both buffers are full (GOP playing + GOP decoding/ready)
+        // Resume reading when GOP finishes (one buffer becomes free)
+        if (shouldReadPackets && !paused) {
            // Read packet header (record position before reading for I-frame tracking)
            let packetOffset = seqread.getReadCount()
            var packetType = seqread.readOneByte()
@@ -864,7 +911,7 @@ try {
            }

            if (packetType === TAV_PACKET_SYNC || packetType == TAV_PACKET_SYNC_NTSC) {
-                // Sync packet - no additional data
+                // Sync packet - no additional data (for I/P frames, not GOPs)
                akku -= FRAME_TIME
                if (packetType == TAV_PACKET_SYNC) {
                    frameCount++
@@ -872,7 +919,7 @@ try {

                trueFrameCount++

-                // Swap ping-pong buffers instead of expensive memcpy (752KB copy eliminated!)
+                // Swap ping-pong buffers
                let temp = CURRENT_RGB_ADDR
                CURRENT_RGB_ADDR = PREV_RGB_ADDR
                PREV_RGB_ADDR = temp
@@ -1000,193 +1047,154 @@ try {
            }
            else if (packetType === TAV_PACKET_GOP_UNIFIED) {
                // GOP Unified packet (temporal 3D DWT)
+                // DOUBLE-BUFFERING: Decode GOP N+1 while playing GOP N to eliminate hiccups

-                // Read GOP size (number of frames in this GOP, 1-16)
+                // Read GOP packet data
                const gopSize = seqread.readOneByte()
-
-                // Read canvas expansion margins (4 bytes)
-                // Encoder expands canvas to preserve all original pixels from all aligned frames
                const marginLeft = seqread.readOneByte()
                const marginRight = seqread.readOneByte()
                const marginTop = seqread.readOneByte()
                const marginBottom = seqread.readOneByte()

-                // Calculate expanded canvas dimensions
                const canvasWidth = header.width + marginLeft + marginRight
                const canvasHeight = header.height + marginTop + marginBottom

                // Read motion vectors (1/16-pixel units, int16)
-                // Encoder writes ALL motion vectors including frame 0
                let motionX = new Array(gopSize)
                let motionY = new Array(gopSize)

                for (let i = 0; i < gopSize; i++) {
-                    // readShort() returns unsigned 16-bit, but motion vectors are signed int16
                    let mx = seqread.readShort()
                    let my = seqread.readShort()
-                    // Convert to signed: if > 32767, it's negative
                    motionX[i] = (mx > 32767) ? (mx - 65536) : mx
                    motionY[i] = (my > 32767) ? (my - 65536) : my
                }

-                // Read compressed data size
                const compressedSize = seqread.readInt()
-
-                // Read compressed data
                let compressedPtr = seqread.readBytes(compressedSize)
                updateDataRateBin(compressedSize)

-                // Check if GOP fits in VM memory
-                const gopMemoryNeeded = gopSize * FRAME_SIZE
-                if (gopMemoryNeeded > MAXMEM) {
-                    throw new Error(`GOP too large: ${gopSize} frames needs ${(gopMemoryNeeded / 1048576).toFixed(2)}MB, but VM has only ${(MAXMEM / 1048576).toFixed(1)}MB. Max GOP size: 8 frames for 8MB system.`)
+                // DOUBLE-BUFFERING LOGIC:
+                // - If no GOP is currently playing: decode immediately to current slot
+                // - Otherwise: buffer this GOP for decode during next GOP's playback
+
+                // Check GOP size fits in slot
+                if (gopSize > MAX_GOP_SIZE) {
+                    console.log(`[GOP] Error: GOP size ${gopSize} exceeds max ${MAX_GOP_SIZE} frames`)
+                    sys.free(compressedPtr)
+                    break
                }

-                // Allocate GOP buffers outside try block so finally can free them
-                let gopRGBBuffers = new Array(gopSize)
-                for (let i = 0; i < gopSize; i++) {
-                    gopRGBBuffers[i] = sys.malloc(FRAME_SIZE)
-                    if (gopRGBBuffers[i] === 0) {
-                        // Malloc failed - free what we allocated and bail out
-                        for (let j = 0; j < i; j++) {
-                            sys.free(gopRGBBuffers[j])
-                        }
-                        throw new Error(`Failed to allocate GOP buffer ${i}/${gopSize}. Out of memory.`)
-                    }
+                if (currentGopSize === 0 && !asyncDecodeInProgress) {
+                    // No active GOP and no decode in progress: decode asynchronously and start playback when ready
+                    const bufferSlot = currentGopBufferSlot
+                    const bufferOffset = bufferSlot * SLOT_SIZE
+
+                    // Defensive: free any old async decode memory (shouldn't happen but be safe)
+                    if (asyncDecodePtr !== 0) {
+                        sys.free(asyncDecodePtr)
+                        asyncDecodePtr = 0
                    }

-                try {
-                    let decodeStart = sys.nanoTime()
-
-                    // Call GOP decoder with canvas expansion information
-                    const [r1, r2] = graphics.tavDecodeGopUnified(
-                        compressedPtr,
-                        compressedSize,
-                        gopSize,
-                        motionX,
-                        motionY,
-                        gopRGBBuffers,  // Array of output buffer addresses
-                        header.width,   // Original frame width
-                        header.height,  // Original frame height
-                        canvasWidth,    // Expanded canvas width (preserves all pixels)
-                        canvasHeight,   // Expanded canvas height (preserves all pixels)
-                        marginLeft,     // Left margin
-                        marginTop,      // Top margin
+                    // Start async decode
+                    graphics.tavDecodeGopToVideoBufferAsync(
+                        compressedPtr, compressedSize, gopSize,
+                        motionX, motionY,
+                        header.width, header.height,
+                        canvasWidth, canvasHeight,
+                        marginLeft, marginTop,
                        header.qualityLevel,
-                        QLUT[header.qualityY],
-                        QLUT[header.qualityCo],
-                        QLUT[header.qualityCg],
+                        QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
                        header.channelLayout,
-                        header.waveletFilter,
-                        header.decompLevels,
-                        2,              // temporalLevels (hardcoded for now, could be in header)
-                        header.entropyCoder  // Entropy coder: 0 = Twobit-map, 1 = EZBC
+                        header.waveletFilter, header.decompLevels, 2,
+                        header.entropyCoder,
+                        bufferOffset
                    )

-                    const framesDecoded = r1
-                    decoderDbgInfo = r2
+                    asyncDecodeInProgress = true
+                    asyncDecodeSlot = bufferSlot
+                    asyncDecodeGopSize = gopSize
+                    asyncDecodePtr = compressedPtr  // Will free after decode completes
+                    asyncDecodeStartTime = sys.nanoTime()

-                    decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
-                    decompressTime = 0  // Included in decode time
-
-                    // Display each decoded frame with proper timing
-                    for (let i = 0; i < framesDecoded; i++) {
-                        let frameStart = sys.nanoTime()
-                        let uploadStart = frameStart
-
-                        // Upload GOP frame directly (no copy needed - already in RGB24 format)
-                        graphics.uploadRGBToFramebuffer(gopRGBBuffers[i], header.width, header.height, trueFrameCount + i, false)
-                        uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
-
-                        // Apply bias lighting (only for first/last frame to save CPU)
-                        let biasStart = sys.nanoTime()
-                        if (i === 0 || i === framesDecoded - 1) {
-                            setBiasLighting()
+                    // Note: compressedPtr will be freed after decode completes
+                    // We'll check for completion in main loop and start playback then
+                    if (interactive) {
+                        console.log(`[GOP] Started async decode of first GOP (slot ${bufferSlot}, ${gopSize} frames)`)
                    }
-                        biasTime = (sys.nanoTime() - biasStart) / 1000000.0
+                } else if (currentGopSize === 0 && asyncDecodeInProgress) {
+                    // First GOP still decoding but another arrived - ignore it to avoid cancelling first GOP
+                    if (interactive) {
+                        console.log(`[GOP] Warning: GOP arrived while first GOP still decoding - ignoring to avoid cancellation`)
+                    }
+                    sys.free(compressedPtr)
+                } else if (currentGopSize > 0 && !asyncDecodeInProgress) {
+                    // GOP is playing and first GOP decode is done: decode this one to other slot in background (async)
+                    const nextSlot = 1 - currentGopBufferSlot
+                    const nextOffset = nextSlot * SLOT_SIZE

-                        // Fire audio on first frame
-                        if (!audioFired && (frameCount > 0 || i > 0)) {
-                            audio.play(0)
-                            audioFired = true
+                    // DIAGNOSTIC: Measure background decode timing
+                    const framesRemaining = currentGopSize - currentGopFrameIndex
+                    const timeRemaining = framesRemaining * FRAME_TIME * 1000.0  // milliseconds
+
+                    // If previous GOP still decoding, free its memory (will be overwritten)
+                    if (nextGopData !== null && !nextGopData.decoded && nextGopData.compressedPtr && nextGopData.compressedPtr !== 0) {
+                        if (interactive) {
+                            console.log(`[GOP] Warning: New GOP arrived before previous decode completed - freeing old data`)
+                        }
+                        sys.free(nextGopData.compressedPtr)
+                        nextGopData.compressedPtr = 0
                    }

-                        // Calculate how much time we've used so far for this frame
-                        let frameElapsed = (sys.nanoTime() - frameStart) / 1000000000.0
-
-                        // Wait for the remainder of FRAME_TIME (busy wait for accurate timing)
-                        let waitNeeded = FRAME_TIME - frameElapsed
-                        if (waitNeeded > 0) {
-                            let waitStart = sys.nanoTime()
-                            while ((sys.nanoTime() - waitStart) / 1000000000.0 < waitNeeded && !stopPlay && !paused) {
-                                sys.sleep(0) // Busy wait
-                            }
+                    if (interactive) {
+                        console.log(`[GOP] Background decode started: frame ${currentGopFrameIndex}/${currentGopSize}, ${framesRemaining} frames (${timeRemaining.toFixed(0)}ms) remaining`)
                    }

-                        // Update global time tracking to keep main loop synchronized
-                        let frameEnd = sys.nanoTime()
-                        let frameTotalTime = (frameEnd - frameStart) / 1000000000.0
-                        akku2 += frameTotalTime
-                        t1 = frameEnd  // Keep t1 synchronized with actual time
+                    // Start async background decode
+                    graphics.tavDecodeGopToVideoBufferAsync(
+                        compressedPtr, compressedSize, gopSize,
+                        motionX, motionY,
+                        header.width, header.height,
+                        canvasWidth, canvasHeight,
+                        marginLeft, marginTop,
+                        header.qualityLevel,
+                        QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
+                        header.channelLayout,
+                        header.waveletFilter, header.decompLevels, 2,
+                        header.entropyCoder,
+                        nextOffset
+                    )

-                        frameCount++
-                        trueFrameCount++
-
-                        // Swap ping-pong buffers for P-frame reference
-                        let temp = CURRENT_RGB_ADDR
-                        CURRENT_RGB_ADDR = PREV_RGB_ADDR
-                        PREV_RGB_ADDR = temp
-
-                        // Log performance for first frame of GOP
-                        if (i === 0 && (frameCount % 60 == 0 || frameCount == 0)) {
-                            let totalTime = decompressTime + decodeTime + uploadTime + biasTime
-                            console.log(`GOP Frame ${frameCount}: Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms (${gopSize} frames)`)
+                    // Mark as decoding (will check completion in main loop)
+                    nextGopData = {
+                        gopSize: gopSize,
+                        decoded: false,  // Will be set to true when async decode completes
+                        slot: nextSlot,
+                        compressedPtr: compressedPtr,  // Will free after decode completes
+                        startTime: sys.nanoTime(),
+                        timeRemaining: timeRemaining
                    }
-                    }
-
-                    // Note: frameCount and trueFrameCount will be incremented by GOP_SYNC packet
-                    // Note: GOP buffers will be freed in finally block
-
-                } catch (e) {
-                    console.log(`GOP Frame ${frameCount}: decode failed: ${e}`)
-                    // Try to get more details from the exception
-                    if (e.stack) {
-                        console.log(`Stack trace: ${e.stack}`)
-                    }
-                    if (e.javaException) {
-                        console.log(`Java exception: ${e.javaException}`)
-                        if (e.javaException.printStackTrace) {
-                            serial.println("Java stack trace:")
-                            e.javaException.printStackTrace()
-                        }
-                    }
-                    // Print exception properties
-                    try {
-                        const props = Object.keys(e)
-                        if (props.length > 0) {
-                            console.log(`Exception properties: ${props.join(', ')}`)
-                            e.printStackTrace()
-                            let ee = e.getStackTrace()
-                            console.log(ee.length)
-                            console.log(ee.slice(0, 10).join('\n'))
-                        }
-                    } catch (ex) {}
-                } finally {
-                    // Always free GOP buffers even on error
-                    for (let i = 0; i < gopSize; i++) {
-                        sys.free(gopRGBBuffers[i])
+                } else {
+                    // Fallback: unexpected state, just free the memory
+                    if (interactive) {
+                        console.log(`[GOP] Warning: Unexpected state - currentGopSize=${currentGopSize}, asyncDecodeInProgress=${asyncDecodeInProgress} - freeing GOP data`)
                    }
                    sys.free(compressedPtr)
                }
            }
            else if (packetType === TAV_PACKET_GOP_SYNC) {
-                // GOP sync packet - increment frame counters by number of frames decoded
+                // GOP sync packet - just skip it, frame display is time-based
                const framesInGOP = seqread.readOneByte()
+                // Ignore - we display frames based on time accumulator, not this packet

-                frameCount += framesInGOP
-                trueFrameCount += framesInGOP
-
-                // Note: Buffer swapping already handled in GOP_UNIFIED handler
+                // CRITICAL: Stop reading packets if both buffers are full
+                // (one GOP playing + one GOP ready/decoding)
+                if (currentGopSize > 0 && nextGopData !== null) {
+                    shouldReadPackets = false
+                    if (interactive) {
+                        console.log(`[GOP] Both buffers full - stopping packet reading until current GOP finishes`)
+                    }
+                }
            }
            else if (packetType === TAV_PACKET_AUDIO_MP2) {
                // MP2 Audio packet
@@ -1281,15 +1289,138 @@ try {
                println(`Unknown packet type: 0x${packetType.toString(16)}`)
                break
            }
-            } // end of !paused block
-        }
+        } // end of !paused packet read block

        let t2 = sys.nanoTime()
        if (!paused) {
+            // Only accumulate time if we have a GOP to play
+            // Don't accumulate during first GOP decode or we'll get fast playback
+            if (currentGopSize > 0) {
                akku += (t2 - t1) / 1000000000.0
+            }
            akku2 += (t2 - t1) / 1000000000.0
        }

+        // STATE MACHINE: Explicit GOP playback with spin-waits
+
+        // Step 1: If first GOP decode in progress AND no GOP is currently playing, wait for it
+        if (asyncDecodeInProgress && currentGopSize === 0) {
+            if (!graphics.tavDecodeGopIsComplete()) {
+                // Spin-wait for first GOP decode (nothing else to do)
+                sys.sleep(1)
+            }
+            else {
+                // First GOP decode completed, start playback
+                const [r1, r2] = graphics.tavDecodeGopGetResult()
+                decodeTime = (sys.nanoTime() - asyncDecodeStartTime) / 1000000.0
+                decoderDbgInfo = r2
+
+                currentGopSize = asyncDecodeGopSize
+                currentGopFrameIndex = 0
+                currentGopBufferSlot = asyncDecodeSlot
+                asyncDecodeInProgress = false
+
+                // Set first frame time to NOW
+                nextFrameTime = sys.nanoTime()
+
+                // Resume packet reading to get next GOP (only one buffer occupied now)
+                shouldReadPackets = true
+
+                if (interactive) {
+                    console.log(`[GOP] First GOP ready (slot ${asyncDecodeSlot}, ${asyncDecodeGopSize} frames) in ${decodeTime.toFixed(1)}ms - starting playback`)
+                }
+
+                // Free compressed data
+                sys.free(asyncDecodePtr)
+                asyncDecodePtr = 0
+                asyncDecodeGopSize = 0
+            }
+        }
+
+        // Step 2 & 3: Display current GOP frame if it's time
+        if (!paused && currentGopSize > 0 && currentGopFrameIndex < currentGopSize) {
+            // Spin-wait for next frame time
+            while (sys.nanoTime() < nextFrameTime && !paused) {
+                sys.sleep(1)
+            }
+
+            if (!paused) {
+                const bufferSlot = currentGopBufferSlot
+                const bufferOffset = bufferSlot * SLOT_SIZE
+
+                let uploadStart = sys.nanoTime()
+                graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset)
+                uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
+
+                // Apply bias lighting
+                let biasStart = sys.nanoTime()
+                if (currentGopFrameIndex === 0 || currentGopFrameIndex === currentGopSize - 1) {
+                    setBiasLighting()
+                }
+                biasTime = (sys.nanoTime() - biasStart) / 1000000.0
+
+                // Fire audio on first frame
+                if (!audioFired) {
+                    audio.play(0)
+                    audioFired = true
+                }
+
+                currentGopFrameIndex++
+                frameCount++
+                trueFrameCount++
+
+                // Schedule next frame
+                nextFrameTime += (frametime)  // frametime is in nanoseconds from header
+            }
+        }
+
+        // Step 4 & 7: GOP finished? Wait for background decode, then transition
+        if (!paused && currentGopSize > 0 && currentGopFrameIndex >= currentGopSize) {
+            if (nextGopData !== null) {
+                // Wait for background decode to complete
+                while (!graphics.tavDecodeGopIsComplete() && !paused) {
+                    sys.sleep(1)
+                }
+
+                if (!paused) {
+                    const [r1, r2] = graphics.tavDecodeGopGetResult()
+                    decodeTime = (sys.nanoTime() - nextGopData.startTime) / 1000000.0
+
+                    if (interactive) {
+                        const margin = nextGopData.timeRemaining - decodeTime
+                        const status = margin > 0 ? "✓ ON TIME" : "✗ TOO LATE"
+                        console.log(`[GOP] Background decode finished in ${decodeTime.toFixed(1)}ms (margin: ${margin.toFixed(0)}ms) ${status}`)
+                    }
+
+                    // Free compressed data
+                    sys.free(nextGopData.compressedPtr)
+
+                    // Transition to next GOP
+                    currentGopBufferSlot = 1 - currentGopBufferSlot
+                    currentGopSize = nextGopData.gopSize
+                    currentGopFrameIndex = 0
+                    nextGopData = null
+
+                    // Resume packet reading now that one buffer is free
+                    shouldReadPackets = true
+
+                    if (interactive) {
+                        console.log(`[GOP] ✓ SEAMLESS TRANSITION to next GOP (slot ${currentGopBufferSlot}, ${currentGopSize} frames)`)
+                    }
+                }
+            } else {
+                // No next GOP available, pause playback
+                if (interactive) {
+                    console.log(`[GOP] ✗ HICCUP - next GOP NOT READY! Playback paused.`)
+                }
+                currentGopSize = 0
+                currentGopFrameIndex = 0
+
+                // Resume packet reading to get next GOP
+                shouldReadPackets = true
+            }
+        }
+
        // Simple progress display
        if (interactive) {
            notifHideTimer += (t2 - t1)
@@ -1319,6 +1450,10 @@ try {
            gui.printTopBar(guiStatus, 1)
        }

+        // Small sleep to prevent 100% CPU and control loop rate
+        // Allows continuous packet reading while maintaining proper frame timing
+        sys.sleep(1)
+
        t1 = t2
    }
 }
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -80,6 +80,7 @@ import kotlin.text.toString

 class GraphicsJSR223Delegate(private val vm: VM) {

+
    private fun getFirstGPU(): GraphicsAdapter? {
        return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
    }
@@ -4519,15 +4520,15 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        // Read entropy coder from header: 0 = Twobit-map, 1 = EZBC
        val isEZBC = (entropyCoder == 1)

-        if (isEZBC) {
-            println("[AUTO] Using EZBC decoder (FORCED)")
+        /*if (isEZBC) {
+            println("[AUTO] Using EZBC decoder")
            postprocessCoefficientsEZBC(compressedData, compressedOffset, coeffCount,
                                       channelLayout, outputY, outputCo, outputCg, outputAlpha)
        } else {
            println("[AUTO] Using twobit-map decoder")
            postprocessCoefficientsVariableLayout(compressedData, compressedOffset, coeffCount,
                                                 channelLayout, outputY, outputCo, outputCg, outputAlpha)
-        }
+        }*/

        return isEZBC
    }
@@ -6849,6 +6850,337 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        return arrayOf(gopSize, dbgOut)
    }

+    /**
+     * Decode GOP frames directly into GraphicsAdapter.videoBuffer (Java heap).
+     * This avoids allocating GOP frames in VM user memory, saving ~6 MB for 8-frame GOPs.
+     *
+     * Frames are stored sequentially in videoBuffer: [Frame0_RGB][Frame1_RGB]...[FrameN_RGB]
+     * Each frame is width×height×3 bytes (RGB24 format).
+     *
+     * @param bufferOffset Byte offset into videoBuffer (for double-buffering: 0 or GOP_SIZE*FRAME_SIZE)
+     * @return Pair<Int, HashMap<String, Any>> - (number of frames decoded, debug info)
+     */
+    fun tavDecodeGopToVideoBuffer(
+        compressedDataPtr: Long,
+        compressedSize: Int,
+        gopSize: Int,
+        motionVectorsX: IntArray,
+        motionVectorsY: IntArray,
+        width: Int,
+        height: Int,
+        canvasWidth: Int,
+        canvasHeight: Int,
+        marginLeft: Int,
+        marginTop: Int,
+        qIndex: Int,
+        qYGlobal: Int,
+        qCoGlobal: Int,
+        qCgGlobal: Int,
+        channelLayout: Int,
+        spatialFilter: Int = 1,
+        spatialLevels: Int = 6,
+        temporalLevels: Int = 2,
+        entropyCoder: Int = 0,
+        bufferOffset: Long = 0
+    ): Array<Any> {
+        val dbgOut = HashMap<String, Any>()
+        dbgOut["qY"] = qYGlobal
+        dbgOut["qCo"] = qCoGlobal
+        dbgOut["qCg"] = qCgGlobal
+        dbgOut["frameMode"] = "G"
+
+        val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter)
+
+        // Verify videoBuffer has enough space
+        val frameSize = width * height * 3L  // RGB24
+        val requiredSize = gopSize * frameSize
+        if (requiredSize > gpu.videoBuffer.size) {
+            println("ERROR: GOP requires ${requiredSize / 1048576}MB but videoBuffer is only ${gpu.videoBuffer.size / 1048576}MB")
+            return arrayOf(0, dbgOut)
+        }
+
+        // Use expanded canvas dimensions for DWT processing
+        val canvasPixels = canvasWidth * canvasHeight
+        val outputPixels = width * height
+
+        // Step 1: Decompress unified GOP block
+        val compressedData = ByteArray(compressedSize)
+        UnsafeHelper.memcpyRaw(
+            null,
+            vm.usermem.ptr + compressedDataPtr,
+            compressedData,
+            UnsafeHelper.getArrayOffset(compressedData),
+            compressedSize.toLong()
+        )
+
+        val decompressedData = try {
+            ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
+                zstd.readBytes()
+            }
+        } catch (e: Exception) {
+            println("ERROR: Zstd decompression failed: ${e.message}")
+            return arrayOf(0, dbgOut)
+        }
+
+        // Step 2: Postprocess unified block to per-frame coefficients
+        val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
+            decompressedData,
+            gopSize,
+            canvasPixels,
+            channelLayout,
+            entropyCoder
+        )
+
+        // Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
+        val gopY = Array(gopSize) { FloatArray(canvasPixels) }
+        val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
+        val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
+
+        // Step 4: Calculate subband layout for expanded canvas
+        val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
+
+        // Step 5: Dequantize with temporal-spatial scaling
+        for (t in 0 until gopSize) {
+            val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
+            val temporalScale = getTemporalQuantizerScale(temporalLevel)
+
+            val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+            val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+            val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+
+            dequantiseDWTSubbandsPerceptual(
+                qIndex, qYGlobal,
+                quantizedCoeffs[t][0], gopY[t],
+                subbands, baseQY, false, spatialLevels,
+                isEZBCMode
+            )
+
+            dequantiseDWTSubbandsPerceptual(
+                qIndex, qYGlobal,
+                quantizedCoeffs[t][1], gopCo[t],
+                subbands, baseQCo, true, spatialLevels,
+                isEZBCMode
+            )
+
+            dequantiseDWTSubbandsPerceptual(
+                qIndex, qYGlobal,
+                quantizedCoeffs[t][2], gopCg[t],
+                subbands, baseQCg, true, spatialLevels,
+                isEZBCMode
+            )
+        }
+
+        // Step 6: Apply inverse 3D DWT
+        tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
+        tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
+        tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
+
+        // Step 7: Apply inverse motion compensation
+        for (t in 1 until gopSize) {
+            val dx = motionVectorsX[t] / 16
+            val dy = motionVectorsY[t] / 16
+
+            if (dx != 0 || dy != 0) {
+                applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
+                applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
+                applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
+            }
+        }
+
+        // Step 8: Crop and convert to RGB, write directly to videoBuffer
+        for (t in 0 until gopSize) {
+            val videoBufferOffset = bufferOffset + (t * frameSize)  // Each frame sequentially, starting at bufferOffset
+
+            for (row in 0 until height) {
+                for (col in 0 until width) {
+                    // Source pixel in expanded canvas
+                    val canvasX = col + marginLeft
+                    val canvasY = row + marginTop
+                    val canvasIdx = canvasY * canvasWidth + canvasX
+
+                    // Destination pixel in videoBuffer
+                    val outIdx = row * width + col
+                    val offset = videoBufferOffset + outIdx * 3L
+
+                    val yVal = gopY[t][canvasIdx]
+                    val co = gopCo[t][canvasIdx]
+                    val cg = gopCg[t][canvasIdx]
+
+                    // YCoCg-R to RGB conversion
+                    val tmp = yVal - (cg / 2.0f)
+                    val g = cg + tmp
+                    val b = tmp - (co / 2.0f)
+                    val r = b + co
+
+                    // Clamp and write to videoBuffer
+                    gpu.videoBuffer[offset + 0] = r.toInt().coerceIn(0, 255).toByte()
+                    gpu.videoBuffer[offset + 1] = g.toInt().coerceIn(0, 255).toByte()
+                    gpu.videoBuffer[offset + 2] = b.toInt().coerceIn(0, 255).toByte()
+                }
+            }
+        }
+
+        return arrayOf(gopSize, dbgOut)
+    }
+
+    /**
+     * Upload a specific frame from videoBuffer to the framebuffer with dithering.
+     * Frames are stored sequentially in videoBuffer starting at offset 0.
+     *
+     * @param frameIndex Which frame in the GOP to upload (0-based)
+     * @param width Frame width
+     * @param height Frame height
+     * @param frameCount Global frame counter for dithering
+     * @param bufferOffset Byte offset into videoBuffer (for double-buffering: 0 or GOP_SIZE*FRAME_SIZE)
+     */
+    fun uploadVideoBufferFrameToFramebuffer(frameIndex: Int, width: Int, height: Int, frameCount: Int, bufferOffset: Long = 0) {
+        val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter)
+        val graphicsMode = gpu.graphicsMode
+
+        val frameSize = width * height * 3L
+        val videoBufferOffset = bufferOffset + (frameIndex * frameSize)
+
+        // Get native resolution
+        val nativeWidth = gpu.config.width
+        val nativeHeight = gpu.config.height
+
+        // Calculate centering offsets
+        val offsetX = (nativeWidth - width) / 2
+        val offsetY = (nativeHeight - height) / 2
+
+        // Dithering pattern for 8bpp → 4bpp conversion
+        val bayerMatrix = arrayOf(
+            intArrayOf(0, 8, 2, 10),
+            intArrayOf(12, 4, 14, 6),
+            intArrayOf(3, 11, 1, 9),
+            intArrayOf(15, 7, 13, 5)
+        )
+
+        // Process row by row
+        for (y in 0 until height) {
+            val screenY = y + offsetY
+            if (screenY !in 0 until nativeHeight) continue
+
+            for (x in 0 until width) {
+                val screenX = x + offsetX
+                if (screenX !in 0 until nativeWidth) continue
+
+                // Read RGB from videoBuffer
+                val pixelIdx = y * width + x
+                val offset = videoBufferOffset + pixelIdx * 3L
+
+                val r = gpu.videoBuffer[offset + 0].toUint()
+                val g = gpu.videoBuffer[offset + 1].toUint()
+                val b = gpu.videoBuffer[offset + 2].toUint()
+
+                val screenPixelIdx = screenY.toLong() * nativeWidth + screenX
+
+                if (graphicsMode == 4) {
+                    // 4bpp mode: dithered RGB (RG in fb1, B_ in fb2)
+                    val threshold = bayerMatrix[y % 4][x % 4]
+                    val rDithered = ((r + (threshold - 8)) shr 4).coerceIn(0, 15)
+                    val gDithered = ((g + (threshold - 8)) shr 4).coerceIn(0, 15)
+                    val bDithered = ((b + (threshold - 8)) shr 4).coerceIn(0, 15)
+
+                    gpu.framebuffer[screenPixelIdx] = ((rDithered shl 4) or gDithered).toByte()
+                    gpu.framebuffer2?.set(screenPixelIdx, (bDithered shl 4).toByte())
+                } else if (graphicsMode == 5) {
+                    // 8bpp mode: full RGB (R in fb1, G in fb2, B in fb3)
+                    gpu.framebuffer[screenPixelIdx] = r.toByte()
+                    gpu.framebuffer2?.set(screenPixelIdx, g.toByte())
+                    gpu.framebuffer3?.set(screenPixelIdx, b.toByte())
+                    gpu.framebuffer4?.set(screenPixelIdx, 255.toByte())
+                }
+            }
+        }
+    }
+
+    // Async GOP decode state
+    private val asyncDecodeComplete = java.util.concurrent.atomic.AtomicBoolean(false)
+    private var asyncDecodeResult: Array<Any>? = null
+    private var asyncDecodeThread: Thread? = null
+
+    /**
+     * Asynchronously decode GOP frames to videoBuffer in a background thread.
+     * This allows JavaScript to continue reading packets and displaying frames while decode runs.
+     *
+     * Call this function, then poll tavDecodeGopIsComplete() in your main loop.
+     * When complete, retrieve result with tavDecodeGopGetResult().
+     *
+     * @param All parameters same as tavDecodeGopToVideoBuffer()
+     */
+    fun tavDecodeGopToVideoBufferAsync(
+        compressedDataPtr: Long,
+        compressedSize: Int,
+        gopSize: Int,
+        motionVectorsX: IntArray,
+        motionVectorsY: IntArray,
+        width: Int,
+        height: Int,
+        canvasWidth: Int,
+        canvasHeight: Int,
+        marginLeft: Int,
+        marginTop: Int,
+        qIndex: Int,
+        qYGlobal: Int,
+        qCoGlobal: Int,
+        qCgGlobal: Int,
+        channelLayout: Int,
+        spatialFilter: Int = 1,
+        spatialLevels: Int = 6,
+        temporalLevels: Int = 2,
+        entropyCoder: Int = 0,
+        bufferOffset: Long = 0
+    ) {
+        // Cancel any existing decode thread
+        asyncDecodeThread?.interrupt()
+
+        // Reset completion flag
+        asyncDecodeComplete.set(false)
+        asyncDecodeResult = null
+
+        // Spawn thread to decode in background
+        asyncDecodeThread = Thread {
+            try {
+                val result = tavDecodeGopToVideoBuffer(
+                    compressedDataPtr, compressedSize, gopSize,
+                    motionVectorsX, motionVectorsY,
+                    width, height, canvasWidth, canvasHeight,
+                    marginLeft, marginTop,
+                    qIndex, qYGlobal, qCoGlobal, qCgGlobal,
+                    channelLayout, spatialFilter, spatialLevels, temporalLevels,
+                    entropyCoder, bufferOffset
+                )
+                asyncDecodeResult = result
+                asyncDecodeComplete.set(true)
+            } catch (e: InterruptedException) {
+                // Thread was cancelled, do nothing
+            } catch (e: Exception) {
+                // Decode failed, set empty result and mark complete
+                asyncDecodeResult = arrayOf(0, HashMap<String, Any>())
+                asyncDecodeComplete.set(true)
+            }
+        }
+        asyncDecodeThread?.start()
+    }
+
+    /**
+     * Check if async GOP decode has completed.
+     * @return true if decode finished, false if still running
+     */
+    fun tavDecodeGopIsComplete(): Boolean {
+        return asyncDecodeComplete.get()
+    }
+
+    /**
+     * Get the result of async GOP decode.
+     * Only call this after tavDecodeGopIsComplete() returns true!
+     * @return Array<Any> - same as tavDecodeGopToVideoBuffer()
+     */
+    fun tavDecodeGopGetResult(): Array<Any> {
+        return asyncDecodeResult ?: arrayOf(0, HashMap<String, Any>())
+    }
+
    // Biorthogonal 13/7 wavelet inverse 1D transform
    // Synthesis filters: Low-pass (13 taps), High-pass (7 taps)
    private fun tavApplyDWTBior137Inverse1D(data: FloatArray, length: Int) {
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt
@@ -79,6 +79,10 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
    internal val framebuffer3 = if (sgr.bankCount >= 3) UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this) else null
    internal val framebuffer4 = if (sgr.bankCount >= 4) UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this) else null

+    init {
+        framebuffer4?.fillWith(-1)
+    }
+
    internal val framebufferOut = Pixmap(WIDTH, HEIGHT, Pixmap.Format.RGBA8888)
    protected var rendertex = Texture(1, 1, Pixmap.Format.RGBA8888)
    internal val paletteOfFloats = FloatArray(1024) {
@@ -103,6 +107,8 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
    internal val unusedArea = UnsafeHelper.allocate(1024, this)
    internal val scanlineOffsets = UnsafeHelper.allocate(1024, this)

+    internal val videoBuffer = UnsafeHelper.allocate(32 * 1024 * 1024, this)
+
    protected val paletteShader = LoadShader(DRAW_SHADER_VERT, config.paletteShader)
    protected val textShader = LoadShader(DRAW_SHADER_VERT, config.fragShader)

@@ -960,6 +966,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
        chrrom0.tryDispose()
        chrrom.tryDispose()
        unusedArea.destroy()
+        videoBuffer.destroy()
        scanlineOffsets.destroy()
        instArea.destroy()
        mappedFontRom.destroy()
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -117,8 +117,8 @@ static int needs_alpha_channel(int channel_layout) {
 #define DEFAULT_FPS 30
 #define DEFAULT_QUALITY 3
 #define DEFAULT_ZSTD_LEVEL 9
-#define TEMPORAL_GOP_SIZE 24//8
-#define TEMPORAL_DECOMP_LEVEL 3
+#define TEMPORAL_GOP_SIZE 20//8 // ~42 frames fit into 32 MB video buffer
+#define TEMPORAL_DECOMP_LEVEL 2
 #define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction

 // Audio/subtitle constants (reused from TEV)
@@ -8832,7 +8832,7 @@ static int detect_scene_change_between_frames(
    if (out_changed_ratio) *out_changed_ratio = changed_ratio;

    // Scene change threshold
-    double threshold = 0.75;
+    double threshold = 0.50;

    return changed_ratio > threshold;
 }