TAV: double buffered playback

2026-06-06 05:28:31 +09:00 · 2025-10-21 16:17:00 +09:00
parent f0ad0ef034
commit 9ac0424be3
4 changed files with 631 additions and 157 deletions
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -80,6 +80,7 @@ import kotlin.text.toString

 class GraphicsJSR223Delegate(private val vm: VM) {

+
    private fun getFirstGPU(): GraphicsAdapter? {
        return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
    }
@@ -4519,15 +4520,15 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        // Read entropy coder from header: 0 = Twobit-map, 1 = EZBC
        val isEZBC = (entropyCoder == 1)

-        if (isEZBC) {
-            println("[AUTO] Using EZBC decoder (FORCED)")
+        /*if (isEZBC) {
+            println("[AUTO] Using EZBC decoder")
            postprocessCoefficientsEZBC(compressedData, compressedOffset, coeffCount,
                                       channelLayout, outputY, outputCo, outputCg, outputAlpha)
        } else {
            println("[AUTO] Using twobit-map decoder")
            postprocessCoefficientsVariableLayout(compressedData, compressedOffset, coeffCount,
                                                 channelLayout, outputY, outputCo, outputCg, outputAlpha)
-        }
+        }*/

        return isEZBC
    }
@@ -6849,6 +6850,337 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        return arrayOf(gopSize, dbgOut)
    }

+    /**
+     * Decode GOP frames directly into GraphicsAdapter.videoBuffer (Java heap).
+     * This avoids allocating GOP frames in VM user memory, saving ~6 MB for 8-frame GOPs.
+     *
+     * Frames are stored sequentially in videoBuffer: [Frame0_RGB][Frame1_RGB]...[FrameN_RGB]
+     * Each frame is width×height×3 bytes (RGB24 format).
+     *
+     * @param bufferOffset Byte offset into videoBuffer (for double-buffering: 0 or GOP_SIZE*FRAME_SIZE)
+     * @return Pair<Int, HashMap<String, Any>> - (number of frames decoded, debug info)
+     */
+    fun tavDecodeGopToVideoBuffer(
+        compressedDataPtr: Long,
+        compressedSize: Int,
+        gopSize: Int,
+        motionVectorsX: IntArray,
+        motionVectorsY: IntArray,
+        width: Int,
+        height: Int,
+        canvasWidth: Int,
+        canvasHeight: Int,
+        marginLeft: Int,
+        marginTop: Int,
+        qIndex: Int,
+        qYGlobal: Int,
+        qCoGlobal: Int,
+        qCgGlobal: Int,
+        channelLayout: Int,
+        spatialFilter: Int = 1,
+        spatialLevels: Int = 6,
+        temporalLevels: Int = 2,
+        entropyCoder: Int = 0,
+        bufferOffset: Long = 0
+    ): Array<Any> {
+        val dbgOut = HashMap<String, Any>()
+        dbgOut["qY"] = qYGlobal
+        dbgOut["qCo"] = qCoGlobal
+        dbgOut["qCg"] = qCgGlobal
+        dbgOut["frameMode"] = "G"
+
+        val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter)
+
+        // Verify videoBuffer has enough space
+        val frameSize = width * height * 3L  // RGB24
+        val requiredSize = gopSize * frameSize
+        if (requiredSize > gpu.videoBuffer.size) {
+            println("ERROR: GOP requires ${requiredSize / 1048576}MB but videoBuffer is only ${gpu.videoBuffer.size / 1048576}MB")
+            return arrayOf(0, dbgOut)
+        }
+
+        // Use expanded canvas dimensions for DWT processing
+        val canvasPixels = canvasWidth * canvasHeight
+        val outputPixels = width * height
+
+        // Step 1: Decompress unified GOP block
+        val compressedData = ByteArray(compressedSize)
+        UnsafeHelper.memcpyRaw(
+            null,
+            vm.usermem.ptr + compressedDataPtr,
+            compressedData,
+            UnsafeHelper.getArrayOffset(compressedData),
+            compressedSize.toLong()
+        )
+
+        val decompressedData = try {
+            ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
+                zstd.readBytes()
+            }
+        } catch (e: Exception) {
+            println("ERROR: Zstd decompression failed: ${e.message}")
+            return arrayOf(0, dbgOut)
+        }
+
+        // Step 2: Postprocess unified block to per-frame coefficients
+        val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
+            decompressedData,
+            gopSize,
+            canvasPixels,
+            channelLayout,
+            entropyCoder
+        )
+
+        // Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
+        val gopY = Array(gopSize) { FloatArray(canvasPixels) }
+        val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
+        val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
+
+        // Step 4: Calculate subband layout for expanded canvas
+        val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
+
+        // Step 5: Dequantize with temporal-spatial scaling
+        for (t in 0 until gopSize) {
+            val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
+            val temporalScale = getTemporalQuantizerScale(temporalLevel)
+
+            val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+            val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+            val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+
+            dequantiseDWTSubbandsPerceptual(
+                qIndex, qYGlobal,
+                quantizedCoeffs[t][0], gopY[t],
+                subbands, baseQY, false, spatialLevels,
+                isEZBCMode
+            )
+
+            dequantiseDWTSubbandsPerceptual(
+                qIndex, qYGlobal,
+                quantizedCoeffs[t][1], gopCo[t],
+                subbands, baseQCo, true, spatialLevels,
+                isEZBCMode
+            )
+
+            dequantiseDWTSubbandsPerceptual(
+                qIndex, qYGlobal,
+                quantizedCoeffs[t][2], gopCg[t],
+                subbands, baseQCg, true, spatialLevels,
+                isEZBCMode
+            )
+        }
+
+        // Step 6: Apply inverse 3D DWT
+        tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
+        tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
+        tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
+
+        // Step 7: Apply inverse motion compensation
+        for (t in 1 until gopSize) {
+            val dx = motionVectorsX[t] / 16
+            val dy = motionVectorsY[t] / 16
+
+            if (dx != 0 || dy != 0) {
+                applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
+                applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
+                applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
+            }
+        }
+
+        // Step 8: Crop and convert to RGB, write directly to videoBuffer
+        for (t in 0 until gopSize) {
+            val videoBufferOffset = bufferOffset + (t * frameSize)  // Each frame sequentially, starting at bufferOffset
+
+            for (row in 0 until height) {
+                for (col in 0 until width) {
+                    // Source pixel in expanded canvas
+                    val canvasX = col + marginLeft
+                    val canvasY = row + marginTop
+                    val canvasIdx = canvasY * canvasWidth + canvasX
+
+                    // Destination pixel in videoBuffer
+                    val outIdx = row * width + col
+                    val offset = videoBufferOffset + outIdx * 3L
+
+                    val yVal = gopY[t][canvasIdx]
+                    val co = gopCo[t][canvasIdx]
+                    val cg = gopCg[t][canvasIdx]
+
+                    // YCoCg-R to RGB conversion
+                    val tmp = yVal - (cg / 2.0f)
+                    val g = cg + tmp
+                    val b = tmp - (co / 2.0f)
+                    val r = b + co
+
+                    // Clamp and write to videoBuffer
+                    gpu.videoBuffer[offset + 0] = r.toInt().coerceIn(0, 255).toByte()
+                    gpu.videoBuffer[offset + 1] = g.toInt().coerceIn(0, 255).toByte()
+                    gpu.videoBuffer[offset + 2] = b.toInt().coerceIn(0, 255).toByte()
+                }
+            }
+        }
+
+        return arrayOf(gopSize, dbgOut)
+    }
+
+    /**
+     * Upload a specific frame from videoBuffer to the framebuffer with dithering.
+     * Frames are stored sequentially in videoBuffer starting at offset 0.
+     *
+     * @param frameIndex Which frame in the GOP to upload (0-based)
+     * @param width Frame width
+     * @param height Frame height
+     * @param frameCount Global frame counter for dithering
+     * @param bufferOffset Byte offset into videoBuffer (for double-buffering: 0 or GOP_SIZE*FRAME_SIZE)
+     */
+    fun uploadVideoBufferFrameToFramebuffer(frameIndex: Int, width: Int, height: Int, frameCount: Int, bufferOffset: Long = 0) {
+        val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter)
+        val graphicsMode = gpu.graphicsMode
+
+        val frameSize = width * height * 3L
+        val videoBufferOffset = bufferOffset + (frameIndex * frameSize)
+
+        // Get native resolution
+        val nativeWidth = gpu.config.width
+        val nativeHeight = gpu.config.height
+
+        // Calculate centering offsets
+        val offsetX = (nativeWidth - width) / 2
+        val offsetY = (nativeHeight - height) / 2
+
+        // Dithering pattern for 8bpp → 4bpp conversion
+        val bayerMatrix = arrayOf(
+            intArrayOf(0, 8, 2, 10),
+            intArrayOf(12, 4, 14, 6),
+            intArrayOf(3, 11, 1, 9),
+            intArrayOf(15, 7, 13, 5)
+        )
+
+        // Process row by row
+        for (y in 0 until height) {
+            val screenY = y + offsetY
+            if (screenY !in 0 until nativeHeight) continue
+
+            for (x in 0 until width) {
+                val screenX = x + offsetX
+                if (screenX !in 0 until nativeWidth) continue
+
+                // Read RGB from videoBuffer
+                val pixelIdx = y * width + x
+                val offset = videoBufferOffset + pixelIdx * 3L
+
+                val r = gpu.videoBuffer[offset + 0].toUint()
+                val g = gpu.videoBuffer[offset + 1].toUint()
+                val b = gpu.videoBuffer[offset + 2].toUint()
+
+                val screenPixelIdx = screenY.toLong() * nativeWidth + screenX
+
+                if (graphicsMode == 4) {
+                    // 4bpp mode: dithered RGB (RG in fb1, B_ in fb2)
+                    val threshold = bayerMatrix[y % 4][x % 4]
+                    val rDithered = ((r + (threshold - 8)) shr 4).coerceIn(0, 15)
+                    val gDithered = ((g + (threshold - 8)) shr 4).coerceIn(0, 15)
+                    val bDithered = ((b + (threshold - 8)) shr 4).coerceIn(0, 15)
+
+                    gpu.framebuffer[screenPixelIdx] = ((rDithered shl 4) or gDithered).toByte()
+                    gpu.framebuffer2?.set(screenPixelIdx, (bDithered shl 4).toByte())
+                } else if (graphicsMode == 5) {
+                    // 8bpp mode: full RGB (R in fb1, G in fb2, B in fb3)
+                    gpu.framebuffer[screenPixelIdx] = r.toByte()
+                    gpu.framebuffer2?.set(screenPixelIdx, g.toByte())
+                    gpu.framebuffer3?.set(screenPixelIdx, b.toByte())
+                    gpu.framebuffer4?.set(screenPixelIdx, 255.toByte())
+                }
+            }
+        }
+    }
+
+    // Async GOP decode state
+    private val asyncDecodeComplete = java.util.concurrent.atomic.AtomicBoolean(false)
+    private var asyncDecodeResult: Array<Any>? = null
+    private var asyncDecodeThread: Thread? = null
+
+    /**
+     * Asynchronously decode GOP frames to videoBuffer in a background thread.
+     * This allows JavaScript to continue reading packets and displaying frames while decode runs.
+     *
+     * Call this function, then poll tavDecodeGopIsComplete() in your main loop.
+     * When complete, retrieve result with tavDecodeGopGetResult().
+     *
+     * @param All parameters same as tavDecodeGopToVideoBuffer()
+     */
+    fun tavDecodeGopToVideoBufferAsync(
+        compressedDataPtr: Long,
+        compressedSize: Int,
+        gopSize: Int,
+        motionVectorsX: IntArray,
+        motionVectorsY: IntArray,
+        width: Int,
+        height: Int,
+        canvasWidth: Int,
+        canvasHeight: Int,
+        marginLeft: Int,
+        marginTop: Int,
+        qIndex: Int,
+        qYGlobal: Int,
+        qCoGlobal: Int,
+        qCgGlobal: Int,
+        channelLayout: Int,
+        spatialFilter: Int = 1,
+        spatialLevels: Int = 6,
+        temporalLevels: Int = 2,
+        entropyCoder: Int = 0,
+        bufferOffset: Long = 0
+    ) {
+        // Cancel any existing decode thread
+        asyncDecodeThread?.interrupt()
+
+        // Reset completion flag
+        asyncDecodeComplete.set(false)
+        asyncDecodeResult = null
+
+        // Spawn thread to decode in background
+        asyncDecodeThread = Thread {
+            try {
+                val result = tavDecodeGopToVideoBuffer(
+                    compressedDataPtr, compressedSize, gopSize,
+                    motionVectorsX, motionVectorsY,
+                    width, height, canvasWidth, canvasHeight,
+                    marginLeft, marginTop,
+                    qIndex, qYGlobal, qCoGlobal, qCgGlobal,
+                    channelLayout, spatialFilter, spatialLevels, temporalLevels,
+                    entropyCoder, bufferOffset
+                )
+                asyncDecodeResult = result
+                asyncDecodeComplete.set(true)
+            } catch (e: InterruptedException) {
+                // Thread was cancelled, do nothing
+            } catch (e: Exception) {
+                // Decode failed, set empty result and mark complete
+                asyncDecodeResult = arrayOf(0, HashMap<String, Any>())
+                asyncDecodeComplete.set(true)
+            }
+        }
+        asyncDecodeThread?.start()
+    }
+
+    /**
+     * Check if async GOP decode has completed.
+     * @return true if decode finished, false if still running
+     */
+    fun tavDecodeGopIsComplete(): Boolean {
+        return asyncDecodeComplete.get()
+    }
+
+    /**
+     * Get the result of async GOP decode.
+     * Only call this after tavDecodeGopIsComplete() returns true!
+     * @return Array<Any> - same as tavDecodeGopToVideoBuffer()
+     */
+    fun tavDecodeGopGetResult(): Array<Any> {
+        return asyncDecodeResult ?: arrayOf(0, HashMap<String, Any>())
+    }
+
    // Biorthogonal 13/7 wavelet inverse 1D transform
    // Synthesis filters: Low-pass (13 taps), High-pass (7 taps)
    private fun tavApplyDWTBior137Inverse1D(data: FloatArray, length: Int) {
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt
@@ -79,6 +79,10 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
    internal val framebuffer3 = if (sgr.bankCount >= 3) UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this) else null
    internal val framebuffer4 = if (sgr.bankCount >= 4) UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this) else null

+    init {
+        framebuffer4?.fillWith(-1)
+    }
+
    internal val framebufferOut = Pixmap(WIDTH, HEIGHT, Pixmap.Format.RGBA8888)
    protected var rendertex = Texture(1, 1, Pixmap.Format.RGBA8888)
    internal val paletteOfFloats = FloatArray(1024) {
@@ -103,6 +107,8 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
    internal val unusedArea = UnsafeHelper.allocate(1024, this)
    internal val scanlineOffsets = UnsafeHelper.allocate(1024, this)

+    internal val videoBuffer = UnsafeHelper.allocate(32 * 1024 * 1024, this)
+
    protected val paletteShader = LoadShader(DRAW_SHADER_VERT, config.paletteShader)
    protected val textShader = LoadShader(DRAW_SHADER_VERT, config.fragShader)

@@ -960,6 +966,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
        chrrom0.tryDispose()
        chrrom.tryDispose()
        unusedArea.destroy()
+        videoBuffer.destroy()
        scanlineOffsets.destroy()
        instArea.destroy()
        mappedFontRom.destroy()