colour with DCT

2026-06-06 05:28:31 +09:00 · 2025-08-18 03:11:34 +09:00
parent 964728dd59
commit ca8ea7f2be
2 changed files with 145 additions and 115 deletions
--- a/assets/disk0/tvdos/bin/playtev.js
+++ b/assets/disk0/tvdos/bin/playtev.js
@@ -204,14 +204,58 @@ let stopPlay = false
 // Dequantize DCT coefficient
 function dequantizeCoeff(coeff, quant, isDC) {
    if (isDC) {
-        // DC coefficient represents the average pixel value
-        // It should be in range roughly -128 to +127 after dequantization
-        return coeff  // No multiplication needed for DC
+        // DC coefficient also needs dequantization
+        return coeff * quant
    } else {
        return coeff * quant
    }
 }

+// 8x8 Inverse DCT implementation
+function idct8x8(coeffs, quantTable) {
+    const N = 8
+    let block = new Array(64)
+    
+    // Dequantize coefficients
+    for (let i = 0; i < 64; i++) {
+        block[i] = dequantizeCoeff(coeffs[i], quantTable[i], i === 0)
+    }
+    
+    // IDCT constants
+    const cos = Math.cos
+    const sqrt2 = Math.sqrt(2)
+    const c = new Array(8)
+    c[0] = 1.0 / sqrt2
+    for (let i = 1; i < 8; i++) {
+        c[i] = 1.0
+    }
+    
+    let result = new Array(64)
+    
+    // 2D IDCT
+    for (let x = 0; x < N; x++) {
+        for (let y = 0; y < N; y++) {
+            let sum = 0.0
+            for (let u = 0; u < N; u++) {
+                for (let v = 0; v < N; v++) {
+                    let coeff = block[v * N + u]
+                    let cosU = cos((2 * x + 1) * u * Math.PI / (2 * N))
+                    let cosV = cos((2 * y + 1) * v * Math.PI / (2 * N))
+                    sum += c[u] * c[v] * coeff * cosU * cosV
+                }
+            }
+            result[y * N + x] = sum / 4.0
+        }
+    }
+    
+    // Convert to pixel values (0-255)
+    for (let i = 0; i < 64; i++) {
+        result[i] = Math.max(0, Math.min(255, Math.round(result[i] + 128)))
+    }
+    
+    return result
+}
+
 // Hardware-accelerated decoding uses graphics.tevIdct8x8() instead of pure JS

 // Hardware-accelerated TEV block decoder 
@@ -260,43 +304,43 @@ function decodeBlock(blockData, blockX, blockY, prevRG, prevBA, currRG, currBA,
            }
        }
    } else {
-        // INTRA or INTER modes: simplified DC-only decoding for debugging
+        // INTRA or INTER modes: Full DCT decoding
        
-        // Extract DC coefficients and convert to colors
-        let rCoeff = blockData.dctCoeffs[0 * 64 + 0] // R DC
-        let gCoeff = blockData.dctCoeffs[1 * 64 + 0] // G DC  
-        let bCoeff = blockData.dctCoeffs[2 * 64 + 0] // B DC
+        // Extract DCT coefficients for each channel (R, G, B)
+        let rCoeffs = blockData.dctCoeffs.slice(0 * 64, 1 * 64)  // R channel
+        let gCoeffs = blockData.dctCoeffs.slice(1 * 64, 2 * 64)  // G channel  
+        let bCoeffs = blockData.dctCoeffs.slice(2 * 64, 3 * 64)  // B channel
        
-        // Dequantize DC coefficients
-        let rDC = dequantizeCoeff(rCoeff, quantTable[0], true)
-        let gDC = dequantizeCoeff(gCoeff, quantTable[0], true) 
-        let bDC = dequantizeCoeff(bCoeff, quantTable[0], true)
+        // Perform IDCT for each channel
+        let rBlock = idct8x8(rCoeffs, quantTable)
+        let gBlock = idct8x8(gCoeffs, quantTable)
+        let bBlock = idct8x8(bCoeffs, quantTable)
        
-        // Convert to RGB values (DC represents average)
-        let r = Math.max(0, Math.min(255, rDC + 128))
-        let g = Math.max(0, Math.min(255, gDC + 128))
-        let b = Math.max(0, Math.min(255, bDC + 128))
-        
-        // Convert to 4-bit values
-        let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255)))
-        let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255)))
-        let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255)))
-        
-        let rgValue = (r4 << 4) | g4  // R in MSB, G in LSB
-        let baValue = (b4 << 4) | 15  // B in MSB, A=15 (opaque) in LSB
-
-        // Software decoding (for fallback only)
-
-        // Fill 8x8 block with solid color
+        // Fill 8x8 block with IDCT results
        for (let dy = 0; dy < BLOCK_SIZE; dy++) {
            for (let dx = 0; dx < BLOCK_SIZE; dx++) {
                let x = startX + dx
                let y = startY + dy
                if (x < width && y < height) {
-                    let offset = y * width + x
-                    // Normal memory plane assignments
-                    sys.poke(currRG - offset, rgValue)  // Graphics memory uses negative addressing
-                    sys.poke(currBA - offset, baValue)
+                    let blockOffset = dy * BLOCK_SIZE + dx
+                    let imageOffset = y * width + x
+                    
+                    // Get RGB values from IDCT results
+                    let r = rBlock[blockOffset]
+                    let g = gBlock[blockOffset]
+                    let b = bBlock[blockOffset]
+                    
+                    // Convert to 4-bit values
+                    let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255)))
+                    let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255)))
+                    let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255)))
+                    
+                    let rgValue = (r4 << 4) | g4  // R in MSB, G in LSB
+                    let baValue = (b4 << 4) | 15  // B in MSB, A=15 (opaque) in LSB
+                    
+                    // Write to graphics memory
+                    sys.poke(currRG - imageOffset, rgValue)  // Graphics memory uses negative addressing
+                    sys.poke(currBA - imageOffset, baValue)
                }
            }
        }
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -20,6 +20,47 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
    }

+    /**
+     * Perform IDCT on a single channel with integer coefficients
+     */
+    private fun performIDCT(coeffs: IntArray, quantTable: IntArray): IntArray {
+        // Use the same DCT basis as tevIdct8x8
+        val dctBasis = Array(8) { u ->
+            Array(8) { x ->
+                val cu = if (u == 0) 1.0 / kotlin.math.sqrt(2.0) else 1.0
+                cu * kotlin.math.cos((2.0 * x + 1.0) * u * kotlin.math.PI / 16.0) / 2.0
+            }
+        }
+        
+        val dctCoeffs = Array(8) { DoubleArray(8) }
+        val result = IntArray(64)
+        
+        // Convert integer coefficients to 2D array and dequantize
+        for (u in 0 until 8) {
+            for (v in 0 until 8) {
+                val idx = u * 8 + v
+                val coeff = coeffs[idx]
+                dctCoeffs[u][v] = (coeff * quantTable[idx]).toDouble()
+            }
+        }
+        
+        // Apply 2D inverse DCT
+        for (x in 0 until 8) {
+            for (y in 0 until 8) {
+                var sum = 0.0
+                for (u in 0 until 8) {
+                    for (v in 0 until 8) {
+                        sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[u][v]
+                    }
+                }
+                val pixel = kotlin.math.max(0.0, kotlin.math.min(255.0, sum + 128.0))
+                result[y * 8 + x] = pixel.toInt()
+            }
+        }
+        
+        return result
+    }
+
    fun getGpuMemBase(): Int {
        return -1 - (1048576 * (vm.findPeriIndexByType(VM.PERITYPE_GPU_AND_TERM) ?: 0))
    }
@@ -1331,68 +1372,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        }
    }

-    /**
-     * Fast 8x8 inverse DCT optimized for video decompression
-     * @param dctPtr pointer to DCT coefficients (192 floats)
-     * @param blockPtr pointer to output RGB block (192 bytes)
-     */
-    fun tevIdct8x8(dctPtr: Int, blockPtr: Int) {
-        val gpu = getFirstGPU() ?: return
-        
-        val dctBasis = Array(8) { u ->
-            Array(8) { x ->
-                val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0
-                cu * cos((2.0 * x + 1.0) * u * PI / 16.0) / 2.0
-            }
-        }
-        
-        val dctCoeffs = Array(3) { Array(8) { DoubleArray(8) } }
-        val block = Array(3) { Array(8) { DoubleArray(8) } }
-        
-        // Read DCT coefficients from memory
-        for (channel in 0..2) {
-            for (u in 0..7) {
-                for (v in 0..7) {
-                    val offset = (channel * 64 + u * 8 + v) * 4
-                    val b0 = vm.peek(dctPtr.toLong() + offset)!! and -1
-                    val b1 = vm.peek(dctPtr.toLong() + offset + 1)!! and -1
-                    val b2 = vm.peek(dctPtr.toLong() + offset + 2)!! and -1
-                    val b3 = vm.peek(dctPtr.toLong() + offset + 3)!! and -1
-                    val floatBits = b0.toUint() or (b1.toUint() shl 8) or (b2.toUint() shl 16) or (b3.toUint() shl 24)
-                    dctCoeffs[channel][u][v] = java.lang.Float.intBitsToFloat(floatBits).toDouble()
-                }
-            }
-        }
-        
-        // Apply 2D inverse DCT to each channel
-        for (channel in 0..2) {
-            for (x in 0..7) {
-                for (y in 0..7) {
-                    var sum = 0.0
-                    for (u in 0..7) {
-                        for (v in 0..7) {
-                            sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[channel][u][v]
-                        }
-                    }
-                    block[channel][y][x] = sum + 0.5 // Add back DC offset
-                }
-            }
-        }
-        
-        // Write RGB block to memory (clamped to 0-255)
-        for (y in 0..7) {
-            for (x in 0..7) {
-                val offset = (y * 8 + x) * 3
-                val r = (clamp(block[0][y][x] * 255.0, 0.0, 255.0)).toInt()
-                val g = (clamp(block[1][y][x] * 255.0, 0.0, 255.0)).toInt()
-                val b = (clamp(block[2][y][x] * 255.0, 0.0, 255.0)).toInt()
-                
-                vm.poke(blockPtr.toLong() + offset, r.toByte())
-                vm.poke(blockPtr.toLong() + offset + 1, g.toByte())
-                vm.poke(blockPtr.toLong() + offset + 2, b.toByte())
-            }
-        }
-    }

    /**
     * Motion compensation: copy 8x8 block with sub-pixel interpolation
@@ -1733,34 +1712,41 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                        }
                    }
                    
-                    else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - DCT decode
-                        // Extract DC coefficients and dequantize
-                        val rDC = dctCoeffs[0 * 64 + 0] // R channel DC
-                        val gDC = dctCoeffs[1 * 64 + 0] // G channel DC  
-                        val bDC = dctCoeffs[2 * 64 + 0] // B channel DC
+                    else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - Full DCT decode
+                        // Hardware-accelerated IDCT for all three channels
+                        val rCoeffs = dctCoeffs.sliceArray(0 * 64 until 1 * 64)  // R channel
+                        val gCoeffs = dctCoeffs.sliceArray(1 * 64 until 2 * 64)  // G channel
+                        val bCoeffs = dctCoeffs.sliceArray(2 * 64 until 3 * 64)  // B channel
                        
-                        // Convert DC to RGB (add 128 offset)
-                        val r = kotlin.math.max(0, kotlin.math.min(255, rDC + 128))
-                        val g = kotlin.math.max(0, kotlin.math.min(255, gDC + 128))
-                        val b = kotlin.math.max(0, kotlin.math.min(255, bDC + 128))
+                        // Perform hardware IDCT for each channel
+                        val rBlock = performIDCT(rCoeffs, quantTable)
+                        val gBlock = performIDCT(gCoeffs, quantTable)
+                        val bBlock = performIDCT(bCoeffs, quantTable)
                        
-                        // Convert to 4-bit 4096-color format
-                        val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255)))
-                        val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255)))
-                        val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255)))
-                        
-                        val rgValue = (r4 shl 4) or g4      // R in MSB, G in LSB  
-                        val baValue = (b4 shl 4) or 15      // B in MSB, A=15 (opaque) in LSB
-                        
-                        // Fill 8x8 block
+                        // Fill 8x8 block with IDCT results
                        for (dy in 0 until 8) {
                            for (dx in 0 until 8) {
                                val x = startX + dx
                                val y = startY + dy
                                if (x < width && y < height) {
-                                    val offset = y.toLong() * width + x
-                                    vm.poke(rgPlaneAddr + offset*thisAddrIncVec, rgValue.toByte())
-                                    vm.poke(baPlaneAddr + offset*thisAddrIncVec, baValue.toByte())
+                                    val blockOffset = dy * 8 + dx
+                                    val imageOffset = y.toLong() * width + x
+                                    
+                                    // Get RGB values from IDCT results
+                                    val r = rBlock[blockOffset]
+                                    val g = gBlock[blockOffset]
+                                    val b = bBlock[blockOffset]
+                                    
+                                    // Convert to 4-bit 4096-color format
+                                    val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255)))
+                                    val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255)))
+                                    val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255)))
+                                    
+                                    val rgValue = (r4 shl 4) or g4      // R in MSB, G in LSB  
+                                    val baValue = (b4 shl 4) or 15      // B in MSB, A=15 (opaque) in LSB
+                                    
+                                    vm.poke(rgPlaneAddr + imageOffset*thisAddrIncVec, rgValue.toByte())
+                                    vm.poke(baPlaneAddr + imageOffset*thisAddrIncVec, baValue.toByte())
                                }
                            }
                        }