diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index 8a0f729..45bdbe8 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -204,14 +204,58 @@ let stopPlay = false // Dequantize DCT coefficient function dequantizeCoeff(coeff, quant, isDC) { if (isDC) { - // DC coefficient represents the average pixel value - // It should be in range roughly -128 to +127 after dequantization - return coeff // No multiplication needed for DC + // DC coefficient also needs dequantization + return coeff * quant } else { return coeff * quant } } +// 8x8 Inverse DCT implementation +function idct8x8(coeffs, quantTable) { + const N = 8 + let block = new Array(64) + + // Dequantize coefficients + for (let i = 0; i < 64; i++) { + block[i] = dequantizeCoeff(coeffs[i], quantTable[i], i === 0) + } + + // IDCT constants + const cos = Math.cos + const sqrt2 = Math.sqrt(2) + const c = new Array(8) + c[0] = 1.0 / sqrt2 + for (let i = 1; i < 8; i++) { + c[i] = 1.0 + } + + let result = new Array(64) + + // 2D IDCT + for (let x = 0; x < N; x++) { + for (let y = 0; y < N; y++) { + let sum = 0.0 + for (let u = 0; u < N; u++) { + for (let v = 0; v < N; v++) { + let coeff = block[v * N + u] + let cosU = cos((2 * x + 1) * u * Math.PI / (2 * N)) + let cosV = cos((2 * y + 1) * v * Math.PI / (2 * N)) + sum += c[u] * c[v] * coeff * cosU * cosV + } + } + result[y * N + x] = sum / 4.0 + } + } + + // Convert to pixel values (0-255) + for (let i = 0; i < 64; i++) { + result[i] = Math.max(0, Math.min(255, Math.round(result[i] + 128))) + } + + return result +} + // Hardware-accelerated decoding uses graphics.tevIdct8x8() instead of pure JS // Hardware-accelerated TEV block decoder @@ -260,43 +304,43 @@ function decodeBlock(blockData, blockX, blockY, prevRG, prevBA, currRG, currBA, } } } else { - // INTRA or INTER modes: simplified DC-only decoding for debugging + // INTRA or INTER modes: Full DCT decoding - // Extract DC coefficients and convert to colors - let rCoeff = blockData.dctCoeffs[0 * 64 + 0] // R DC - let gCoeff = blockData.dctCoeffs[1 * 64 + 0] // G DC - let bCoeff = blockData.dctCoeffs[2 * 64 + 0] // B DC + // Extract DCT coefficients for each channel (R, G, B) + let rCoeffs = blockData.dctCoeffs.slice(0 * 64, 1 * 64) // R channel + let gCoeffs = blockData.dctCoeffs.slice(1 * 64, 2 * 64) // G channel + let bCoeffs = blockData.dctCoeffs.slice(2 * 64, 3 * 64) // B channel - // Dequantize DC coefficients - let rDC = dequantizeCoeff(rCoeff, quantTable[0], true) - let gDC = dequantizeCoeff(gCoeff, quantTable[0], true) - let bDC = dequantizeCoeff(bCoeff, quantTable[0], true) + // Perform IDCT for each channel + let rBlock = idct8x8(rCoeffs, quantTable) + let gBlock = idct8x8(gCoeffs, quantTable) + let bBlock = idct8x8(bCoeffs, quantTable) - // Convert to RGB values (DC represents average) - let r = Math.max(0, Math.min(255, rDC + 128)) - let g = Math.max(0, Math.min(255, gDC + 128)) - let b = Math.max(0, Math.min(255, bDC + 128)) - - // Convert to 4-bit values - let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255))) - let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255))) - let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255))) - - let rgValue = (r4 << 4) | g4 // R in MSB, G in LSB - let baValue = (b4 << 4) | 15 // B in MSB, A=15 (opaque) in LSB - - // Software decoding (for fallback only) - - // Fill 8x8 block with solid color + // Fill 8x8 block with IDCT results for (let dy = 0; dy < BLOCK_SIZE; dy++) { for (let dx = 0; dx < BLOCK_SIZE; dx++) { let x = startX + dx let y = startY + dy if (x < width && y < height) { - let offset = y * width + x - // Normal memory plane assignments - sys.poke(currRG - offset, rgValue) // Graphics memory uses negative addressing - sys.poke(currBA - offset, baValue) + let blockOffset = dy * BLOCK_SIZE + dx + let imageOffset = y * width + x + + // Get RGB values from IDCT results + let r = rBlock[blockOffset] + let g = gBlock[blockOffset] + let b = bBlock[blockOffset] + + // Convert to 4-bit values + let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255))) + let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255))) + let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255))) + + let rgValue = (r4 << 4) | g4 // R in MSB, G in LSB + let baValue = (b4 << 4) | 15 // B in MSB, A=15 (opaque) in LSB + + // Write to graphics memory + sys.poke(currRG - imageOffset, rgValue) // Graphics memory uses negative addressing + sys.poke(currBA - imageOffset, baValue) } } } diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 23d7f72..13451a6 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -20,6 +20,47 @@ class GraphicsJSR223Delegate(private val vm: VM) { return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter } + /** + * Perform IDCT on a single channel with integer coefficients + */ + private fun performIDCT(coeffs: IntArray, quantTable: IntArray): IntArray { + // Use the same DCT basis as tevIdct8x8 + val dctBasis = Array(8) { u -> + Array(8) { x -> + val cu = if (u == 0) 1.0 / kotlin.math.sqrt(2.0) else 1.0 + cu * kotlin.math.cos((2.0 * x + 1.0) * u * kotlin.math.PI / 16.0) / 2.0 + } + } + + val dctCoeffs = Array(8) { DoubleArray(8) } + val result = IntArray(64) + + // Convert integer coefficients to 2D array and dequantize + for (u in 0 until 8) { + for (v in 0 until 8) { + val idx = u * 8 + v + val coeff = coeffs[idx] + dctCoeffs[u][v] = (coeff * quantTable[idx]).toDouble() + } + } + + // Apply 2D inverse DCT + for (x in 0 until 8) { + for (y in 0 until 8) { + var sum = 0.0 + for (u in 0 until 8) { + for (v in 0 until 8) { + sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[u][v] + } + } + val pixel = kotlin.math.max(0.0, kotlin.math.min(255.0, sum + 128.0)) + result[y * 8 + x] = pixel.toInt() + } + } + + return result + } + fun getGpuMemBase(): Int { return -1 - (1048576 * (vm.findPeriIndexByType(VM.PERITYPE_GPU_AND_TERM) ?: 0)) } @@ -1331,68 +1372,6 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - /** - * Fast 8x8 inverse DCT optimized for video decompression - * @param dctPtr pointer to DCT coefficients (192 floats) - * @param blockPtr pointer to output RGB block (192 bytes) - */ - fun tevIdct8x8(dctPtr: Int, blockPtr: Int) { - val gpu = getFirstGPU() ?: return - - val dctBasis = Array(8) { u -> - Array(8) { x -> - val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0 - cu * cos((2.0 * x + 1.0) * u * PI / 16.0) / 2.0 - } - } - - val dctCoeffs = Array(3) { Array(8) { DoubleArray(8) } } - val block = Array(3) { Array(8) { DoubleArray(8) } } - - // Read DCT coefficients from memory - for (channel in 0..2) { - for (u in 0..7) { - for (v in 0..7) { - val offset = (channel * 64 + u * 8 + v) * 4 - val b0 = vm.peek(dctPtr.toLong() + offset)!! and -1 - val b1 = vm.peek(dctPtr.toLong() + offset + 1)!! and -1 - val b2 = vm.peek(dctPtr.toLong() + offset + 2)!! and -1 - val b3 = vm.peek(dctPtr.toLong() + offset + 3)!! and -1 - val floatBits = b0.toUint() or (b1.toUint() shl 8) or (b2.toUint() shl 16) or (b3.toUint() shl 24) - dctCoeffs[channel][u][v] = java.lang.Float.intBitsToFloat(floatBits).toDouble() - } - } - } - - // Apply 2D inverse DCT to each channel - for (channel in 0..2) { - for (x in 0..7) { - for (y in 0..7) { - var sum = 0.0 - for (u in 0..7) { - for (v in 0..7) { - sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[channel][u][v] - } - } - block[channel][y][x] = sum + 0.5 // Add back DC offset - } - } - } - - // Write RGB block to memory (clamped to 0-255) - for (y in 0..7) { - for (x in 0..7) { - val offset = (y * 8 + x) * 3 - val r = (clamp(block[0][y][x] * 255.0, 0.0, 255.0)).toInt() - val g = (clamp(block[1][y][x] * 255.0, 0.0, 255.0)).toInt() - val b = (clamp(block[2][y][x] * 255.0, 0.0, 255.0)).toInt() - - vm.poke(blockPtr.toLong() + offset, r.toByte()) - vm.poke(blockPtr.toLong() + offset + 1, g.toByte()) - vm.poke(blockPtr.toLong() + offset + 2, b.toByte()) - } - } - } /** * Motion compensation: copy 8x8 block with sub-pixel interpolation @@ -1733,34 +1712,41 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - DCT decode - // Extract DC coefficients and dequantize - val rDC = dctCoeffs[0 * 64 + 0] // R channel DC - val gDC = dctCoeffs[1 * 64 + 0] // G channel DC - val bDC = dctCoeffs[2 * 64 + 0] // B channel DC + else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - Full DCT decode + // Hardware-accelerated IDCT for all three channels + val rCoeffs = dctCoeffs.sliceArray(0 * 64 until 1 * 64) // R channel + val gCoeffs = dctCoeffs.sliceArray(1 * 64 until 2 * 64) // G channel + val bCoeffs = dctCoeffs.sliceArray(2 * 64 until 3 * 64) // B channel - // Convert DC to RGB (add 128 offset) - val r = kotlin.math.max(0, kotlin.math.min(255, rDC + 128)) - val g = kotlin.math.max(0, kotlin.math.min(255, gDC + 128)) - val b = kotlin.math.max(0, kotlin.math.min(255, bDC + 128)) + // Perform hardware IDCT for each channel + val rBlock = performIDCT(rCoeffs, quantTable) + val gBlock = performIDCT(gCoeffs, quantTable) + val bBlock = performIDCT(bCoeffs, quantTable) - // Convert to 4-bit 4096-color format - val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255))) - val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255))) - val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255))) - - val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB - val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB - - // Fill 8x8 block + // Fill 8x8 block with IDCT results for (dy in 0 until 8) { for (dx in 0 until 8) { val x = startX + dx val y = startY + dy if (x < width && y < height) { - val offset = y.toLong() * width + x - vm.poke(rgPlaneAddr + offset*thisAddrIncVec, rgValue.toByte()) - vm.poke(baPlaneAddr + offset*thisAddrIncVec, baValue.toByte()) + val blockOffset = dy * 8 + dx + val imageOffset = y.toLong() * width + x + + // Get RGB values from IDCT results + val r = rBlock[blockOffset] + val g = gBlock[blockOffset] + val b = bBlock[blockOffset] + + // Convert to 4-bit 4096-color format + val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255))) + val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255))) + val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255))) + + val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB + val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB + + vm.poke(rgPlaneAddr + imageOffset*thisAddrIncVec, rgValue.toByte()) + vm.poke(baPlaneAddr + imageOffset*thisAddrIncVec, baValue.toByte()) } } }