From 65537d2f34a0033c2f54edeb1e01483af74a897a Mon Sep 17 00:00:00 2001 From: minjaesong Date: Mon, 18 Aug 2025 12:38:06 +0900 Subject: [PATCH] mostly working TEV without mocomp --- assets/disk0/tvdos/bin/playtev.js | 81 +++++-- assets/disk0/tvdos/bin/zfm.js | 2 + .../torvald/tsvm/GraphicsJSR223Delegate.kt | 216 +++++++++++------- video_encoder/encoder_tev.c | 129 +++++------ 4 files changed, 250 insertions(+), 178 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index 45bdbe8..a0d1875 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -178,24 +178,35 @@ let frameTime = 1.0 / fps // Ultra-fast approach: always render to display, use dedicated previous frame buffer const FRAME_PIXELS = width * height -// Always render directly to display memory for immediate visibility -const CURRENT_RG_ADDR = -1048577 // Main graphics RG plane (displayed) -const CURRENT_BA_ADDR = -1310721 // Main graphics BA plane (displayed) +// Frame buffer addresses for graphics display +const DISPLAY_RG_ADDR = -1048577 // Main graphics RG plane (displayed) +const DISPLAY_BA_ADDR = -1310721 // Main graphics BA plane (displayed) -// Dedicated previous frame buffer for reference (peripheral slot 2) -const PREV_RG_ADDR = sys.malloc(560*448) // Slot 2 RG plane -const PREV_BA_ADDR = sys.malloc(560*448) // Slot 2 BA plane +// RGB frame buffers (24-bit: R,G,B per pixel) +const CURRENT_RGB_ADDR = sys.malloc(560*448*3) // Current frame RGB buffer +const PREV_RGB_ADDR = sys.malloc(560*448*3) // Previous frame RGB buffer // Working memory for blocks (minimal allocation) let rgbWorkspace = sys.malloc(BLOCK_SIZE * BLOCK_SIZE * 3) // 192 bytes let dctWorkspace = sys.malloc(BLOCK_SIZE * BLOCK_SIZE * 3 * 4) // 768 bytes (floats) -// Initialize both frame buffers to black with alpha=15 (opaque) +// Initialize RGB frame buffers to black (0,0,0) for (let i = 0; i < FRAME_PIXELS; i++) { - sys.poke(CURRENT_RG_ADDR - i, 0) - sys.poke(CURRENT_BA_ADDR - i, 15) // Alpha = 15 (opaque) - sys.poke(PREV_RG_ADDR + i, 0) - sys.poke(PREV_BA_ADDR + i, 15) // Alpha = 15 (opaque) + // Current frame RGB: black + sys.poke(CURRENT_RGB_ADDR + i*3, 0) // R + sys.poke(CURRENT_RGB_ADDR + i*3 + 1, 0) // G + sys.poke(CURRENT_RGB_ADDR + i*3 + 2, 0) // B + + // Previous frame RGB: black + sys.poke(PREV_RGB_ADDR + i*3, 0) // R + sys.poke(PREV_RGB_ADDR + i*3 + 1, 0) // G + sys.poke(PREV_RGB_ADDR + i*3 + 2, 0) // B +} + +// Initialize display framebuffer to black +for (let i = 0; i < FRAME_PIXELS; i++) { + sys.poke(DISPLAY_RG_ADDR - i, 0) // Black in RG plane + sys.poke(DISPLAY_BA_ADDR - i, 15) // Black with alpha=15 (opaque) in BA plane } let frameCount = 0 @@ -211,6 +222,27 @@ function dequantizeCoeff(coeff, quant, isDC) { } } +// 4x4 Bayer dithering matrix +const BAYER_MATRIX = [ + [ 0, 8, 2,10], + [12, 4,14, 6], + [ 3,11, 1, 9], + [15, 7,13, 5] +] + +// Apply Bayer dithering to reduce banding when quantizing to 4-bit +function ditherValue(value, x, y) { + // Get the dither threshold for this pixel position + const threshold = BAYER_MATRIX[y & 3][x & 3] + + // Scale threshold from 0-15 to 0-15.9375 (16 steps over 16 values) + const scaledThreshold = threshold / 16.0 + + // Add dither and quantize to 4-bit (0-15) + const dithered = value + scaledThreshold + return Math.max(0, Math.min(15, Math.floor(dithered * 15 / 255))) +} + // 8x8 Inverse DCT implementation function idct8x8(coeffs, quantTable) { const N = 8 @@ -330,10 +362,10 @@ function decodeBlock(blockData, blockX, blockY, prevRG, prevBA, currRG, currBA, let g = gBlock[blockOffset] let b = bBlock[blockOffset] - // Convert to 4-bit values - let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255))) - let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255))) - let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255))) + // Apply Bayer dithering when converting to 4-bit values + let r4 = ditherValue(r, x, y) + let g4 = ditherValue(g, x, y) + let b4 = ditherValue(b, x, y) let rgValue = (r4 << 4) | g4 // R in MSB, G in LSB let baValue = (b4 << 4) | 15 // B in MSB, A=15 (opaque) in LSB @@ -368,10 +400,9 @@ try { // Sync packet - frame complete frameCount++ - // Copy current display frame to previous frame buffer for next frame reference + // Copy current RGB frame to previous frame buffer for next frame reference // This is the only copying we need, and it happens once per frame after display - sys.memcpy(CURRENT_RG_ADDR, PREV_RG_ADDR, FRAME_PIXELS) - sys.memcpy(CURRENT_BA_ADDR, PREV_BA_ADDR, FRAME_PIXELS) + sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3) } else if ((packetType & 0xFF) == TEV_PACKET_IFRAME || (packetType & 0xFF) == TEV_PACKET_PFRAME) { // Video frame packet @@ -409,10 +440,14 @@ try { // Hardware decode complete - // Hardware-accelerated TEV decoding (blazing fast!) + // Hardware-accelerated TEV decoding to RGB buffers (blazing fast!) try { - graphics.tevDecode(blockDataPtr, CURRENT_RG_ADDR, CURRENT_BA_ADDR, - width, height, quality, PREV_RG_ADDR, PREV_BA_ADDR) + graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, + width, height, quality) + + // Upload RGB buffer to display framebuffer with dithering + graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR, + width, height, frameCount) } catch (e) { serial.println(`Frame ${frameCount}: Hardware decode failed: ${e}`) } @@ -446,8 +481,8 @@ try { // Cleanup working memory (graphics memory is automatically managed) sys.free(rgbWorkspace) sys.free(dctWorkspace) - sys.free(PREV_RG_ADDR) - sys.free(PREV_BA_ADDR) + sys.free(CURRENT_RGB_ADDR) + sys.free(PREV_RGB_ADDR) audio.stop(0) diff --git a/assets/disk0/tvdos/bin/zfm.js b/assets/disk0/tvdos/bin/zfm.js index 326497c..d65146a 100644 --- a/assets/disk0/tvdos/bin/zfm.js +++ b/assets/disk0/tvdos/bin/zfm.js @@ -29,6 +29,7 @@ const COL_HL_EXT = { "mp3": 33, "mp2": 34, "mov": 213, + "mv2": 213, "ipf1": 190, "ipf2": 191, "txt": 223, @@ -41,6 +42,7 @@ const EXEC_FUNS = { "mp3": (f) => _G.shell.execute(`playmp3 "${f}" -i`), "mp2": (f) => _G.shell.execute(`playmp2 "${f}" -i`), "mov": (f) => _G.shell.execute(`playmov "${f}" -i`), + "mv2": (f) => _G.shell.execute(`playtev "${f}" -i`), "pcm": (f) => _G.shell.execute(`playpcm "${f}" -i`), "ipf1": (f) => _G.shell.execute(`decodeipf "${f}" -i`), "ipf2": (f) => _G.shell.execute(`decodeipf "${f}" -i`), diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 13451a6..69e8c70 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -6,8 +6,6 @@ import net.torvald.UnsafeHelper import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint import net.torvald.tsvm.peripheral.GraphicsAdapter import net.torvald.tsvm.peripheral.fmod -import kotlin.experimental.and -import kotlin.experimental.or import kotlin.math.abs import kotlin.math.roundToInt import kotlin.math.cos @@ -20,10 +18,58 @@ class GraphicsJSR223Delegate(private val vm: VM) { return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter } + /** + * Upload RGB frame buffer to graphics framebuffer with dithering + * @param rgbAddr Source RGB buffer (24-bit: R,G,B bytes) + * @param rgPlaneAddr Destination RG framebuffer + * @param baPlaneAddr Destination BA framebuffer + * @param width Frame width + * @param height Frame height + */ + fun uploadRGBToFramebuffer(rgbAddr: Long, rgPlaneAddr: Long, baPlaneAddr: Long, width: Int, height: Int, frameCounter: Int) { + val rgAddrIncVec = if (rgPlaneAddr >= 0) 1 else -1 + val baAddrIncVec = if (baPlaneAddr >= 0) 1 else -1 + val rgbAddrIncVec = if (rgbAddr >= 0) 1 else -1 + + for (y in 0 until height) { + for (x in 0 until width) { + val pixelOffset = y.toLong() * width + x + val rgbOffset = pixelOffset * 3 * rgbAddrIncVec + + // Read RGB values + val r = vm.peek(rgbAddr + rgbOffset)!!.toUint() + val g = vm.peek(rgbAddr + rgbOffset + rgbAddrIncVec)!!.toUint() + val b = vm.peek(rgbAddr + rgbOffset + rgbAddrIncVec * 2)!!.toUint() + + // Apply Bayer dithering and convert to 4-bit + val r4 = ditherValue(r, x, y, frameCounter) + val g4 = ditherValue(g, x, y, frameCounter) + val b4 = ditherValue(b, x, y, frameCounter) + + // Pack into 4096-color format + val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB + val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB + + // Write to framebuffer + vm.poke(rgPlaneAddr + pixelOffset * rgAddrIncVec, rgValue.toByte()) + vm.poke(baPlaneAddr + pixelOffset * baAddrIncVec, baValue.toByte()) + } + } + } + + /** + * Apply Bayer dithering to reduce banding when quantizing to 4-bit + */ + private fun ditherValue(value: Int, x: Int, y: Int, f: Int): Int { + val t = bayerKernels[f % 4][4 * (y % 4) + (x % 4)] // use rotating bayerKernel to time-dither the static pattern for even better visuals + val q = floor((t / 15f + (value / 255f)) * 15f) / 15f + return round(15f * q) + } + /** * Perform IDCT on a single channel with integer coefficients */ - private fun performIDCT(coeffs: IntArray, quantTable: IntArray): IntArray { + private fun tevIdct8x8(coeffs: IntArray, quantTable: IntArray): IntArray { // Use the same DCT basis as tevIdct8x8 val dctBasis = Array(8) { u -> Array(8) { x -> @@ -845,8 +891,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Co (bytes 0–1): 4 nybbles - val coA = (a[0].toInt() and 0xFF) or ((a[1].toInt() and 0xFF) shl 8) - val coB = (b[0].toInt() and 0xFF) or ((b[1].toInt() and 0xFF) shl 8) + val coA = (a[0].toUint()) or ((a[1].toUint()) shl 8) + val coB = (b[0].toUint()) or ((b[1].toUint()) shl 8) for (i in 0 until 4) { val va = (coA shr (i * 4)) and 0xF val vb = (coB shr (i * 4)) and 0xF @@ -855,8 +901,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Cg (bytes 2–3): 4 nybbles - val cgA = (a[2].toInt() and 0xFF) or ((a[3].toInt() and 0xFF) shl 8) - val cgB = (b[2].toInt() and 0xFF) or ((b[3].toInt() and 0xFF) shl 8) + val cgA = (a[2].toUint()) or ((a[3].toUint()) shl 8) + val cgB = (b[2].toUint()) or ((b[3].toUint()) shl 8) for (i in 0 until 4) { val va = (cgA shr (i * 4)) and 0xF val vb = (cgB shr (i * 4)) and 0xF @@ -866,8 +912,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Y (bytes 4–9): 16 nybbles for (i in 4 until 10) { - val byteA = a[i].toInt() and 0xFF - val byteB = b[i].toInt() and 0xFF + val byteA = a[i].toUint() + val byteB = b[i].toUint() val yAHigh = (byteA shr 4) and 0xF val yALow = byteA and 0xF @@ -1127,7 +1173,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var ptr = ipf1DeltaPtr.toLong() var blockIndex = 0 - fun readByte(): Int = vm.peek(ptr++)!!.toInt() and 0xFF + fun readByte(): Int = vm.peek(ptr++)!!.toUint() fun readShort(): Int { val low = readByte() val high = readByte() @@ -1331,9 +1377,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (y in 0..7) { for (x in 0..7) { val offset = (y * 8 + x) * 3 - val r = vm.peek(blockPtr.toLong() + offset)!! and -1 - val g = vm.peek(blockPtr.toLong() + offset + 1)!! and -1 - val b = vm.peek(blockPtr.toLong() + offset + 2)!! and -1 + val r = vm.peek(blockPtr.toLong() + offset)!!.toUint() + val g = vm.peek(blockPtr.toLong() + offset + 1)!!.toUint() + val b = vm.peek(blockPtr.toLong() + offset + 2)!!.toUint() // Convert to 0-1 range and center around 0 block[0][y][x] = (r / 255.0) - 0.5 @@ -1415,15 +1461,15 @@ class GraphicsJSR223Delegate(private val vm: VM) { val srcOffset10 = (iy + 1) * width + ix val srcOffset11 = (iy + 1) * width + (ix + 1) - val rg00 = vm.peek(srcRG.toLong() + srcOffset00)!! and -1 - val rg01 = vm.peek(srcRG.toLong() + srcOffset01)!! and -1 - val rg10 = vm.peek(srcRG.toLong() + srcOffset10)!! and -1 - val rg11 = vm.peek(srcRG.toLong() + srcOffset11)!! and -1 + val rg00 = vm.peek(srcRG.toLong() + srcOffset00)!!.toUint() + val rg01 = vm.peek(srcRG.toLong() + srcOffset01)!!.toUint() + val rg10 = vm.peek(srcRG.toLong() + srcOffset10)!!.toUint() + val rg11 = vm.peek(srcRG.toLong() + srcOffset11)!!.toUint() - val ba00 = vm.peek(srcBA.toLong() + srcOffset00)!! and -1 - val ba01 = vm.peek(srcBA.toLong() + srcOffset01)!! and -1 - val ba10 = vm.peek(srcBA.toLong() + srcOffset10)!! and -1 - val ba11 = vm.peek(srcBA.toLong() + srcOffset11)!! and -1 + val ba00 = vm.peek(srcBA.toLong() + srcOffset00)!!.toUint() + val ba01 = vm.peek(srcBA.toLong() + srcOffset01)!!.toUint() + val ba10 = vm.peek(srcBA.toLong() + srcOffset10)!!.toUint() + val ba11 = vm.peek(srcBA.toLong() + srcOffset11)!!.toUint() // Bilinear interpolation val rgTop = rg00 * (1 - fx) + rg01 * fx @@ -1453,24 +1499,24 @@ class GraphicsJSR223Delegate(private val vm: VM) { * @param blockX block X coordinate (in 8-pixel units) * @param blockY block Y coordinate (in 8-pixel units) */ - fun tevRgbTo4096(rgbPtr: Int, destRG: Int, destBA: Int, blockX: Int, blockY: Int) { + fun tevRgbTo4096(rgbPtr: Int, destRG: Int, destBA: Int, blockX: Int, blockY: Int, frameCounter: Int) { val gpu = getFirstGPU() ?: return val width = gpu.config.width for (y in 0..7) { for (x in 0..7) { val rgbOffset = (y * 8 + x) * 3 - val r = vm.peek(rgbPtr.toLong() + rgbOffset)!! and -1 - val g = vm.peek(rgbPtr.toLong() + rgbOffset + 1)!! and -1 - val b = vm.peek(rgbPtr.toLong() + rgbOffset + 2)!! and -1 - - // Convert to 4-bit per channel (4096 colors) - val r4 = (r * 15 + 127) / 255 - val g4 = (g * 15 + 127) / 255 - val b4 = (b * 15 + 127) / 255 + val r = vm.peek(rgbPtr.toLong() + rgbOffset)!!.toUint() + val g = vm.peek(rgbPtr.toLong() + rgbOffset + 1)!!.toUint() + val b = vm.peek(rgbPtr.toLong() + rgbOffset + 2)!!.toUint() val pixelX = blockX * 8 + x val pixelY = blockY * 8 + y + + // Convert to 4-bit per channel with dithering (4096 colors) + val r4 = ditherValue(r, pixelX, pixelY, frameCounter) + val g4 = ditherValue(g, pixelX, pixelY, frameCounter) + val b4 = ditherValue(b, pixelX, pixelY, frameCounter) val destOffset = pixelY * width + pixelX if (pixelX < width && pixelY < gpu.config.height) { @@ -1523,10 +1569,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { val curOffset = (startY + dy) * width + (startX + dx) val refOffset = (refStartY + dy) * width + (refStartX + dx) - val curRG = vm.peek(curRG.toLong() + curOffset)!! and -1 - val curBA = vm.peek(curBA.toLong() + curOffset)!! and -1 - val refRGVal = vm.peek(refRG.toLong() + refOffset)!! and -1 - val refBAVal = vm.peek(refBA.toLong() + refOffset)!! and -1 + val curRG = vm.peek(curRG.toLong() + curOffset)!!.toUint() + val curBA = vm.peek(curBA.toLong() + curOffset)!!.toUint() + val refRGVal = vm.peek(refRG.toLong() + refOffset)!!.toUint() + val refBAVal = vm.peek(refBA.toLong() + refOffset)!!.toUint() sad += abs((curRG and -16) - (refRGVal and -16)) + // R abs((curRG and 0x0F) - (refRGVal and 0x0F)) + // G @@ -1622,18 +1668,15 @@ class GraphicsJSR223Delegate(private val vm: VM) { * Decodes compressed TEV block data directly to framebuffer * * @param blockDataPtr Pointer to decompressed TEV block data - * @param rgPlaneAddr Address of RG plane in memory (can target the graphics hardware) - * @param baPlaneAddr Address of BA plane in memory (can target the graphics hardware) + * @param currentRGBAddr Address of current frame RGB buffer (24-bit: R,G,B per pixel) + * @param prevRGBAddr Address of previous frame RGB buffer (for motion compensation) * @param width Frame width in pixels * @param height Frame height in pixels - * @param prevRGAddr Previous frame RG plane (for motion compensation) - * @param prevBAAddr Previous frame BA plane (for motion compensation) + * @param quality Quantization quality level (0-7) + * @param frameCounter Frame counter for temporal patterns */ - fun tevDecode(blockDataPtr: Long, rgPlaneAddr: Long, baPlaneAddr: Long, - width: Int, height: Int, quality: Int, prevRGAddr: Long, prevBAAddr: Long) { - - assert(rgPlaneAddr * baPlaneAddr >= 0) { "RG and BA plane must be on a same memory scope (got $rgPlaneAddr, $baPlaneAddr)" } - assert(prevRGAddr * prevBAAddr >= 0) { "Prev RG and BA plane must be on a same memory scope (got $prevRGAddr, $prevBAAddr)" } + fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int, quality: Int) { val blocksX = (width + 7) / 8 val blocksY = (height + 7) / 8 @@ -1642,9 +1685,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { var readPtr = blockDataPtr - // decide increment "direction" by the sign of the pointer - val prevAddrIncVec = if (prevRGAddr >= 0) 1 else -1 - val thisAddrIncVec = if (rgPlaneAddr >= 0) 1 else -1 + // decide increment "direction" by the sign of the pointer + val prevAddrIncVec = if (prevRGBAddr >= 0) 1 else -1 + val thisAddrIncVec = if (currentRGBAddr >= 0) 1 else -1 for (by in 0 until blocksY) { for (bx in 0 until blocksX) { @@ -1652,40 +1695,46 @@ class GraphicsJSR223Delegate(private val vm: VM) { val startY = by * 8 // Read TEV block header (7 bytes) - val mode = vm.peek(readPtr)!!.toInt() and 0xFF - val mvX = ((vm.peek(readPtr + 1)!!.toInt() and 0xFF) or - ((vm.peek(readPtr + 2)!!.toInt() and 0xFF) shl 8)).toShort().toInt() - val mvY = ((vm.peek(readPtr + 3)!!.toInt() and 0xFF) or - ((vm.peek(readPtr + 4)!!.toInt() and 0xFF) shl 8)).toShort().toInt() + val mode = vm.peek(readPtr)!!.toUint() + val mvX = ((vm.peek(readPtr + 1)!!.toUint()) or + ((vm.peek(readPtr + 2)!!.toUint()) shl 8)).toShort().toInt() + val mvY = ((vm.peek(readPtr + 3)!!.toUint()) or + ((vm.peek(readPtr + 4)!!.toUint()) shl 8)).toShort().toInt() readPtr += 7 // Skip CBP field // Read DCT coefficients (3 channels × 64 coefficients × 2 bytes) val dctCoeffs = IntArray(3 * 64) for (i in 0 until 3 * 64) { - val coeff = ((vm.peek(readPtr)!!.toInt() and 0xFF) or - ((vm.peek(readPtr + 1)!!.toInt() and 0xFF) shl 8)).toShort().toInt() + val coeff = ((vm.peek(readPtr)!!.toUint()) or + ((vm.peek(readPtr + 1)!!.toUint()) shl 8)).toShort().toInt() dctCoeffs[i] = coeff readPtr += 2 } when (mode) { - 0x00 -> { // TEV_MODE_SKIP - copy from previous frame + 0x00 -> { // TEV_MODE_SKIP - copy RGB from previous frame for (dy in 0 until 8) { for (dx in 0 until 8) { val x = startX + dx val y = startY + dy if (x < width && y < height) { - val offset = y.toLong() * width + x - val prevRG = vm.peek(prevRGAddr + offset*prevAddrIncVec)!!.toInt() and 0xFF - val prevBA = vm.peek(prevBAAddr + offset*prevAddrIncVec)!!.toInt() and 0xFF - vm.poke(rgPlaneAddr + offset*thisAddrIncVec, prevRG.toByte()) - vm.poke(baPlaneAddr + offset*thisAddrIncVec, prevBA.toByte()) + val pixelOffset = y.toLong() * width + x + val rgbOffset = pixelOffset * 3 + + // Copy RGB values from previous frame + val prevR = vm.peek(prevRGBAddr + rgbOffset*prevAddrIncVec)!! + val prevG = vm.peek(prevRGBAddr + (rgbOffset + 1)*prevAddrIncVec)!! + val prevB = vm.peek(prevRGBAddr + (rgbOffset + 2)*prevAddrIncVec)!! + + vm.poke(currentRGBAddr + rgbOffset*thisAddrIncVec, prevR) + vm.poke(currentRGBAddr + (rgbOffset + 1)*thisAddrIncVec, prevG) + vm.poke(currentRGBAddr + (rgbOffset + 2)*thisAddrIncVec, prevB) } } } } - 0x03 -> { // TEV_MODE_MOTION - motion compensation + 0x03 -> { // TEV_MODE_MOTION - motion compensation with RGB for (dy in 0 until 8) { for (dx in 0 until 8) { val x = startX + dx @@ -1694,18 +1743,26 @@ class GraphicsJSR223Delegate(private val vm: VM) { val refY = y + mvY if (x < width && y < height) { - val dstOffset = y.toLong() * width + x + val dstPixelOffset = y.toLong() * width + x + val dstRgbOffset = dstPixelOffset * 3 if (refX in 0 until width && refY in 0 until height) { - val refOffset = refY.toLong() * width + refX - val refRG = vm.peek(prevRGAddr + refOffset*prevAddrIncVec)!!.toInt() and 0xFF - val refBA = vm.peek(prevBAAddr + refOffset*prevAddrIncVec)!!.toInt() and 0xFF - vm.poke(rgPlaneAddr + dstOffset*thisAddrIncVec, refRG.toByte()) - vm.poke(baPlaneAddr + dstOffset*thisAddrIncVec, refBA.toByte()) + val refPixelOffset = refY.toLong() * width + refX + val refRgbOffset = refPixelOffset * 3 + + // Copy RGB from reference position + val refR = vm.peek(prevRGBAddr + refRgbOffset*prevAddrIncVec)!! + val refG = vm.peek(prevRGBAddr + (refRgbOffset + 1)*prevAddrIncVec)!! + val refB = vm.peek(prevRGBAddr + (refRgbOffset + 2)*prevAddrIncVec)!! + + vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, refR) + vm.poke(currentRGBAddr + (dstRgbOffset + 1)*thisAddrIncVec, refG) + vm.poke(currentRGBAddr + (dstRgbOffset + 2)*thisAddrIncVec, refB) } else { // Out of bounds - use black - vm.poke(rgPlaneAddr + dstOffset*thisAddrIncVec, 0.toByte()) - vm.poke(baPlaneAddr + dstOffset*thisAddrIncVec, 15.toByte()) // Alpha=15 + vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, 0.toByte()) // R=0 + vm.poke(currentRGBAddr + (dstRgbOffset + 1)*thisAddrIncVec, 0.toByte()) // G=0 + vm.poke(currentRGBAddr + (dstRgbOffset + 2)*thisAddrIncVec, 0.toByte()) // B=0 } } } @@ -1719,9 +1776,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { val bCoeffs = dctCoeffs.sliceArray(2 * 64 until 3 * 64) // B channel // Perform hardware IDCT for each channel - val rBlock = performIDCT(rCoeffs, quantTable) - val gBlock = performIDCT(gCoeffs, quantTable) - val bBlock = performIDCT(bCoeffs, quantTable) + val rBlock = tevIdct8x8(rCoeffs, quantTable) + val gBlock = tevIdct8x8(gCoeffs, quantTable) + val bBlock = tevIdct8x8(bCoeffs, quantTable) // Fill 8x8 block with IDCT results for (dy in 0 until 8) { @@ -1736,17 +1793,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { val r = rBlock[blockOffset] val g = gBlock[blockOffset] val b = bBlock[blockOffset] - - // Convert to 4-bit 4096-color format - val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255))) - val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255))) - val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255))) - - val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB - val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB - - vm.poke(rgPlaneAddr + imageOffset*thisAddrIncVec, rgValue.toByte()) - vm.poke(baPlaneAddr + imageOffset*thisAddrIncVec, baValue.toByte()) + + // Store full 8-bit RGB values to RGB buffer + val rgbOffset = imageOffset * 3 + vm.poke(currentRGBAddr + rgbOffset*thisAddrIncVec, r.toByte()) + vm.poke(currentRGBAddr + (rgbOffset + 1)*thisAddrIncVec, g.toByte()) + vm.poke(currentRGBAddr + (rgbOffset + 2)*thisAddrIncVec, b.toByte()) } } } diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index f45b9aa..64c5e7d 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -132,10 +132,8 @@ typedef struct { int output_to_stdout; int quality; // 0-7, higher = better quality - // Frame buffers (4096-color format: R|G, B|A byte planes) - uint8_t *current_rg, *current_ba; - uint8_t *previous_rg, *previous_ba; - uint8_t *reference_rg, *reference_ba; + // Frame buffers (8-bit RGB format for encoding) + uint8_t *current_rgb, *previous_rgb, *reference_rgb; // Encoding workspace uint8_t *rgb_workspace; // 8x8 RGB blocks (192 bytes) @@ -178,21 +176,9 @@ static int16_t quantize_coeff(float coeff, uint8_t quant, int is_dc) { // Currently using simplified encoding logic // Convert RGB to 4096-color format -static void rgb_to_4096(uint8_t *rgb, uint8_t *rg, uint8_t *ba, int pixels) { - for (int i = 0; i < pixels; i++) { - uint8_t r = rgb[i * 3]; - uint8_t g = rgb[i * 3 + 1]; - uint8_t b = rgb[i * 3 + 2]; - - // Convert RGB to 4-bit per channel for full color - uint8_t r4 = (r * 15 + 127) / 255; - uint8_t g4 = (g * 15 + 127) / 255; - uint8_t b4 = (b * 15 + 127) / 255; - - // Correct 4096-color format: R,G in MSBs, B,A in MSBs - with alpha=15 for opaque - rg[i] = (r4 << 4) | g4; // R in MSB, G in LSB - ba[i] = (b4 << 4) | 15; // B in MSB, A=15 (opaque) in LSB - } +static void copy_rgb_frame(uint8_t *rgb_input, uint8_t *rgb_frame, int pixels) { + // Copy input RGB data to frame buffer (preserving full 8-bit precision) + memcpy(rgb_frame, rgb_input, pixels * 3); } // Simple motion estimation (full search) @@ -224,15 +210,15 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y, int cur_offset = (start_y + dy) * enc->width + (start_x + dx); int ref_offset = (ref_y + dy) * enc->width + (ref_x + dx); - int cur_rg = enc->current_rg[cur_offset]; - int cur_ba = enc->current_ba[cur_offset]; - int ref_rg = enc->previous_rg[ref_offset]; - int ref_ba = enc->previous_ba[ref_offset]; + int cur_r = enc->current_rgb[cur_offset * 3]; + int cur_g = enc->current_rgb[cur_offset * 3 + 1]; + int cur_b = enc->current_rgb[cur_offset * 3 + 2]; + int ref_r = enc->previous_rgb[ref_offset * 3]; + int ref_g = enc->previous_rgb[ref_offset * 3 + 1]; + int ref_b = enc->previous_rgb[ref_offset * 3 + 2]; - // SAD on 4-bit channels - sad += abs((cur_rg >> 4) - (ref_rg >> 4)) + // R - abs((cur_rg & 0xF) - (ref_rg & 0xF)) + // G - abs((cur_ba >> 4) - (ref_ba >> 4)); // B + // SAD on 8-bit RGB channels + sad += abs(cur_r - ref_r) + abs(cur_g - ref_g) + abs(cur_b - ref_b); } } @@ -251,6 +237,9 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke int block_idx = block_y * ((enc->width + 7) / 8) + block_x; tev_block_t *block = &enc->block_data[block_idx]; + int start_x = block_x * BLOCK_SIZE; + int start_y = block_y * BLOCK_SIZE; + // Extract 8x8 RGB block from current frame for (int y = 0; y < BLOCK_SIZE; y++) { for (int x = 0; x < BLOCK_SIZE; x++) { @@ -260,13 +249,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke if (pixel_x < enc->width && pixel_y < enc->height) { int frame_offset = pixel_y * enc->width + pixel_x; - uint8_t rg = enc->current_rg[frame_offset]; - uint8_t ba = enc->current_ba[frame_offset]; - - // Convert back to RGB for DCT - enc->rgb_workspace[offset] = ((rg >> 4) & 0xF) * 255 / 15; // R - enc->rgb_workspace[offset + 1] = (rg & 0xF) * 255 / 15; // G - enc->rgb_workspace[offset + 2] = ((ba >> 4) & 0xF) * 255 / 15; // B + // Copy RGB data directly (preserving full 8-bit precision) + enc->rgb_workspace[offset] = enc->current_rgb[frame_offset * 3]; // R + enc->rgb_workspace[offset + 1] = enc->current_rgb[frame_offset * 3 + 1]; // G + enc->rgb_workspace[offset + 2] = enc->current_rgb[frame_offset * 3 + 2]; // B } else { // Pad with black enc->rgb_workspace[offset] = 0; @@ -286,17 +272,24 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke } else { // Try different modes and pick the best - // Try SKIP mode + // Try SKIP mode - compare with previous frame int skip_sad = 0; - for (int i = 0; i < BLOCK_SIZE * BLOCK_SIZE; i++) { - int cur_rg = enc->current_rg[i]; - int cur_ba = enc->current_ba[i]; - int prev_rg = enc->previous_rg[i]; - int prev_ba = enc->previous_ba[i]; - - skip_sad += abs((cur_rg >> 4) - (prev_rg >> 4)) + - abs((cur_rg & 0xF) - (prev_rg & 0xF)) + - abs((cur_ba >> 4) - (prev_ba >> 4)); + for (int dy = 0; dy < BLOCK_SIZE; dy++) { + for (int dx = 0; dx < BLOCK_SIZE; dx++) { + int pixel_x = start_x + dx; + int pixel_y = start_y + dy; + if (pixel_x < enc->width && pixel_y < enc->height) { + int offset = pixel_y * enc->width + pixel_x; + int cur_r = enc->current_rgb[offset * 3]; + int cur_g = enc->current_rgb[offset * 3 + 1]; + int cur_b = enc->current_rgb[offset * 3 + 2]; + int prev_r = enc->previous_rgb[offset * 3]; + int prev_g = enc->previous_rgb[offset * 3 + 1]; + int prev_b = enc->previous_rgb[offset * 3 + 2]; + + skip_sad += abs(cur_r - prev_r) + abs(cur_g - prev_g) + abs(cur_b - prev_b); + } + } } if (skip_sad < 8) { // Much stricter threshold for SKIP @@ -323,14 +316,14 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke int cur_offset = cur_y * enc->width + cur_x; int ref_offset = ref_y * enc->width + ref_x; - uint8_t cur_rg = enc->current_rg[cur_offset]; - uint8_t cur_ba = enc->current_ba[cur_offset]; - uint8_t ref_rg = enc->previous_rg[ref_offset]; - uint8_t ref_ba = enc->previous_ba[ref_offset]; + uint8_t cur_r = enc->current_rgb[cur_offset * 3]; + uint8_t cur_g = enc->current_rgb[cur_offset * 3 + 1]; + uint8_t cur_b = enc->current_rgb[cur_offset * 3 + 2]; + uint8_t ref_r = enc->previous_rgb[ref_offset * 3]; + uint8_t ref_g = enc->previous_rgb[ref_offset * 3 + 1]; + uint8_t ref_b = enc->previous_rgb[ref_offset * 3 + 2]; - motion_sad += abs((cur_rg >> 4) - (ref_rg >> 4)) + - abs((cur_rg & 0xF) - (ref_rg & 0xF)) + - abs((cur_ba >> 4) - (ref_ba >> 4)); + motion_sad += abs(cur_r - ref_r) + abs(cur_g - ref_g) + abs(cur_b - ref_b); } else { motion_sad += 48; // Penalty for out-of-bounds reference } @@ -570,7 +563,7 @@ static int process_frame(tev_encoder_t *enc, int frame_num, FILE *output) { } // Convert to 4096-color format - rgb_to_4096(rgb_buffer, enc->current_rg, enc->current_ba, enc->width * enc->height); + copy_rgb_frame(rgb_buffer, enc->current_rgb, enc->width * enc->height); free(rgb_buffer); int is_keyframe = (frame_num == 1) || (frame_num % KEYFRAME_INTERVAL == 0); @@ -631,12 +624,9 @@ static int process_frame(tev_encoder_t *enc, int frame_num, FILE *output) { enc->total_output_bytes += 2 + 4 + compressed_size + 2; // Swap frame buffers for next frame - uint8_t *temp_rg = enc->previous_rg; - uint8_t *temp_ba = enc->previous_ba; - enc->previous_rg = enc->current_rg; - enc->previous_ba = enc->current_ba; - enc->current_rg = temp_rg; - enc->current_ba = temp_ba; + uint8_t *temp_rgb = enc->previous_rgb; + enc->previous_rgb = enc->current_rgb; + enc->current_rgb = temp_rgb; fprintf(stderr, "\rFrame %d/%d [%c] - Skip:%d Intra:%d Inter:%d - Ratio:%.1f%%", frame_num, enc->total_frames, is_keyframe ? 'I' : 'P', @@ -665,12 +655,9 @@ static int allocate_buffers(tev_encoder_t *enc) { int pixels = enc->width * enc->height; int blocks = ((enc->width + 7) / 8) * ((enc->height + 7) / 8); - enc->current_rg = malloc(pixels); - enc->current_ba = malloc(pixels); - enc->previous_rg = malloc(pixels); - enc->previous_ba = malloc(pixels); - enc->reference_rg = malloc(pixels); - enc->reference_ba = malloc(pixels); + enc->current_rgb = malloc(pixels * 3); // RGB: 3 bytes per pixel + enc->previous_rgb = malloc(pixels * 3); + enc->reference_rgb = malloc(pixels * 3); enc->rgb_workspace = malloc(BLOCK_SIZE * BLOCK_SIZE * 3); enc->dct_workspace = malloc(BLOCK_SIZE * BLOCK_SIZE * 3 * sizeof(float)); @@ -686,9 +673,8 @@ static int allocate_buffers(tev_encoder_t *enc) { int gzip_init_result = deflateInit2(&enc->gzip_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); // 15+16 for gzip format - return (enc->current_rg && enc->current_ba && enc->previous_rg && enc->previous_ba && - enc->reference_rg && enc->reference_ba && enc->rgb_workspace && - enc->dct_workspace && enc->block_data && enc->compressed_buffer && + return (enc->current_rgb && enc->previous_rgb && enc->reference_rgb && + enc->rgb_workspace && enc->dct_workspace && enc->block_data && enc->compressed_buffer && enc->mp2_buffer && gzip_init_result == Z_OK); } @@ -702,12 +688,9 @@ static void cleanup_encoder(tev_encoder_t *enc) { free(enc->input_file); free(enc->output_file); - free(enc->current_rg); - free(enc->current_ba); - free(enc->previous_rg); - free(enc->previous_ba); - free(enc->reference_rg); - free(enc->reference_ba); + free(enc->current_rgb); + free(enc->previous_rgb); + free(enc->reference_rgb); free(enc->rgb_workspace); free(enc->dct_workspace); free(enc->block_data);