From 8bb111760b6497b9e050e407a9921446f84faa58 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Tue, 19 Aug 2025 22:20:19 +0900 Subject: [PATCH] half-working INTER block --- assets/disk0/tvdos/bin/playtev.js | 93 +----- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 288 +++++++++++++++--- video_encoder/encoder_tev.c | 168 ++++++++-- 3 files changed, 397 insertions(+), 152 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index 22a43a2..ddaa13c 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -24,80 +24,6 @@ const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i" const fullFilePath = _G.shell.resolvePathInput(exec_args[1]) const FILE_LENGTH = files.open(fullFilePath.full).size -// Quantization tables for Y channel (16x16 - just use first 8 quality levels) -const QUANT_TABLES_Y = [ - // Quality 0 (lowest) - 8x8 pattern repeated to 16x16 - (() => { - const base = [80, 60, 50, 80, 120, 200, 255, 255, - 55, 60, 70, 95, 130, 255, 255, 255, - 70, 65, 80, 120, 200, 255, 255, 255, - 70, 85, 110, 145, 255, 255, 255, 255, - 90, 110, 185, 255, 255, 255, 255, 255, - 120, 175, 255, 255, 255, 255, 255, 255, - 245, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255] - const extended = [] - for (let y = 0; y < 16; y++) { - for (let x = 0; x < 16; x++) { - extended.push(base[(y % 8) * 8 + (x % 8)]) - } - } - return extended - })(), - [40, 30, 25, 40, 60, 100, 128, 150, 28, 30, 35, 48, 65, 128, 150, 180], // Quality 1 (simplified) - [20, 15, 13, 20, 30, 50, 64, 75, 14, 15, 18, 24, 33, 64, 75, 90], // Quality 2 - [16, 12, 10, 16, 24, 40, 51, 60, 11, 12, 14, 19, 26, 51, 60, 72], // Quality 3 - [12, 9, 8, 12, 18, 30, 38, 45, 8, 9, 11, 14, 20, 38, 45, 54], // Quality 4 - [10, 7, 6, 10, 15, 25, 32, 38, 7, 7, 9, 12, 16, 32, 38, 45], // Quality 5 - [8, 6, 5, 8, 12, 20, 26, 30, 6, 6, 7, 10, 13, 26, 30, 36], // Quality 6 - // Quality 7 (highest) - (() => { - const base = [2, 1, 1, 2, 3, 5, 6, 7, - 1, 1, 1, 2, 3, 6, 7, 9, - 1, 1, 2, 3, 5, 6, 7, 9, - 1, 2, 3, 4, 6, 7, 9, 10, - 2, 3, 5, 6, 7, 9, 10, 11, - 3, 4, 6, 7, 9, 10, 11, 12, - 6, 6, 7, 9, 10, 11, 12, 13, - 6, 7, 9, 10, 11, 12, 13, 13] - const extended = [] - for (let y = 0; y < 16; y++) { - for (let x = 0; x < 16; x++) { - extended.push(base[(y % 8) * 8 + (x % 8)]) - } - } - return extended - })() -] - -// Quantization tables for chroma channels (8x8) -const QUANT_TABLES_C = [ - // Quality 0 (lowest) - [120, 90, 75, 120, 180, 255, 255, 255, - 83, 90, 105, 143, 195, 255, 255, 255, - 105, 98, 120, 180, 255, 255, 255, 255, - 105, 128, 165, 218, 255, 255, 255, 255, - 135, 165, 278, 255, 255, 255, 255, 255, - 180, 263, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255], - [60, 45, 38, 60, 90, 150, 192, 225], // Quality 1 (simplified) - [30, 23, 19, 30, 45, 75, 96, 113], // Quality 2 - [24, 18, 15, 24, 36, 60, 77, 90], // Quality 3 - [18, 14, 12, 18, 27, 45, 57, 68], // Quality 4 - [15, 11, 9, 15, 23, 38, 48, 57], // Quality 5 - [12, 9, 8, 12, 18, 30, 39, 45], // Quality 6 - // Quality 7 (highest) - [3, 2, 2, 3, 5, 8, 9, 11, - 2, 2, 2, 3, 5, 9, 11, 14, - 2, 2, 3, 5, 8, 9, 11, 14, - 2, 3, 5, 6, 9, 11, 14, 15, - 3, 5, 8, 9, 11, 14, 15, 17, - 5, 6, 9, 11, 14, 15, 17, 18, - 9, 9, 11, 14, 15, 17, 18, 20, - 9, 11, 14, 15, 17, 18, 20, 20] -] - let videoRateBin = [] let errorlevel = 0 let notifHideTimer = 0 @@ -198,23 +124,12 @@ let ycocgWorkspace = sys.malloc(BLOCK_SIZE * BLOCK_SIZE * 3) // Y+Co+Cg workspac let dctWorkspace = sys.malloc(BLOCK_SIZE * BLOCK_SIZE * 4) // DCT coefficients (floats) // Initialize RGB frame buffers to black (0,0,0) -for (let i = 0; i < FRAME_PIXELS; i++) { - // Current frame RGB: black - sys.poke(CURRENT_RGB_ADDR + i*3, 0) // R - sys.poke(CURRENT_RGB_ADDR + i*3 + 1, 0) // G - sys.poke(CURRENT_RGB_ADDR + i*3 + 2, 0) // B - - // Previous frame RGB: black - sys.poke(PREV_RGB_ADDR + i*3, 0) // R - sys.poke(PREV_RGB_ADDR + i*3 + 1, 0) // G - sys.poke(PREV_RGB_ADDR + i*3 + 2, 0) // B -} +sys.memset(CURRENT_RGB_ADDR, 0, FRAME_PIXELS * 3) +sys.memset(PREV_RGB_ADDR, 0, FRAME_PIXELS * 3) // Initialize display framebuffer to black -for (let i = 0; i < FRAME_PIXELS; i++) { - sys.poke(DISPLAY_RG_ADDR - i, 0) // Black in RG plane - sys.poke(DISPLAY_BA_ADDR - i, 15) // Black with alpha=15 (opaque) in BA plane -} +sys.memset(DISPLAY_RG_ADDR, 0, FRAME_PIXELS) // Black in RG plane +sys.memset(DISPLAY_BA_ADDR, 15, FRAME_PIXELS) // Black with alpha=15 (opaque) in BA plane let frameCount = 0 let stopPlay = false diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index c993cfe..858e531 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -7,7 +7,9 @@ import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint import net.torvald.tsvm.peripheral.GraphicsAdapter import net.torvald.tsvm.peripheral.fmod import kotlin.math.abs +import kotlin.math.cos import kotlin.math.roundToInt +import kotlin.math.sqrt class GraphicsJSR223Delegate(private val vm: VM) { @@ -1548,19 +1550,18 @@ class GraphicsJSR223Delegate(private val vm: VM) { return round(15f * q) } + val dctBasis8 = Array(8) { u -> + FloatArray(8) { x -> + val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0 + (0.5 * cu * cos((2.0 * x + 1.0) * u * PI / 16.0)).toFloat() + } + } + /** * Perform IDCT on a single channel with integer coefficients */ private fun tevIdct8x8(coeffs: IntArray, quantTable: IntArray): IntArray { - // Use the same DCT basis as tevIdct8x8 - val dctBasis = Array(8) { u -> - Array(8) { x -> - val cu = if (u == 0) 1.0 / kotlin.math.sqrt(2.0) else 1.0 - cu * kotlin.math.cos((2.0 * x + 1.0) * u * kotlin.math.PI / 16.0) / 2.0 - } - } - - val dctCoeffs = Array(8) { DoubleArray(8) } + val dctCoeffs = Array(8) { FloatArray(8) } val result = IntArray(64) // Convert integer coefficients to 2D array and dequantize @@ -1570,10 +1571,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { val coeff = coeffs[idx] if (idx == 0) { // DC coefficient for chroma: lossless quantization (no scaling) - dctCoeffs[u][v] = coeff.toDouble() + dctCoeffs[u][v] = coeff.toFloat() } else { // AC coefficients: use quantization table - dctCoeffs[u][v] = (coeff * quantTable[idx]).toDouble() + dctCoeffs[u][v] = (coeff * quantTable[idx]).toFloat() } } } @@ -1581,14 +1582,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Apply 2D inverse DCT for (x in 0 until 8) { for (y in 0 until 8) { - var sum = 0.0 + var sum = 0f for (u in 0 until 8) { for (v in 0 until 8) { - sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[u][v] + sum += dctBasis8[u][x] * dctBasis8[v][y] * dctCoeffs[u][v] } } - // Co/Cg values don't need +128 offset (they're already centered around 0) - val pixel = kotlin.math.max(-255.0, kotlin.math.min(255.0, sum)) + // Chroma residuals should be in reasonable range (±128 max) + val pixel = sum.coerceIn(-127f, 128f) result[y * 8 + x] = pixel.toInt() } } @@ -1596,16 +1597,16 @@ class GraphicsJSR223Delegate(private val vm: VM) { return result } + val dctBasis16 = Array(16) { u -> + FloatArray(16) { x -> + val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0 + (0.25 * cu * cos((2.0 * x + 1.0) * u * PI / 32.0)).toFloat() + } + } + // 16x16 IDCT for Y channel (YCoCg-R format) private fun tevIdct16x16(coeffs: IntArray, quantTable: IntArray): IntArray { - val dctBasis = Array(16) { u -> - Array(16) { x -> - val cu = if (u == 0) 1.0 / kotlin.math.sqrt(2.0) else 1.0 - cu * kotlin.math.cos((2.0 * x + 1.0) * u * kotlin.math.PI / 32.0) / 4.0 - } - } - - val dctCoeffs = Array(16) { DoubleArray(16) } + val dctCoeffs = Array(16) { FloatArray(16) } val result = IntArray(256) // 16x16 = 256 // Convert integer coefficients to 2D array and dequantize @@ -1615,10 +1616,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { val coeff = coeffs[idx] if (idx == 0) { // DC coefficient for luma: lossless quantization (no scaling) - dctCoeffs[u][v] = coeff.toDouble() + dctCoeffs[u][v] = coeff.toFloat() } else { // AC coefficients: use quantization table - dctCoeffs[u][v] = (coeff * quantTable[idx]).toDouble() + dctCoeffs[u][v] = (coeff * quantTable[idx]).toFloat() } } } @@ -1626,13 +1627,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Apply 2D inverse DCT for (x in 0 until 16) { for (y in 0 until 16) { - var sum = 0.0 + var sum = 0f for (u in 0 until 16) { for (v in 0 until 16) { - sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[u][v] + sum += dctBasis16[u][x] * dctBasis16[v][y] * dctCoeffs[u][v] } } - val pixel = kotlin.math.max(0.0, kotlin.math.min(255.0, sum + 128.0)) + val pixel = (sum + 128).coerceIn(0f, 255f) result[y * 16 + x] = pixel.toInt() } } @@ -1654,22 +1655,50 @@ class GraphicsJSR223Delegate(private val vm: VM) { val co = coBlock[coIdx] val cg = cgBlock[coIdx] - // YCoCg-R inverse transform (using safe integer arithmetic) - val tmp = y - (cg / 2) // Use division instead of shift to avoid overflow + // YCoCg-R inverse transform (per YCoCg-R spec with truncated division) + val tmp = y - (cg / 2) val g = cg + tmp - val b = tmp - (co / 2) // Use division instead of shift to avoid overflow + val b = tmp - (co / 2) val r = b + co // Clamp and store RGB val baseIdx = (py * 16 + px) * 3 - rgbData[baseIdx] = kotlin.math.max(0, kotlin.math.min(255, r)) // R - rgbData[baseIdx + 1] = kotlin.math.max(0, kotlin.math.min(255, g)) // G - rgbData[baseIdx + 2] = kotlin.math.max(0, kotlin.math.min(255, b)) // B + rgbData[baseIdx] = r.coerceIn(0, 255) // R + rgbData[baseIdx + 1] = g.coerceIn(0, 255) // G + rgbData[baseIdx + 2] = b.coerceIn(0, 255) // B } } return rgbData } + + // RGB to YCoCg-R conversion for INTER mode residual calculation + fun tevRGBToYcocg(rgbBlock: IntArray): IntArray { + val ycocgData = IntArray(16 * 16 * 3) // Y,Co,Cg for 16x16 pixels + + for (py in 0 until 16) { + for (px in 0 until 16) { + val baseIdx = (py * 16 + px) * 3 + val r = rgbBlock[baseIdx] + val g = rgbBlock[baseIdx + 1] + val b = rgbBlock[baseIdx + 2] + + // YCoCg-R forward transform + val co = r - b + val tmp = b + (co / 2) + val cg = g - tmp + val y = tmp + (cg / 2) + + // Store YCoCg values + val yIdx = py * 16 + px + ycocgData[yIdx * 3] = y.coerceIn(0, 255) // Y + ycocgData[yIdx * 3 + 1] = co.coerceIn(-128, 127) // Co + ycocgData[yIdx * 3 + 2] = cg.coerceIn(-128, 127) // Cg + } + } + + return ycocgData + } /** * Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format @@ -1775,7 +1804,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { readPtr += 768 } - else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - Full YCoCg-R DCT decode + 0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation) // Read DCT coefficients: Y (16x16=256), Co (8x8=64), Cg (8x8=64) val yCoeffs = IntArray(256) val coCoeffs = IntArray(64) @@ -1813,7 +1842,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Convert YCoCg-R to RGB val rgbData = tevYcocgToRGB(yBlock, coBlock, cgBlock) - // Store RGB data to frame buffer + // Store RGB data to frame buffer (complete replacement) for (dy in 0 until 16) { for (dx in 0 until 16) { val x = startX + dx @@ -1830,6 +1859,187 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } + + 0x02 -> { // TEV_MODE_INTER - Motion compensation + residual DCT + // Step 1: Read residual DCT coefficients + val yCoeffs = IntArray(256) + val coCoeffs = IntArray(64) + val cgCoeffs = IntArray(64) + + // Read Y coefficients (16x16 = 256 coefficients × 2 bytes) + for (i in 0 until 256) { + val coeff = ((vm.peek(readPtr)!!.toUint()) or + ((vm.peek(readPtr + 1)!!.toUint()) shl 8)).toShort().toInt() + yCoeffs[i] = coeff + readPtr += 2 + } + + // Read Co coefficients (8x8 = 64 coefficients × 2 bytes) + for (i in 0 until 64) { + val coeff = ((vm.peek(readPtr)!!.toUint()) or + ((vm.peek(readPtr + 1)!!.toUint()) shl 8)).toShort().toInt() + coCoeffs[i] = coeff + readPtr += 2 + } + + // Read Cg coefficients (8x8 = 64 coefficients × 2 bytes) + for (i in 0 until 64) { + val coeff = ((vm.peek(readPtr)!!.toUint()) or + ((vm.peek(readPtr + 1)!!.toUint()) shl 8)).toShort().toInt() + cgCoeffs[i] = coeff + readPtr += 2 + } + + // Step 2: Decode residual DCT + val yResidual = tevIdct16x16(yCoeffs, quantTableY) + val coResidual = tevIdct8x8(coCoeffs, quantTableC) + val cgResidual = tevIdct8x8(cgCoeffs, quantTableC) + + // Step 3: Build motion-compensated YCoCg-R block and add residuals + val finalY = IntArray(256) + val finalCo = IntArray(64) + val finalCg = IntArray(64) + + // Process Y residuals (16x16) + for (dy in 0 until 16) { + for (dx in 0 until 16) { + val x = startX + dx + val y = startY + dy + val refX = x + mvX + val refY = y + mvY + val pixelIdx = dy * 16 + dx + + if (x < width && y < height) { + var mcY: Int + + if (refX in 0 until width && refY in 0 until height) { + // Get motion-compensated RGB from previous frame + val refPixelOffset = refY.toLong() * width + refX + val refRgbOffset = refPixelOffset * 3 + + val mcR = vm.peek(prevRGBAddr + refRgbOffset*prevAddrIncVec)!!.toUint().toInt() + val mcG = vm.peek(prevRGBAddr + (refRgbOffset + 1)*prevAddrIncVec)!!.toUint().toInt() + val mcB = vm.peek(prevRGBAddr + (refRgbOffset + 2)*prevAddrIncVec)!!.toUint().toInt() + + // Convert motion-compensated RGB to Y only + val co = mcR - mcB + val tmp = mcB + (co / 2) + val cg = mcG - tmp + val yVal = tmp + (cg / 2) + + mcY = yVal + } else { + // Out of bounds reference - use neutral values + mcY = 128 + } + + // Add Y residual + finalY[pixelIdx] = (mcY + yResidual[pixelIdx]).coerceIn(0, 255) + } + } + } + + // Process chroma residuals separately (8x8 subsampled) + for (cy in 0 until 8) { + for (cx in 0 until 8) { + // Chroma coordinates are at 2x2 block centers in subsampled space + val x = startX + cx * 2 + val y = startY + cy * 2 + + // Apply motion vector to chroma block center + val refX = x + mvX + val refY = y + mvY + val chromaIdx = cy * 8 + cx + + if (x < width && y < height) { + var mcCo: Int + var mcCg: Int + + // Sample 2x2 block from motion-compensated position for chroma + if (refX >= 0 && refY >= 0 && refX < width - 1 && refY < height - 1) { + var coSum = 0 + var cgSum = 0 + var count = 0 + + // Sample 2x2 block for chroma subsampling (like encoder) + for (dy in 0 until 2) { + for (dx in 0 until 2) { + val sampleX = refX + dx + val sampleY = refY + dy + if (sampleX < width && sampleY < height) { + val refPixelOffset = sampleY.toLong() * width + sampleX + val refRgbOffset = refPixelOffset * 3 + + val mcR = vm.peek(prevRGBAddr + refRgbOffset*prevAddrIncVec)!!.toUint().toInt() + val mcG = vm.peek(prevRGBAddr + (refRgbOffset + 1)*prevAddrIncVec)!!.toUint().toInt() + val mcB = vm.peek(prevRGBAddr + (refRgbOffset + 2)*prevAddrIncVec)!!.toUint().toInt() + + val co = mcR - mcB + val tmp = mcB + (co / 2) + val cg = mcG - tmp + + coSum += co + cgSum += cg + count++ + } + } + } + + mcCo = if (count > 0) coSum / count else 0 + mcCg = if (count > 0) cgSum / count else 0 + } else { + // Out of bounds reference - use neutral chroma values + mcCo = 0 + mcCg = 0 + } + + // Add chroma residuals - no clamping to see if that's the issue + finalCo[chromaIdx] = mcCo + coResidual[chromaIdx] + finalCg[chromaIdx] = mcCg + cgResidual[chromaIdx] + } + } + } + + // Step 4: Convert final YCoCg-R to RGB + val finalRgb = tevYcocgToRGB(finalY, finalCo, finalCg) + + // Step 5: Store final RGB data to frame buffer + for (dy in 0 until 16) { + for (dx in 0 until 16) { + val x = startX + dx + val y = startY + dy + if (x < width && y < height) { + val rgbIdx = (dy * 16 + dx) * 3 + val imageOffset = y.toLong() * width + x + val bufferOffset = imageOffset * 3 + + vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, finalRgb[rgbIdx].toByte()) + vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, finalRgb[rgbIdx + 1].toByte()) + vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, finalRgb[rgbIdx + 2].toByte()) + } + } + } + } + + else -> { + // Unknown block mode - skip DCT coefficients and use black + readPtr += 768 // Skip Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes + + for (dy in 0 until 16) { + for (dx in 0 until 16) { + val x = startX + dx + val y = startY + dy + if (x < width && y < height) { + val imageOffset = y.toLong() * width + x + val bufferOffset = imageOffset * 3 + + vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, 0.toByte()) // R=0 + vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, 0.toByte()) // G=0 + vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, 0.toByte()) // B=0 + } + } + } + } } } } @@ -1855,9 +2065,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { // YCoCg-R transform val co = r - b - val tmp = b + (co shr 1) + val tmp = b + (co / 2) val cg = g - tmp - val y = tmp + (cg shr 1) + val y = tmp + (cg / 2) yBlock[py * 16 + px] = y } @@ -1883,7 +2093,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val b = vm.peek(srcPtr + (offset + 2) * incVec)!!.toUint() val co = r - b - val tmp = b + (co shr 1) + val tmp = b + (co / 2) val cg = g - tmp coSum += co diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index 12fd056..621e9e3 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -28,6 +28,11 @@ #define TEV_PACKET_AUDIO_MP2 0x20 // MP2 audio #define TEV_PACKET_SYNC 0xFF // Sync packet +// Utility macros +static inline int CLAMP(int x, int min, int max) { + return x < min ? min : (x > max ? max : x); +} + // Quality settings for quantization (Y channel) - 16x16 tables static const uint8_t QUANT_TABLES_Y[8][256] = { // Quality 0 (lowest) - 16x16 table @@ -310,30 +315,30 @@ typedef struct { int blocks_skip, blocks_intra, blocks_inter, blocks_motion; } tev_encoder_t; -// RGB to YCoCg-R transform +// RGB to YCoCg-R transform (per YCoCg-R specification with truncated division) static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int *cg) { *co = (int)r - (int)b; - int tmp = (int)b + ((*co) >> 1); + int tmp = (int)b + ((*co) / 2); *cg = (int)g - tmp; - *y = tmp + ((*cg) >> 1); + *y = tmp + ((*cg) / 2); - // Clamp to valid ranges (YCoCg-R should be roughly -255 to +255) - *y = (*y < 0) ? 0 : ((*y > 255) ? 255 : *y); - *co = (*co < -255) ? -255 : ((*co > 255) ? 255 : *co); - *cg = (*cg < -255) ? -255 : ((*cg > 255) ? 255 : *cg); + // Clamp to valid ranges (YCoCg-R should be roughly -128 to +127) + *y = CLAMP(*y, 0, 255); + *co = CLAMP(*co, -128, 127); + *cg = CLAMP(*cg, -128, 127); } -// YCoCg-R to RGB transform (for verification) +// YCoCg-R to RGB transform (for verification - per YCoCg-R specification) static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t *b) { - int tmp = y - (cg >> 1); + int tmp = y - (cg / 2); *g = cg + tmp; - *b = tmp - (co >> 1); + *b = tmp - (co / 2); *r = *b + co; // Clamp values - *r = (*r < 0) ? 0 : ((*r > 255) ? 255 : *r); - *g = (*g < 0) ? 0 : ((*g > 255) ? 255 : *g); - *b = (*b < 0) ? 0 : ((*b > 255) ? 255 : *b); + *r = CLAMP(*r, 0, 255); + *g = CLAMP(*g, 0, 255); + *b = CLAMP(*b, 0, 255); } // 16x16 2D DCT @@ -507,6 +512,117 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y, } } +// Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling +static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block, + uint8_t *y_block, int8_t *co_block, int8_t *cg_block) { + // Convert 16x16 RGB to Y (full resolution) + for (int py = 0; py < 16; py++) { + for (int px = 0; px < 16; px++) { + int rgb_idx = (py * 16 + px) * 3; + int r = rgb_block[rgb_idx]; + int g = rgb_block[rgb_idx + 1]; + int b = rgb_block[rgb_idx + 2]; + + // YCoCg-R transform (per specification with truncated division) + int co = r - b; + int tmp = b + (co / 2); + int cg = g - tmp; + int y = tmp + (cg / 2); + + y_block[py * 16 + px] = CLAMP(y, 0, 255); + } + } + + // Convert to Co and Cg with 4:2:0 subsampling (8x8) + for (int cy = 0; cy < 8; cy++) { + for (int cx = 0; cx < 8; cx++) { + // Sample 2x2 block from RGB and average for chroma + int sum_co = 0, sum_cg = 0; + + for (int dy = 0; dy < 2; dy++) { + for (int dx = 0; dx < 2; dx++) { + int py = cy * 2 + dy; + int px = cx * 2 + dx; + int rgb_idx = (py * 16 + px) * 3; + + int r = rgb_block[rgb_idx]; + int g = rgb_block[rgb_idx + 1]; + int b = rgb_block[rgb_idx + 2]; + + int co = r - b; + int tmp = b + (co / 2); + int cg = g - tmp; + + sum_co += co; + sum_cg += cg; + } + } + + // Average and store subsampled chroma + co_block[cy * 8 + cx] = CLAMP(sum_co / 4, -128, 127); + cg_block[cy * 8 + cx] = CLAMP(sum_cg / 4, -128, 127); + } + } +} + +// Extract motion-compensated YCoCg-R block from reference frame +static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height, + int block_x, int block_y, int mv_x, int mv_y, + uint8_t *y_block, int8_t *co_block, int8_t *cg_block) { + // Extract 16x16 RGB block with motion compensation + uint8_t rgb_block[16 * 16 * 3]; + + for (int dy = 0; dy < 16; dy++) { + for (int dx = 0; dx < 16; dx++) { + int cur_x = block_x + dx; + int cur_y = block_y + dy; + int ref_x = cur_x + mv_x; + int ref_y = cur_y + mv_y; + + int rgb_idx = (dy * 16 + dx) * 3; + + if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) { + // Copy RGB from reference position + int ref_offset = (ref_y * width + ref_x) * 3; + rgb_block[rgb_idx] = rgb_data[ref_offset]; // R + rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G + rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B + } else { + // Out of bounds - use black + rgb_block[rgb_idx] = 0; // R + rgb_block[rgb_idx + 1] = 0; // G + rgb_block[rgb_idx + 2] = 0; // B + } + } + } + + // Convert RGB block to YCoCg-R + convert_rgb_to_ycocgr_block(rgb_block, y_block, co_block, cg_block); +} + +// Compute motion-compensated residual for INTER mode +static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) { + int start_x = block_x * 16; + int start_y = block_y * 16; + + // Extract motion-compensated reference block from previous frame + uint8_t ref_y[256]; + int8_t ref_co[64], ref_cg[64]; + extract_motion_compensated_block(enc->previous_rgb, enc->width, enc->height, + start_x, start_y, mv_x, mv_y, + ref_y, ref_co, ref_cg); + + // Compute residuals: current - motion_compensated_reference + for (int i = 0; i < 256; i++) { + enc->y_workspace[i] = (int)enc->y_workspace[i] - (int)ref_y[i]; + } + + for (int i = 0; i < 64; i++) { + enc->co_workspace[i] = (int)enc->co_workspace[i] - (int)ref_co[i]; + enc->cg_workspace[i] = (int)enc->cg_workspace[i] - (int)ref_cg[i]; + } +} + // Encode a 16x16 block static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) { tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x]; @@ -608,8 +724,15 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs)); enc->blocks_motion++; return; // Skip DCT encoding, just store motion vector + } else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) { + // Use inter mode with residual DCT - motion compensation + residual + block->mode = TEV_MODE_INTER; + enc->blocks_inter++; + + // Compute motion-compensated residual for DCT encoding + compute_motion_residual(enc, block_x, block_y, block->mv_x, block->mv_y); } else { - // Use intra mode for now (inter mode with residual DCT not implemented) + // No good motion prediction - use intra mode block->mode = TEV_MODE_INTRA; block->mv_x = 0; block->mv_y = 0; @@ -695,13 +818,13 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) { if (gzip_init_result != Z_OK) { fprintf(stderr, "Failed to initialize gzip compression\n"); - return -1; + return 0; } // Initialize previous frame to black memset(enc->previous_rgb, 0, pixels * 3); - return 0; + return 1; } // Free encoder resources @@ -772,13 +895,13 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) { if (deflateReset(&enc->gzip_stream) != Z_OK) { fprintf(stderr, "Gzip deflateReset failed\n"); - return -1; + return 0; } int result = deflate(&enc->gzip_stream, Z_FINISH); if (result != Z_STREAM_END) { fprintf(stderr, "Gzip compression failed: %d\n", result); - return -1; + return 0; } size_t compressed_size = enc->gzip_stream.total_out; @@ -792,16 +915,13 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) { fwrite(enc->compressed_buffer, 1, compressed_size, output); enc->total_output_bytes += 5 + compressed_size; - - // Copy current frame to previous for next iteration - //memcpy(enc->previous_rgb, enc->current_rgb, enc->width * enc->height * 3); // Swap frame buffers for next frame uint8_t *temp_rgb = enc->previous_rgb; enc->previous_rgb = enc->current_rgb; enc->current_rgb = temp_rgb; - return 0; + return 1; } // Execute command and capture output @@ -1099,7 +1219,7 @@ int main(int argc, char *argv[]) { } // Allocate buffers - if (alloc_encoder_buffers(enc) < 0) { + if (!alloc_encoder_buffers(enc)) { fprintf(stderr, "Failed to allocate encoder buffers\n"); cleanup_encoder(enc); return 1; @@ -1194,7 +1314,7 @@ int main(int argc, char *argv[]) { } // Encode frame - if (encode_frame(enc, output, frame_count) < 0) { + if (!encode_frame(enc, output, frame_count)) { fprintf(stderr, "Failed to encode frame %d\n", frame_count); break; }