diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index 5bfa8d0..2b2485f 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -11,7 +11,7 @@ const HEIGHT = 448 const BLOCK_SIZE = 16 // 16x16 blocks for YCoCg-R const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV" const TEV_VERSION_YCOCG = 2 // YCoCg-R version -const TEV_VERSION_XYB = 3 // XYB version +const TEV_VERSION_ICtCp = 3 // ICtCp version const SND_BASE_ADDR = audio.getBaseAddr() const pcm = require("pcm") const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728] @@ -391,18 +391,15 @@ if (!magicMatching) { // Read header let version = seqread.readOneByte() -if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_XYB) { - println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_XYB} for XYB)`) +if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_ICtCp) { + println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_ICtCp} for ICtCp)`) return 1 } -let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R" +let colorSpace = (version === TEV_VERSION_ICtCp) ? "ICtCp" : "YCoCg" if (interactive) { con.move(1,1) - if (colorSpace == "XYB") - println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace}) | Deblock: ${enableDeblocking ? 'ON' : 'OFF'}, ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`); - else - println(`Push and hold Backspace to exit | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | BoundaryAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`); + println(`Push and hold Backspace to exit | ${colorSpace} | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | EdgeAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`); } let width = seqread.readShort() @@ -418,7 +415,6 @@ let hasSubtitle = !!(flags & 2) let videoFlags = seqread.readOneByte() let isInterlaced = !!(videoFlags & 1) let isNTSC = !!(videoFlags & 2) -let isLossless = !!(videoFlags & 4) let unused2 = seqread.readOneByte() @@ -428,7 +424,7 @@ serial.println(` FPS: ${(isNTSC) ? (fps * 1000 / 1001) : fps}`) serial.println(` Duration: ${totalFrames / fps}`) serial.println(` Audio: ${hasAudio ? "Yes" : "No"}`) serial.println(` Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`) -serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}, ${isLossless ? "lossless" : "lossy"}`) +serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}`) // DEBUG interlace raw output @@ -621,7 +617,7 @@ try { PREV_RGB_ADDR = temp } else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) { - // Video frame packet (always includes rate control factor) + // Video frame packet let payloadLen = seqread.readInt() let compressedPtr = seqread.readBytes(payloadLen) updateDataRateBin(payloadLen) @@ -636,11 +632,6 @@ try { // Decompress using gzip // Optimized buffer size calculation for TEV YCoCg-R blocks - let blocksX = (width + 15) >> 4 // 16x16 blocks - let blocksY = (height + 15) >> 4 - let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8) - let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing - let actualSize let decompressStart = sys.nanoTime() try { @@ -655,7 +646,7 @@ try { continue } - // Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or XYB based on version) + // Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or ICtCp based on version) try { // duplicate every 1000th frame (pass a turn every 1000n+501st) if NTSC if (!isNTSC || frameCount % 1000 != 501 || frameDuped) { @@ -667,14 +658,14 @@ try { if (isInterlaced) { // For interlaced: decode current frame into currentFieldAddr // For display: use prevFieldAddr as current, currentFieldAddr as next - graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless) + graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding) graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm) // Rotate field buffers for next frame: NEXT -> CURRENT -> PREV rotateFieldBuffers() } else { // Progressive or first frame: normal decoding without temporal prediction - graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless) + graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding) } decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds diff --git a/terranmon.txt b/terranmon.txt index db99115..742653d 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -683,7 +683,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. - Version 2.1: Added Rate Control Factor to all video packets (breaking change) * Enables bitrate-constrained encoding alongside quality modes * All video frames now include 4-byte rate control factor after payload size -- Version 3.0: Additional support of XYB Colour space +- Version 3.0: Additional support of ICtCp Colour space # File Structure \x1F T S V M T E V diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 6eb895b..47b8288 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -2147,7 +2147,92 @@ class GraphicsJSR223Delegate(private val vm: VM) { return rgbData } - + + // ICtCp to RGB conversion for TEV version 3 + fun tevIctcpToRGB(iBlock: IntArray, ctBlock: IntArray, cpBlock: IntArray): IntArray { + val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels + + // Process 16x16 I channel with 8x8 Ct/Cp channels (4:2:0 upsampling) + for (py in 0 until 16) { + for (px in 0 until 16) { + val iIdx = py * 16 + px + val i = iBlock[iIdx].toDouble() + + // Get Ct/Cp from 8x8 chroma blocks (4:2:0 upsampling) + val ctIdx = (py / 2) * 8 + (px / 2) + val ct = ctBlock[ctIdx].toDouble() + val cp = cpBlock[ctIdx].toDouble() + + // Convert scaled values back to ICtCp range + // I channel: IDCT already added 128, so i is in [0,255]. Reverse encoder: (c1*255-128)+128 = c1*255 + val I = i / 255.0 + // Ct/Cp were scaled: c2/c3 * 255.0, so reverse: ct/cp / 255.0 + val Ct = (ct / 255.0) + val Cp = (cp / 255.0) + + // ICtCp -> L'M'S' (inverse matrix) + val Lp = (I + 0.015718580108730416 * Ct + 0.2095810681164055 * Cp).coerceIn(0.0, 1.0) + val Mp = (I - 0.015718580108730416 * Ct - 0.20958106811640548 * Cp).coerceIn(0.0, 1.0) + val Sp = (I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp).coerceIn(0.0, 1.0) + + // HLG decode: L'M'S' -> linear LMS + val L = HLG_inverse_OETF(Lp) + val M = HLG_inverse_OETF(Mp) + val S = HLG_inverse_OETF(Sp) + + // LMS -> linear sRGB (inverse matrix) + val rLin = 29.601046511687 * L - 21.364325340529906 * M - 4.886500015143518 * S + val gLin = -12.083229161592032 * L + 10.673933874098694 * M + 1.5369143265611211 * S + val bLin = 0.38562844776642574 * L - 0.6536244436141302 * M + 1.0968381245163787 * S + + // Gamma encode to sRGB + val rSrgb = srgbUnlinearize(rLin) + val gSrgb = srgbUnlinearize(gLin) + val bSrgb = srgbUnlinearize(bLin) + + // Convert to 8-bit and store + val baseIdx = (py * 16 + px) * 3 + rgbData[baseIdx] = (rSrgb * 255.0).toInt().coerceIn(0, 255) // R + rgbData[baseIdx + 1] = (gSrgb * 255.0).toInt().coerceIn(0, 255) // G + rgbData[baseIdx + 2] = (bSrgb * 255.0).toInt().coerceIn(0, 255) // B + } + } + + return rgbData + } + + // Helper functions for ICtCp decoding + + // Inverse HLG OETF (HLG -> linear) + fun HLG_inverse_OETF(V: Double): Double { + val a = 0.17883277 + val b = 1.0 - 4.0 * a + val c = 0.5 - a * ln(4.0 * a) + + if (V <= 0.5) + return (V * V) / 3.0 + else + return (exp((V - c)/a) + b) / 12.0 + } + + // sRGB gamma decode: nonlinear -> linear + private fun srgbLinearize(value: Double): Double { + return if (value <= 0.04045) { + value / 12.92 + } else { + ((value + 0.055) / 1.055).pow(2.4) + } + } + + // sRGB gamma encode: linear -> nonlinear + private fun srgbUnlinearize(value: Double): Double { + return if (value <= 0.0031308) { + value * 12.92 + } else { + 1.055 * value.pow(1.0 / 2.4) - 0.055 + } + } + // RGB to YCoCg-R conversion for INTER mode residual calculation fun tevRGBToYcocg(rgbBlock: IntArray): IntArray { val ycocgData = IntArray(16 * 16 * 3) // Y,Co,Cg for 16x16 pixels @@ -2175,147 +2260,6 @@ class GraphicsJSR223Delegate(private val vm: VM) { return ycocgData } - - // XYB conversion constants from JPEG XL specification - private val XYB_BIAS = 0.00379307325527544933 - private val CBRT_BIAS = 0.155954200549248620 // cbrt(XYB_BIAS) - - // RGB to LMS mixing coefficients - private val RGB_TO_LMS = arrayOf( - doubleArrayOf(0.3, 0.622, 0.078), // L coefficients - doubleArrayOf(0.23, 0.692, 0.078), // M coefficients - doubleArrayOf(0.24342268924547819, 0.20476744424496821, 0.55180986650955360) // S coefficients - ) - - // LMS to RGB inverse matrix - private val LMS_TO_RGB = arrayOf( - doubleArrayOf(11.0315669046, -9.8669439081, -0.1646229965), - doubleArrayOf(-3.2541473811, 4.4187703776, -0.1646229965), - doubleArrayOf(-3.6588512867, 2.7129230459, 1.9459282408) - ) - - // sRGB linearization functions - private fun srgbLinearise(value: Double): Double { - return if (value > 0.04045) { - Math.pow((value + 0.055) / 1.055, 2.4) - } else { - value / 12.92 - } - } - - private fun srgbUnlinearise(value: Double): Double { - return if (value > 0.0031308) { - 1.055 * Math.pow(value, 1.0 / 2.4) - 0.055 - } else { - value * 12.92 - } - } - - // XYB to RGB conversion for hardware decoding - fun tevXybToRGB(yBlock: IntArray, xBlock: IntArray, bBlock: IntArray): IntArray { - val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels - - for (py in 0 until 16) { - for (px in 0 until 16) { - val yIdx = py * 16 + px - val y = yBlock[yIdx] - - // Get chroma values from subsampled 8x8 blocks (nearest neighbor upsampling) - val xbIdx = (py / 2) * 8 + (px / 2) - val x = xBlock[xbIdx] - val b = bBlock[xbIdx] - - // Optimal range-based dequantization (exact inverse of improved quantization) - val X_MIN = -0.016; val X_MAX = 0.030 - val xVal = (x / 255.0) * (X_MAX - X_MIN) + X_MIN // X: inverse of range mapping - val Y_MAX = 0.85 - val yVal = (y / 255.0) * Y_MAX // Y: inverse of improved scale - val B_MAX = 0.85 - val bVal = ((b + 128.0) / 255.0) * B_MAX // B: inverse of ((val/B_MAX*255)-128) - - // XYB to LMS gamma - val lgamma = xVal + yVal - val mgamma = yVal - xVal - val sgamma = bVal - - // Remove gamma correction - val lmix = (lgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS - val mmix = (mgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS - val smix = (sgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS - - // LMS to linear RGB using inverse matrix - val rLinear = (LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix).coerceIn(0.0, 1.0) - val gLinear = (LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix).coerceIn(0.0, 1.0) - val bLinear = (LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix).coerceIn(0.0, 1.0) - - // Convert back to sRGB gamma and 0-255 range - val r = (srgbUnlinearise(rLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255) - val g = (srgbUnlinearise(gLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255) - val bRgb = (srgbUnlinearise(bLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255) - - // Store RGB - val baseIdx = (py * 16 + px) * 3 - rgbData[baseIdx] = r // R - rgbData[baseIdx + 1] = g // G - rgbData[baseIdx + 2] = bRgb // B - } - } - - return rgbData - } - - // RGB to XYB conversion for INTER mode residual calculation - fun tevRGBToXyb(rgbBlock: IntArray): IntArray { - val xybData = IntArray(16 * 16 * 3) // Y,X,B for 16x16 pixels - - for (py in 0 until 16) { - for (px in 0 until 16) { - val baseIdx = (py * 16 + px) * 3 - val r = rgbBlock[baseIdx] - val g = rgbBlock[baseIdx + 1] - val b = rgbBlock[baseIdx + 2] - - // Convert RGB to 0-1 range and linearise sRGB - val rNorm = srgbLinearise(r / 255.0) - val gNorm = srgbLinearise(g / 255.0) - val bNorm = srgbLinearise(b / 255.0) - - // RGB to LMS mixing with bias - val lmix = RGB_TO_LMS[0][0] * rNorm + RGB_TO_LMS[0][1] * gNorm + RGB_TO_LMS[0][2] * bNorm + XYB_BIAS - val mmix = RGB_TO_LMS[1][0] * rNorm + RGB_TO_LMS[1][1] * gNorm + RGB_TO_LMS[1][2] * bNorm + XYB_BIAS - val smix = RGB_TO_LMS[2][0] * rNorm + RGB_TO_LMS[2][1] * gNorm + RGB_TO_LMS[2][2] * bNorm + XYB_BIAS - - // Apply gamma correction (cube root) - val lgamma = lmix.pow(1.0 / 3.0) - CBRT_BIAS - val mgamma = mmix.pow(1.0 / 3.0) - CBRT_BIAS - val sgamma = smix.pow(1.0 / 3.0) - CBRT_BIAS - - // LMS to XYB transformation - val xVal = (lgamma - mgamma) / 2.0 - val yVal = (lgamma + mgamma) / 2.0 - val bVal = sgamma - - // Optimal range-based quantization for XYB values (improved precision) - // X: actual range -0.016 to +0.030, map to full 0-255 precision - val X_MIN = -0.016; val X_MAX = 0.030 - val xQuant = (((xVal - X_MIN) / (X_MAX - X_MIN)) * 255.0).toInt().coerceIn(0, 255) - // Y: range 0 to 0.85, map to 0 to 255 (improved scale) - val Y_MAX = 0.85 - val yQuant = ((yVal / Y_MAX) * 255.0).toInt().coerceIn(0, 255) - // B: range 0 to 0.85, map to -128 to +127 (optimized precision) - val B_MAX = 0.85 - val bQuant = (((bVal / B_MAX) * 255.0) - 128.0).toInt().coerceIn(-128, 127) - - // Store XYB values - val yIdx = py * 16 + px - xybData[yIdx * 3] = yQuant // Y - xybData[yIdx * 3 + 1] = xQuant // X - xybData[yIdx * 3 + 2] = bQuant // B - } - } - - return xybData - } /** * Enhanced TEV Deblocking Filter - Uses Knusperli-inspired techniques for superior boundary analysis @@ -2627,8 +2571,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int, debugMotionVectors: Boolean = false, tevVersion: Int = 2, - enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false, - isLossless: Boolean = false) { + enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false) { // height doesn't change when interlaced, because that's the encoder's output @@ -2744,7 +2687,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val cgPixels = tevIdct8x8_fromOptimizedCoeffs(cgBlock) val rgbData = if (tevVersion == 3) { - tevXybToRGB(yPixels, coPixels, cgPixels) + tevIctcpToRGB(yPixels, coPixels, cgPixels) } else { tevYcocgToRGB(yPixels, coPixels, cgPixels) } @@ -2919,69 +2862,20 @@ class GraphicsJSR223Delegate(private val vm: VM) { } 0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation) - val yBlock: IntArray - val coBlock: IntArray - val cgBlock: IntArray - - if (isLossless) { - // Lossless mode: coefficients are stored as float16, no quantization - // Read float16 coefficients: Y (16x16=256), Co (8x8=64), Cg (8x8=64) - val coeffFloat16Array = ShortArray(384) // 384 float16 values stored as shorts - vm.bulkPeekShort(readPtr.toInt(), coeffFloat16Array, 768) // 384 * 2 bytes - readPtr += 768 - - // Convert float16 to float32 and perform IDCT directly (no quantization) - println("DEBUG: Reading lossless coefficients, first few float16 values: ${coeffFloat16Array.take(10).map { "0x${it.toString(16)}" }}") - val yCoeffs = FloatArray(256) { i -> - // Convert signed short to unsigned short for float16 interpretation - val signedShort = coeffFloat16Array[i] - val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned - val floatVal = Float16.toFloat(float16bits.toShort()) - if (floatVal.isNaN() || floatVal.isInfinite()) { - println("NaN/Inf detected at Y coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal") - 0f // Replace NaN with 0 - } else floatVal - } - val coCoeffs = FloatArray(64) { i -> - // Convert signed short to unsigned short for float16 interpretation - val signedShort = coeffFloat16Array[256 + i] - val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned - val floatVal = Float16.toFloat(float16bits.toShort()) - if (floatVal.isNaN() || floatVal.isInfinite()) { - println("NaN/Inf detected at Co coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal") - 0f // Replace NaN with 0 - } else floatVal - } - val cgCoeffs = FloatArray(64) { i -> - // Convert signed short to unsigned short for float16 interpretation - val signedShort = coeffFloat16Array[320 + i] - val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned - val floatVal = Float16.toFloat(float16bits.toShort()) - if (floatVal.isNaN() || floatVal.isInfinite()) { - println("NaN/Inf detected at Cg coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal") - 0f // Replace NaN with 0 - } else floatVal - } - - yBlock = tevIdct16x16_lossless(yCoeffs) - coBlock = tevIdct8x8_lossless(coCoeffs) - cgBlock = tevIdct8x8_lossless(cgCoeffs) - } else { - // Regular lossy mode: quantized int16 coefficients - // Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes - val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts - vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768) - readPtr += 768 + // Regular lossy mode: quantized int16 coefficients + // Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes + val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts + vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768) + readPtr += 768 - // Perform hardware IDCT for each channel using fast algorithm - yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor) - coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor) - cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor) - } + // Perform hardware IDCT for each channel using fast algorithm + val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor) + val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor) + val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor) // Convert to RGB (YCoCg-R for v2, XYB for v3) val rgbData = if (tevVersion == 3) { - tevXybToRGB(yBlock, coBlock, cgBlock) // XYB format (v3) + tevIctcpToRGB(yBlock, coBlock, cgBlock) // XYB format (v3) } else { tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2) } @@ -3108,7 +3002,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Step 4: Convert final data to RGB (YCoCg-R for v2, XYB for v3) val finalRgb = if (tevVersion == 3) { - tevXybToRGB(finalY, finalCo, finalCg) // XYB format (v3) + tevIctcpToRGB(finalY, finalCo, finalCg) // XYB format (v3) } else { tevYcocgToRGB(finalY, finalCo, finalCg) // YCoCg-R format (v2) } diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index d39a957..e1c4144 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -1,5 +1,5 @@ // Created by Claude on 2025-08-18. -// TEV (TSVM Enhanced Video) Encoder - YCoCg-R 4:2:0 16x16 Block Version +// TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version #include #include #include @@ -68,7 +68,9 @@ static inline float float16_to_float(uint16_t hbits) { // TSVM Enhanced Video (TEV) format constants #define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV" -#define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0 +// TEV version - dynamic based on color space mode +// Version 2: YCoCg-R 4:2:0 (default) +// Version 3: ICtCp 4:2:0 (--ictcp flag) // version 1: 8x8 RGB // version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) {current winner} // version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor) @@ -192,17 +194,6 @@ typedef struct __attribute__((packed)) { int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // quantised Cg DCT coefficients (8x8) } tev_block_t; -// Lossless TEV block structure (uses float32 internally, converted to float16 during serialization) -typedef struct __attribute__((packed)) { - uint8_t mode; // Block encoding mode - int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision) - float rate_control_factor; // Always 1.0f in lossless mode - uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs) - float y_coeffs[BLOCK_SIZE_SQR]; // lossless Y DCT coefficients (16x16) - float co_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Co DCT coefficients (8x8) - float cg_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Cg DCT coefficients (8x8) -} tev_lossless_block_t; - // Subtitle entry structure typedef struct subtitle_entry { int start_frame; @@ -232,7 +223,7 @@ typedef struct { int qualityCg; int verbose; int disable_rcf; // 0 = rcf enabled, 1 = disabled - int lossless_mode; // 0 = lossy (default), 1 = lossless mode + int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp color space // Bitrate control int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode) @@ -289,6 +280,10 @@ typedef struct { int complexity_capacity; // Capacity of complexity_values array } tev_encoder_t; +////////////////////////// +// COLOUR MATHS CODES // +////////////////////////// + // RGB to YCoCg-R transform (per YCoCg-R specification with truncated division) static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int *cg) { *co = (int)r - (int)b; @@ -315,6 +310,180 @@ static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t *b = CLAMP(*b, 0, 255); } +// ---------------------- ICtCp Implementation ---------------------- + +static inline int iround(double v) { return (int)floor(v + 0.5); } + +// ---------------------- sRGB gamma helpers ---------------------- +static inline double srgb_linearize(double val) { + // val in [0,1] + if (val <= 0.04045) return val / 12.92; + return pow((val + 0.055) / 1.055, 2.4); +} +static inline double srgb_unlinearize(double val) { + // val in [0,1] + if (val <= 0.0031308) return val * 12.92; + return 1.055 * pow(val, 1.0 / 2.4) - 0.055; +} + +// -------------------------- HLG -------------------------- +// Forward HLG OETF (linear -> HLG) +static inline double HLG_OETF(double L) { + // L in [0,1], relative scene-linear + const double a = 0.17883277; + const double b = 1.0 - 4.0 * a; + const double c = 0.5 - a * log(4.0 * a); + + if (L <= 1.0/12.0) + return sqrt(3.0 * L); + else + return a * log(12.0 * L - b) + c; +} + +// Inverse HLG OETF (HLG -> linear) +static inline double HLG_inverse_OETF(double V) { + const double a = 0.17883277; + const double b = 1.0 - 4.0 * a; + const double c = 0.5 - a * log(4.0 * a); + + if (V <= 0.5) + return (V * V) / 3.0; + else + return (exp((V - c)/a) + b) / 12.0; +} + +// ---------------------- Matrices (doubles) ---------------------- +// Combined linear sRGB -> LMS (single 3x3): product of sRGB->XYZ, XYZ->BT2020, BT2020->LMS +// Computed from standard matrices (double precision). +static const double M_RGB_TO_LMS[3][3] = { + {0.20502672199540622, 0.42945363228947586, 0.31165003516511786}, + {0.2233144413317712, 0.5540422172466897, 0.21854692537153908}, + {0.0609931761282002, 0.17917502499816504, 0.9323768661336348} +}; + +// Inverse: LMS -> linear sRGB (inverse of above) +static const double M_LMS_TO_RGB[3][3] = { + {29.601046511687, -21.364325340529906, -4.886500015143518}, + {-12.083229161592032, 10.673933874098694, 1.5369143265611211}, + {0.38562844776642574, -0.6536244436141302, 1.0968381245163787} +}; + +// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants. +static const double M_LMSPRIME_TO_ICTCP[3][3] = { + { 2048.0/4096.0, 2048.0/4096.0, 0.0 }, + { 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 }, + { 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 } +}; + +// Inverse: I Ct Cp -> L' M' S' (precomputed inverse) +static const double M_ICTCP_TO_LMSPRIME[3][3] = { + { 1.0, 0.015718580108730416, 0.2095810681164055 }, + { 1.0, -0.015718580108730416, -0.20958106811640548 }, + { 1.0, 1.0212710798422344, -0.6052744909924316 } +}; + +// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ---------------------- +// Inputs: r,g,b in 0..255 sRGB (8-bit) +// Outputs: I, Ct, Cp as doubles (nominally I in ~[0..1], Ct/Cp ranges depend on colors) +void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8, + double *out_I, double *out_Ct, double *out_Cp) +{ + // 1) linearize sRGB to 0..1 + double r = srgb_linearize((double)r8 / 255.0); + double g = srgb_linearize((double)g8 / 255.0); + double b = srgb_linearize((double)b8 / 255.0); + + // 2) linear RGB -> LMS (single 3x3 multiply) + double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b; + double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b; + double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b; + + // 3) apply HLG encode (map linear LMS -> perceptual domain L',M',S') + double Lp = HLG_OETF(L); + double Mp = HLG_OETF(M); + double Sp = HLG_OETF(S); + + // 4) L'M'S' -> ICtCp + double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp; + double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp; + double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp; + + *out_I = FCLAMP(I * 255.f, 0.f, 255.f); + *out_Ct = FCLAMP(Ct * 255.f, -256.f, 255.f); + *out_Cp = FCLAMP(Cp * 255.f, -256.f, 255.f); +} + +// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ---------------------- +// Inputs: I, Ct, Cp as doubles +// Outputs: r8,g8,b8 in 0..255 (8-bit sRGB, clamped and rounded) +void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8, + uint8_t *r8, uint8_t *g8, uint8_t *b8) +{ + double I = I8 / 255.f; + double Ct = Ct8 / 255.f; + double Cp = Cp8 / 255.f; + + // 1) ICtCp -> L' M' S' (3x3 multiply) + double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp; + double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp; + double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp; + + // 2) HLG decode: L' -> linear LMS + double L = HLG_inverse_OETF(Lp); + double M = HLG_inverse_OETF(Mp); + double S = HLG_inverse_OETF(Sp); + + // 3) LMS -> linear sRGB (3x3 inverse) + double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S; + double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S; + double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S; + + // 4) gamma encode and convert to 0..255 with center-of-bin rounding + double r = srgb_unlinearize(r_lin); + double g = srgb_unlinearize(g_lin); + double b = srgb_unlinearize(b_lin); + + *r8 = (uint8_t)CLAMP(iround(r * 255.0), 0, 255); + *g8 = (uint8_t)CLAMP(iround(g * 255.0), 0, 255); + *b8 = (uint8_t)CLAMP(iround(b * 255.0), 0, 255); +} + +// ---------------------- Color Space Switching Functions ---------------------- +// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode + +static void rgb_to_color_space(tev_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b, + double *c1, double *c2, double *c3) { + if (enc->ictcp_mode) { + // Use ICtCp color space + srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3); + } else { + // Use YCoCg-R color space (convert to int first, then to double) + int y_val, co_val, cg_val; + rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val); + *c1 = (double)y_val; + *c2 = (double)co_val; + *c3 = (double)cg_val; + } +} + +static void color_space_to_rgb(tev_encoder_t *enc, double c1, double c2, double c3, + uint8_t *r, uint8_t *g, uint8_t *b) { + if (enc->ictcp_mode) { + // Use ICtCp color space + ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b); + } else { + // Use YCoCg-R color space (convert from double to int first) + int y_val = (int)round(c1); + int co_val = (int)round(c2); + int cg_val = (int)round(c3); + ycocgr_to_rgb(y_val, co_val, cg_val, r, g, b); + } +} + +//////////////////////////////////////// +// DISCRETE COSINE TRANSFORMATIONS // +//////////////////////////////////////// + // Pre-calculated cosine tables static float dct_table_16[16][16]; // For 16x16 DCT static float dct_table_8[8][8]; // For 8x8 DCT @@ -429,14 +598,14 @@ static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma } } -// Extract 16x16 block from RGB frame and convert to YCoCg-R -static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height, - int block_x, int block_y, - float *y_block, float *co_block, float *cg_block) { +// Extract 16x16 block from RGB frame and convert to color space +static void extract_color_space_block(tev_encoder_t *enc, uint8_t *rgb_frame, int width, int height, + int block_x, int block_y, + float *c1_block, float *c2_block, float *c3_block) { int start_x = block_x * BLOCK_SIZE; int start_y = block_y * BLOCK_SIZE; - // Extract 16x16 Y block + // Extract 16x16 primary channel block (Y for YCoCg-R, I for ICtCp) for (int py = 0; py < BLOCK_SIZE; py++) { for (int px = 0; px < BLOCK_SIZE; px++) { int x = start_x + px; @@ -448,10 +617,10 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height, uint8_t g = rgb_frame[offset + 1]; uint8_t b = rgb_frame[offset + 2]; - int y_val, co_val, cg_val; - rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val); + double c1, c2, c3; + rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3); - y_block[py * BLOCK_SIZE + px] = (float)y_val - 128.0f; // Center around 0 + c1_block[py * BLOCK_SIZE + px] = (float)c1 - 128.0f; } } } @@ -473,25 +642,30 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height, uint8_t g = rgb_frame[offset + 1]; uint8_t b = rgb_frame[offset + 2]; - int y_val, co_val, cg_val; - rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val); + double c1, c2, c3; + rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3); + + co_sum += (int)c2; + cg_sum += (int)c3; - co_sum += co_val; - cg_sum += cg_val; count++; } } } if (count > 0) { - // Center chroma around 0 for DCT (Co/Cg range is -255 to +255, so don't add offset) - co_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count); - cg_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count); + // Average the accumulated chroma values and store + c2_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count); + c3_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count); } } } } + + + + // Calculate spatial activity for any channel (16x16 or 8x8) static float calculate_spatial_activity(const float *block, int block_size) { float activity = 0.0f; @@ -791,8 +965,138 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y, } // Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling -static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block, - uint8_t *y_block, int8_t *co_block, int8_t *cg_block) { +static void convert_rgb_to_color_space_block(tev_encoder_t *enc, const uint8_t *rgb_block, + float *c1_workspace, float *c2_workspace, float *c3_workspace) { + if (enc->ictcp_mode) { + // ICtCp mode: Convert 16x16 RGB to ICtCp (full resolution for I, 4:2:0 subsampling for CtCp) + + // Convert I channel at full resolution (16x16) + for (int py = 0; py < BLOCK_SIZE; py++) { + for (int px = 0; px < BLOCK_SIZE; px++) { + int rgb_idx = (py * BLOCK_SIZE + px) * 3; + uint8_t r = rgb_block[rgb_idx]; + uint8_t g = rgb_block[rgb_idx + 1]; + uint8_t b = rgb_block[rgb_idx + 2]; + + double I, Ct, Cp; + srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp); + + // Store I at full resolution, scale to appropriate range + c1_workspace[py * BLOCK_SIZE + px] = (float)(I * 255.0); + } + } + + // Convert Ct and Cp with 4:2:0 subsampling (8x8) + for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) { + for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) { + double sum_ct = 0.0, sum_cp = 0.0; + + // Sample 2x2 block from RGB and average for chroma + for (int dy = 0; dy < 2; dy++) { + for (int dx = 0; dx < 2; dx++) { + int py = cy * 2 + dy; + int px = cx * 2 + dx; + int rgb_idx = (py * 16 + px) * 3; + + int r = rgb_block[rgb_idx]; + int g = rgb_block[rgb_idx + 1]; + int b = rgb_block[rgb_idx + 2]; + + double I, Ct, Cp; + srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp); + + sum_ct += Ct; + sum_cp += Cp; + } + } + + // Average and store subsampled chroma, scale to signed 8-bit equivalent range + c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_ct / 4.0) * 255.0); + c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_cp / 4.0) * 255.0); + } + } + } else { + // YCoCg-R mode: Original implementation + + // Convert 16x16 RGB to Y (full resolution) + for (int py = 0; py < BLOCK_SIZE; py++) { + for (int px = 0; px < BLOCK_SIZE; px++) { + int rgb_idx = (py * BLOCK_SIZE + px) * 3; + int r = rgb_block[rgb_idx]; + int g = rgb_block[rgb_idx + 1]; + int b = rgb_block[rgb_idx + 2]; + + // YCoCg-R transform (per specification with truncated division) + int y = (r + 2*g + b) / 4; + c1_workspace[py * BLOCK_SIZE + px] = (float)CLAMP(y, 0, 255); + } + } + + // Convert to Co and Cg with 4:2:0 subsampling (8x8) + for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) { + for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) { + int sum_co = 0, sum_cg = 0; + + // Sample 2x2 block from RGB and average for chroma + for (int dy = 0; dy < 2; dy++) { + for (int dx = 0; dx < 2; dx++) { + int py = cy * 2 + dy; + int px = cx * 2 + dx; + int rgb_idx = (py * 16 + px) * 3; + + int r = rgb_block[rgb_idx]; + int g = rgb_block[rgb_idx + 1]; + int b = rgb_block[rgb_idx + 2]; + + int co = r - b; + int tmp = b + (co / 2); + int cg = g - tmp; + + sum_co += co; + sum_cg += cg; + } + } + + // Average and store subsampled chroma + c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_co / 4, -256, 255); + c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_cg / 4, -256, 255); + } + } + } +} + +// Extract motion-compensated YCoCg-R block from reference frame +static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height, + int block_x, int block_y, int mv_x, int mv_y, + uint8_t *y_block, int8_t *co_block, int8_t *cg_block) { + // Extract 16x16 RGB block with motion compensation + uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3]; + + for (int dy = 0; dy < BLOCK_SIZE; dy++) { + for (int dx = 0; dx < BLOCK_SIZE; dx++) { + int cur_x = block_x + dx; + int cur_y = block_y + dy; + int ref_x = cur_x + mv_x; // Revert to original motion compensation + int ref_y = cur_y + mv_y; + + int rgb_idx = (dy * BLOCK_SIZE + dx) * 3; + + if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) { + // Copy RGB from reference position + int ref_offset = (ref_y * width + ref_x) * 3; + rgb_block[rgb_idx] = rgb_data[ref_offset]; // R + rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G + rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B + } else { + // Out of bounds - use black + rgb_block[rgb_idx] = 0; // R + rgb_block[rgb_idx + 1] = 0; // G + rgb_block[rgb_idx + 2] = 0; // B + } + } + } + + // Convert RGB block to YCoCg-R (original implementation for motion compensation) // Convert 16x16 RGB to Y (full resolution) for (int py = 0; py < BLOCK_SIZE; py++) { for (int px = 0; px < BLOCK_SIZE; px++) { @@ -840,41 +1144,6 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block, } } -// Extract motion-compensated YCoCg-R block from reference frame -static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height, - int block_x, int block_y, int mv_x, int mv_y, - uint8_t *y_block, int8_t *co_block, int8_t *cg_block) { - // Extract 16x16 RGB block with motion compensation - uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3]; - - for (int dy = 0; dy < BLOCK_SIZE; dy++) { - for (int dx = 0; dx < BLOCK_SIZE; dx++) { - int cur_x = block_x + dx; - int cur_y = block_y + dy; - int ref_x = cur_x + mv_x; // Revert to original motion compensation - int ref_y = cur_y + mv_y; - - int rgb_idx = (dy * BLOCK_SIZE + dx) * 3; - - if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) { - // Copy RGB from reference position - int ref_offset = (ref_y * width + ref_x) * 3; - rgb_block[rgb_idx] = rgb_data[ref_offset]; // R - rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G - rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B - } else { - // Out of bounds - use black - rgb_block[rgb_idx] = 0; // R - rgb_block[rgb_idx + 1] = 0; // G - rgb_block[rgb_idx + 2] = 0; // B - } - } - } - - // Convert RGB block to YCoCg-R - convert_rgb_to_ycocgr_block(rgb_block, y_block, co_block, cg_block); -} - // Compute motion-compensated residual for INTER mode static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) { int start_x = block_x * BLOCK_SIZE; @@ -909,7 +1178,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x]; // Extract YCoCg-R block - extract_ycocgr_block(enc->current_rgb, enc->width, enc->height, + extract_color_space_block(enc, enc->current_rgb, enc->width, enc->height, block_x, block_y, enc->y_workspace, enc->co_workspace, enc->cg_workspace); @@ -1105,107 +1374,6 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke block->cbp = 0x07; // Y, Co, Cg all present } -// Encode a 16x16 block in lossless mode -static void encode_block_lossless(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) { - tev_lossless_block_t *block = (tev_lossless_block_t*)&enc->block_data[block_y * ((enc->width + 15) / 16) + block_x]; - - // Extract YCoCg-R block - extract_ycocgr_block(enc->current_rgb, enc->width, enc->height, - block_x, block_y, - enc->y_workspace, enc->co_workspace, enc->cg_workspace); - - if (is_keyframe) { - // Intra coding for keyframes - block->mode = TEV_MODE_INTRA; - block->mv_x = block->mv_y = 0; - enc->blocks_intra++; - } else { - // Same mode decision logic as regular encode_block - // For simplicity, using INTRA for now in lossless mode - block->mode = TEV_MODE_INTRA; - block->mv_x = block->mv_y = 0; - enc->blocks_intra++; - } - - // Lossless mode: rate control factor is always 1.0f - block->rate_control_factor = 1.0f; - - // Apply DCT transforms using the same pattern as regular encoding - // Y channel (16x16) - dct_16x16_fast(enc->y_workspace, enc->dct_workspace); - for (int i = 0; i < BLOCK_SIZE_SQR; i++) { - block->y_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization - } - - // Co channel (8x8) - dct_8x8_fast(enc->co_workspace, enc->dct_workspace); - for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) { - block->co_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization - } - - // Cg channel (8x8) - dct_8x8_fast(enc->cg_workspace, enc->dct_workspace); - for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) { - block->cg_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization - } - - // Set CBP (simplified - always encode all channels) - block->cbp = 0x07; // Y, Co, Cg all present -} - -// Serialized lossless block structure (for writing to file with float16 coefficients) -typedef struct __attribute__((packed)) { - uint8_t mode; - int16_t mv_x, mv_y; - float rate_control_factor; // Always 1.0f in lossless mode - uint16_t cbp; - uint16_t y_coeffs[BLOCK_SIZE_SQR]; // float16 Y coefficients - uint16_t co_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Co coefficients - uint16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Cg coefficients -} tev_serialized_lossless_block_t; - -// Convert lossless blocks to serialized format with float16 coefficients -static void serialize_lossless_blocks(tev_encoder_t *enc, int blocks_x, int blocks_y, - tev_serialized_lossless_block_t *serialized_blocks) { - for (int by = 0; by < blocks_y; by++) { - for (int bx = 0; bx < blocks_x; bx++) { - tev_lossless_block_t *src = (tev_lossless_block_t*)&enc->block_data[by * blocks_x + bx]; - tev_serialized_lossless_block_t *dst = &serialized_blocks[by * blocks_x + bx]; - - // Copy basic fields - dst->mode = src->mode; - dst->mv_x = src->mv_x; - dst->mv_y = src->mv_y; - dst->rate_control_factor = src->rate_control_factor; - dst->cbp = src->cbp; - - // Convert float32 coefficients to float16 with range clamping - // Float16 max finite value is approximately 65504 - const float FLOAT16_MAX = 65504.0f; - - for (int i = 0; i < BLOCK_SIZE_SQR; i++) { - float coeff = FCLAMP(src->y_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX); - dst->y_coeffs[i] = float_to_float16(coeff); - if (enc->verbose && fabsf(src->y_coeffs[i]) > FLOAT16_MAX) { - printf("WARNING: Y coefficient %d clamped: %f -> %f\n", i, src->y_coeffs[i], coeff); - } - } - for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) { - float co_coeff = FCLAMP(src->co_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX); - float cg_coeff = FCLAMP(src->cg_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX); - dst->co_coeffs[i] = float_to_float16(co_coeff); - dst->cg_coeffs[i] = float_to_float16(cg_coeff); - if (enc->verbose && fabsf(src->co_coeffs[i]) > FLOAT16_MAX) { - printf("WARNING: Co coefficient %d clamped: %f -> %f\n", i, src->co_coeffs[i], co_coeff); - } - if (enc->verbose && fabsf(src->cg_coeffs[i]) > FLOAT16_MAX) { - printf("WARNING: Cg coefficient %d clamped: %f -> %f\n", i, src->cg_coeffs[i], cg_coeff); - } - } - } - } -} - // Convert SubRip time format (HH:MM:SS,mmm) to frame number static int srt_time_to_frame(const char *time_str, int fps) { int hours, minutes, seconds, milliseconds; @@ -1820,17 +1988,13 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) { enc->cg_workspace = malloc(8 * 8 * sizeof(float)); enc->dct_workspace = malloc(16 * 16 * sizeof(float)); + // Allocate block data enc->block_data = malloc(total_blocks * sizeof(tev_block_t)); - // Allocate compression buffer large enough for both regular and lossless modes - size_t max_block_size = sizeof(tev_block_t) > sizeof(tev_serialized_lossless_block_t) ? - sizeof(tev_block_t) : sizeof(tev_serialized_lossless_block_t); - size_t compressed_buffer_size = total_blocks * max_block_size * 2; + + // Allocate compression buffer + size_t compressed_buffer_size = total_blocks * sizeof(tev_block_t) * 2; enc->compressed_buffer = malloc(compressed_buffer_size); - - if (enc->verbose) { - printf("Allocated compressed buffer: %zu bytes for %d blocks (max_block_size: %zu)\n", - compressed_buffer_size, total_blocks, max_block_size); - } + enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE); if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb || @@ -1889,7 +2053,7 @@ static void free_encoder(tev_encoder_t *enc) { static int write_tev_header(FILE *output, tev_encoder_t *enc) { // Magic + version fwrite(TEV_MAGIC, 1, 8, output); - uint8_t version = TEV_VERSION; + uint8_t version = enc->ictcp_mode ? 3 : 2; // Version 3 for ICtCp, 2 for YCoCg-R fwrite(&version, 1, 1, output); // Video parameters @@ -1901,7 +2065,7 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) { uint8_t qualityCo = enc->qualityCo; uint8_t qualityCg = enc->qualityCg; uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1); - uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0) | (enc->lossless_mode ? 4 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate, bit 2 = is_lossless + uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate uint8_t reserved = 0; fwrite(&width, 2, 1, output); @@ -2008,11 +2172,7 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie // Encode all blocks for (int by = 0; by < blocks_y; by++) { for (int bx = 0; bx < blocks_x; bx++) { - if (enc->lossless_mode) { - encode_block_lossless(enc, bx, by, is_keyframe); - } else { - encode_block(enc, bx, by, is_keyframe); - } + encode_block(enc, bx, by, is_keyframe); // Calculate complexity for rate control (if enabled) if (enc->bitrate_mode > 0) { @@ -2029,34 +2189,14 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie // Compress block data using Zstd (compatible with TSVM decoder) size_t compressed_size; - - if (enc->lossless_mode) { - // Lossless mode: serialize blocks with float16 coefficients - size_t serialized_block_data_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t); - tev_serialized_lossless_block_t *serialized_blocks = malloc(serialized_block_data_size); - if (!serialized_blocks) { - fprintf(stderr, "Failed to allocate memory for serialized lossless blocks\n"); - return -1; - } - - serialize_lossless_blocks(enc, blocks_x, blocks_y, serialized_blocks); - - // Use the pre-allocated buffer size instead of calculating dynamically - size_t output_buffer_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t) * 2; - compressed_size = ZSTD_compressCCtx(enc->zstd_context, - enc->compressed_buffer, output_buffer_size, - serialized_blocks, serialized_block_data_size, - ZSTD_COMPRESSON_LEVEL); - free(serialized_blocks); - } else { - // Regular mode: use regular block data - size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t); - compressed_size = ZSTD_compressCCtx(enc->zstd_context, - enc->compressed_buffer, block_data_size * 2, - enc->block_data, block_data_size, - ZSTD_COMPRESSON_LEVEL); - } - + + // Regular mode: use regular block data + size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t); + compressed_size = ZSTD_compressCCtx(enc->zstd_context, + enc->compressed_buffer, block_data_size * 2, + enc->block_data, block_data_size, + ZSTD_COMPRESSON_LEVEL); + if (ZSTD_isError(compressed_size)) { fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size)); return 0; @@ -2288,7 +2428,7 @@ static int start_audio_conversion(tev_encoder_t *enc) { char command[2048]; snprintf(command, sizeof(command), "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null", - enc->input_file, enc->lossless_mode ? 384 : MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE); + enc->input_file, MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE); int result = system(command); if (result == 0) { @@ -2429,7 +2569,7 @@ static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) { // Show usage information static void show_usage(const char *program_name) { - printf("TEV YCoCg-R 4:2:0 Video Encoder\n"); + printf("TEV YCoCg-R/ICtCp 4:2:0 Video Encoder\n"); printf("Usage: %s [options] -i input.mp4 -o output.mv2\n\n", program_name); printf("Options:\n"); printf(" -i, --input FILE Input video file\n"); @@ -2443,7 +2583,7 @@ static void show_usage(const char *program_name) { printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); printf(" -v, --verbose Verbose output\n"); printf(" -t, --test Test mode: generate solid colour frames\n"); - printf(" --lossless Lossless mode: store coefficients as float16 (no quantisation, implies -p, 384k audio)\n"); + printf(" --ictcp Use ICtCp color space instead of YCoCg-R (generates TEV version 3)\n"); printf(" --enable-rcf Enable per-block rate control (experimental)\n"); printf(" --enable-encode-stats Collect and report block complexity statistics\n"); printf(" --help Show this help\n\n"); @@ -2467,7 +2607,7 @@ static void show_usage(const char *program_name) { printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT); printf("\n\n"); printf("Features:\n"); - printf(" - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n"); + printf(" - YCoCg-R or ICtCp 4:2:0 chroma subsampling for 50%% compression improvement\n"); printf(" - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n"); printf(" - Frame rate conversion with FFmpeg temporal filtering\n"); printf(" - Adaptive quality control with complexity-based adjustment\n"); @@ -2536,7 +2676,7 @@ int main(int argc, char *argv[]) { {"test", no_argument, 0, 't'}, {"enable-encode-stats", no_argument, 0, 1000}, {"enable-rcf", no_argument, 0, 1100}, - {"lossless", no_argument, 0, 1200}, + {"ictcp", no_argument, 0, 1300}, {"help", no_argument, 0, '?'}, {0, 0, 0, 0} }; @@ -2611,8 +2751,8 @@ int main(int argc, char *argv[]) { case 1100: // --enable-rcf enc->disable_rcf = 0; break; - case 1200: // --lossless - enc->lossless_mode = 1; + case 1300: // --ictcp + enc->ictcp_mode = 1; break; case 0: if (strcmp(long_options[option_index].name, "help") == 0) { @@ -2633,24 +2773,17 @@ int main(int argc, char *argv[]) { } } - // Lossless mode validation and adjustments - if (enc->lossless_mode) { - // In lossless mode, disable rate control and set quality to maximum - enc->bitrate_mode = 0; - enc->disable_rcf = 1; - enc->progressive_mode = 1; - enc->qualityIndex = 5; - enc->qualityY = enc->qualityCo = enc->qualityCg = 255; // Use 255 as a redundant lossless marker - if (enc->verbose) { - printf("Lossless mode enabled: Rate control disabled, quality set to maximum, enabling progressive scan\n"); - } - } - // halve the internal representation of frame height if (!enc->progressive_mode) { enc->height /= 2; } + if (enc->ictcp_mode) { + int qc = (enc->qualityCo + enc->qualityCg) / 2; + enc->qualityCo = qc; + enc->qualityCg = qc; + } + if (!test_mode && (!enc->input_file || !enc->output_file)) { fprintf(stderr, "Input and output files are required (unless using --test mode)\n"); show_usage(argv[0]); @@ -2737,7 +2870,7 @@ int main(int argc, char *argv[]) { write_tev_header(output, enc); gettimeofday(&enc->start_time, NULL); - printf("Encoding video with YCoCg-R 4:2:0 format...\n"); + printf("Encoding video with %s 4:2:0 format...\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R"); if (enc->output_fps != enc->fps) { printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps); } @@ -2791,13 +2924,13 @@ int main(int argc, char *argv[]) { printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b); // Test YCoCg-R conversion - int y_test, co_test, cg_test; - rgb_to_ycocgr(test_r, test_g, test_b, &y_test, &co_test, &cg_test); - printf(" YCoCg-R: Y=%d Co=%d Cg=%d\n", y_test, co_test, cg_test); + double y_test, co_test, cg_test; + rgb_to_color_space(enc, test_r, test_g, test_b, &y_test, &co_test, &cg_test); + printf(" %s: Y=%.3f Co=%.3f Cg=%.3f\n", enc->ictcp_mode ? "ICtCp" : "YCoCg", y_test, co_test, cg_test); // Test reverse conversion uint8_t r_rev, g_rev, b_rev; - ycocgr_to_rgb(y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev); + color_space_to_rgb(enc, y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev); printf(" Reverse: R=%d G=%d B=%d\n", r_rev, g_rev, b_rev); } else {