ICtCp colour space wip

2026-06-06 05:28:31 +09:00 · 2025-09-15 01:44:29 +09:00
parent 198e951102
commit 9c2aa96b73
4 changed files with 480 additions and 462 deletions
--- a/assets/disk0/tvdos/bin/playtev.js
+++ b/assets/disk0/tvdos/bin/playtev.js
@@ -11,7 +11,7 @@ const HEIGHT = 448
 const BLOCK_SIZE = 16  // 16x16 blocks for YCoCg-R
 const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV"
 const TEV_VERSION_YCOCG = 2  // YCoCg-R version
-const TEV_VERSION_XYB = 3    // XYB version
+const TEV_VERSION_ICtCp = 3    // ICtCp version
 const SND_BASE_ADDR = audio.getBaseAddr()
 const pcm = require("pcm")
 const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]
@@ -391,18 +391,15 @@ if (!magicMatching) {

 // Read header
 let version = seqread.readOneByte()
-if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_XYB) {
-    println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_XYB} for XYB)`)
+if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_ICtCp) {
+    println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_ICtCp} for ICtCp)`)
    return 1
 }

-let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R"
+let colorSpace = (version === TEV_VERSION_ICtCp) ? "ICtCp" : "YCoCg"
 if (interactive) {
    con.move(1,1)
-    if (colorSpace == "XYB")
-        println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace}) | Deblock: ${enableDeblocking ? 'ON' : 'OFF'}, ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
-    else
-        println(`Push and hold Backspace to exit | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | BoundaryAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
+    println(`Push and hold Backspace to exit | ${colorSpace} | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | EdgeAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
 }

 let width = seqread.readShort()
@@ -418,7 +415,6 @@ let hasSubtitle = !!(flags & 2)
 let videoFlags = seqread.readOneByte()
 let isInterlaced = !!(videoFlags & 1)
 let isNTSC = !!(videoFlags & 2)
-let isLossless = !!(videoFlags & 4)
 let unused2 = seqread.readOneByte()


@@ -428,7 +424,7 @@ serial.println(`  FPS: ${(isNTSC) ? (fps * 1000 / 1001) : fps}`)
 serial.println(`  Duration: ${totalFrames / fps}`)
 serial.println(`  Audio: ${hasAudio ? "Yes" : "No"}`)
 serial.println(`  Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`)
-serial.println(`  Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}, ${isLossless ? "lossless" : "lossy"}`)
+serial.println(`  Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}`)


 // DEBUG interlace raw output
@@ -621,7 +617,7 @@ try {
                PREV_RGB_ADDR = temp

            } else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
-                // Video frame packet (always includes rate control factor)
+                // Video frame packet
                let payloadLen = seqread.readInt()
                let compressedPtr = seqread.readBytes(payloadLen)
                updateDataRateBin(payloadLen)
@@ -636,11 +632,6 @@ try {

                // Decompress using gzip
                // Optimized buffer size calculation for TEV YCoCg-R blocks
-                let blocksX = (width + 15) >> 4  // 16x16 blocks
-                let blocksY = (height + 15) >> 4
-                let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
-                let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
-
                let actualSize
                let decompressStart = sys.nanoTime()
                try {
@@ -655,7 +646,7 @@ try {
                    continue
                }

-                // Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or XYB based on version)
+                // Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or ICtCp based on version)
                try {
                    // duplicate every 1000th frame (pass a turn every 1000n+501st) if NTSC
                    if (!isNTSC || frameCount % 1000 != 501 || frameDuped) {
@@ -667,14 +658,14 @@ try {
                        if (isInterlaced) {
                            // For interlaced: decode current frame into currentFieldAddr
                            // For display: use prevFieldAddr as current, currentFieldAddr as next
-                            graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
+                            graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
                            graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)

                            // Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
                            rotateFieldBuffers()
                        } else {
                            // Progressive or first frame: normal decoding without temporal prediction
-                            graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
+                            graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
                        }

                        decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0  // Convert to milliseconds
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -683,7 +683,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
 - Version 2.1: Added Rate Control Factor to all video packets (breaking change)
  * Enables bitrate-constrained encoding alongside quality modes
  * All video frames now include 4-byte rate control factor after payload size
- Version 3.0: Additional support of XYB Colour space
+- Version 3.0: Additional support of ICtCp Colour space

 # File Structure
 \x1F T S V M T E V
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -2147,7 +2147,92 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        
        return rgbData
    }
-    
+
+    // ICtCp to RGB conversion for TEV version 3
+    fun tevIctcpToRGB(iBlock: IntArray, ctBlock: IntArray, cpBlock: IntArray): IntArray {
+        val rgbData = IntArray(16 * 16 * 3)  // R,G,B for 16x16 pixels
+
+        // Process 16x16 I channel with 8x8 Ct/Cp channels (4:2:0 upsampling)
+        for (py in 0 until 16) {
+            for (px in 0 until 16) {
+                val iIdx = py * 16 + px
+                val i = iBlock[iIdx].toDouble()
+
+                // Get Ct/Cp from 8x8 chroma blocks (4:2:0 upsampling)
+                val ctIdx = (py / 2) * 8 + (px / 2)
+                val ct = ctBlock[ctIdx].toDouble()
+                val cp = cpBlock[ctIdx].toDouble()
+
+                // Convert scaled values back to ICtCp range
+                // I channel: IDCT already added 128, so i is in [0,255]. Reverse encoder: (c1*255-128)+128 = c1*255
+                val I = i / 255.0
+                // Ct/Cp were scaled: c2/c3 * 255.0, so reverse: ct/cp / 255.0
+                val Ct = (ct / 255.0)
+                val Cp = (cp / 255.0)
+
+                // ICtCp -> L'M'S' (inverse matrix)
+                val Lp = (I + 0.015718580108730416 * Ct + 0.2095810681164055 * Cp).coerceIn(0.0, 1.0)
+                val Mp = (I - 0.015718580108730416 * Ct - 0.20958106811640548 * Cp).coerceIn(0.0, 1.0)
+                val Sp = (I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp).coerceIn(0.0, 1.0)
+
+                // HLG decode: L'M'S' -> linear LMS
+                val L = HLG_inverse_OETF(Lp)
+                val M = HLG_inverse_OETF(Mp)
+                val S = HLG_inverse_OETF(Sp)
+
+                // LMS -> linear sRGB (inverse matrix)
+                val rLin = 29.601046511687 * L - 21.364325340529906 * M - 4.886500015143518 * S
+                val gLin = -12.083229161592032 * L + 10.673933874098694 * M + 1.5369143265611211 * S
+                val bLin = 0.38562844776642574 * L - 0.6536244436141302 * M + 1.0968381245163787 * S
+
+                // Gamma encode to sRGB
+                val rSrgb = srgbUnlinearize(rLin)
+                val gSrgb = srgbUnlinearize(gLin)
+                val bSrgb = srgbUnlinearize(bLin)
+
+                // Convert to 8-bit and store
+                val baseIdx = (py * 16 + px) * 3
+                rgbData[baseIdx] = (rSrgb * 255.0).toInt().coerceIn(0, 255)     // R
+                rgbData[baseIdx + 1] = (gSrgb * 255.0).toInt().coerceIn(0, 255) // G
+                rgbData[baseIdx + 2] = (bSrgb * 255.0).toInt().coerceIn(0, 255) // B
+            }
+        }
+
+        return rgbData
+    }
+
+    // Helper functions for ICtCp decoding
+
+    // Inverse HLG OETF (HLG -> linear)
+    fun HLG_inverse_OETF(V: Double): Double {
+        val a = 0.17883277
+        val b = 1.0 - 4.0 * a
+        val c = 0.5 - a * ln(4.0 * a)
+
+        if (V <= 0.5)
+            return (V * V) / 3.0
+        else
+            return (exp((V - c)/a) + b) / 12.0
+    }
+
+    // sRGB gamma decode: nonlinear -> linear
+    private fun srgbLinearize(value: Double): Double {
+        return if (value <= 0.04045) {
+            value / 12.92
+        } else {
+            ((value + 0.055) / 1.055).pow(2.4)
+        }
+    }
+
+    // sRGB gamma encode: linear -> nonlinear
+    private fun srgbUnlinearize(value: Double): Double {
+        return if (value <= 0.0031308) {
+            value * 12.92
+        } else {
+            1.055 * value.pow(1.0 / 2.4) - 0.055
+        }
+    }
+
    // RGB to YCoCg-R conversion for INTER mode residual calculation
    fun tevRGBToYcocg(rgbBlock: IntArray): IntArray {
        val ycocgData = IntArray(16 * 16 * 3)  // Y,Co,Cg for 16x16 pixels
@@ -2175,147 +2260,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        
        return ycocgData
    }
-    
-    // XYB conversion constants from JPEG XL specification
-    private val XYB_BIAS = 0.00379307325527544933
-    private val CBRT_BIAS = 0.155954200549248620 // cbrt(XYB_BIAS)
-    
-    // RGB to LMS mixing coefficients  
-    private val RGB_TO_LMS = arrayOf(
-        doubleArrayOf(0.3, 0.622, 0.078),                           // L coefficients
-        doubleArrayOf(0.23, 0.692, 0.078),                          // M coefficients  
-        doubleArrayOf(0.24342268924547819, 0.20476744424496821, 0.55180986650955360)  // S coefficients
-    )
-    
-    // LMS to RGB inverse matrix
-    private val LMS_TO_RGB = arrayOf(
-        doubleArrayOf(11.0315669046, -9.8669439081, -0.1646229965),
-        doubleArrayOf(-3.2541473811, 4.4187703776, -0.1646229965),
-        doubleArrayOf(-3.6588512867, 2.7129230459, 1.9459282408)
-    )
-    
-    // sRGB linearization functions
-    private fun srgbLinearise(value: Double): Double {
-        return if (value > 0.04045) {
-            Math.pow((value + 0.055) / 1.055, 2.4)
-        } else {
-            value / 12.92
-        }
-    }
-    
-    private fun srgbUnlinearise(value: Double): Double {
-        return if (value > 0.0031308) {
-            1.055 * Math.pow(value, 1.0 / 2.4) - 0.055
-        } else {
-            value * 12.92
-        }
-    }
-    
-    // XYB to RGB conversion for hardware decoding
-    fun tevXybToRGB(yBlock: IntArray, xBlock: IntArray, bBlock: IntArray): IntArray {
-        val rgbData = IntArray(16 * 16 * 3)  // R,G,B for 16x16 pixels
-        
-        for (py in 0 until 16) {
-            for (px in 0 until 16) {
-                val yIdx = py * 16 + px
-                val y = yBlock[yIdx]
-                
-                // Get chroma values from subsampled 8x8 blocks (nearest neighbor upsampling)
-                val xbIdx = (py / 2) * 8 + (px / 2)
-                val x = xBlock[xbIdx]
-                val b = bBlock[xbIdx]
-                
-                // Optimal range-based dequantization (exact inverse of improved quantization)
-                val X_MIN = -0.016; val X_MAX = 0.030
-                val xVal = (x / 255.0) * (X_MAX - X_MIN) + X_MIN  // X: inverse of range mapping
-                val Y_MAX = 0.85
-                val yVal = (y / 255.0) * Y_MAX                    // Y: inverse of improved scale
-                val B_MAX = 0.85
-                val bVal = ((b + 128.0) / 255.0) * B_MAX          // B: inverse of ((val/B_MAX*255)-128)
-                
-                // XYB to LMS gamma
-                val lgamma = xVal + yVal
-                val mgamma = yVal - xVal
-                val sgamma = bVal
-                
-                // Remove gamma correction
-                val lmix = (lgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
-                val mmix = (mgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
-                val smix = (sgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
-                
-                // LMS to linear RGB using inverse matrix
-                val rLinear = (LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix).coerceIn(0.0, 1.0)
-                val gLinear = (LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix).coerceIn(0.0, 1.0)
-                val bLinear = (LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix).coerceIn(0.0, 1.0)
-                
-                // Convert back to sRGB gamma and 0-255 range
-                val r = (srgbUnlinearise(rLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
-                val g = (srgbUnlinearise(gLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
-                val bRgb = (srgbUnlinearise(bLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
-                
-                // Store RGB
-                val baseIdx = (py * 16 + px) * 3
-                rgbData[baseIdx] = r     // R
-                rgbData[baseIdx + 1] = g // G
-                rgbData[baseIdx + 2] = bRgb // B
-            }
-        }
-        
-        return rgbData
-    }
-    
-    // RGB to XYB conversion for INTER mode residual calculation
-    fun tevRGBToXyb(rgbBlock: IntArray): IntArray {
-        val xybData = IntArray(16 * 16 * 3)  // Y,X,B for 16x16 pixels
-        
-        for (py in 0 until 16) {
-            for (px in 0 until 16) {
-                val baseIdx = (py * 16 + px) * 3
-                val r = rgbBlock[baseIdx]
-                val g = rgbBlock[baseIdx + 1]
-                val b = rgbBlock[baseIdx + 2]
-                
-                // Convert RGB to 0-1 range and linearise sRGB
-                val rNorm = srgbLinearise(r / 255.0)
-                val gNorm = srgbLinearise(g / 255.0)
-                val bNorm = srgbLinearise(b / 255.0)
-                
-                // RGB to LMS mixing with bias
-                val lmix = RGB_TO_LMS[0][0] * rNorm + RGB_TO_LMS[0][1] * gNorm + RGB_TO_LMS[0][2] * bNorm + XYB_BIAS
-                val mmix = RGB_TO_LMS[1][0] * rNorm + RGB_TO_LMS[1][1] * gNorm + RGB_TO_LMS[1][2] * bNorm + XYB_BIAS
-                val smix = RGB_TO_LMS[2][0] * rNorm + RGB_TO_LMS[2][1] * gNorm + RGB_TO_LMS[2][2] * bNorm + XYB_BIAS
-                
-                // Apply gamma correction (cube root)
-                val lgamma = lmix.pow(1.0 / 3.0) - CBRT_BIAS
-                val mgamma = mmix.pow(1.0 / 3.0) - CBRT_BIAS
-                val sgamma = smix.pow(1.0 / 3.0) - CBRT_BIAS
-                
-                // LMS to XYB transformation
-                val xVal = (lgamma - mgamma) / 2.0
-                val yVal = (lgamma + mgamma) / 2.0
-                val bVal = sgamma
-                
-                // Optimal range-based quantization for XYB values (improved precision)
-                // X: actual range -0.016 to +0.030, map to full 0-255 precision
-                val X_MIN = -0.016; val X_MAX = 0.030
-                val xQuant = (((xVal - X_MIN) / (X_MAX - X_MIN)) * 255.0).toInt().coerceIn(0, 255)
-                // Y: range 0 to 0.85, map to 0 to 255 (improved scale)
-                val Y_MAX = 0.85
-                val yQuant = ((yVal / Y_MAX) * 255.0).toInt().coerceIn(0, 255)
-                // B: range 0 to 0.85, map to -128 to +127 (optimized precision)
-                val B_MAX = 0.85
-                val bQuant = (((bVal / B_MAX) * 255.0) - 128.0).toInt().coerceIn(-128, 127)
-                
-                // Store XYB values
-                val yIdx = py * 16 + px
-                xybData[yIdx * 3] = yQuant     // Y
-                xybData[yIdx * 3 + 1] = xQuant // X
-                xybData[yIdx * 3 + 2] = bQuant // B
-            }
-        }
-        
-        return xybData
-    }

    /**
     * Enhanced TEV Deblocking Filter - Uses Knusperli-inspired techniques for superior boundary analysis
@@ -2627,8 +2571,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
                  width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
                  debugMotionVectors: Boolean = false, tevVersion: Int = 2,
-                  enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false,
-                  isLossless: Boolean = false) {
+                  enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false) {

        // height doesn't change when interlaced, because that's the encoder's output

@@ -2744,7 +2687,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                                val cgPixels = tevIdct8x8_fromOptimizedCoeffs(cgBlock)
                                
                                val rgbData = if (tevVersion == 3) {
-                                    tevXybToRGB(yPixels, coPixels, cgPixels)
+                                    tevIctcpToRGB(yPixels, coPixels, cgPixels)
                                } else {
                                    tevYcocgToRGB(yPixels, coPixels, cgPixels)
                                }
@@ -2919,69 +2862,20 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                        }

                        0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation)
-                            val yBlock: IntArray
-                            val coBlock: IntArray  
-                            val cgBlock: IntArray
-                            
-                            if (isLossless) {
-                                // Lossless mode: coefficients are stored as float16, no quantization
-                                // Read float16 coefficients: Y (16x16=256), Co (8x8=64), Cg (8x8=64)
-                                val coeffFloat16Array = ShortArray(384) // 384 float16 values stored as shorts
-                                vm.bulkPeekShort(readPtr.toInt(), coeffFloat16Array, 768) // 384 * 2 bytes
-                                readPtr += 768
-                                
-                                // Convert float16 to float32 and perform IDCT directly (no quantization)
-                                println("DEBUG: Reading lossless coefficients, first few float16 values: ${coeffFloat16Array.take(10).map { "0x${it.toString(16)}" }}")
-                                val yCoeffs = FloatArray(256) { i ->
-                                    // Convert signed short to unsigned short for float16 interpretation
-                                    val signedShort = coeffFloat16Array[i]
-                                    val float16bits = signedShort.toInt() and 0xFFFF  // Convert to unsigned
-                                    val floatVal = Float16.toFloat(float16bits.toShort())
-                                    if (floatVal.isNaN() || floatVal.isInfinite()) {
-                                        println("NaN/Inf detected at Y coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
-                                        0f // Replace NaN with 0
-                                    } else floatVal
-                                }
-                                val coCoeffs = FloatArray(64) { i ->
-                                    // Convert signed short to unsigned short for float16 interpretation
-                                    val signedShort = coeffFloat16Array[256 + i]
-                                    val float16bits = signedShort.toInt() and 0xFFFF  // Convert to unsigned
-                                    val floatVal = Float16.toFloat(float16bits.toShort())
-                                    if (floatVal.isNaN() || floatVal.isInfinite()) {
-                                        println("NaN/Inf detected at Co coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
-                                        0f // Replace NaN with 0
-                                    } else floatVal
-                                }
-                                val cgCoeffs = FloatArray(64) { i ->
-                                    // Convert signed short to unsigned short for float16 interpretation
-                                    val signedShort = coeffFloat16Array[320 + i]
-                                    val float16bits = signedShort.toInt() and 0xFFFF  // Convert to unsigned
-                                    val floatVal = Float16.toFloat(float16bits.toShort())
-                                    if (floatVal.isNaN() || floatVal.isInfinite()) {
-                                        println("NaN/Inf detected at Cg coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
-                                        0f // Replace NaN with 0
-                                    } else floatVal
-                                }
-                                
-                                yBlock = tevIdct16x16_lossless(yCoeffs)
-                                coBlock = tevIdct8x8_lossless(coCoeffs)
-                                cgBlock = tevIdct8x8_lossless(cgCoeffs)
-                            } else {
-                                // Regular lossy mode: quantized int16 coefficients
-                                // Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
-                                val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
-                                vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
-                                readPtr += 768
+                            // Regular lossy mode: quantized int16 coefficients
+                            // Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
+                            val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
+                            vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
+                            readPtr += 768

-                                // Perform hardware IDCT for each channel using fast algorithm
-                                yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
-                                coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
-                                cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
-                            }
+                            // Perform hardware IDCT for each channel using fast algorithm
+                            val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
+                            val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
+                            val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)

                            // Convert to RGB (YCoCg-R for v2, XYB for v3)
                            val rgbData = if (tevVersion == 3) {
-                                tevXybToRGB(yBlock, coBlock, cgBlock)  // XYB format (v3)
+                                tevIctcpToRGB(yBlock, coBlock, cgBlock)  // XYB format (v3)
                            } else {
                                tevYcocgToRGB(yBlock, coBlock, cgBlock)  // YCoCg-R format (v2)
                            }
@@ -3108,7 +3002,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {

                            // Step 4: Convert final data to RGB (YCoCg-R for v2, XYB for v3)
                            val finalRgb = if (tevVersion == 3) {
-                                tevXybToRGB(finalY, finalCo, finalCg)  // XYB format (v3)
+                                tevIctcpToRGB(finalY, finalCo, finalCg)  // XYB format (v3)
                            } else {
                                tevYcocgToRGB(finalY, finalCo, finalCg)  // YCoCg-R format (v2)
                            }
--- a/video_encoder/encoder_tev.c
+++ b/video_encoder/encoder_tev.c
@@ -1,5 +1,5 @@
 // Created by Claude on 2025-08-18.
-// TEV (TSVM Enhanced Video) Encoder - YCoCg-R 4:2:0 16x16 Block Version
+// TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -68,7 +68,9 @@ static inline float float16_to_float(uint16_t hbits) {

 // TSVM Enhanced Video (TEV) format constants
 #define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56"  // "\x1FTSVM TEV"
-#define TEV_VERSION 2  // Updated for YCoCg-R 4:2:0
+// TEV version - dynamic based on color space mode
+// Version 2: YCoCg-R 4:2:0 (default)
+// Version 3: ICtCp 4:2:0 (--ictcp flag)
 // version 1: 8x8 RGB
 // version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) {current winner}
 // version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor)
@@ -192,17 +194,6 @@ typedef struct __attribute__((packed)) {
    int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR];  // quantised Cg DCT coefficients (8x8)
 } tev_block_t;

-// Lossless TEV block structure (uses float32 internally, converted to float16 during serialization)
-typedef struct __attribute__((packed)) {
-    uint8_t mode;           // Block encoding mode
-    int16_t mv_x, mv_y;     // Motion vector (1/4 pixel precision)
-    float rate_control_factor; // Always 1.0f in lossless mode
-    uint16_t cbp;           // Coded block pattern (which channels have non-zero coeffs)
-    float y_coeffs[BLOCK_SIZE_SQR];  // lossless Y DCT coefficients (16x16)
-    float co_coeffs[HALF_BLOCK_SIZE_SQR];  // lossless Co DCT coefficients (8x8)
-    float cg_coeffs[HALF_BLOCK_SIZE_SQR];  // lossless Cg DCT coefficients (8x8)
-} tev_lossless_block_t;
-
 // Subtitle entry structure
 typedef struct subtitle_entry {
    int start_frame;
@@ -232,7 +223,7 @@ typedef struct {
    int qualityCg;
    int verbose;
    int disable_rcf;          // 0 = rcf enabled, 1 = disabled
-    int lossless_mode;    // 0 = lossy (default), 1 = lossless mode
+    int ictcp_mode;       // 0 = YCoCg-R (default), 1 = ICtCp color space

    // Bitrate control
    int target_bitrate_kbps;  // Target bitrate in kbps (0 = quality mode)
@@ -289,6 +280,10 @@ typedef struct {
    int complexity_capacity;  // Capacity of complexity_values array
 } tev_encoder_t;

+//////////////////////////
+// COLOUR MATHS CODES //
+//////////////////////////
+
 // RGB to YCoCg-R transform (per YCoCg-R specification with truncated division)
 static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int *cg) {
    *co = (int)r - (int)b;
@@ -315,6 +310,180 @@ static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t
    *b = CLAMP(*b, 0, 255);
 }

+// ---------------------- ICtCp Implementation ----------------------
+
+static inline int iround(double v) { return (int)floor(v + 0.5); }
+
+// ---------------------- sRGB gamma helpers ----------------------
+static inline double srgb_linearize(double val) {
+    // val in [0,1]
+    if (val <= 0.04045) return val / 12.92;
+    return pow((val + 0.055) / 1.055, 2.4);
+}
+static inline double srgb_unlinearize(double val) {
+    // val in [0,1]
+    if (val <= 0.0031308) return val * 12.92;
+    return 1.055 * pow(val, 1.0 / 2.4) - 0.055;
+}
+
+// -------------------------- HLG --------------------------
+// Forward HLG OETF (linear -> HLG)
+static inline double HLG_OETF(double L) {
+    // L in [0,1], relative scene-linear
+    const double a = 0.17883277;
+    const double b = 1.0 - 4.0 * a;
+    const double c = 0.5 - a * log(4.0 * a);
+
+    if (L <= 1.0/12.0)
+        return sqrt(3.0 * L);
+    else
+        return a * log(12.0 * L - b) + c;
+}
+
+// Inverse HLG OETF (HLG -> linear)
+static inline double HLG_inverse_OETF(double V) {
+    const double a = 0.17883277;
+    const double b = 1.0 - 4.0 * a;
+    const double c = 0.5 - a * log(4.0 * a);
+
+    if (V <= 0.5)
+        return (V * V) / 3.0;
+    else
+        return (exp((V - c)/a) + b) / 12.0;
+}
+
+// ---------------------- Matrices (doubles) ----------------------
+// Combined linear sRGB -> LMS (single 3x3): product of sRGB->XYZ, XYZ->BT2020, BT2020->LMS
+// Computed from standard matrices (double precision).
+static const double M_RGB_TO_LMS[3][3] = {
+    {0.20502672199540622, 0.42945363228947586, 0.31165003516511786},
+    {0.2233144413317712, 0.5540422172466897, 0.21854692537153908},
+    {0.0609931761282002, 0.17917502499816504, 0.9323768661336348}
+};
+
+// Inverse: LMS -> linear sRGB (inverse of above)
+static const double M_LMS_TO_RGB[3][3] = {
+    {29.601046511687, -21.364325340529906, -4.886500015143518},
+    {-12.083229161592032, 10.673933874098694, 1.5369143265611211},
+    {0.38562844776642574, -0.6536244436141302, 1.0968381245163787}
+};
+
+// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants.
+static const double M_LMSPRIME_TO_ICTCP[3][3] = {
+    { 2048.0/4096.0,   2048.0/4096.0,     0.0          },
+    { 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0    },
+    { 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0    }
+};
+
+// Inverse: I Ct Cp -> L' M' S'  (precomputed inverse)
+static const double M_ICTCP_TO_LMSPRIME[3][3] = {
+    { 1.0,         0.015718580108730416,  0.2095810681164055 },
+    { 1.0,        -0.015718580108730416, -0.20958106811640548 },
+    { 1.0,         1.0212710798422344, -0.6052744909924316 }
+};
+
+// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ----------------------
+// Inputs: r,g,b in 0..255 sRGB (8-bit)
+// Outputs: I, Ct, Cp as doubles (nominally I in ~[0..1], Ct/Cp ranges depend on colors)
+void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
+                       double *out_I, double *out_Ct, double *out_Cp)
+{
+    // 1) linearize sRGB to 0..1
+    double r = srgb_linearize((double)r8 / 255.0);
+    double g = srgb_linearize((double)g8 / 255.0);
+    double b = srgb_linearize((double)b8 / 255.0);
+
+    // 2) linear RGB -> LMS (single 3x3 multiply)
+    double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
+    double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
+    double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
+
+    // 3) apply HLG encode (map linear LMS -> perceptual domain L',M',S')
+    double Lp = HLG_OETF(L);
+    double Mp = HLG_OETF(M);
+    double Sp = HLG_OETF(S);
+
+    // 4) L'M'S' -> ICtCp
+    double I  = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
+    double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
+    double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
+
+    *out_I = FCLAMP(I * 255.f, 0.f, 255.f);
+    *out_Ct = FCLAMP(Ct * 255.f, -256.f, 255.f);
+    *out_Cp = FCLAMP(Cp * 255.f, -256.f, 255.f);
+}
+
+// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ----------------------
+// Inputs: I, Ct, Cp as doubles
+// Outputs: r8,g8,b8 in 0..255 (8-bit sRGB, clamped and rounded)
+void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
+                       uint8_t *r8, uint8_t *g8, uint8_t *b8)
+{
+    double I = I8 / 255.f;
+    double Ct = Ct8 / 255.f;
+    double Cp = Cp8 / 255.f;
+
+    // 1) ICtCp -> L' M' S' (3x3 multiply)
+    double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
+    double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
+    double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
+
+    // 2) HLG decode: L' -> linear LMS
+    double L = HLG_inverse_OETF(Lp);
+    double M = HLG_inverse_OETF(Mp);
+    double S = HLG_inverse_OETF(Sp);
+
+    // 3) LMS -> linear sRGB (3x3 inverse)
+    double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
+    double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
+    double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
+
+    // 4) gamma encode and convert to 0..255 with center-of-bin rounding
+    double r = srgb_unlinearize(r_lin);
+    double g = srgb_unlinearize(g_lin);
+    double b = srgb_unlinearize(b_lin);
+
+    *r8 = (uint8_t)CLAMP(iround(r * 255.0), 0, 255);
+    *g8 = (uint8_t)CLAMP(iround(g * 255.0), 0, 255);
+    *b8 = (uint8_t)CLAMP(iround(b * 255.0), 0, 255);
+}
+
+// ---------------------- Color Space Switching Functions ----------------------
+// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode
+
+static void rgb_to_color_space(tev_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b,
+                               double *c1, double *c2, double *c3) {
+    if (enc->ictcp_mode) {
+        // Use ICtCp color space
+        srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3);
+    } else {
+        // Use YCoCg-R color space (convert to int first, then to double)
+        int y_val, co_val, cg_val;
+        rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
+        *c1 = (double)y_val;
+        *c2 = (double)co_val;
+        *c3 = (double)cg_val;
+    }
+}
+
+static void color_space_to_rgb(tev_encoder_t *enc, double c1, double c2, double c3,
+                               uint8_t *r, uint8_t *g, uint8_t *b) {
+    if (enc->ictcp_mode) {
+        // Use ICtCp color space
+        ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b);
+    } else {
+        // Use YCoCg-R color space (convert from double to int first)
+        int y_val = (int)round(c1);
+        int co_val = (int)round(c2);
+        int cg_val = (int)round(c3);
+        ycocgr_to_rgb(y_val, co_val, cg_val, r, g, b);
+    }
+}
+
+////////////////////////////////////////
+// DISCRETE COSINE TRANSFORMATIONS //
+////////////////////////////////////////
+
 // Pre-calculated cosine tables
 static float dct_table_16[16][16]; // For 16x16 DCT
 static float dct_table_8[8][8];    // For 8x8 DCT
@@ -429,14 +598,14 @@ static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma
    }
 }

-// Extract 16x16 block from RGB frame and convert to YCoCg-R
-static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
-                                int block_x, int block_y,
-                                float *y_block, float *co_block, float *cg_block) {
+// Extract 16x16 block from RGB frame and convert to color space
+static void extract_color_space_block(tev_encoder_t *enc, uint8_t *rgb_frame, int width, int height,
+                                      int block_x, int block_y,
+                                      float *c1_block, float *c2_block, float *c3_block) {
    int start_x = block_x * BLOCK_SIZE;
    int start_y = block_y * BLOCK_SIZE;

-    // Extract 16x16 Y block
+    // Extract 16x16 primary channel block (Y for YCoCg-R, I for ICtCp)
    for (int py = 0; py < BLOCK_SIZE; py++) {
        for (int px = 0; px < BLOCK_SIZE; px++) {
            int x = start_x + px;
@@ -448,10 +617,10 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
                uint8_t g = rgb_frame[offset + 1];
                uint8_t b = rgb_frame[offset + 2];

-                int y_val, co_val, cg_val;
-                rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
+                double c1, c2, c3;
+                rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);

-                y_block[py * BLOCK_SIZE + px] = (float)y_val - 128.0f;  // Center around 0
+                c1_block[py * BLOCK_SIZE + px] = (float)c1 - 128.0f;
            }
        }
    }
@@ -473,25 +642,30 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
                        uint8_t g = rgb_frame[offset + 1];
                        uint8_t b = rgb_frame[offset + 2];

-                        int y_val, co_val, cg_val;
-                        rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
+                        double c1, c2, c3;
+                        rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);
+
+                        co_sum += (int)c2;
+                        cg_sum += (int)c3;

-                        co_sum += co_val;
-                        cg_sum += cg_val;
                        count++;
                    }
                }
            }

            if (count > 0) {
-                // Center chroma around 0 for DCT (Co/Cg range is -255 to +255, so don't add offset)
-                co_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
-                cg_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
+                // Average the accumulated chroma values and store
+                c2_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
+                c3_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
            }
        }
    }
 }

+
+
+
+
 // Calculate spatial activity for any channel (16x16 or 8x8)
 static float calculate_spatial_activity(const float *block, int block_size) {
    float activity = 0.0f;
@@ -791,8 +965,138 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
 }

 // Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling
-static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
-                                       uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
+static void convert_rgb_to_color_space_block(tev_encoder_t *enc, const uint8_t *rgb_block,
+                                            float *c1_workspace, float *c2_workspace, float *c3_workspace) {
+    if (enc->ictcp_mode) {
+        // ICtCp mode: Convert 16x16 RGB to ICtCp (full resolution for I, 4:2:0 subsampling for CtCp)
+
+        // Convert I channel at full resolution (16x16)
+        for (int py = 0; py < BLOCK_SIZE; py++) {
+            for (int px = 0; px < BLOCK_SIZE; px++) {
+                int rgb_idx = (py * BLOCK_SIZE + px) * 3;
+                uint8_t r = rgb_block[rgb_idx];
+                uint8_t g = rgb_block[rgb_idx + 1];
+                uint8_t b = rgb_block[rgb_idx + 2];
+
+                double I, Ct, Cp;
+                srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
+
+                // Store I at full resolution, scale to appropriate range
+                c1_workspace[py * BLOCK_SIZE + px] = (float)(I * 255.0);
+            }
+        }
+
+        // Convert Ct and Cp with 4:2:0 subsampling (8x8)
+        for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
+            for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
+                double sum_ct = 0.0, sum_cp = 0.0;
+
+                // Sample 2x2 block from RGB and average for chroma
+                for (int dy = 0; dy < 2; dy++) {
+                    for (int dx = 0; dx < 2; dx++) {
+                        int py = cy * 2 + dy;
+                        int px = cx * 2 + dx;
+                        int rgb_idx = (py * 16 + px) * 3;
+
+                        int r = rgb_block[rgb_idx];
+                        int g = rgb_block[rgb_idx + 1];
+                        int b = rgb_block[rgb_idx + 2];
+
+                        double I, Ct, Cp;
+                        srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
+
+                        sum_ct += Ct;
+                        sum_cp += Cp;
+                    }
+                }
+
+                // Average and store subsampled chroma, scale to signed 8-bit equivalent range
+                c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_ct / 4.0) * 255.0);
+                c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_cp / 4.0) * 255.0);
+            }
+        }
+    } else {
+        // YCoCg-R mode: Original implementation
+
+        // Convert 16x16 RGB to Y (full resolution)
+        for (int py = 0; py < BLOCK_SIZE; py++) {
+            for (int px = 0; px < BLOCK_SIZE; px++) {
+                int rgb_idx = (py * BLOCK_SIZE + px) * 3;
+                int r = rgb_block[rgb_idx];
+                int g = rgb_block[rgb_idx + 1];
+                int b = rgb_block[rgb_idx + 2];
+
+                // YCoCg-R transform (per specification with truncated division)
+                int y = (r + 2*g + b) / 4;
+                c1_workspace[py * BLOCK_SIZE + px] = (float)CLAMP(y, 0, 255);
+            }
+        }
+
+        // Convert to Co and Cg with 4:2:0 subsampling (8x8)
+        for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
+            for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
+                int sum_co = 0, sum_cg = 0;
+
+                // Sample 2x2 block from RGB and average for chroma
+                for (int dy = 0; dy < 2; dy++) {
+                    for (int dx = 0; dx < 2; dx++) {
+                        int py = cy * 2 + dy;
+                        int px = cx * 2 + dx;
+                        int rgb_idx = (py * 16 + px) * 3;
+
+                        int r = rgb_block[rgb_idx];
+                        int g = rgb_block[rgb_idx + 1];
+                        int b = rgb_block[rgb_idx + 2];
+
+                        int co = r - b;
+                        int tmp = b + (co / 2);
+                        int cg = g - tmp;
+
+                        sum_co += co;
+                        sum_cg += cg;
+                    }
+                }
+
+                // Average and store subsampled chroma
+                c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_co / 4, -256, 255);
+                c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_cg / 4, -256, 255);
+            }
+        }
+    }
+}
+
+// Extract motion-compensated YCoCg-R block from reference frame
+static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
+                                           int block_x, int block_y, int mv_x, int mv_y,
+                                           uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
+    // Extract 16x16 RGB block with motion compensation
+    uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
+
+    for (int dy = 0; dy < BLOCK_SIZE; dy++) {
+        for (int dx = 0; dx < BLOCK_SIZE; dx++) {
+            int cur_x = block_x + dx;
+            int cur_y = block_y + dy;
+            int ref_x = cur_x + mv_x;  // Revert to original motion compensation
+            int ref_y = cur_y + mv_y;
+
+            int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
+
+            if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
+                // Copy RGB from reference position
+                int ref_offset = (ref_y * width + ref_x) * 3;
+                rgb_block[rgb_idx] = rgb_data[ref_offset];         // R
+                rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
+                rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
+            } else {
+                // Out of bounds - use black
+                rgb_block[rgb_idx] = 0;     // R
+                rgb_block[rgb_idx + 1] = 0; // G
+                rgb_block[rgb_idx + 2] = 0; // B
+            }
+        }
+    }
+
+    // Convert RGB block to YCoCg-R (original implementation for motion compensation)
    // Convert 16x16 RGB to Y (full resolution)
    for (int py = 0; py < BLOCK_SIZE; py++) {
        for (int px = 0; px < BLOCK_SIZE; px++) {
@@ -840,41 +1144,6 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
    }
 }

-// Extract motion-compensated YCoCg-R block from reference frame
-static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
-                                           int block_x, int block_y, int mv_x, int mv_y,
-                                           uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
-    // Extract 16x16 RGB block with motion compensation
-    uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
-
-    for (int dy = 0; dy < BLOCK_SIZE; dy++) {
-        for (int dx = 0; dx < BLOCK_SIZE; dx++) {
-            int cur_x = block_x + dx;
-            int cur_y = block_y + dy;
-            int ref_x = cur_x + mv_x;  // Revert to original motion compensation
-            int ref_y = cur_y + mv_y;
-
-            int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
-
-            if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
-                // Copy RGB from reference position
-                int ref_offset = (ref_y * width + ref_x) * 3;
-                rgb_block[rgb_idx] = rgb_data[ref_offset];         // R
-                rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
-                rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
-            } else {
-                // Out of bounds - use black
-                rgb_block[rgb_idx] = 0;     // R
-                rgb_block[rgb_idx + 1] = 0; // G
-                rgb_block[rgb_idx + 2] = 0; // B
-            }
-        }
-    }
-
-    // Convert RGB block to YCoCg-R
-    convert_rgb_to_ycocgr_block(rgb_block, y_block, co_block, cg_block);
-}
-
 // Compute motion-compensated residual for INTER mode
 static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) {
    int start_x = block_x * BLOCK_SIZE;
@@ -909,7 +1178,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
    tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];

    // Extract YCoCg-R block
-    extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
+    extract_color_space_block(enc, enc->current_rgb, enc->width, enc->height,
                        block_x, block_y,
                        enc->y_workspace, enc->co_workspace, enc->cg_workspace);

@@ -1105,107 +1374,6 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
    block->cbp = 0x07;  // Y, Co, Cg all present
 }

-// Encode a 16x16 block in lossless mode
-static void encode_block_lossless(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
-    tev_lossless_block_t *block = (tev_lossless_block_t*)&enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
-
-    // Extract YCoCg-R block
-    extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
-                        block_x, block_y,
-                        enc->y_workspace, enc->co_workspace, enc->cg_workspace);
-
-    if (is_keyframe) {
-        // Intra coding for keyframes
-        block->mode = TEV_MODE_INTRA;
-        block->mv_x = block->mv_y = 0;
-        enc->blocks_intra++;
-    } else {
-        // Same mode decision logic as regular encode_block
-        // For simplicity, using INTRA for now in lossless mode
-        block->mode = TEV_MODE_INTRA;
-        block->mv_x = block->mv_y = 0;
-        enc->blocks_intra++;
-    }
-
-    // Lossless mode: rate control factor is always 1.0f
-    block->rate_control_factor = 1.0f;
-
-    // Apply DCT transforms using the same pattern as regular encoding
-    // Y channel (16x16)
-    dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
-    for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
-        block->y_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
-    }
-
-    // Co channel (8x8)  
-    dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
-    for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
-        block->co_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
-    }
-
-    // Cg channel (8x8)
-    dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
-    for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
-        block->cg_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
-    }
-
-    // Set CBP (simplified - always encode all channels)
-    block->cbp = 0x07;  // Y, Co, Cg all present
-}
-
-// Serialized lossless block structure (for writing to file with float16 coefficients)
-typedef struct __attribute__((packed)) {
-    uint8_t mode;
-    int16_t mv_x, mv_y;
-    float rate_control_factor; // Always 1.0f in lossless mode
-    uint16_t cbp;
-    uint16_t y_coeffs[BLOCK_SIZE_SQR];      // float16 Y coefficients
-    uint16_t co_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Co coefficients
-    uint16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Cg coefficients
-} tev_serialized_lossless_block_t;
-
-// Convert lossless blocks to serialized format with float16 coefficients
-static void serialize_lossless_blocks(tev_encoder_t *enc, int blocks_x, int blocks_y, 
-                                     tev_serialized_lossless_block_t *serialized_blocks) {
-    for (int by = 0; by < blocks_y; by++) {
-        for (int bx = 0; bx < blocks_x; bx++) {
-            tev_lossless_block_t *src = (tev_lossless_block_t*)&enc->block_data[by * blocks_x + bx];
-            tev_serialized_lossless_block_t *dst = &serialized_blocks[by * blocks_x + bx];
-            
-            // Copy basic fields
-            dst->mode = src->mode;
-            dst->mv_x = src->mv_x;
-            dst->mv_y = src->mv_y;
-            dst->rate_control_factor = src->rate_control_factor;
-            dst->cbp = src->cbp;
-            
-            // Convert float32 coefficients to float16 with range clamping
-            // Float16 max finite value is approximately 65504
-            const float FLOAT16_MAX = 65504.0f;
-            
-            for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
-                float coeff = FCLAMP(src->y_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
-                dst->y_coeffs[i] = float_to_float16(coeff);
-                if (enc->verbose && fabsf(src->y_coeffs[i]) > FLOAT16_MAX) {
-                    printf("WARNING: Y coefficient %d clamped: %f -> %f\n", i, src->y_coeffs[i], coeff);
-                }
-            }
-            for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
-                float co_coeff = FCLAMP(src->co_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
-                float cg_coeff = FCLAMP(src->cg_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
-                dst->co_coeffs[i] = float_to_float16(co_coeff);
-                dst->cg_coeffs[i] = float_to_float16(cg_coeff);
-                if (enc->verbose && fabsf(src->co_coeffs[i]) > FLOAT16_MAX) {
-                    printf("WARNING: Co coefficient %d clamped: %f -> %f\n", i, src->co_coeffs[i], co_coeff);
-                }
-                if (enc->verbose && fabsf(src->cg_coeffs[i]) > FLOAT16_MAX) {
-                    printf("WARNING: Cg coefficient %d clamped: %f -> %f\n", i, src->cg_coeffs[i], cg_coeff);
-                }
-            }
-        }
-    }
-}
-
 // Convert SubRip time format (HH:MM:SS,mmm) to frame number
 static int srt_time_to_frame(const char *time_str, int fps) {
    int hours, minutes, seconds, milliseconds;
@@ -1820,17 +1988,13 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) {
    enc->cg_workspace = malloc(8 * 8 * sizeof(float));
    enc->dct_workspace = malloc(16 * 16 * sizeof(float));

+    // Allocate block data
    enc->block_data = malloc(total_blocks * sizeof(tev_block_t));
-    // Allocate compression buffer large enough for both regular and lossless modes
-    size_t max_block_size = sizeof(tev_block_t) > sizeof(tev_serialized_lossless_block_t) ? 
-                            sizeof(tev_block_t) : sizeof(tev_serialized_lossless_block_t);
-    size_t compressed_buffer_size = total_blocks * max_block_size * 2;
+
+    // Allocate compression buffer
+    size_t compressed_buffer_size = total_blocks * sizeof(tev_block_t) * 2;
    enc->compressed_buffer = malloc(compressed_buffer_size);
-    
-    if (enc->verbose) {
-        printf("Allocated compressed buffer: %zu bytes for %d blocks (max_block_size: %zu)\n", 
-               compressed_buffer_size, total_blocks, max_block_size);
-    }
+
    enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE);

    if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb ||
@@ -1889,7 +2053,7 @@ static void free_encoder(tev_encoder_t *enc) {
 static int write_tev_header(FILE *output, tev_encoder_t *enc) {
    // Magic + version
    fwrite(TEV_MAGIC, 1, 8, output);
-    uint8_t version = TEV_VERSION;
+    uint8_t version = enc->ictcp_mode ? 3 : 2;  // Version 3 for ICtCp, 2 for YCoCg-R
    fwrite(&version, 1, 1, output);

    // Video parameters
@@ -1901,7 +2065,7 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) {
    uint8_t qualityCo = enc->qualityCo;
    uint8_t qualityCg = enc->qualityCg;
    uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1);
-    uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0) | (enc->lossless_mode ? 4 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate, bit 2 = is_lossless
+    uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate
    uint8_t reserved = 0;

    fwrite(&width, 2, 1, output);
@@ -2008,11 +2172,7 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
    // Encode all blocks
    for (int by = 0; by < blocks_y; by++) {
        for (int bx = 0; bx < blocks_x; bx++) {
-            if (enc->lossless_mode) {
-                encode_block_lossless(enc, bx, by, is_keyframe);
-            } else {
-                encode_block(enc, bx, by, is_keyframe);
-            }
+            encode_block(enc, bx, by, is_keyframe);

            // Calculate complexity for rate control (if enabled)
            if (enc->bitrate_mode > 0) {
@@ -2029,34 +2189,14 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie

    // Compress block data using Zstd (compatible with TSVM decoder)
    size_t compressed_size;
-    
-    if (enc->lossless_mode) {
-        // Lossless mode: serialize blocks with float16 coefficients
-        size_t serialized_block_data_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t);
-        tev_serialized_lossless_block_t *serialized_blocks = malloc(serialized_block_data_size);
-        if (!serialized_blocks) {
-            fprintf(stderr, "Failed to allocate memory for serialized lossless blocks\n");
-            return -1;
-        }
-        
-        serialize_lossless_blocks(enc, blocks_x, blocks_y, serialized_blocks);
-        
-        // Use the pre-allocated buffer size instead of calculating dynamically
-        size_t output_buffer_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t) * 2;
-        compressed_size = ZSTD_compressCCtx(enc->zstd_context,
-                                           enc->compressed_buffer, output_buffer_size,
-                                           serialized_blocks, serialized_block_data_size,
-                                           ZSTD_COMPRESSON_LEVEL);
-        free(serialized_blocks);
-    } else {
-        // Regular mode: use regular block data
-        size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
-        compressed_size = ZSTD_compressCCtx(enc->zstd_context,
-                                           enc->compressed_buffer, block_data_size * 2,
-                                           enc->block_data, block_data_size,
-                                           ZSTD_COMPRESSON_LEVEL);
-    }
-    
+
+    // Regular mode: use regular block data
+    size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
+    compressed_size = ZSTD_compressCCtx(enc->zstd_context,
+                                       enc->compressed_buffer, block_data_size * 2,
+                                       enc->block_data, block_data_size,
+                                       ZSTD_COMPRESSON_LEVEL);
+
    if (ZSTD_isError(compressed_size)) {
        fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size));
        return 0;
@@ -2288,7 +2428,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
    char command[2048];
    snprintf(command, sizeof(command),
        "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
-        enc->input_file, enc->lossless_mode ? 384 : MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
+        enc->input_file, MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);

    int result = system(command);
    if (result == 0) {
@@ -2429,7 +2569,7 @@ static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {

 // Show usage information
 static void show_usage(const char *program_name) {
-    printf("TEV YCoCg-R 4:2:0 Video Encoder\n");
+    printf("TEV YCoCg-R/ICtCp 4:2:0 Video Encoder\n");
    printf("Usage: %s [options] -i input.mp4 -o output.mv2\n\n", program_name);
    printf("Options:\n");
    printf("  -i, --input FILE       Input video file\n");
@@ -2443,7 +2583,7 @@ static void show_usage(const char *program_name) {
    printf("  -S, --subtitles FILE   SubRip (.srt) or SAMI (.smi) subtitle file\n");
    printf("  -v, --verbose          Verbose output\n");
    printf("  -t, --test             Test mode: generate solid colour frames\n");
-    printf("  --lossless             Lossless mode: store coefficients as float16 (no quantisation, implies -p, 384k audio)\n");
+    printf("  --ictcp                Use ICtCp color space instead of YCoCg-R (generates TEV version 3)\n");
    printf("  --enable-rcf           Enable per-block rate control (experimental)\n");
    printf("  --enable-encode-stats  Collect and report block complexity statistics\n");
    printf("  --help                 Show this help\n\n");
@@ -2467,7 +2607,7 @@ static void show_usage(const char *program_name) {
    printf("\n  -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
    printf("\n\n");
    printf("Features:\n");
-    printf("  - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n");
+    printf("  - YCoCg-R or ICtCp 4:2:0 chroma subsampling for 50%% compression improvement\n");
    printf("  - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n");
    printf("  - Frame rate conversion with FFmpeg temporal filtering\n");
    printf("  - Adaptive quality control with complexity-based adjustment\n");
@@ -2536,7 +2676,7 @@ int main(int argc, char *argv[]) {
        {"test", no_argument, 0, 't'},
        {"enable-encode-stats", no_argument, 0, 1000},
        {"enable-rcf", no_argument, 0, 1100},
-        {"lossless", no_argument, 0, 1200},
+        {"ictcp", no_argument, 0, 1300},
        {"help", no_argument, 0, '?'},
        {0, 0, 0, 0}
    };
@@ -2611,8 +2751,8 @@ int main(int argc, char *argv[]) {
             case 1100: // --enable-rcf
                enc->disable_rcf = 0;
                break;
-            case 1200: // --lossless
-                enc->lossless_mode = 1;
+            case 1300: // --ictcp
+                enc->ictcp_mode = 1;
                break;
            case 0:
                if (strcmp(long_options[option_index].name, "help") == 0) {
@@ -2633,24 +2773,17 @@ int main(int argc, char *argv[]) {
        }
    }

-    // Lossless mode validation and adjustments
-    if (enc->lossless_mode) {
-        // In lossless mode, disable rate control and set quality to maximum
-        enc->bitrate_mode = 0;
-        enc->disable_rcf = 1;
-        enc->progressive_mode = 1;
-        enc->qualityIndex = 5;
-        enc->qualityY = enc->qualityCo = enc->qualityCg = 255; // Use 255 as a redundant lossless marker
-        if (enc->verbose) {
-            printf("Lossless mode enabled: Rate control disabled, quality set to maximum, enabling progressive scan\n");
-        }
-    }
-
    // halve the internal representation of frame height
    if (!enc->progressive_mode) {
        enc->height /= 2;
    }

+    if (enc->ictcp_mode) {
+        int qc = (enc->qualityCo + enc->qualityCg) / 2;
+        enc->qualityCo = qc;
+        enc->qualityCg = qc;
+    }
+
    if (!test_mode && (!enc->input_file || !enc->output_file)) {
        fprintf(stderr, "Input and output files are required (unless using --test mode)\n");
        show_usage(argv[0]);
@@ -2737,7 +2870,7 @@ int main(int argc, char *argv[]) {
    write_tev_header(output, enc);
    gettimeofday(&enc->start_time, NULL);

-    printf("Encoding video with YCoCg-R 4:2:0 format...\n");
+    printf("Encoding video with %s 4:2:0 format...\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
    if (enc->output_fps != enc->fps) {
        printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
    }
@@ -2791,13 +2924,13 @@ int main(int argc, char *argv[]) {
            printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b);
            
            // Test YCoCg-R conversion
-            int y_test, co_test, cg_test;
-            rgb_to_ycocgr(test_r, test_g, test_b, &y_test, &co_test, &cg_test);
-            printf("  YCoCg-R: Y=%d Co=%d Cg=%d\n", y_test, co_test, cg_test);
+            double y_test, co_test, cg_test;
+            rgb_to_color_space(enc, test_r, test_g, test_b, &y_test, &co_test, &cg_test);
+            printf("  %s: Y=%.3f Co=%.3f Cg=%.3f\n", enc->ictcp_mode ? "ICtCp" : "YCoCg", y_test, co_test, cg_test);
            
            // Test reverse conversion
            uint8_t r_rev, g_rev, b_rev;
-            ycocgr_to_rgb(y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
+            color_space_to_rgb(enc, y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
            printf("  Reverse: R=%d G=%d B=%d\n", r_rev, g_rev, b_rev);
            
        } else {