ICtCp colour space wip

This commit is contained in:
minjaesong
2025-09-15 01:44:29 +09:00
parent 198e951102
commit 9c2aa96b73
4 changed files with 480 additions and 462 deletions

View File

@@ -11,7 +11,7 @@ const HEIGHT = 448
const BLOCK_SIZE = 16 // 16x16 blocks for YCoCg-R
const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV"
const TEV_VERSION_YCOCG = 2 // YCoCg-R version
const TEV_VERSION_XYB = 3 // XYB version
const TEV_VERSION_ICtCp = 3 // ICtCp version
const SND_BASE_ADDR = audio.getBaseAddr()
const pcm = require("pcm")
const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]
@@ -391,18 +391,15 @@ if (!magicMatching) {
// Read header
let version = seqread.readOneByte()
if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_XYB) {
println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_XYB} for XYB)`)
if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_ICtCp) {
println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_ICtCp} for ICtCp)`)
return 1
}
let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R"
let colorSpace = (version === TEV_VERSION_ICtCp) ? "ICtCp" : "YCoCg"
if (interactive) {
con.move(1,1)
if (colorSpace == "XYB")
println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace}) | Deblock: ${enableDeblocking ? 'ON' : 'OFF'}, ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
else
println(`Push and hold Backspace to exit | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | BoundaryAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
println(`Push and hold Backspace to exit | ${colorSpace} | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | EdgeAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
}
let width = seqread.readShort()
@@ -418,7 +415,6 @@ let hasSubtitle = !!(flags & 2)
let videoFlags = seqread.readOneByte()
let isInterlaced = !!(videoFlags & 1)
let isNTSC = !!(videoFlags & 2)
let isLossless = !!(videoFlags & 4)
let unused2 = seqread.readOneByte()
@@ -428,7 +424,7 @@ serial.println(` FPS: ${(isNTSC) ? (fps * 1000 / 1001) : fps}`)
serial.println(` Duration: ${totalFrames / fps}`)
serial.println(` Audio: ${hasAudio ? "Yes" : "No"}`)
serial.println(` Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`)
serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}, ${isLossless ? "lossless" : "lossy"}`)
serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}`)
// DEBUG interlace raw output
@@ -621,7 +617,7 @@ try {
PREV_RGB_ADDR = temp
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
// Video frame packet (always includes rate control factor)
// Video frame packet
let payloadLen = seqread.readInt()
let compressedPtr = seqread.readBytes(payloadLen)
updateDataRateBin(payloadLen)
@@ -636,11 +632,6 @@ try {
// Decompress using gzip
// Optimized buffer size calculation for TEV YCoCg-R blocks
let blocksX = (width + 15) >> 4 // 16x16 blocks
let blocksY = (height + 15) >> 4
let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
let actualSize
let decompressStart = sys.nanoTime()
try {
@@ -655,7 +646,7 @@ try {
continue
}
// Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or XYB based on version)
// Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or ICtCp based on version)
try {
// duplicate every 1000th frame (pass a turn every 1000n+501st) if NTSC
if (!isNTSC || frameCount % 1000 != 501 || frameDuped) {
@@ -667,14 +658,14 @@ try {
if (isInterlaced) {
// For interlaced: decode current frame into currentFieldAddr
// For display: use prevFieldAddr as current, currentFieldAddr as next
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
rotateFieldBuffers()
} else {
// Progressive or first frame: normal decoding without temporal prediction
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
}
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds

View File

@@ -683,7 +683,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
- Version 2.1: Added Rate Control Factor to all video packets (breaking change)
* Enables bitrate-constrained encoding alongside quality modes
* All video frames now include 4-byte rate control factor after payload size
- Version 3.0: Additional support of XYB Colour space
- Version 3.0: Additional support of ICtCp Colour space
# File Structure
\x1F T S V M T E V

View File

@@ -2147,7 +2147,92 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return rgbData
}
// ICtCp to RGB conversion for TEV version 3
fun tevIctcpToRGB(iBlock: IntArray, ctBlock: IntArray, cpBlock: IntArray): IntArray {
val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels
// Process 16x16 I channel with 8x8 Ct/Cp channels (4:2:0 upsampling)
for (py in 0 until 16) {
for (px in 0 until 16) {
val iIdx = py * 16 + px
val i = iBlock[iIdx].toDouble()
// Get Ct/Cp from 8x8 chroma blocks (4:2:0 upsampling)
val ctIdx = (py / 2) * 8 + (px / 2)
val ct = ctBlock[ctIdx].toDouble()
val cp = cpBlock[ctIdx].toDouble()
// Convert scaled values back to ICtCp range
// I channel: IDCT already added 128, so i is in [0,255]. Reverse encoder: (c1*255-128)+128 = c1*255
val I = i / 255.0
// Ct/Cp were scaled: c2/c3 * 255.0, so reverse: ct/cp / 255.0
val Ct = (ct / 255.0)
val Cp = (cp / 255.0)
// ICtCp -> L'M'S' (inverse matrix)
val Lp = (I + 0.015718580108730416 * Ct + 0.2095810681164055 * Cp).coerceIn(0.0, 1.0)
val Mp = (I - 0.015718580108730416 * Ct - 0.20958106811640548 * Cp).coerceIn(0.0, 1.0)
val Sp = (I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp).coerceIn(0.0, 1.0)
// HLG decode: L'M'S' -> linear LMS
val L = HLG_inverse_OETF(Lp)
val M = HLG_inverse_OETF(Mp)
val S = HLG_inverse_OETF(Sp)
// LMS -> linear sRGB (inverse matrix)
val rLin = 29.601046511687 * L - 21.364325340529906 * M - 4.886500015143518 * S
val gLin = -12.083229161592032 * L + 10.673933874098694 * M + 1.5369143265611211 * S
val bLin = 0.38562844776642574 * L - 0.6536244436141302 * M + 1.0968381245163787 * S
// Gamma encode to sRGB
val rSrgb = srgbUnlinearize(rLin)
val gSrgb = srgbUnlinearize(gLin)
val bSrgb = srgbUnlinearize(bLin)
// Convert to 8-bit and store
val baseIdx = (py * 16 + px) * 3
rgbData[baseIdx] = (rSrgb * 255.0).toInt().coerceIn(0, 255) // R
rgbData[baseIdx + 1] = (gSrgb * 255.0).toInt().coerceIn(0, 255) // G
rgbData[baseIdx + 2] = (bSrgb * 255.0).toInt().coerceIn(0, 255) // B
}
}
return rgbData
}
// Helper functions for ICtCp decoding
// Inverse HLG OETF (HLG -> linear)
fun HLG_inverse_OETF(V: Double): Double {
val a = 0.17883277
val b = 1.0 - 4.0 * a
val c = 0.5 - a * ln(4.0 * a)
if (V <= 0.5)
return (V * V) / 3.0
else
return (exp((V - c)/a) + b) / 12.0
}
// sRGB gamma decode: nonlinear -> linear
private fun srgbLinearize(value: Double): Double {
return if (value <= 0.04045) {
value / 12.92
} else {
((value + 0.055) / 1.055).pow(2.4)
}
}
// sRGB gamma encode: linear -> nonlinear
private fun srgbUnlinearize(value: Double): Double {
return if (value <= 0.0031308) {
value * 12.92
} else {
1.055 * value.pow(1.0 / 2.4) - 0.055
}
}
// RGB to YCoCg-R conversion for INTER mode residual calculation
fun tevRGBToYcocg(rgbBlock: IntArray): IntArray {
val ycocgData = IntArray(16 * 16 * 3) // Y,Co,Cg for 16x16 pixels
@@ -2175,147 +2260,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return ycocgData
}
// XYB conversion constants from JPEG XL specification
private val XYB_BIAS = 0.00379307325527544933
private val CBRT_BIAS = 0.155954200549248620 // cbrt(XYB_BIAS)
// RGB to LMS mixing coefficients
private val RGB_TO_LMS = arrayOf(
doubleArrayOf(0.3, 0.622, 0.078), // L coefficients
doubleArrayOf(0.23, 0.692, 0.078), // M coefficients
doubleArrayOf(0.24342268924547819, 0.20476744424496821, 0.55180986650955360) // S coefficients
)
// LMS to RGB inverse matrix
private val LMS_TO_RGB = arrayOf(
doubleArrayOf(11.0315669046, -9.8669439081, -0.1646229965),
doubleArrayOf(-3.2541473811, 4.4187703776, -0.1646229965),
doubleArrayOf(-3.6588512867, 2.7129230459, 1.9459282408)
)
// sRGB linearization functions
private fun srgbLinearise(value: Double): Double {
return if (value > 0.04045) {
Math.pow((value + 0.055) / 1.055, 2.4)
} else {
value / 12.92
}
}
private fun srgbUnlinearise(value: Double): Double {
return if (value > 0.0031308) {
1.055 * Math.pow(value, 1.0 / 2.4) - 0.055
} else {
value * 12.92
}
}
// XYB to RGB conversion for hardware decoding
fun tevXybToRGB(yBlock: IntArray, xBlock: IntArray, bBlock: IntArray): IntArray {
val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels
for (py in 0 until 16) {
for (px in 0 until 16) {
val yIdx = py * 16 + px
val y = yBlock[yIdx]
// Get chroma values from subsampled 8x8 blocks (nearest neighbor upsampling)
val xbIdx = (py / 2) * 8 + (px / 2)
val x = xBlock[xbIdx]
val b = bBlock[xbIdx]
// Optimal range-based dequantization (exact inverse of improved quantization)
val X_MIN = -0.016; val X_MAX = 0.030
val xVal = (x / 255.0) * (X_MAX - X_MIN) + X_MIN // X: inverse of range mapping
val Y_MAX = 0.85
val yVal = (y / 255.0) * Y_MAX // Y: inverse of improved scale
val B_MAX = 0.85
val bVal = ((b + 128.0) / 255.0) * B_MAX // B: inverse of ((val/B_MAX*255)-128)
// XYB to LMS gamma
val lgamma = xVal + yVal
val mgamma = yVal - xVal
val sgamma = bVal
// Remove gamma correction
val lmix = (lgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
val mmix = (mgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
val smix = (sgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
// LMS to linear RGB using inverse matrix
val rLinear = (LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix).coerceIn(0.0, 1.0)
val gLinear = (LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix).coerceIn(0.0, 1.0)
val bLinear = (LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix).coerceIn(0.0, 1.0)
// Convert back to sRGB gamma and 0-255 range
val r = (srgbUnlinearise(rLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
val g = (srgbUnlinearise(gLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
val bRgb = (srgbUnlinearise(bLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
// Store RGB
val baseIdx = (py * 16 + px) * 3
rgbData[baseIdx] = r // R
rgbData[baseIdx + 1] = g // G
rgbData[baseIdx + 2] = bRgb // B
}
}
return rgbData
}
// RGB to XYB conversion for INTER mode residual calculation
fun tevRGBToXyb(rgbBlock: IntArray): IntArray {
val xybData = IntArray(16 * 16 * 3) // Y,X,B for 16x16 pixels
for (py in 0 until 16) {
for (px in 0 until 16) {
val baseIdx = (py * 16 + px) * 3
val r = rgbBlock[baseIdx]
val g = rgbBlock[baseIdx + 1]
val b = rgbBlock[baseIdx + 2]
// Convert RGB to 0-1 range and linearise sRGB
val rNorm = srgbLinearise(r / 255.0)
val gNorm = srgbLinearise(g / 255.0)
val bNorm = srgbLinearise(b / 255.0)
// RGB to LMS mixing with bias
val lmix = RGB_TO_LMS[0][0] * rNorm + RGB_TO_LMS[0][1] * gNorm + RGB_TO_LMS[0][2] * bNorm + XYB_BIAS
val mmix = RGB_TO_LMS[1][0] * rNorm + RGB_TO_LMS[1][1] * gNorm + RGB_TO_LMS[1][2] * bNorm + XYB_BIAS
val smix = RGB_TO_LMS[2][0] * rNorm + RGB_TO_LMS[2][1] * gNorm + RGB_TO_LMS[2][2] * bNorm + XYB_BIAS
// Apply gamma correction (cube root)
val lgamma = lmix.pow(1.0 / 3.0) - CBRT_BIAS
val mgamma = mmix.pow(1.0 / 3.0) - CBRT_BIAS
val sgamma = smix.pow(1.0 / 3.0) - CBRT_BIAS
// LMS to XYB transformation
val xVal = (lgamma - mgamma) / 2.0
val yVal = (lgamma + mgamma) / 2.0
val bVal = sgamma
// Optimal range-based quantization for XYB values (improved precision)
// X: actual range -0.016 to +0.030, map to full 0-255 precision
val X_MIN = -0.016; val X_MAX = 0.030
val xQuant = (((xVal - X_MIN) / (X_MAX - X_MIN)) * 255.0).toInt().coerceIn(0, 255)
// Y: range 0 to 0.85, map to 0 to 255 (improved scale)
val Y_MAX = 0.85
val yQuant = ((yVal / Y_MAX) * 255.0).toInt().coerceIn(0, 255)
// B: range 0 to 0.85, map to -128 to +127 (optimized precision)
val B_MAX = 0.85
val bQuant = (((bVal / B_MAX) * 255.0) - 128.0).toInt().coerceIn(-128, 127)
// Store XYB values
val yIdx = py * 16 + px
xybData[yIdx * 3] = yQuant // Y
xybData[yIdx * 3 + 1] = xQuant // X
xybData[yIdx * 3 + 2] = bQuant // B
}
}
return xybData
}
/**
* Enhanced TEV Deblocking Filter - Uses Knusperli-inspired techniques for superior boundary analysis
@@ -2627,8 +2571,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
debugMotionVectors: Boolean = false, tevVersion: Int = 2,
enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false,
isLossless: Boolean = false) {
enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false) {
// height doesn't change when interlaced, because that's the encoder's output
@@ -2744,7 +2687,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val cgPixels = tevIdct8x8_fromOptimizedCoeffs(cgBlock)
val rgbData = if (tevVersion == 3) {
tevXybToRGB(yPixels, coPixels, cgPixels)
tevIctcpToRGB(yPixels, coPixels, cgPixels)
} else {
tevYcocgToRGB(yPixels, coPixels, cgPixels)
}
@@ -2919,69 +2862,20 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation)
val yBlock: IntArray
val coBlock: IntArray
val cgBlock: IntArray
if (isLossless) {
// Lossless mode: coefficients are stored as float16, no quantization
// Read float16 coefficients: Y (16x16=256), Co (8x8=64), Cg (8x8=64)
val coeffFloat16Array = ShortArray(384) // 384 float16 values stored as shorts
vm.bulkPeekShort(readPtr.toInt(), coeffFloat16Array, 768) // 384 * 2 bytes
readPtr += 768
// Convert float16 to float32 and perform IDCT directly (no quantization)
println("DEBUG: Reading lossless coefficients, first few float16 values: ${coeffFloat16Array.take(10).map { "0x${it.toString(16)}" }}")
val yCoeffs = FloatArray(256) { i ->
// Convert signed short to unsigned short for float16 interpretation
val signedShort = coeffFloat16Array[i]
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
val floatVal = Float16.toFloat(float16bits.toShort())
if (floatVal.isNaN() || floatVal.isInfinite()) {
println("NaN/Inf detected at Y coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
0f // Replace NaN with 0
} else floatVal
}
val coCoeffs = FloatArray(64) { i ->
// Convert signed short to unsigned short for float16 interpretation
val signedShort = coeffFloat16Array[256 + i]
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
val floatVal = Float16.toFloat(float16bits.toShort())
if (floatVal.isNaN() || floatVal.isInfinite()) {
println("NaN/Inf detected at Co coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
0f // Replace NaN with 0
} else floatVal
}
val cgCoeffs = FloatArray(64) { i ->
// Convert signed short to unsigned short for float16 interpretation
val signedShort = coeffFloat16Array[320 + i]
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
val floatVal = Float16.toFloat(float16bits.toShort())
if (floatVal.isNaN() || floatVal.isInfinite()) {
println("NaN/Inf detected at Cg coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
0f // Replace NaN with 0
} else floatVal
}
yBlock = tevIdct16x16_lossless(yCoeffs)
coBlock = tevIdct8x8_lossless(coCoeffs)
cgBlock = tevIdct8x8_lossless(cgCoeffs)
} else {
// Regular lossy mode: quantized int16 coefficients
// Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
readPtr += 768
// Regular lossy mode: quantized int16 coefficients
// Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
readPtr += 768
// Perform hardware IDCT for each channel using fast algorithm
yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
}
// Perform hardware IDCT for each channel using fast algorithm
val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
// Convert to RGB (YCoCg-R for v2, XYB for v3)
val rgbData = if (tevVersion == 3) {
tevXybToRGB(yBlock, coBlock, cgBlock) // XYB format (v3)
tevIctcpToRGB(yBlock, coBlock, cgBlock) // XYB format (v3)
} else {
tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2)
}
@@ -3108,7 +3002,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Step 4: Convert final data to RGB (YCoCg-R for v2, XYB for v3)
val finalRgb = if (tevVersion == 3) {
tevXybToRGB(finalY, finalCo, finalCg) // XYB format (v3)
tevIctcpToRGB(finalY, finalCo, finalCg) // XYB format (v3)
} else {
tevYcocgToRGB(finalY, finalCo, finalCg) // YCoCg-R format (v2)
}

View File

@@ -1,5 +1,5 @@
// Created by Claude on 2025-08-18.
// TEV (TSVM Enhanced Video) Encoder - YCoCg-R 4:2:0 16x16 Block Version
// TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
@@ -68,7 +68,9 @@ static inline float float16_to_float(uint16_t hbits) {
// TSVM Enhanced Video (TEV) format constants
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV"
#define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0
// TEV version - dynamic based on color space mode
// Version 2: YCoCg-R 4:2:0 (default)
// Version 3: ICtCp 4:2:0 (--ictcp flag)
// version 1: 8x8 RGB
// version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) {current winner}
// version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor)
@@ -192,17 +194,6 @@ typedef struct __attribute__((packed)) {
int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // quantised Cg DCT coefficients (8x8)
} tev_block_t;
// Lossless TEV block structure (uses float32 internally, converted to float16 during serialization)
typedef struct __attribute__((packed)) {
uint8_t mode; // Block encoding mode
int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision)
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs)
float y_coeffs[BLOCK_SIZE_SQR]; // lossless Y DCT coefficients (16x16)
float co_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Co DCT coefficients (8x8)
float cg_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Cg DCT coefficients (8x8)
} tev_lossless_block_t;
// Subtitle entry structure
typedef struct subtitle_entry {
int start_frame;
@@ -232,7 +223,7 @@ typedef struct {
int qualityCg;
int verbose;
int disable_rcf; // 0 = rcf enabled, 1 = disabled
int lossless_mode; // 0 = lossy (default), 1 = lossless mode
int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp color space
// Bitrate control
int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode)
@@ -289,6 +280,10 @@ typedef struct {
int complexity_capacity; // Capacity of complexity_values array
} tev_encoder_t;
//////////////////////////
// COLOUR MATHS CODES //
//////////////////////////
// RGB to YCoCg-R transform (per YCoCg-R specification with truncated division)
static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int *cg) {
*co = (int)r - (int)b;
@@ -315,6 +310,180 @@ static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t
*b = CLAMP(*b, 0, 255);
}
// ---------------------- ICtCp Implementation ----------------------
static inline int iround(double v) { return (int)floor(v + 0.5); }
// ---------------------- sRGB gamma helpers ----------------------
static inline double srgb_linearize(double val) {
// val in [0,1]
if (val <= 0.04045) return val / 12.92;
return pow((val + 0.055) / 1.055, 2.4);
}
static inline double srgb_unlinearize(double val) {
// val in [0,1]
if (val <= 0.0031308) return val * 12.92;
return 1.055 * pow(val, 1.0 / 2.4) - 0.055;
}
// -------------------------- HLG --------------------------
// Forward HLG OETF (linear -> HLG)
static inline double HLG_OETF(double L) {
// L in [0,1], relative scene-linear
const double a = 0.17883277;
const double b = 1.0 - 4.0 * a;
const double c = 0.5 - a * log(4.0 * a);
if (L <= 1.0/12.0)
return sqrt(3.0 * L);
else
return a * log(12.0 * L - b) + c;
}
// Inverse HLG OETF (HLG -> linear)
static inline double HLG_inverse_OETF(double V) {
const double a = 0.17883277;
const double b = 1.0 - 4.0 * a;
const double c = 0.5 - a * log(4.0 * a);
if (V <= 0.5)
return (V * V) / 3.0;
else
return (exp((V - c)/a) + b) / 12.0;
}
// ---------------------- Matrices (doubles) ----------------------
// Combined linear sRGB -> LMS (single 3x3): product of sRGB->XYZ, XYZ->BT2020, BT2020->LMS
// Computed from standard matrices (double precision).
static const double M_RGB_TO_LMS[3][3] = {
{0.20502672199540622, 0.42945363228947586, 0.31165003516511786},
{0.2233144413317712, 0.5540422172466897, 0.21854692537153908},
{0.0609931761282002, 0.17917502499816504, 0.9323768661336348}
};
// Inverse: LMS -> linear sRGB (inverse of above)
static const double M_LMS_TO_RGB[3][3] = {
{29.601046511687, -21.364325340529906, -4.886500015143518},
{-12.083229161592032, 10.673933874098694, 1.5369143265611211},
{0.38562844776642574, -0.6536244436141302, 1.0968381245163787}
};
// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants.
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
};
// Inverse: I Ct Cp -> L' M' S' (precomputed inverse)
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
{ 1.0, -0.015718580108730416, -0.20958106811640548 },
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
};
// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ----------------------
// Inputs: r,g,b in 0..255 sRGB (8-bit)
// Outputs: I, Ct, Cp as doubles (nominally I in ~[0..1], Ct/Cp ranges depend on colors)
void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp)
{
// 1) linearize sRGB to 0..1
double r = srgb_linearize((double)r8 / 255.0);
double g = srgb_linearize((double)g8 / 255.0);
double b = srgb_linearize((double)b8 / 255.0);
// 2) linear RGB -> LMS (single 3x3 multiply)
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
// 3) apply HLG encode (map linear LMS -> perceptual domain L',M',S')
double Lp = HLG_OETF(L);
double Mp = HLG_OETF(M);
double Sp = HLG_OETF(S);
// 4) L'M'S' -> ICtCp
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
*out_I = FCLAMP(I * 255.f, 0.f, 255.f);
*out_Ct = FCLAMP(Ct * 255.f, -256.f, 255.f);
*out_Cp = FCLAMP(Cp * 255.f, -256.f, 255.f);
}
// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ----------------------
// Inputs: I, Ct, Cp as doubles
// Outputs: r8,g8,b8 in 0..255 (8-bit sRGB, clamped and rounded)
void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8)
{
double I = I8 / 255.f;
double Ct = Ct8 / 255.f;
double Cp = Cp8 / 255.f;
// 1) ICtCp -> L' M' S' (3x3 multiply)
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
// 2) HLG decode: L' -> linear LMS
double L = HLG_inverse_OETF(Lp);
double M = HLG_inverse_OETF(Mp);
double S = HLG_inverse_OETF(Sp);
// 3) LMS -> linear sRGB (3x3 inverse)
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
// 4) gamma encode and convert to 0..255 with center-of-bin rounding
double r = srgb_unlinearize(r_lin);
double g = srgb_unlinearize(g_lin);
double b = srgb_unlinearize(b_lin);
*r8 = (uint8_t)CLAMP(iround(r * 255.0), 0, 255);
*g8 = (uint8_t)CLAMP(iround(g * 255.0), 0, 255);
*b8 = (uint8_t)CLAMP(iround(b * 255.0), 0, 255);
}
// ---------------------- Color Space Switching Functions ----------------------
// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode
static void rgb_to_color_space(tev_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b,
double *c1, double *c2, double *c3) {
if (enc->ictcp_mode) {
// Use ICtCp color space
srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3);
} else {
// Use YCoCg-R color space (convert to int first, then to double)
int y_val, co_val, cg_val;
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
*c1 = (double)y_val;
*c2 = (double)co_val;
*c3 = (double)cg_val;
}
}
static void color_space_to_rgb(tev_encoder_t *enc, double c1, double c2, double c3,
uint8_t *r, uint8_t *g, uint8_t *b) {
if (enc->ictcp_mode) {
// Use ICtCp color space
ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b);
} else {
// Use YCoCg-R color space (convert from double to int first)
int y_val = (int)round(c1);
int co_val = (int)round(c2);
int cg_val = (int)round(c3);
ycocgr_to_rgb(y_val, co_val, cg_val, r, g, b);
}
}
////////////////////////////////////////
// DISCRETE COSINE TRANSFORMATIONS //
////////////////////////////////////////
// Pre-calculated cosine tables
static float dct_table_16[16][16]; // For 16x16 DCT
static float dct_table_8[8][8]; // For 8x8 DCT
@@ -429,14 +598,14 @@ static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma
}
}
// Extract 16x16 block from RGB frame and convert to YCoCg-R
static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
int block_x, int block_y,
float *y_block, float *co_block, float *cg_block) {
// Extract 16x16 block from RGB frame and convert to color space
static void extract_color_space_block(tev_encoder_t *enc, uint8_t *rgb_frame, int width, int height,
int block_x, int block_y,
float *c1_block, float *c2_block, float *c3_block) {
int start_x = block_x * BLOCK_SIZE;
int start_y = block_y * BLOCK_SIZE;
// Extract 16x16 Y block
// Extract 16x16 primary channel block (Y for YCoCg-R, I for ICtCp)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
int x = start_x + px;
@@ -448,10 +617,10 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
uint8_t g = rgb_frame[offset + 1];
uint8_t b = rgb_frame[offset + 2];
int y_val, co_val, cg_val;
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
double c1, c2, c3;
rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);
y_block[py * BLOCK_SIZE + px] = (float)y_val - 128.0f; // Center around 0
c1_block[py * BLOCK_SIZE + px] = (float)c1 - 128.0f;
}
}
}
@@ -473,25 +642,30 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
uint8_t g = rgb_frame[offset + 1];
uint8_t b = rgb_frame[offset + 2];
int y_val, co_val, cg_val;
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
double c1, c2, c3;
rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);
co_sum += (int)c2;
cg_sum += (int)c3;
co_sum += co_val;
cg_sum += cg_val;
count++;
}
}
}
if (count > 0) {
// Center chroma around 0 for DCT (Co/Cg range is -255 to +255, so don't add offset)
co_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
cg_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
// Average the accumulated chroma values and store
c2_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
c3_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
}
}
}
}
// Calculate spatial activity for any channel (16x16 or 8x8)
static float calculate_spatial_activity(const float *block, int block_size) {
float activity = 0.0f;
@@ -791,8 +965,138 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
}
// Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling
static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
static void convert_rgb_to_color_space_block(tev_encoder_t *enc, const uint8_t *rgb_block,
float *c1_workspace, float *c2_workspace, float *c3_workspace) {
if (enc->ictcp_mode) {
// ICtCp mode: Convert 16x16 RGB to ICtCp (full resolution for I, 4:2:0 subsampling for CtCp)
// Convert I channel at full resolution (16x16)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
int rgb_idx = (py * BLOCK_SIZE + px) * 3;
uint8_t r = rgb_block[rgb_idx];
uint8_t g = rgb_block[rgb_idx + 1];
uint8_t b = rgb_block[rgb_idx + 2];
double I, Ct, Cp;
srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
// Store I at full resolution, scale to appropriate range
c1_workspace[py * BLOCK_SIZE + px] = (float)(I * 255.0);
}
}
// Convert Ct and Cp with 4:2:0 subsampling (8x8)
for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
double sum_ct = 0.0, sum_cp = 0.0;
// Sample 2x2 block from RGB and average for chroma
for (int dy = 0; dy < 2; dy++) {
for (int dx = 0; dx < 2; dx++) {
int py = cy * 2 + dy;
int px = cx * 2 + dx;
int rgb_idx = (py * 16 + px) * 3;
int r = rgb_block[rgb_idx];
int g = rgb_block[rgb_idx + 1];
int b = rgb_block[rgb_idx + 2];
double I, Ct, Cp;
srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
sum_ct += Ct;
sum_cp += Cp;
}
}
// Average and store subsampled chroma, scale to signed 8-bit equivalent range
c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_ct / 4.0) * 255.0);
c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_cp / 4.0) * 255.0);
}
}
} else {
// YCoCg-R mode: Original implementation
// Convert 16x16 RGB to Y (full resolution)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
int rgb_idx = (py * BLOCK_SIZE + px) * 3;
int r = rgb_block[rgb_idx];
int g = rgb_block[rgb_idx + 1];
int b = rgb_block[rgb_idx + 2];
// YCoCg-R transform (per specification with truncated division)
int y = (r + 2*g + b) / 4;
c1_workspace[py * BLOCK_SIZE + px] = (float)CLAMP(y, 0, 255);
}
}
// Convert to Co and Cg with 4:2:0 subsampling (8x8)
for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
int sum_co = 0, sum_cg = 0;
// Sample 2x2 block from RGB and average for chroma
for (int dy = 0; dy < 2; dy++) {
for (int dx = 0; dx < 2; dx++) {
int py = cy * 2 + dy;
int px = cx * 2 + dx;
int rgb_idx = (py * 16 + px) * 3;
int r = rgb_block[rgb_idx];
int g = rgb_block[rgb_idx + 1];
int b = rgb_block[rgb_idx + 2];
int co = r - b;
int tmp = b + (co / 2);
int cg = g - tmp;
sum_co += co;
sum_cg += cg;
}
}
// Average and store subsampled chroma
c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_co / 4, -256, 255);
c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_cg / 4, -256, 255);
}
}
}
}
// Extract motion-compensated YCoCg-R block from reference frame
static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
int block_x, int block_y, int mv_x, int mv_y,
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
// Extract 16x16 RGB block with motion compensation
uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
int cur_x = block_x + dx;
int cur_y = block_y + dy;
int ref_x = cur_x + mv_x; // Revert to original motion compensation
int ref_y = cur_y + mv_y;
int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
// Copy RGB from reference position
int ref_offset = (ref_y * width + ref_x) * 3;
rgb_block[rgb_idx] = rgb_data[ref_offset]; // R
rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
} else {
// Out of bounds - use black
rgb_block[rgb_idx] = 0; // R
rgb_block[rgb_idx + 1] = 0; // G
rgb_block[rgb_idx + 2] = 0; // B
}
}
}
// Convert RGB block to YCoCg-R (original implementation for motion compensation)
// Convert 16x16 RGB to Y (full resolution)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
@@ -840,41 +1144,6 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
}
}
// Extract motion-compensated YCoCg-R block from reference frame
static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
int block_x, int block_y, int mv_x, int mv_y,
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
// Extract 16x16 RGB block with motion compensation
uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
int cur_x = block_x + dx;
int cur_y = block_y + dy;
int ref_x = cur_x + mv_x; // Revert to original motion compensation
int ref_y = cur_y + mv_y;
int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
// Copy RGB from reference position
int ref_offset = (ref_y * width + ref_x) * 3;
rgb_block[rgb_idx] = rgb_data[ref_offset]; // R
rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
} else {
// Out of bounds - use black
rgb_block[rgb_idx] = 0; // R
rgb_block[rgb_idx + 1] = 0; // G
rgb_block[rgb_idx + 2] = 0; // B
}
}
}
// Convert RGB block to YCoCg-R
convert_rgb_to_ycocgr_block(rgb_block, y_block, co_block, cg_block);
}
// Compute motion-compensated residual for INTER mode
static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) {
int start_x = block_x * BLOCK_SIZE;
@@ -909,7 +1178,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
// Extract YCoCg-R block
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
extract_color_space_block(enc, enc->current_rgb, enc->width, enc->height,
block_x, block_y,
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
@@ -1105,107 +1374,6 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
block->cbp = 0x07; // Y, Co, Cg all present
}
// Encode a 16x16 block in lossless mode
static void encode_block_lossless(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
tev_lossless_block_t *block = (tev_lossless_block_t*)&enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
// Extract YCoCg-R block
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
block_x, block_y,
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
if (is_keyframe) {
// Intra coding for keyframes
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
} else {
// Same mode decision logic as regular encode_block
// For simplicity, using INTRA for now in lossless mode
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
}
// Lossless mode: rate control factor is always 1.0f
block->rate_control_factor = 1.0f;
// Apply DCT transforms using the same pattern as regular encoding
// Y channel (16x16)
dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
block->y_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Co channel (8x8)
dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->co_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Cg channel (8x8)
dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->cg_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Set CBP (simplified - always encode all channels)
block->cbp = 0x07; // Y, Co, Cg all present
}
// Serialized lossless block structure (for writing to file with float16 coefficients)
typedef struct __attribute__((packed)) {
uint8_t mode;
int16_t mv_x, mv_y;
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp;
uint16_t y_coeffs[BLOCK_SIZE_SQR]; // float16 Y coefficients
uint16_t co_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Co coefficients
uint16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Cg coefficients
} tev_serialized_lossless_block_t;
// Convert lossless blocks to serialized format with float16 coefficients
static void serialize_lossless_blocks(tev_encoder_t *enc, int blocks_x, int blocks_y,
tev_serialized_lossless_block_t *serialized_blocks) {
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
tev_lossless_block_t *src = (tev_lossless_block_t*)&enc->block_data[by * blocks_x + bx];
tev_serialized_lossless_block_t *dst = &serialized_blocks[by * blocks_x + bx];
// Copy basic fields
dst->mode = src->mode;
dst->mv_x = src->mv_x;
dst->mv_y = src->mv_y;
dst->rate_control_factor = src->rate_control_factor;
dst->cbp = src->cbp;
// Convert float32 coefficients to float16 with range clamping
// Float16 max finite value is approximately 65504
const float FLOAT16_MAX = 65504.0f;
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
float coeff = FCLAMP(src->y_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->y_coeffs[i] = float_to_float16(coeff);
if (enc->verbose && fabsf(src->y_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Y coefficient %d clamped: %f -> %f\n", i, src->y_coeffs[i], coeff);
}
}
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
float co_coeff = FCLAMP(src->co_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
float cg_coeff = FCLAMP(src->cg_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->co_coeffs[i] = float_to_float16(co_coeff);
dst->cg_coeffs[i] = float_to_float16(cg_coeff);
if (enc->verbose && fabsf(src->co_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Co coefficient %d clamped: %f -> %f\n", i, src->co_coeffs[i], co_coeff);
}
if (enc->verbose && fabsf(src->cg_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Cg coefficient %d clamped: %f -> %f\n", i, src->cg_coeffs[i], cg_coeff);
}
}
}
}
}
// Convert SubRip time format (HH:MM:SS,mmm) to frame number
static int srt_time_to_frame(const char *time_str, int fps) {
int hours, minutes, seconds, milliseconds;
@@ -1820,17 +1988,13 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) {
enc->cg_workspace = malloc(8 * 8 * sizeof(float));
enc->dct_workspace = malloc(16 * 16 * sizeof(float));
// Allocate block data
enc->block_data = malloc(total_blocks * sizeof(tev_block_t));
// Allocate compression buffer large enough for both regular and lossless modes
size_t max_block_size = sizeof(tev_block_t) > sizeof(tev_serialized_lossless_block_t) ?
sizeof(tev_block_t) : sizeof(tev_serialized_lossless_block_t);
size_t compressed_buffer_size = total_blocks * max_block_size * 2;
// Allocate compression buffer
size_t compressed_buffer_size = total_blocks * sizeof(tev_block_t) * 2;
enc->compressed_buffer = malloc(compressed_buffer_size);
if (enc->verbose) {
printf("Allocated compressed buffer: %zu bytes for %d blocks (max_block_size: %zu)\n",
compressed_buffer_size, total_blocks, max_block_size);
}
enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE);
if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb ||
@@ -1889,7 +2053,7 @@ static void free_encoder(tev_encoder_t *enc) {
static int write_tev_header(FILE *output, tev_encoder_t *enc) {
// Magic + version
fwrite(TEV_MAGIC, 1, 8, output);
uint8_t version = TEV_VERSION;
uint8_t version = enc->ictcp_mode ? 3 : 2; // Version 3 for ICtCp, 2 for YCoCg-R
fwrite(&version, 1, 1, output);
// Video parameters
@@ -1901,7 +2065,7 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) {
uint8_t qualityCo = enc->qualityCo;
uint8_t qualityCg = enc->qualityCg;
uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1);
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0) | (enc->lossless_mode ? 4 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate, bit 2 = is_lossless
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate
uint8_t reserved = 0;
fwrite(&width, 2, 1, output);
@@ -2008,11 +2172,7 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
// Encode all blocks
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
if (enc->lossless_mode) {
encode_block_lossless(enc, bx, by, is_keyframe);
} else {
encode_block(enc, bx, by, is_keyframe);
}
encode_block(enc, bx, by, is_keyframe);
// Calculate complexity for rate control (if enabled)
if (enc->bitrate_mode > 0) {
@@ -2029,34 +2189,14 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
// Compress block data using Zstd (compatible with TSVM decoder)
size_t compressed_size;
if (enc->lossless_mode) {
// Lossless mode: serialize blocks with float16 coefficients
size_t serialized_block_data_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t);
tev_serialized_lossless_block_t *serialized_blocks = malloc(serialized_block_data_size);
if (!serialized_blocks) {
fprintf(stderr, "Failed to allocate memory for serialized lossless blocks\n");
return -1;
}
serialize_lossless_blocks(enc, blocks_x, blocks_y, serialized_blocks);
// Use the pre-allocated buffer size instead of calculating dynamically
size_t output_buffer_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t) * 2;
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, output_buffer_size,
serialized_blocks, serialized_block_data_size,
ZSTD_COMPRESSON_LEVEL);
free(serialized_blocks);
} else {
// Regular mode: use regular block data
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, block_data_size * 2,
enc->block_data, block_data_size,
ZSTD_COMPRESSON_LEVEL);
}
// Regular mode: use regular block data
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, block_data_size * 2,
enc->block_data, block_data_size,
ZSTD_COMPRESSON_LEVEL);
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size));
return 0;
@@ -2288,7 +2428,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, enc->lossless_mode ? 384 : MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
enc->input_file, MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command);
if (result == 0) {
@@ -2429,7 +2569,7 @@ static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {
// Show usage information
static void show_usage(const char *program_name) {
printf("TEV YCoCg-R 4:2:0 Video Encoder\n");
printf("TEV YCoCg-R/ICtCp 4:2:0 Video Encoder\n");
printf("Usage: %s [options] -i input.mp4 -o output.mv2\n\n", program_name);
printf("Options:\n");
printf(" -i, --input FILE Input video file\n");
@@ -2443,7 +2583,7 @@ static void show_usage(const char *program_name) {
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
printf(" -v, --verbose Verbose output\n");
printf(" -t, --test Test mode: generate solid colour frames\n");
printf(" --lossless Lossless mode: store coefficients as float16 (no quantisation, implies -p, 384k audio)\n");
printf(" --ictcp Use ICtCp color space instead of YCoCg-R (generates TEV version 3)\n");
printf(" --enable-rcf Enable per-block rate control (experimental)\n");
printf(" --enable-encode-stats Collect and report block complexity statistics\n");
printf(" --help Show this help\n\n");
@@ -2467,7 +2607,7 @@ static void show_usage(const char *program_name) {
printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
printf("\n\n");
printf("Features:\n");
printf(" - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n");
printf(" - YCoCg-R or ICtCp 4:2:0 chroma subsampling for 50%% compression improvement\n");
printf(" - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n");
printf(" - Frame rate conversion with FFmpeg temporal filtering\n");
printf(" - Adaptive quality control with complexity-based adjustment\n");
@@ -2536,7 +2676,7 @@ int main(int argc, char *argv[]) {
{"test", no_argument, 0, 't'},
{"enable-encode-stats", no_argument, 0, 1000},
{"enable-rcf", no_argument, 0, 1100},
{"lossless", no_argument, 0, 1200},
{"ictcp", no_argument, 0, 1300},
{"help", no_argument, 0, '?'},
{0, 0, 0, 0}
};
@@ -2611,8 +2751,8 @@ int main(int argc, char *argv[]) {
case 1100: // --enable-rcf
enc->disable_rcf = 0;
break;
case 1200: // --lossless
enc->lossless_mode = 1;
case 1300: // --ictcp
enc->ictcp_mode = 1;
break;
case 0:
if (strcmp(long_options[option_index].name, "help") == 0) {
@@ -2633,24 +2773,17 @@ int main(int argc, char *argv[]) {
}
}
// Lossless mode validation and adjustments
if (enc->lossless_mode) {
// In lossless mode, disable rate control and set quality to maximum
enc->bitrate_mode = 0;
enc->disable_rcf = 1;
enc->progressive_mode = 1;
enc->qualityIndex = 5;
enc->qualityY = enc->qualityCo = enc->qualityCg = 255; // Use 255 as a redundant lossless marker
if (enc->verbose) {
printf("Lossless mode enabled: Rate control disabled, quality set to maximum, enabling progressive scan\n");
}
}
// halve the internal representation of frame height
if (!enc->progressive_mode) {
enc->height /= 2;
}
if (enc->ictcp_mode) {
int qc = (enc->qualityCo + enc->qualityCg) / 2;
enc->qualityCo = qc;
enc->qualityCg = qc;
}
if (!test_mode && (!enc->input_file || !enc->output_file)) {
fprintf(stderr, "Input and output files are required (unless using --test mode)\n");
show_usage(argv[0]);
@@ -2737,7 +2870,7 @@ int main(int argc, char *argv[]) {
write_tev_header(output, enc);
gettimeofday(&enc->start_time, NULL);
printf("Encoding video with YCoCg-R 4:2:0 format...\n");
printf("Encoding video with %s 4:2:0 format...\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
if (enc->output_fps != enc->fps) {
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
}
@@ -2791,13 +2924,13 @@ int main(int argc, char *argv[]) {
printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b);
// Test YCoCg-R conversion
int y_test, co_test, cg_test;
rgb_to_ycocgr(test_r, test_g, test_b, &y_test, &co_test, &cg_test);
printf(" YCoCg-R: Y=%d Co=%d Cg=%d\n", y_test, co_test, cg_test);
double y_test, co_test, cg_test;
rgb_to_color_space(enc, test_r, test_g, test_b, &y_test, &co_test, &cg_test);
printf(" %s: Y=%.3f Co=%.3f Cg=%.3f\n", enc->ictcp_mode ? "ICtCp" : "YCoCg", y_test, co_test, cg_test);
// Test reverse conversion
uint8_t r_rev, g_rev, b_rev;
ycocgr_to_rgb(y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
color_space_to_rgb(enc, y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
printf(" Reverse: R=%d G=%d B=%d\n", r_rev, g_rev, b_rev);
} else {