mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
ICtCp colour space wip
This commit is contained in:
@@ -11,7 +11,7 @@ const HEIGHT = 448
|
||||
const BLOCK_SIZE = 16 // 16x16 blocks for YCoCg-R
|
||||
const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV"
|
||||
const TEV_VERSION_YCOCG = 2 // YCoCg-R version
|
||||
const TEV_VERSION_XYB = 3 // XYB version
|
||||
const TEV_VERSION_ICtCp = 3 // ICtCp version
|
||||
const SND_BASE_ADDR = audio.getBaseAddr()
|
||||
const pcm = require("pcm")
|
||||
const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]
|
||||
@@ -391,18 +391,15 @@ if (!magicMatching) {
|
||||
|
||||
// Read header
|
||||
let version = seqread.readOneByte()
|
||||
if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_XYB) {
|
||||
println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_XYB} for XYB)`)
|
||||
if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_ICtCp) {
|
||||
println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_ICtCp} for ICtCp)`)
|
||||
return 1
|
||||
}
|
||||
|
||||
let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R"
|
||||
let colorSpace = (version === TEV_VERSION_ICtCp) ? "ICtCp" : "YCoCg"
|
||||
if (interactive) {
|
||||
con.move(1,1)
|
||||
if (colorSpace == "XYB")
|
||||
println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace}) | Deblock: ${enableDeblocking ? 'ON' : 'OFF'}, ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
|
||||
else
|
||||
println(`Push and hold Backspace to exit | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | BoundaryAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
|
||||
println(`Push and hold Backspace to exit | ${colorSpace} | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | EdgeAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
|
||||
}
|
||||
|
||||
let width = seqread.readShort()
|
||||
@@ -418,7 +415,6 @@ let hasSubtitle = !!(flags & 2)
|
||||
let videoFlags = seqread.readOneByte()
|
||||
let isInterlaced = !!(videoFlags & 1)
|
||||
let isNTSC = !!(videoFlags & 2)
|
||||
let isLossless = !!(videoFlags & 4)
|
||||
let unused2 = seqread.readOneByte()
|
||||
|
||||
|
||||
@@ -428,7 +424,7 @@ serial.println(` FPS: ${(isNTSC) ? (fps * 1000 / 1001) : fps}`)
|
||||
serial.println(` Duration: ${totalFrames / fps}`)
|
||||
serial.println(` Audio: ${hasAudio ? "Yes" : "No"}`)
|
||||
serial.println(` Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`)
|
||||
serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}, ${isLossless ? "lossless" : "lossy"}`)
|
||||
serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}`)
|
||||
|
||||
|
||||
// DEBUG interlace raw output
|
||||
@@ -621,7 +617,7 @@ try {
|
||||
PREV_RGB_ADDR = temp
|
||||
|
||||
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
|
||||
// Video frame packet (always includes rate control factor)
|
||||
// Video frame packet
|
||||
let payloadLen = seqread.readInt()
|
||||
let compressedPtr = seqread.readBytes(payloadLen)
|
||||
updateDataRateBin(payloadLen)
|
||||
@@ -636,11 +632,6 @@ try {
|
||||
|
||||
// Decompress using gzip
|
||||
// Optimized buffer size calculation for TEV YCoCg-R blocks
|
||||
let blocksX = (width + 15) >> 4 // 16x16 blocks
|
||||
let blocksY = (height + 15) >> 4
|
||||
let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
|
||||
let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
|
||||
|
||||
let actualSize
|
||||
let decompressStart = sys.nanoTime()
|
||||
try {
|
||||
@@ -655,7 +646,7 @@ try {
|
||||
continue
|
||||
}
|
||||
|
||||
// Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or XYB based on version)
|
||||
// Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or ICtCp based on version)
|
||||
try {
|
||||
// duplicate every 1000th frame (pass a turn every 1000n+501st) if NTSC
|
||||
if (!isNTSC || frameCount % 1000 != 501 || frameDuped) {
|
||||
@@ -667,14 +658,14 @@ try {
|
||||
if (isInterlaced) {
|
||||
// For interlaced: decode current frame into currentFieldAddr
|
||||
// For display: use prevFieldAddr as current, currentFieldAddr as next
|
||||
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
|
||||
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
|
||||
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
|
||||
|
||||
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
|
||||
rotateFieldBuffers()
|
||||
} else {
|
||||
// Progressive or first frame: normal decoding without temporal prediction
|
||||
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
|
||||
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
|
||||
}
|
||||
|
||||
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds
|
||||
|
||||
@@ -683,7 +683,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
|
||||
- Version 2.1: Added Rate Control Factor to all video packets (breaking change)
|
||||
* Enables bitrate-constrained encoding alongside quality modes
|
||||
* All video frames now include 4-byte rate control factor after payload size
|
||||
- Version 3.0: Additional support of XYB Colour space
|
||||
- Version 3.0: Additional support of ICtCp Colour space
|
||||
|
||||
# File Structure
|
||||
\x1F T S V M T E V
|
||||
|
||||
@@ -2147,7 +2147,92 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
return rgbData
|
||||
}
|
||||
|
||||
|
||||
// ICtCp to RGB conversion for TEV version 3
|
||||
fun tevIctcpToRGB(iBlock: IntArray, ctBlock: IntArray, cpBlock: IntArray): IntArray {
|
||||
val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels
|
||||
|
||||
// Process 16x16 I channel with 8x8 Ct/Cp channels (4:2:0 upsampling)
|
||||
for (py in 0 until 16) {
|
||||
for (px in 0 until 16) {
|
||||
val iIdx = py * 16 + px
|
||||
val i = iBlock[iIdx].toDouble()
|
||||
|
||||
// Get Ct/Cp from 8x8 chroma blocks (4:2:0 upsampling)
|
||||
val ctIdx = (py / 2) * 8 + (px / 2)
|
||||
val ct = ctBlock[ctIdx].toDouble()
|
||||
val cp = cpBlock[ctIdx].toDouble()
|
||||
|
||||
// Convert scaled values back to ICtCp range
|
||||
// I channel: IDCT already added 128, so i is in [0,255]. Reverse encoder: (c1*255-128)+128 = c1*255
|
||||
val I = i / 255.0
|
||||
// Ct/Cp were scaled: c2/c3 * 255.0, so reverse: ct/cp / 255.0
|
||||
val Ct = (ct / 255.0)
|
||||
val Cp = (cp / 255.0)
|
||||
|
||||
// ICtCp -> L'M'S' (inverse matrix)
|
||||
val Lp = (I + 0.015718580108730416 * Ct + 0.2095810681164055 * Cp).coerceIn(0.0, 1.0)
|
||||
val Mp = (I - 0.015718580108730416 * Ct - 0.20958106811640548 * Cp).coerceIn(0.0, 1.0)
|
||||
val Sp = (I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp).coerceIn(0.0, 1.0)
|
||||
|
||||
// HLG decode: L'M'S' -> linear LMS
|
||||
val L = HLG_inverse_OETF(Lp)
|
||||
val M = HLG_inverse_OETF(Mp)
|
||||
val S = HLG_inverse_OETF(Sp)
|
||||
|
||||
// LMS -> linear sRGB (inverse matrix)
|
||||
val rLin = 29.601046511687 * L - 21.364325340529906 * M - 4.886500015143518 * S
|
||||
val gLin = -12.083229161592032 * L + 10.673933874098694 * M + 1.5369143265611211 * S
|
||||
val bLin = 0.38562844776642574 * L - 0.6536244436141302 * M + 1.0968381245163787 * S
|
||||
|
||||
// Gamma encode to sRGB
|
||||
val rSrgb = srgbUnlinearize(rLin)
|
||||
val gSrgb = srgbUnlinearize(gLin)
|
||||
val bSrgb = srgbUnlinearize(bLin)
|
||||
|
||||
// Convert to 8-bit and store
|
||||
val baseIdx = (py * 16 + px) * 3
|
||||
rgbData[baseIdx] = (rSrgb * 255.0).toInt().coerceIn(0, 255) // R
|
||||
rgbData[baseIdx + 1] = (gSrgb * 255.0).toInt().coerceIn(0, 255) // G
|
||||
rgbData[baseIdx + 2] = (bSrgb * 255.0).toInt().coerceIn(0, 255) // B
|
||||
}
|
||||
}
|
||||
|
||||
return rgbData
|
||||
}
|
||||
|
||||
// Helper functions for ICtCp decoding
|
||||
|
||||
// Inverse HLG OETF (HLG -> linear)
|
||||
fun HLG_inverse_OETF(V: Double): Double {
|
||||
val a = 0.17883277
|
||||
val b = 1.0 - 4.0 * a
|
||||
val c = 0.5 - a * ln(4.0 * a)
|
||||
|
||||
if (V <= 0.5)
|
||||
return (V * V) / 3.0
|
||||
else
|
||||
return (exp((V - c)/a) + b) / 12.0
|
||||
}
|
||||
|
||||
// sRGB gamma decode: nonlinear -> linear
|
||||
private fun srgbLinearize(value: Double): Double {
|
||||
return if (value <= 0.04045) {
|
||||
value / 12.92
|
||||
} else {
|
||||
((value + 0.055) / 1.055).pow(2.4)
|
||||
}
|
||||
}
|
||||
|
||||
// sRGB gamma encode: linear -> nonlinear
|
||||
private fun srgbUnlinearize(value: Double): Double {
|
||||
return if (value <= 0.0031308) {
|
||||
value * 12.92
|
||||
} else {
|
||||
1.055 * value.pow(1.0 / 2.4) - 0.055
|
||||
}
|
||||
}
|
||||
|
||||
// RGB to YCoCg-R conversion for INTER mode residual calculation
|
||||
fun tevRGBToYcocg(rgbBlock: IntArray): IntArray {
|
||||
val ycocgData = IntArray(16 * 16 * 3) // Y,Co,Cg for 16x16 pixels
|
||||
@@ -2175,147 +2260,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
return ycocgData
|
||||
}
|
||||
|
||||
// XYB conversion constants from JPEG XL specification
|
||||
private val XYB_BIAS = 0.00379307325527544933
|
||||
private val CBRT_BIAS = 0.155954200549248620 // cbrt(XYB_BIAS)
|
||||
|
||||
// RGB to LMS mixing coefficients
|
||||
private val RGB_TO_LMS = arrayOf(
|
||||
doubleArrayOf(0.3, 0.622, 0.078), // L coefficients
|
||||
doubleArrayOf(0.23, 0.692, 0.078), // M coefficients
|
||||
doubleArrayOf(0.24342268924547819, 0.20476744424496821, 0.55180986650955360) // S coefficients
|
||||
)
|
||||
|
||||
// LMS to RGB inverse matrix
|
||||
private val LMS_TO_RGB = arrayOf(
|
||||
doubleArrayOf(11.0315669046, -9.8669439081, -0.1646229965),
|
||||
doubleArrayOf(-3.2541473811, 4.4187703776, -0.1646229965),
|
||||
doubleArrayOf(-3.6588512867, 2.7129230459, 1.9459282408)
|
||||
)
|
||||
|
||||
// sRGB linearization functions
|
||||
private fun srgbLinearise(value: Double): Double {
|
||||
return if (value > 0.04045) {
|
||||
Math.pow((value + 0.055) / 1.055, 2.4)
|
||||
} else {
|
||||
value / 12.92
|
||||
}
|
||||
}
|
||||
|
||||
private fun srgbUnlinearise(value: Double): Double {
|
||||
return if (value > 0.0031308) {
|
||||
1.055 * Math.pow(value, 1.0 / 2.4) - 0.055
|
||||
} else {
|
||||
value * 12.92
|
||||
}
|
||||
}
|
||||
|
||||
// XYB to RGB conversion for hardware decoding
|
||||
fun tevXybToRGB(yBlock: IntArray, xBlock: IntArray, bBlock: IntArray): IntArray {
|
||||
val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels
|
||||
|
||||
for (py in 0 until 16) {
|
||||
for (px in 0 until 16) {
|
||||
val yIdx = py * 16 + px
|
||||
val y = yBlock[yIdx]
|
||||
|
||||
// Get chroma values from subsampled 8x8 blocks (nearest neighbor upsampling)
|
||||
val xbIdx = (py / 2) * 8 + (px / 2)
|
||||
val x = xBlock[xbIdx]
|
||||
val b = bBlock[xbIdx]
|
||||
|
||||
// Optimal range-based dequantization (exact inverse of improved quantization)
|
||||
val X_MIN = -0.016; val X_MAX = 0.030
|
||||
val xVal = (x / 255.0) * (X_MAX - X_MIN) + X_MIN // X: inverse of range mapping
|
||||
val Y_MAX = 0.85
|
||||
val yVal = (y / 255.0) * Y_MAX // Y: inverse of improved scale
|
||||
val B_MAX = 0.85
|
||||
val bVal = ((b + 128.0) / 255.0) * B_MAX // B: inverse of ((val/B_MAX*255)-128)
|
||||
|
||||
// XYB to LMS gamma
|
||||
val lgamma = xVal + yVal
|
||||
val mgamma = yVal - xVal
|
||||
val sgamma = bVal
|
||||
|
||||
// Remove gamma correction
|
||||
val lmix = (lgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
|
||||
val mmix = (mgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
|
||||
val smix = (sgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS
|
||||
|
||||
// LMS to linear RGB using inverse matrix
|
||||
val rLinear = (LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix).coerceIn(0.0, 1.0)
|
||||
val gLinear = (LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix).coerceIn(0.0, 1.0)
|
||||
val bLinear = (LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix).coerceIn(0.0, 1.0)
|
||||
|
||||
// Convert back to sRGB gamma and 0-255 range
|
||||
val r = (srgbUnlinearise(rLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
|
||||
val g = (srgbUnlinearise(gLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
|
||||
val bRgb = (srgbUnlinearise(bLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255)
|
||||
|
||||
// Store RGB
|
||||
val baseIdx = (py * 16 + px) * 3
|
||||
rgbData[baseIdx] = r // R
|
||||
rgbData[baseIdx + 1] = g // G
|
||||
rgbData[baseIdx + 2] = bRgb // B
|
||||
}
|
||||
}
|
||||
|
||||
return rgbData
|
||||
}
|
||||
|
||||
// RGB to XYB conversion for INTER mode residual calculation
|
||||
fun tevRGBToXyb(rgbBlock: IntArray): IntArray {
|
||||
val xybData = IntArray(16 * 16 * 3) // Y,X,B for 16x16 pixels
|
||||
|
||||
for (py in 0 until 16) {
|
||||
for (px in 0 until 16) {
|
||||
val baseIdx = (py * 16 + px) * 3
|
||||
val r = rgbBlock[baseIdx]
|
||||
val g = rgbBlock[baseIdx + 1]
|
||||
val b = rgbBlock[baseIdx + 2]
|
||||
|
||||
// Convert RGB to 0-1 range and linearise sRGB
|
||||
val rNorm = srgbLinearise(r / 255.0)
|
||||
val gNorm = srgbLinearise(g / 255.0)
|
||||
val bNorm = srgbLinearise(b / 255.0)
|
||||
|
||||
// RGB to LMS mixing with bias
|
||||
val lmix = RGB_TO_LMS[0][0] * rNorm + RGB_TO_LMS[0][1] * gNorm + RGB_TO_LMS[0][2] * bNorm + XYB_BIAS
|
||||
val mmix = RGB_TO_LMS[1][0] * rNorm + RGB_TO_LMS[1][1] * gNorm + RGB_TO_LMS[1][2] * bNorm + XYB_BIAS
|
||||
val smix = RGB_TO_LMS[2][0] * rNorm + RGB_TO_LMS[2][1] * gNorm + RGB_TO_LMS[2][2] * bNorm + XYB_BIAS
|
||||
|
||||
// Apply gamma correction (cube root)
|
||||
val lgamma = lmix.pow(1.0 / 3.0) - CBRT_BIAS
|
||||
val mgamma = mmix.pow(1.0 / 3.0) - CBRT_BIAS
|
||||
val sgamma = smix.pow(1.0 / 3.0) - CBRT_BIAS
|
||||
|
||||
// LMS to XYB transformation
|
||||
val xVal = (lgamma - mgamma) / 2.0
|
||||
val yVal = (lgamma + mgamma) / 2.0
|
||||
val bVal = sgamma
|
||||
|
||||
// Optimal range-based quantization for XYB values (improved precision)
|
||||
// X: actual range -0.016 to +0.030, map to full 0-255 precision
|
||||
val X_MIN = -0.016; val X_MAX = 0.030
|
||||
val xQuant = (((xVal - X_MIN) / (X_MAX - X_MIN)) * 255.0).toInt().coerceIn(0, 255)
|
||||
// Y: range 0 to 0.85, map to 0 to 255 (improved scale)
|
||||
val Y_MAX = 0.85
|
||||
val yQuant = ((yVal / Y_MAX) * 255.0).toInt().coerceIn(0, 255)
|
||||
// B: range 0 to 0.85, map to -128 to +127 (optimized precision)
|
||||
val B_MAX = 0.85
|
||||
val bQuant = (((bVal / B_MAX) * 255.0) - 128.0).toInt().coerceIn(-128, 127)
|
||||
|
||||
// Store XYB values
|
||||
val yIdx = py * 16 + px
|
||||
xybData[yIdx * 3] = yQuant // Y
|
||||
xybData[yIdx * 3 + 1] = xQuant // X
|
||||
xybData[yIdx * 3 + 2] = bQuant // B
|
||||
}
|
||||
}
|
||||
|
||||
return xybData
|
||||
}
|
||||
|
||||
/**
|
||||
* Enhanced TEV Deblocking Filter - Uses Knusperli-inspired techniques for superior boundary analysis
|
||||
@@ -2627,8 +2571,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
|
||||
debugMotionVectors: Boolean = false, tevVersion: Int = 2,
|
||||
enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false,
|
||||
isLossless: Boolean = false) {
|
||||
enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false) {
|
||||
|
||||
// height doesn't change when interlaced, because that's the encoder's output
|
||||
|
||||
@@ -2744,7 +2687,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val cgPixels = tevIdct8x8_fromOptimizedCoeffs(cgBlock)
|
||||
|
||||
val rgbData = if (tevVersion == 3) {
|
||||
tevXybToRGB(yPixels, coPixels, cgPixels)
|
||||
tevIctcpToRGB(yPixels, coPixels, cgPixels)
|
||||
} else {
|
||||
tevYcocgToRGB(yPixels, coPixels, cgPixels)
|
||||
}
|
||||
@@ -2919,69 +2862,20 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
|
||||
0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation)
|
||||
val yBlock: IntArray
|
||||
val coBlock: IntArray
|
||||
val cgBlock: IntArray
|
||||
|
||||
if (isLossless) {
|
||||
// Lossless mode: coefficients are stored as float16, no quantization
|
||||
// Read float16 coefficients: Y (16x16=256), Co (8x8=64), Cg (8x8=64)
|
||||
val coeffFloat16Array = ShortArray(384) // 384 float16 values stored as shorts
|
||||
vm.bulkPeekShort(readPtr.toInt(), coeffFloat16Array, 768) // 384 * 2 bytes
|
||||
readPtr += 768
|
||||
|
||||
// Convert float16 to float32 and perform IDCT directly (no quantization)
|
||||
println("DEBUG: Reading lossless coefficients, first few float16 values: ${coeffFloat16Array.take(10).map { "0x${it.toString(16)}" }}")
|
||||
val yCoeffs = FloatArray(256) { i ->
|
||||
// Convert signed short to unsigned short for float16 interpretation
|
||||
val signedShort = coeffFloat16Array[i]
|
||||
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
|
||||
val floatVal = Float16.toFloat(float16bits.toShort())
|
||||
if (floatVal.isNaN() || floatVal.isInfinite()) {
|
||||
println("NaN/Inf detected at Y coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
|
||||
0f // Replace NaN with 0
|
||||
} else floatVal
|
||||
}
|
||||
val coCoeffs = FloatArray(64) { i ->
|
||||
// Convert signed short to unsigned short for float16 interpretation
|
||||
val signedShort = coeffFloat16Array[256 + i]
|
||||
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
|
||||
val floatVal = Float16.toFloat(float16bits.toShort())
|
||||
if (floatVal.isNaN() || floatVal.isInfinite()) {
|
||||
println("NaN/Inf detected at Co coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
|
||||
0f // Replace NaN with 0
|
||||
} else floatVal
|
||||
}
|
||||
val cgCoeffs = FloatArray(64) { i ->
|
||||
// Convert signed short to unsigned short for float16 interpretation
|
||||
val signedShort = coeffFloat16Array[320 + i]
|
||||
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
|
||||
val floatVal = Float16.toFloat(float16bits.toShort())
|
||||
if (floatVal.isNaN() || floatVal.isInfinite()) {
|
||||
println("NaN/Inf detected at Cg coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
|
||||
0f // Replace NaN with 0
|
||||
} else floatVal
|
||||
}
|
||||
|
||||
yBlock = tevIdct16x16_lossless(yCoeffs)
|
||||
coBlock = tevIdct8x8_lossless(coCoeffs)
|
||||
cgBlock = tevIdct8x8_lossless(cgCoeffs)
|
||||
} else {
|
||||
// Regular lossy mode: quantized int16 coefficients
|
||||
// Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
|
||||
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
|
||||
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
|
||||
readPtr += 768
|
||||
// Regular lossy mode: quantized int16 coefficients
|
||||
// Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
|
||||
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
|
||||
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
|
||||
readPtr += 768
|
||||
|
||||
// Perform hardware IDCT for each channel using fast algorithm
|
||||
yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
|
||||
coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
|
||||
cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
|
||||
}
|
||||
// Perform hardware IDCT for each channel using fast algorithm
|
||||
val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
|
||||
val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
|
||||
val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
|
||||
|
||||
// Convert to RGB (YCoCg-R for v2, XYB for v3)
|
||||
val rgbData = if (tevVersion == 3) {
|
||||
tevXybToRGB(yBlock, coBlock, cgBlock) // XYB format (v3)
|
||||
tevIctcpToRGB(yBlock, coBlock, cgBlock) // XYB format (v3)
|
||||
} else {
|
||||
tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2)
|
||||
}
|
||||
@@ -3108,7 +3002,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
// Step 4: Convert final data to RGB (YCoCg-R for v2, XYB for v3)
|
||||
val finalRgb = if (tevVersion == 3) {
|
||||
tevXybToRGB(finalY, finalCo, finalCg) // XYB format (v3)
|
||||
tevIctcpToRGB(finalY, finalCo, finalCg) // XYB format (v3)
|
||||
} else {
|
||||
tevYcocgToRGB(finalY, finalCo, finalCg) // YCoCg-R format (v2)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Created by Claude on 2025-08-18.
|
||||
// TEV (TSVM Enhanced Video) Encoder - YCoCg-R 4:2:0 16x16 Block Version
|
||||
// TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
@@ -68,7 +68,9 @@ static inline float float16_to_float(uint16_t hbits) {
|
||||
|
||||
// TSVM Enhanced Video (TEV) format constants
|
||||
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV"
|
||||
#define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0
|
||||
// TEV version - dynamic based on color space mode
|
||||
// Version 2: YCoCg-R 4:2:0 (default)
|
||||
// Version 3: ICtCp 4:2:0 (--ictcp flag)
|
||||
// version 1: 8x8 RGB
|
||||
// version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) {current winner}
|
||||
// version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor)
|
||||
@@ -192,17 +194,6 @@ typedef struct __attribute__((packed)) {
|
||||
int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // quantised Cg DCT coefficients (8x8)
|
||||
} tev_block_t;
|
||||
|
||||
// Lossless TEV block structure (uses float32 internally, converted to float16 during serialization)
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint8_t mode; // Block encoding mode
|
||||
int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision)
|
||||
float rate_control_factor; // Always 1.0f in lossless mode
|
||||
uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs)
|
||||
float y_coeffs[BLOCK_SIZE_SQR]; // lossless Y DCT coefficients (16x16)
|
||||
float co_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Co DCT coefficients (8x8)
|
||||
float cg_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Cg DCT coefficients (8x8)
|
||||
} tev_lossless_block_t;
|
||||
|
||||
// Subtitle entry structure
|
||||
typedef struct subtitle_entry {
|
||||
int start_frame;
|
||||
@@ -232,7 +223,7 @@ typedef struct {
|
||||
int qualityCg;
|
||||
int verbose;
|
||||
int disable_rcf; // 0 = rcf enabled, 1 = disabled
|
||||
int lossless_mode; // 0 = lossy (default), 1 = lossless mode
|
||||
int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp color space
|
||||
|
||||
// Bitrate control
|
||||
int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode)
|
||||
@@ -289,6 +280,10 @@ typedef struct {
|
||||
int complexity_capacity; // Capacity of complexity_values array
|
||||
} tev_encoder_t;
|
||||
|
||||
//////////////////////////
|
||||
// COLOUR MATHS CODES //
|
||||
//////////////////////////
|
||||
|
||||
// RGB to YCoCg-R transform (per YCoCg-R specification with truncated division)
|
||||
static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int *cg) {
|
||||
*co = (int)r - (int)b;
|
||||
@@ -315,6 +310,180 @@ static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t
|
||||
*b = CLAMP(*b, 0, 255);
|
||||
}
|
||||
|
||||
// ---------------------- ICtCp Implementation ----------------------
|
||||
|
||||
static inline int iround(double v) { return (int)floor(v + 0.5); }
|
||||
|
||||
// ---------------------- sRGB gamma helpers ----------------------
|
||||
static inline double srgb_linearize(double val) {
|
||||
// val in [0,1]
|
||||
if (val <= 0.04045) return val / 12.92;
|
||||
return pow((val + 0.055) / 1.055, 2.4);
|
||||
}
|
||||
static inline double srgb_unlinearize(double val) {
|
||||
// val in [0,1]
|
||||
if (val <= 0.0031308) return val * 12.92;
|
||||
return 1.055 * pow(val, 1.0 / 2.4) - 0.055;
|
||||
}
|
||||
|
||||
// -------------------------- HLG --------------------------
|
||||
// Forward HLG OETF (linear -> HLG)
|
||||
static inline double HLG_OETF(double L) {
|
||||
// L in [0,1], relative scene-linear
|
||||
const double a = 0.17883277;
|
||||
const double b = 1.0 - 4.0 * a;
|
||||
const double c = 0.5 - a * log(4.0 * a);
|
||||
|
||||
if (L <= 1.0/12.0)
|
||||
return sqrt(3.0 * L);
|
||||
else
|
||||
return a * log(12.0 * L - b) + c;
|
||||
}
|
||||
|
||||
// Inverse HLG OETF (HLG -> linear)
|
||||
static inline double HLG_inverse_OETF(double V) {
|
||||
const double a = 0.17883277;
|
||||
const double b = 1.0 - 4.0 * a;
|
||||
const double c = 0.5 - a * log(4.0 * a);
|
||||
|
||||
if (V <= 0.5)
|
||||
return (V * V) / 3.0;
|
||||
else
|
||||
return (exp((V - c)/a) + b) / 12.0;
|
||||
}
|
||||
|
||||
// ---------------------- Matrices (doubles) ----------------------
|
||||
// Combined linear sRGB -> LMS (single 3x3): product of sRGB->XYZ, XYZ->BT2020, BT2020->LMS
|
||||
// Computed from standard matrices (double precision).
|
||||
static const double M_RGB_TO_LMS[3][3] = {
|
||||
{0.20502672199540622, 0.42945363228947586, 0.31165003516511786},
|
||||
{0.2233144413317712, 0.5540422172466897, 0.21854692537153908},
|
||||
{0.0609931761282002, 0.17917502499816504, 0.9323768661336348}
|
||||
};
|
||||
|
||||
// Inverse: LMS -> linear sRGB (inverse of above)
|
||||
static const double M_LMS_TO_RGB[3][3] = {
|
||||
{29.601046511687, -21.364325340529906, -4.886500015143518},
|
||||
{-12.083229161592032, 10.673933874098694, 1.5369143265611211},
|
||||
{0.38562844776642574, -0.6536244436141302, 1.0968381245163787}
|
||||
};
|
||||
|
||||
// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants.
|
||||
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
|
||||
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
|
||||
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
|
||||
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
|
||||
};
|
||||
|
||||
// Inverse: I Ct Cp -> L' M' S' (precomputed inverse)
|
||||
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
|
||||
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
|
||||
{ 1.0, -0.015718580108730416, -0.20958106811640548 },
|
||||
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
|
||||
};
|
||||
|
||||
// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ----------------------
|
||||
// Inputs: r,g,b in 0..255 sRGB (8-bit)
|
||||
// Outputs: I, Ct, Cp as doubles (nominally I in ~[0..1], Ct/Cp ranges depend on colors)
|
||||
void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
|
||||
double *out_I, double *out_Ct, double *out_Cp)
|
||||
{
|
||||
// 1) linearize sRGB to 0..1
|
||||
double r = srgb_linearize((double)r8 / 255.0);
|
||||
double g = srgb_linearize((double)g8 / 255.0);
|
||||
double b = srgb_linearize((double)b8 / 255.0);
|
||||
|
||||
// 2) linear RGB -> LMS (single 3x3 multiply)
|
||||
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
|
||||
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
|
||||
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
|
||||
|
||||
// 3) apply HLG encode (map linear LMS -> perceptual domain L',M',S')
|
||||
double Lp = HLG_OETF(L);
|
||||
double Mp = HLG_OETF(M);
|
||||
double Sp = HLG_OETF(S);
|
||||
|
||||
// 4) L'M'S' -> ICtCp
|
||||
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
|
||||
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
|
||||
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
|
||||
|
||||
*out_I = FCLAMP(I * 255.f, 0.f, 255.f);
|
||||
*out_Ct = FCLAMP(Ct * 255.f, -256.f, 255.f);
|
||||
*out_Cp = FCLAMP(Cp * 255.f, -256.f, 255.f);
|
||||
}
|
||||
|
||||
// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ----------------------
|
||||
// Inputs: I, Ct, Cp as doubles
|
||||
// Outputs: r8,g8,b8 in 0..255 (8-bit sRGB, clamped and rounded)
|
||||
void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
|
||||
uint8_t *r8, uint8_t *g8, uint8_t *b8)
|
||||
{
|
||||
double I = I8 / 255.f;
|
||||
double Ct = Ct8 / 255.f;
|
||||
double Cp = Cp8 / 255.f;
|
||||
|
||||
// 1) ICtCp -> L' M' S' (3x3 multiply)
|
||||
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
|
||||
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
|
||||
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
|
||||
|
||||
// 2) HLG decode: L' -> linear LMS
|
||||
double L = HLG_inverse_OETF(Lp);
|
||||
double M = HLG_inverse_OETF(Mp);
|
||||
double S = HLG_inverse_OETF(Sp);
|
||||
|
||||
// 3) LMS -> linear sRGB (3x3 inverse)
|
||||
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
|
||||
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
|
||||
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
|
||||
|
||||
// 4) gamma encode and convert to 0..255 with center-of-bin rounding
|
||||
double r = srgb_unlinearize(r_lin);
|
||||
double g = srgb_unlinearize(g_lin);
|
||||
double b = srgb_unlinearize(b_lin);
|
||||
|
||||
*r8 = (uint8_t)CLAMP(iround(r * 255.0), 0, 255);
|
||||
*g8 = (uint8_t)CLAMP(iround(g * 255.0), 0, 255);
|
||||
*b8 = (uint8_t)CLAMP(iround(b * 255.0), 0, 255);
|
||||
}
|
||||
|
||||
// ---------------------- Color Space Switching Functions ----------------------
|
||||
// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode
|
||||
|
||||
static void rgb_to_color_space(tev_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b,
|
||||
double *c1, double *c2, double *c3) {
|
||||
if (enc->ictcp_mode) {
|
||||
// Use ICtCp color space
|
||||
srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3);
|
||||
} else {
|
||||
// Use YCoCg-R color space (convert to int first, then to double)
|
||||
int y_val, co_val, cg_val;
|
||||
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
|
||||
*c1 = (double)y_val;
|
||||
*c2 = (double)co_val;
|
||||
*c3 = (double)cg_val;
|
||||
}
|
||||
}
|
||||
|
||||
static void color_space_to_rgb(tev_encoder_t *enc, double c1, double c2, double c3,
|
||||
uint8_t *r, uint8_t *g, uint8_t *b) {
|
||||
if (enc->ictcp_mode) {
|
||||
// Use ICtCp color space
|
||||
ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b);
|
||||
} else {
|
||||
// Use YCoCg-R color space (convert from double to int first)
|
||||
int y_val = (int)round(c1);
|
||||
int co_val = (int)round(c2);
|
||||
int cg_val = (int)round(c3);
|
||||
ycocgr_to_rgb(y_val, co_val, cg_val, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////
|
||||
// DISCRETE COSINE TRANSFORMATIONS //
|
||||
////////////////////////////////////////
|
||||
|
||||
// Pre-calculated cosine tables
|
||||
static float dct_table_16[16][16]; // For 16x16 DCT
|
||||
static float dct_table_8[8][8]; // For 8x8 DCT
|
||||
@@ -429,14 +598,14 @@ static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma
|
||||
}
|
||||
}
|
||||
|
||||
// Extract 16x16 block from RGB frame and convert to YCoCg-R
|
||||
static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
|
||||
int block_x, int block_y,
|
||||
float *y_block, float *co_block, float *cg_block) {
|
||||
// Extract 16x16 block from RGB frame and convert to color space
|
||||
static void extract_color_space_block(tev_encoder_t *enc, uint8_t *rgb_frame, int width, int height,
|
||||
int block_x, int block_y,
|
||||
float *c1_block, float *c2_block, float *c3_block) {
|
||||
int start_x = block_x * BLOCK_SIZE;
|
||||
int start_y = block_y * BLOCK_SIZE;
|
||||
|
||||
// Extract 16x16 Y block
|
||||
// Extract 16x16 primary channel block (Y for YCoCg-R, I for ICtCp)
|
||||
for (int py = 0; py < BLOCK_SIZE; py++) {
|
||||
for (int px = 0; px < BLOCK_SIZE; px++) {
|
||||
int x = start_x + px;
|
||||
@@ -448,10 +617,10 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
|
||||
uint8_t g = rgb_frame[offset + 1];
|
||||
uint8_t b = rgb_frame[offset + 2];
|
||||
|
||||
int y_val, co_val, cg_val;
|
||||
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
|
||||
double c1, c2, c3;
|
||||
rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);
|
||||
|
||||
y_block[py * BLOCK_SIZE + px] = (float)y_val - 128.0f; // Center around 0
|
||||
c1_block[py * BLOCK_SIZE + px] = (float)c1 - 128.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -473,25 +642,30 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
|
||||
uint8_t g = rgb_frame[offset + 1];
|
||||
uint8_t b = rgb_frame[offset + 2];
|
||||
|
||||
int y_val, co_val, cg_val;
|
||||
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
|
||||
double c1, c2, c3;
|
||||
rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);
|
||||
|
||||
co_sum += (int)c2;
|
||||
cg_sum += (int)c3;
|
||||
|
||||
co_sum += co_val;
|
||||
cg_sum += cg_val;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (count > 0) {
|
||||
// Center chroma around 0 for DCT (Co/Cg range is -255 to +255, so don't add offset)
|
||||
co_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
|
||||
cg_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
|
||||
// Average the accumulated chroma values and store
|
||||
c2_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
|
||||
c3_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate spatial activity for any channel (16x16 or 8x8)
|
||||
static float calculate_spatial_activity(const float *block, int block_size) {
|
||||
float activity = 0.0f;
|
||||
@@ -791,8 +965,138 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
|
||||
}
|
||||
|
||||
// Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling
|
||||
static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
|
||||
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
|
||||
static void convert_rgb_to_color_space_block(tev_encoder_t *enc, const uint8_t *rgb_block,
|
||||
float *c1_workspace, float *c2_workspace, float *c3_workspace) {
|
||||
if (enc->ictcp_mode) {
|
||||
// ICtCp mode: Convert 16x16 RGB to ICtCp (full resolution for I, 4:2:0 subsampling for CtCp)
|
||||
|
||||
// Convert I channel at full resolution (16x16)
|
||||
for (int py = 0; py < BLOCK_SIZE; py++) {
|
||||
for (int px = 0; px < BLOCK_SIZE; px++) {
|
||||
int rgb_idx = (py * BLOCK_SIZE + px) * 3;
|
||||
uint8_t r = rgb_block[rgb_idx];
|
||||
uint8_t g = rgb_block[rgb_idx + 1];
|
||||
uint8_t b = rgb_block[rgb_idx + 2];
|
||||
|
||||
double I, Ct, Cp;
|
||||
srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
|
||||
|
||||
// Store I at full resolution, scale to appropriate range
|
||||
c1_workspace[py * BLOCK_SIZE + px] = (float)(I * 255.0);
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Ct and Cp with 4:2:0 subsampling (8x8)
|
||||
for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
|
||||
for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
|
||||
double sum_ct = 0.0, sum_cp = 0.0;
|
||||
|
||||
// Sample 2x2 block from RGB and average for chroma
|
||||
for (int dy = 0; dy < 2; dy++) {
|
||||
for (int dx = 0; dx < 2; dx++) {
|
||||
int py = cy * 2 + dy;
|
||||
int px = cx * 2 + dx;
|
||||
int rgb_idx = (py * 16 + px) * 3;
|
||||
|
||||
int r = rgb_block[rgb_idx];
|
||||
int g = rgb_block[rgb_idx + 1];
|
||||
int b = rgb_block[rgb_idx + 2];
|
||||
|
||||
double I, Ct, Cp;
|
||||
srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
|
||||
|
||||
sum_ct += Ct;
|
||||
sum_cp += Cp;
|
||||
}
|
||||
}
|
||||
|
||||
// Average and store subsampled chroma, scale to signed 8-bit equivalent range
|
||||
c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_ct / 4.0) * 255.0);
|
||||
c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)((sum_cp / 4.0) * 255.0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// YCoCg-R mode: Original implementation
|
||||
|
||||
// Convert 16x16 RGB to Y (full resolution)
|
||||
for (int py = 0; py < BLOCK_SIZE; py++) {
|
||||
for (int px = 0; px < BLOCK_SIZE; px++) {
|
||||
int rgb_idx = (py * BLOCK_SIZE + px) * 3;
|
||||
int r = rgb_block[rgb_idx];
|
||||
int g = rgb_block[rgb_idx + 1];
|
||||
int b = rgb_block[rgb_idx + 2];
|
||||
|
||||
// YCoCg-R transform (per specification with truncated division)
|
||||
int y = (r + 2*g + b) / 4;
|
||||
c1_workspace[py * BLOCK_SIZE + px] = (float)CLAMP(y, 0, 255);
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to Co and Cg with 4:2:0 subsampling (8x8)
|
||||
for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
|
||||
for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
|
||||
int sum_co = 0, sum_cg = 0;
|
||||
|
||||
// Sample 2x2 block from RGB and average for chroma
|
||||
for (int dy = 0; dy < 2; dy++) {
|
||||
for (int dx = 0; dx < 2; dx++) {
|
||||
int py = cy * 2 + dy;
|
||||
int px = cx * 2 + dx;
|
||||
int rgb_idx = (py * 16 + px) * 3;
|
||||
|
||||
int r = rgb_block[rgb_idx];
|
||||
int g = rgb_block[rgb_idx + 1];
|
||||
int b = rgb_block[rgb_idx + 2];
|
||||
|
||||
int co = r - b;
|
||||
int tmp = b + (co / 2);
|
||||
int cg = g - tmp;
|
||||
|
||||
sum_co += co;
|
||||
sum_cg += cg;
|
||||
}
|
||||
}
|
||||
|
||||
// Average and store subsampled chroma
|
||||
c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_co / 4, -256, 255);
|
||||
c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_cg / 4, -256, 255);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract motion-compensated YCoCg-R block from reference frame
|
||||
static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
|
||||
int block_x, int block_y, int mv_x, int mv_y,
|
||||
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
|
||||
// Extract 16x16 RGB block with motion compensation
|
||||
uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
|
||||
|
||||
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
|
||||
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
|
||||
int cur_x = block_x + dx;
|
||||
int cur_y = block_y + dy;
|
||||
int ref_x = cur_x + mv_x; // Revert to original motion compensation
|
||||
int ref_y = cur_y + mv_y;
|
||||
|
||||
int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
|
||||
|
||||
if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
|
||||
// Copy RGB from reference position
|
||||
int ref_offset = (ref_y * width + ref_x) * 3;
|
||||
rgb_block[rgb_idx] = rgb_data[ref_offset]; // R
|
||||
rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
|
||||
rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
|
||||
} else {
|
||||
// Out of bounds - use black
|
||||
rgb_block[rgb_idx] = 0; // R
|
||||
rgb_block[rgb_idx + 1] = 0; // G
|
||||
rgb_block[rgb_idx + 2] = 0; // B
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert RGB block to YCoCg-R (original implementation for motion compensation)
|
||||
// Convert 16x16 RGB to Y (full resolution)
|
||||
for (int py = 0; py < BLOCK_SIZE; py++) {
|
||||
for (int px = 0; px < BLOCK_SIZE; px++) {
|
||||
@@ -840,41 +1144,6 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
|
||||
}
|
||||
}
|
||||
|
||||
// Extract motion-compensated YCoCg-R block from reference frame
|
||||
static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
|
||||
int block_x, int block_y, int mv_x, int mv_y,
|
||||
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
|
||||
// Extract 16x16 RGB block with motion compensation
|
||||
uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
|
||||
|
||||
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
|
||||
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
|
||||
int cur_x = block_x + dx;
|
||||
int cur_y = block_y + dy;
|
||||
int ref_x = cur_x + mv_x; // Revert to original motion compensation
|
||||
int ref_y = cur_y + mv_y;
|
||||
|
||||
int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
|
||||
|
||||
if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
|
||||
// Copy RGB from reference position
|
||||
int ref_offset = (ref_y * width + ref_x) * 3;
|
||||
rgb_block[rgb_idx] = rgb_data[ref_offset]; // R
|
||||
rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
|
||||
rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
|
||||
} else {
|
||||
// Out of bounds - use black
|
||||
rgb_block[rgb_idx] = 0; // R
|
||||
rgb_block[rgb_idx + 1] = 0; // G
|
||||
rgb_block[rgb_idx + 2] = 0; // B
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert RGB block to YCoCg-R
|
||||
convert_rgb_to_ycocgr_block(rgb_block, y_block, co_block, cg_block);
|
||||
}
|
||||
|
||||
// Compute motion-compensated residual for INTER mode
|
||||
static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) {
|
||||
int start_x = block_x * BLOCK_SIZE;
|
||||
@@ -909,7 +1178,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
||||
tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
|
||||
|
||||
// Extract YCoCg-R block
|
||||
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
|
||||
extract_color_space_block(enc, enc->current_rgb, enc->width, enc->height,
|
||||
block_x, block_y,
|
||||
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
|
||||
|
||||
@@ -1105,107 +1374,6 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
||||
block->cbp = 0x07; // Y, Co, Cg all present
|
||||
}
|
||||
|
||||
// Encode a 16x16 block in lossless mode
|
||||
static void encode_block_lossless(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
|
||||
tev_lossless_block_t *block = (tev_lossless_block_t*)&enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
|
||||
|
||||
// Extract YCoCg-R block
|
||||
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
|
||||
block_x, block_y,
|
||||
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
|
||||
|
||||
if (is_keyframe) {
|
||||
// Intra coding for keyframes
|
||||
block->mode = TEV_MODE_INTRA;
|
||||
block->mv_x = block->mv_y = 0;
|
||||
enc->blocks_intra++;
|
||||
} else {
|
||||
// Same mode decision logic as regular encode_block
|
||||
// For simplicity, using INTRA for now in lossless mode
|
||||
block->mode = TEV_MODE_INTRA;
|
||||
block->mv_x = block->mv_y = 0;
|
||||
enc->blocks_intra++;
|
||||
}
|
||||
|
||||
// Lossless mode: rate control factor is always 1.0f
|
||||
block->rate_control_factor = 1.0f;
|
||||
|
||||
// Apply DCT transforms using the same pattern as regular encoding
|
||||
// Y channel (16x16)
|
||||
dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
|
||||
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
|
||||
block->y_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
|
||||
}
|
||||
|
||||
// Co channel (8x8)
|
||||
dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
|
||||
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
|
||||
block->co_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
|
||||
}
|
||||
|
||||
// Cg channel (8x8)
|
||||
dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
|
||||
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
|
||||
block->cg_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
|
||||
}
|
||||
|
||||
// Set CBP (simplified - always encode all channels)
|
||||
block->cbp = 0x07; // Y, Co, Cg all present
|
||||
}
|
||||
|
||||
// Serialized lossless block structure (for writing to file with float16 coefficients)
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint8_t mode;
|
||||
int16_t mv_x, mv_y;
|
||||
float rate_control_factor; // Always 1.0f in lossless mode
|
||||
uint16_t cbp;
|
||||
uint16_t y_coeffs[BLOCK_SIZE_SQR]; // float16 Y coefficients
|
||||
uint16_t co_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Co coefficients
|
||||
uint16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Cg coefficients
|
||||
} tev_serialized_lossless_block_t;
|
||||
|
||||
// Convert lossless blocks to serialized format with float16 coefficients
|
||||
static void serialize_lossless_blocks(tev_encoder_t *enc, int blocks_x, int blocks_y,
|
||||
tev_serialized_lossless_block_t *serialized_blocks) {
|
||||
for (int by = 0; by < blocks_y; by++) {
|
||||
for (int bx = 0; bx < blocks_x; bx++) {
|
||||
tev_lossless_block_t *src = (tev_lossless_block_t*)&enc->block_data[by * blocks_x + bx];
|
||||
tev_serialized_lossless_block_t *dst = &serialized_blocks[by * blocks_x + bx];
|
||||
|
||||
// Copy basic fields
|
||||
dst->mode = src->mode;
|
||||
dst->mv_x = src->mv_x;
|
||||
dst->mv_y = src->mv_y;
|
||||
dst->rate_control_factor = src->rate_control_factor;
|
||||
dst->cbp = src->cbp;
|
||||
|
||||
// Convert float32 coefficients to float16 with range clamping
|
||||
// Float16 max finite value is approximately 65504
|
||||
const float FLOAT16_MAX = 65504.0f;
|
||||
|
||||
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
|
||||
float coeff = FCLAMP(src->y_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
|
||||
dst->y_coeffs[i] = float_to_float16(coeff);
|
||||
if (enc->verbose && fabsf(src->y_coeffs[i]) > FLOAT16_MAX) {
|
||||
printf("WARNING: Y coefficient %d clamped: %f -> %f\n", i, src->y_coeffs[i], coeff);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
|
||||
float co_coeff = FCLAMP(src->co_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
|
||||
float cg_coeff = FCLAMP(src->cg_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
|
||||
dst->co_coeffs[i] = float_to_float16(co_coeff);
|
||||
dst->cg_coeffs[i] = float_to_float16(cg_coeff);
|
||||
if (enc->verbose && fabsf(src->co_coeffs[i]) > FLOAT16_MAX) {
|
||||
printf("WARNING: Co coefficient %d clamped: %f -> %f\n", i, src->co_coeffs[i], co_coeff);
|
||||
}
|
||||
if (enc->verbose && fabsf(src->cg_coeffs[i]) > FLOAT16_MAX) {
|
||||
printf("WARNING: Cg coefficient %d clamped: %f -> %f\n", i, src->cg_coeffs[i], cg_coeff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert SubRip time format (HH:MM:SS,mmm) to frame number
|
||||
static int srt_time_to_frame(const char *time_str, int fps) {
|
||||
int hours, minutes, seconds, milliseconds;
|
||||
@@ -1820,17 +1988,13 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) {
|
||||
enc->cg_workspace = malloc(8 * 8 * sizeof(float));
|
||||
enc->dct_workspace = malloc(16 * 16 * sizeof(float));
|
||||
|
||||
// Allocate block data
|
||||
enc->block_data = malloc(total_blocks * sizeof(tev_block_t));
|
||||
// Allocate compression buffer large enough for both regular and lossless modes
|
||||
size_t max_block_size = sizeof(tev_block_t) > sizeof(tev_serialized_lossless_block_t) ?
|
||||
sizeof(tev_block_t) : sizeof(tev_serialized_lossless_block_t);
|
||||
size_t compressed_buffer_size = total_blocks * max_block_size * 2;
|
||||
|
||||
// Allocate compression buffer
|
||||
size_t compressed_buffer_size = total_blocks * sizeof(tev_block_t) * 2;
|
||||
enc->compressed_buffer = malloc(compressed_buffer_size);
|
||||
|
||||
if (enc->verbose) {
|
||||
printf("Allocated compressed buffer: %zu bytes for %d blocks (max_block_size: %zu)\n",
|
||||
compressed_buffer_size, total_blocks, max_block_size);
|
||||
}
|
||||
|
||||
enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE);
|
||||
|
||||
if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb ||
|
||||
@@ -1889,7 +2053,7 @@ static void free_encoder(tev_encoder_t *enc) {
|
||||
static int write_tev_header(FILE *output, tev_encoder_t *enc) {
|
||||
// Magic + version
|
||||
fwrite(TEV_MAGIC, 1, 8, output);
|
||||
uint8_t version = TEV_VERSION;
|
||||
uint8_t version = enc->ictcp_mode ? 3 : 2; // Version 3 for ICtCp, 2 for YCoCg-R
|
||||
fwrite(&version, 1, 1, output);
|
||||
|
||||
// Video parameters
|
||||
@@ -1901,7 +2065,7 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) {
|
||||
uint8_t qualityCo = enc->qualityCo;
|
||||
uint8_t qualityCg = enc->qualityCg;
|
||||
uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1);
|
||||
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0) | (enc->lossless_mode ? 4 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate, bit 2 = is_lossless
|
||||
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate
|
||||
uint8_t reserved = 0;
|
||||
|
||||
fwrite(&width, 2, 1, output);
|
||||
@@ -2008,11 +2172,7 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
|
||||
// Encode all blocks
|
||||
for (int by = 0; by < blocks_y; by++) {
|
||||
for (int bx = 0; bx < blocks_x; bx++) {
|
||||
if (enc->lossless_mode) {
|
||||
encode_block_lossless(enc, bx, by, is_keyframe);
|
||||
} else {
|
||||
encode_block(enc, bx, by, is_keyframe);
|
||||
}
|
||||
encode_block(enc, bx, by, is_keyframe);
|
||||
|
||||
// Calculate complexity for rate control (if enabled)
|
||||
if (enc->bitrate_mode > 0) {
|
||||
@@ -2029,34 +2189,14 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
|
||||
|
||||
// Compress block data using Zstd (compatible with TSVM decoder)
|
||||
size_t compressed_size;
|
||||
|
||||
if (enc->lossless_mode) {
|
||||
// Lossless mode: serialize blocks with float16 coefficients
|
||||
size_t serialized_block_data_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t);
|
||||
tev_serialized_lossless_block_t *serialized_blocks = malloc(serialized_block_data_size);
|
||||
if (!serialized_blocks) {
|
||||
fprintf(stderr, "Failed to allocate memory for serialized lossless blocks\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
serialize_lossless_blocks(enc, blocks_x, blocks_y, serialized_blocks);
|
||||
|
||||
// Use the pre-allocated buffer size instead of calculating dynamically
|
||||
size_t output_buffer_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t) * 2;
|
||||
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
|
||||
enc->compressed_buffer, output_buffer_size,
|
||||
serialized_blocks, serialized_block_data_size,
|
||||
ZSTD_COMPRESSON_LEVEL);
|
||||
free(serialized_blocks);
|
||||
} else {
|
||||
// Regular mode: use regular block data
|
||||
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
|
||||
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
|
||||
enc->compressed_buffer, block_data_size * 2,
|
||||
enc->block_data, block_data_size,
|
||||
ZSTD_COMPRESSON_LEVEL);
|
||||
}
|
||||
|
||||
|
||||
// Regular mode: use regular block data
|
||||
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
|
||||
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
|
||||
enc->compressed_buffer, block_data_size * 2,
|
||||
enc->block_data, block_data_size,
|
||||
ZSTD_COMPRESSON_LEVEL);
|
||||
|
||||
if (ZSTD_isError(compressed_size)) {
|
||||
fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size));
|
||||
return 0;
|
||||
@@ -2288,7 +2428,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
|
||||
char command[2048];
|
||||
snprintf(command, sizeof(command),
|
||||
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
|
||||
enc->input_file, enc->lossless_mode ? 384 : MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
|
||||
enc->input_file, MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
|
||||
|
||||
int result = system(command);
|
||||
if (result == 0) {
|
||||
@@ -2429,7 +2569,7 @@ static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {
|
||||
|
||||
// Show usage information
|
||||
static void show_usage(const char *program_name) {
|
||||
printf("TEV YCoCg-R 4:2:0 Video Encoder\n");
|
||||
printf("TEV YCoCg-R/ICtCp 4:2:0 Video Encoder\n");
|
||||
printf("Usage: %s [options] -i input.mp4 -o output.mv2\n\n", program_name);
|
||||
printf("Options:\n");
|
||||
printf(" -i, --input FILE Input video file\n");
|
||||
@@ -2443,7 +2583,7 @@ static void show_usage(const char *program_name) {
|
||||
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
||||
printf(" -v, --verbose Verbose output\n");
|
||||
printf(" -t, --test Test mode: generate solid colour frames\n");
|
||||
printf(" --lossless Lossless mode: store coefficients as float16 (no quantisation, implies -p, 384k audio)\n");
|
||||
printf(" --ictcp Use ICtCp color space instead of YCoCg-R (generates TEV version 3)\n");
|
||||
printf(" --enable-rcf Enable per-block rate control (experimental)\n");
|
||||
printf(" --enable-encode-stats Collect and report block complexity statistics\n");
|
||||
printf(" --help Show this help\n\n");
|
||||
@@ -2467,7 +2607,7 @@ static void show_usage(const char *program_name) {
|
||||
printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
||||
printf("\n\n");
|
||||
printf("Features:\n");
|
||||
printf(" - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n");
|
||||
printf(" - YCoCg-R or ICtCp 4:2:0 chroma subsampling for 50%% compression improvement\n");
|
||||
printf(" - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n");
|
||||
printf(" - Frame rate conversion with FFmpeg temporal filtering\n");
|
||||
printf(" - Adaptive quality control with complexity-based adjustment\n");
|
||||
@@ -2536,7 +2676,7 @@ int main(int argc, char *argv[]) {
|
||||
{"test", no_argument, 0, 't'},
|
||||
{"enable-encode-stats", no_argument, 0, 1000},
|
||||
{"enable-rcf", no_argument, 0, 1100},
|
||||
{"lossless", no_argument, 0, 1200},
|
||||
{"ictcp", no_argument, 0, 1300},
|
||||
{"help", no_argument, 0, '?'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
@@ -2611,8 +2751,8 @@ int main(int argc, char *argv[]) {
|
||||
case 1100: // --enable-rcf
|
||||
enc->disable_rcf = 0;
|
||||
break;
|
||||
case 1200: // --lossless
|
||||
enc->lossless_mode = 1;
|
||||
case 1300: // --ictcp
|
||||
enc->ictcp_mode = 1;
|
||||
break;
|
||||
case 0:
|
||||
if (strcmp(long_options[option_index].name, "help") == 0) {
|
||||
@@ -2633,24 +2773,17 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
|
||||
// Lossless mode validation and adjustments
|
||||
if (enc->lossless_mode) {
|
||||
// In lossless mode, disable rate control and set quality to maximum
|
||||
enc->bitrate_mode = 0;
|
||||
enc->disable_rcf = 1;
|
||||
enc->progressive_mode = 1;
|
||||
enc->qualityIndex = 5;
|
||||
enc->qualityY = enc->qualityCo = enc->qualityCg = 255; // Use 255 as a redundant lossless marker
|
||||
if (enc->verbose) {
|
||||
printf("Lossless mode enabled: Rate control disabled, quality set to maximum, enabling progressive scan\n");
|
||||
}
|
||||
}
|
||||
|
||||
// halve the internal representation of frame height
|
||||
if (!enc->progressive_mode) {
|
||||
enc->height /= 2;
|
||||
}
|
||||
|
||||
if (enc->ictcp_mode) {
|
||||
int qc = (enc->qualityCo + enc->qualityCg) / 2;
|
||||
enc->qualityCo = qc;
|
||||
enc->qualityCg = qc;
|
||||
}
|
||||
|
||||
if (!test_mode && (!enc->input_file || !enc->output_file)) {
|
||||
fprintf(stderr, "Input and output files are required (unless using --test mode)\n");
|
||||
show_usage(argv[0]);
|
||||
@@ -2737,7 +2870,7 @@ int main(int argc, char *argv[]) {
|
||||
write_tev_header(output, enc);
|
||||
gettimeofday(&enc->start_time, NULL);
|
||||
|
||||
printf("Encoding video with YCoCg-R 4:2:0 format...\n");
|
||||
printf("Encoding video with %s 4:2:0 format...\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
|
||||
if (enc->output_fps != enc->fps) {
|
||||
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
|
||||
}
|
||||
@@ -2791,13 +2924,13 @@ int main(int argc, char *argv[]) {
|
||||
printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b);
|
||||
|
||||
// Test YCoCg-R conversion
|
||||
int y_test, co_test, cg_test;
|
||||
rgb_to_ycocgr(test_r, test_g, test_b, &y_test, &co_test, &cg_test);
|
||||
printf(" YCoCg-R: Y=%d Co=%d Cg=%d\n", y_test, co_test, cg_test);
|
||||
double y_test, co_test, cg_test;
|
||||
rgb_to_color_space(enc, test_r, test_g, test_b, &y_test, &co_test, &cg_test);
|
||||
printf(" %s: Y=%.3f Co=%.3f Cg=%.3f\n", enc->ictcp_mode ? "ICtCp" : "YCoCg", y_test, co_test, cg_test);
|
||||
|
||||
// Test reverse conversion
|
||||
uint8_t r_rev, g_rev, b_rev;
|
||||
ycocgr_to_rgb(y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
|
||||
color_space_to_rgb(enc, y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
|
||||
printf(" Reverse: R=%d G=%d B=%d\n", r_rev, g_rev, b_rev);
|
||||
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user