From 1343dd10cfb285a3027162cf3e4f37b17e33cd13 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Mon, 15 Sep 2025 16:35:44 +0900 Subject: [PATCH] TAV with ICtCp colour space --- assets/disk0/tvdos/bin/playtav.js | 6 +- terranmon.txt | 2 +- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 676 +++++++++++++++++- video_encoder/encoder_tav.c | 230 +++++- 4 files changed, 886 insertions(+), 28 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index fa68ca0..ded9e94 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -156,7 +156,7 @@ for (let i = 0; i < 7; i++) { seqread.readOneByte() } -if (header.version !== TAV_VERSION) { +if (header.version < 1 || header.version > 2) { con.puts(`Error: Unsupported TAV version ${header.version}`) errorlevel = 1 return @@ -185,6 +185,7 @@ console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? console.log(`Decomposition levels: ${header.decompLevels}`) console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`) console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`) +console.log(`Color space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`) console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`) // Frame buffer addresses - same as TEV @@ -357,7 +358,8 @@ try { header.waveletFilter, // TAV-specific parameter header.decompLevels, // TAV-specific parameter enableDeblocking, - isLossless + isLossless, + header.version // TAV version for color space detection ) decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 diff --git a/terranmon.txt b/terranmon.txt index c5d530f..62b7cba 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -683,7 +683,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. - Version 2.1: Added Rate Control Factor to all video packets (breaking change) * Enables bitrate-constrained encoding alongside quality modes * All video frames now include 4-byte rate control factor after payload size -- Version 3.0: Additional support of XYB Colour space +- Version 3.0: Additional support of ICtCp Colour space # File Structure \x1F T S V M T E V diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 19bd92c..fe30132 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -12,7 +12,6 @@ import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint import net.torvald.tsvm.peripheral.GraphicsAdapter import net.torvald.tsvm.peripheral.PeriBase import net.torvald.tsvm.peripheral.fmod -import net.torvald.util.Float16 import kotlin.math.* class GraphicsJSR223Delegate(private val vm: VM) { @@ -2176,14 +2175,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { val Sp = I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp // HLG decode: L'M'S' -> linear LMS - val L = HLG_inverse_OETF(Lp) - val M = HLG_inverse_OETF(Mp) - val S = HLG_inverse_OETF(Sp) + val L = HLG_EOTF(Lp) + val M = HLG_EOTF(Mp) + val S = HLG_EOTF(Sp) // LMS -> linear sRGB (inverse matrix) - val rLin = 3.436606694333079 * L -2.5064521186562705 * M + 0.06984542432319149 * S - val gLin = -0.7913295555989289 * L + 1.983600451792291 * M -0.192270896193362 * S - val bLin = -0.025949899690592665 * L -0.09891371471172647 * M + 1.1248636144023192 * S + val rLin = 6.1723815689243215 * L -5.319534979827695 * M + 0.14699442094633924 * S + val gLin = -1.3243428148026244 * L + 2.560286104841917 * M -0.2359203727576164 * S + val bLin = -0.011819739235953752 * L -0.26473549971186555 * M + 1.2767952602537955 * S // Gamma encode to sRGB val rSrgb = srgbUnlinearize(rLin) @@ -2204,7 +2203,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Helper functions for ICtCp decoding // Inverse HLG OETF (HLG -> linear) - fun HLG_inverse_OETF(V: Double): Double { + fun HLG_EOTF(V: Double): Double { val a = 0.17883277 val b = 1.0 - 4.0 * a val c = 0.5 - a * ln(4.0 * a) @@ -3919,4 +3918,665 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + // ================= TAV (TSVM Advanced Video) Decoder ================= + // DWT-based video codec with ICtCp color space support + + fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int, + debugMotionVectors: Boolean = false, waveletFilter: Int = 1, + decompLevels: Int = 3, enableDeblocking: Boolean = true, + isLossless: Boolean = false, tavVersion: Int = 1) { + + var readPtr = blockDataPtr + + try { + val tilesX = (width + 63) / 64 // 64x64 tiles + val tilesY = (height + 63) / 64 + + // Process each tile + for (tileY in 0 until tilesY) { + for (tileX in 0 until tilesX) { + + // Read tile header (9 bytes: mode + mvX + mvY + rcf) + val mode = vm.peek(readPtr).toInt() and 0xFF + readPtr += 1 + val mvX = vm.peekShort(readPtr).toInt() + readPtr += 2 + val mvY = vm.peekShort(readPtr).toInt() + readPtr += 2 + val rcf = vm.peekFloat(readPtr) + readPtr += 4 + + when (mode) { + 0x00 -> { // TAV_MODE_SKIP + // Copy 64x64 tile from previous frame to current frame + copyTile64x64RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) + } + 0x01 -> { // TAV_MODE_INTRA + // Decode DWT coefficients directly to RGB buffer + readPtr = decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, + width, height, qY, qCo, qCg, rcf, + waveletFilter, decompLevels, isLossless, tavVersion) + } + 0x02 -> { // TAV_MODE_INTER + // Motion compensation + DWT residual to RGB buffer + readPtr = decodeDWTInterTileRGB(readPtr, tileX, tileY, mvX, mvY, + currentRGBAddr, prevRGBAddr, + width, height, qY, qCo, qCg, rcf, + waveletFilter, decompLevels, isLossless, tavVersion) + } + 0x03 -> { // TAV_MODE_MOTION + // Motion compensation only (no residual) + applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, + currentRGBAddr, prevRGBAddr, width, height) + } + } + } + } + + } catch (e: Exception) { + println("TAV decode error: ${e.message}") + } + } + + private fun decodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, + waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { + val tileSize = 64 + val coeffCount = tileSize * tileSize + var ptr = readPtr + + // Read quantized DWT coefficients for Y, Co, Cg channels + val quantizedY = ShortArray(coeffCount) + val quantizedCo = ShortArray(coeffCount) + val quantizedCg = ShortArray(coeffCount) + + // Read Y coefficients + for (i in 0 until coeffCount) { + quantizedY[i] = vm.peekShort(ptr) + ptr += 2 + } + + // Read Co coefficients + for (i in 0 until coeffCount) { + quantizedCo[i] = vm.peekShort(ptr) + ptr += 2 + } + + // Read Cg coefficients + for (i in 0 until coeffCount) { + quantizedCg[i] = vm.peekShort(ptr) + ptr += 2 + } + + // Dequantize and apply inverse DWT + val yTile = FloatArray(coeffCount) + val coTile = FloatArray(coeffCount) + val cgTile = FloatArray(coeffCount) + + for (i in 0 until coeffCount) { + yTile[i] = quantizedY[i] * qY * rcf + coTile[i] = quantizedCo[i] * qCo * rcf + cgTile[i] = quantizedCg[i] * qCg * rcf + } + + // Apply inverse DWT using specified filter with decomposition levels + if (isLossless) { + applyDWTInverseMultiLevel(yTile, tileSize, tileSize, decompLevels, 0) + applyDWTInverseMultiLevel(coTile, tileSize, tileSize, decompLevels, 0) + applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, decompLevels, 0) + } else { + applyDWTInverseMultiLevel(yTile, tileSize, tileSize, decompLevels, waveletFilter) + applyDWTInverseMultiLevel(coTile, tileSize, tileSize, decompLevels, waveletFilter) + applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, decompLevels, waveletFilter) + } + + // Convert to RGB based on TAV version (YCoCg-R for v1, ICtCp for v2) + if (tavVersion == 2) { + convertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } else { + convertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } + + return ptr + } + + private fun convertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + val tileSize = 64 + val startX = tileX * tileSize + val startY = tileY * tileSize + + for (y in 0 until tileSize) { + for (x in 0 until tileSize) { + val frameX = startX + x + val frameY = startY + y + + if (frameX < width && frameY < height) { + val tileIdx = y * tileSize + x + val pixelIdx = frameY * width + frameX + + // YCoCg-R to RGB conversion (exact inverse of encoder) + val Y = yTile[tileIdx] + val Co = coTile[tileIdx] + val Cg = cgTile[tileIdx] + + // Inverse of encoder's YCoCg-R transform: + val tmp = Y - Cg / 2.0f + val g = Cg + tmp + val b = tmp - Co / 2.0f + val r = Co + b + + val rgbOffset = pixelIdx * 3L + vm.poke(rgbAddr + rgbOffset, r.toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 1, g.toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 2, b.toInt().coerceIn(0, 255).toByte()) + } + } + } + } + + private fun convertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + val tileSize = 64 + val startX = tileX * tileSize + val startY = tileY * tileSize + + for (y in 0 until tileSize) { + for (x in 0 until tileSize) { + val frameX = startX + x + val frameY = startY + y + + if (frameX < width && frameY < height) { + val tileIdx = y * tileSize + x + val pixelIdx = frameY * width + frameX + + // ICtCp to sRGB conversion (adapted from encoder ICtCp functions) + val I = iTile[tileIdx].toDouble() / 255.0 + val Ct = (ctTile[tileIdx].toDouble() - 127.5) / 255.0 + val Cp = (cpTile[tileIdx].toDouble() - 127.5) / 255.0 + + // ICtCp -> L'M'S' (inverse matrix) + val Lp = I + 0.015718580108730416 * Ct + 0.2095810681164055 * Cp + val Mp = I - 0.015718580108730416 * Ct - 0.20958106811640548 * Cp + val Sp = I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp + + // HLG decode: L'M'S' -> linear LMS + val L = HLG_EOTF(Lp) + val M = HLG_EOTF(Mp) + val S = HLG_EOTF(Sp) + + // LMS -> linear sRGB (inverse matrix) + val rLin = 6.1723815689243215 * L -5.319534979827695 * M + 0.14699442094633924 * S + val gLin = -1.3243428148026244 * L + 2.560286104841917 * M -0.2359203727576164 * S + val bLin = -0.011819739235953752 * L -0.26473549971186555 * M + 1.2767952602537955 * S + + // Gamma encode to sRGB + val rSrgb = srgbUnlinearize(rLin) + val gSrgb = srgbUnlinearize(gLin) + val bSrgb = srgbUnlinearize(bLin) + + val rgbOffset = pixelIdx * 3L + vm.poke(rgbAddr + rgbOffset, (rSrgb * 255.0).toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 1, (gSrgb * 255.0).toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 2, (bSrgb * 255.0).toInt().coerceIn(0, 255).toByte()) + } + } + } + } + + private fun addYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + val tileSize = 64 + val startX = tileX * tileSize + val startY = tileY * tileSize + + for (y in 0 until tileSize) { + for (x in 0 until tileSize) { + val frameX = startX + x + val frameY = startY + y + + if (frameX < width && frameY < height) { + val tileIdx = y * tileSize + x + val pixelIdx = frameY * width + frameX + val rgbOffset = pixelIdx * 3L + + // Get current RGB (from motion compensation) + val curR = (vm.peek(rgbAddr + rgbOffset).toInt() and 0xFF).toFloat() + val curG = (vm.peek(rgbAddr + rgbOffset + 1).toInt() and 0xFF).toFloat() + val curB = (vm.peek(rgbAddr + rgbOffset + 2).toInt() and 0xFF).toFloat() + + // Convert current RGB back to YCoCg + val co = (curR - curB) / 2 + val tmp = curB + co + val cg = (curG - tmp) / 2 + val yPred = tmp + cg + + // Add residual + val yFinal = yPred + yRes[tileIdx] + val coFinal = co + coRes[tileIdx] + val cgFinal = cg + cgRes[tileIdx] + + // Convert back to RGB + val tmpFinal = yFinal - cgFinal + val gFinal = yFinal + cgFinal + val bFinal = tmpFinal - coFinal + val rFinal = tmpFinal + coFinal + + vm.poke(rgbAddr + rgbOffset, rFinal.toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 1, gFinal.toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 2, bFinal.toInt().coerceIn(0, 255).toByte()) + } + } + } + } + + // Helper functions (simplified versions of existing DWT functions) + private fun copyTile64x64RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { + val tileSize = 64 + val startX = tileX * tileSize + val startY = tileY * tileSize + + for (y in 0 until tileSize) { + for (x in 0 until tileSize) { + val frameX = startX + x + val frameY = startY + y + + if (frameX < width && frameY < height) { + val pixelIdx = frameY * width + frameX + val rgbOffset = pixelIdx * 3L + + // Copy RGB pixel from previous frame + val r = vm.peek(prevRGBAddr + rgbOffset) + val g = vm.peek(prevRGBAddr + rgbOffset + 1) + val b = vm.peek(prevRGBAddr + rgbOffset + 2) + + vm.poke(currentRGBAddr + rgbOffset, r) + vm.poke(currentRGBAddr + rgbOffset + 1, g) + vm.poke(currentRGBAddr + rgbOffset + 2, b) + } + } + } + } + + private fun decodeDWTInterTileRGB(readPtr: Long, tileX: Int, tileY: Int, mvX: Int, mvY: Int, + currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, + waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { + + // Step 1: Apply motion compensation + applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height) + + // Step 2: Add DWT residual (same as intra but add to existing pixels) + return decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf, + waveletFilter, decompLevels, isLossless, tavVersion) + } + + private fun applyMotionCompensation64x64RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int, + currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int) { + val tileSize = 64 + val startX = tileX * tileSize + val startY = tileY * tileSize + + // Motion vectors in quarter-pixel precision + val refX = startX + (mvX / 4.0f) + val refY = startY + (mvY / 4.0f) + + for (y in 0 until tileSize) { + for (x in 0 until tileSize) { + val currentPixelIdx = (startY + y) * width + (startX + x) + + if (currentPixelIdx >= 0 && currentPixelIdx < width * height) { + // Bilinear interpolation for sub-pixel motion vectors + val srcX = refX + x + val srcY = refY + y + + val interpolatedRGB = bilinearInterpolateRGB(prevRGBAddr, width, height, srcX, srcY) + + val rgbOffset = currentPixelIdx * 3L + vm.poke(currentRGBAddr + rgbOffset, interpolatedRGB[0]) + vm.poke(currentRGBAddr + rgbOffset + 1, interpolatedRGB[1]) + vm.poke(currentRGBAddr + rgbOffset + 2, interpolatedRGB[2]) + } + } + } + } + + private fun bilinearInterpolateRGB(rgbPtr: Long, width: Int, height: Int, x: Float, y: Float): ByteArray { + val x0 = kotlin.math.floor(x).toInt() + val y0 = kotlin.math.floor(y).toInt() + val x1 = x0 + 1 + val y1 = y0 + 1 + + if (x0 < 0 || y0 < 0 || x1 >= width || y1 >= height) { + return byteArrayOf(0, 0, 0) // Out of bounds - return black + } + + val fx = x - x0 + val fy = y - y0 + + // Get 4 corner pixels + val rgb00 = getRGBPixel(rgbPtr, y0 * width + x0) + val rgb10 = getRGBPixel(rgbPtr, y0 * width + x1) + val rgb01 = getRGBPixel(rgbPtr, y1 * width + x0) + val rgb11 = getRGBPixel(rgbPtr, y1 * width + x1) + + // Bilinear interpolation + val result = ByteArray(3) + for (c in 0..2) { + val interp = (1 - fx) * (1 - fy) * (rgb00[c].toInt() and 0xFF) + + fx * (1 - fy) * (rgb10[c].toInt() and 0xFF) + + (1 - fx) * fy * (rgb01[c].toInt() and 0xFF) + + fx * fy * (rgb11[c].toInt() and 0xFF) + result[c] = interp.toInt().coerceIn(0, 255).toByte() + } + + return result + } + + private fun getRGBPixel(rgbPtr: Long, pixelIdx: Int): ByteArray { + val offset = pixelIdx * 3L + return byteArrayOf( + vm.peek(rgbPtr + offset), + vm.peek(rgbPtr + offset + 1), + vm.peek(rgbPtr + offset + 2) + ) + } + + private fun applyDWT53Forward(data: FloatArray, width: Int, height: Int) { + // TODO: Implement 5/3 forward DWT + // Lifting scheme implementation for 5/3 reversible filter + } + + private fun applyDWT53Inverse(data: FloatArray, width: Int, height: Int) { + // 5/3 reversible DWT inverse using lifting scheme + // First apply horizontal inverse DWT on all rows + val tempRow = FloatArray(width) + for (y in 0 until height) { + for (x in 0 until width) { + tempRow[x] = data[y * width + x] + } + applyLift53InverseHorizontal(tempRow, width) + for (x in 0 until width) { + data[y * width + x] = tempRow[x] + } + } + + // Then apply vertical inverse DWT on all columns + val tempCol = FloatArray(height) + for (x in 0 until width) { + for (y in 0 until height) { + tempCol[y] = data[y * width + x] + } + applyLift53InverseVertical(tempCol, height) + for (y in 0 until height) { + data[y * width + x] = tempCol[y] + } + } + } + + private fun applyDWT97Forward(data: FloatArray, width: Int, height: Int) { + // TODO: Implement 9/7 forward DWT + // Lifting scheme implementation for 9/7 irreversible filter + } + + private fun applyDWTInverseMultiLevel(data: FloatArray, width: Int, height: Int, levels: Int, filterType: Int) { + // Multi-level inverse DWT - reconstruct from smallest to largest (reverse of encoder) + val size = width // Full tile size (64) + val tempRow = FloatArray(size) + val tempCol = FloatArray(size) + + for (level in levels - 1 downTo 0) { + val currentSize = size shr level + if (currentSize < 2) break + + // Apply inverse DWT to current subband region - EXACT match to encoder + // The encoder does ROW transform first, then COLUMN transform + // So inverse must do COLUMN inverse first, then ROW inverse + + // Column inverse transform first + for (x in 0 until currentSize) { + for (y in 0 until currentSize) { + tempCol[y] = data[y * size + x] + } + + if (filterType == 0) { + applyDWT53Inverse1D(tempCol, currentSize) + } else { + applyDWT97Inverse1D(tempCol, currentSize) + } + + for (y in 0 until currentSize) { + data[y * size + x] = tempCol[y] + } + } + + // Row inverse transform second + for (y in 0 until currentSize) { + for (x in 0 until currentSize) { + tempRow[x] = data[y * size + x] + } + + if (filterType == 0) { + applyDWT53Inverse1D(tempRow, currentSize) + } else { + applyDWT97Inverse1D(tempRow, currentSize) + } + + for (x in 0 until currentSize) { + data[y * size + x] = tempRow[x] + } + } + } + } + + private fun applyDWT97Inverse(data: FloatArray, width: Int, height: Int) { + // 9/7 irreversible DWT inverse using lifting scheme + // First apply horizontal inverse DWT on all rows + val tempRow = FloatArray(width) + for (y in 0 until height) { + for (x in 0 until width) { + tempRow[x] = data[y * width + x] + } + applyLift97InverseHorizontal(tempRow, width) + for (x in 0 until width) { + data[y * width + x] = tempRow[x] + } + } + + // Then apply vertical inverse DWT on all columns + val tempCol = FloatArray(height) + for (x in 0 until width) { + for (y in 0 until height) { + tempCol[y] = data[y * width + x] + } + applyLift97InverseVertical(tempCol, height) + for (y in 0 until height) { + data[y * width + x] = tempCol[y] + } + } + } + + private fun applyLift97InverseHorizontal(row: FloatArray, width: Int) { TODO() } + private fun applyLift97InverseVertical(col: FloatArray, height: Int) { TODO() } + + // 1D lifting scheme implementations for 5/3 filter + private fun applyLift53InverseHorizontal(data: FloatArray, length: Int) { + if (length < 2) return + + val temp = FloatArray(length) + val half = (length + 1) / 2 + + // Separate even and odd samples (inverse interleaving) + for (i in 0 until half) { + temp[i] = data[2 * i] // Even samples (low-pass) + } + for (i in 0 until length / 2) { + temp[half + i] = data[2 * i + 1] // Odd samples (high-pass) + } + + // Inverse lifting steps for 5/3 filter + // Step 2: Undo update step - even[i] -= (odd[i-1] + odd[i] + 2) >> 2 + for (i in 1 until half) { + val oddPrev = if (i - 1 >= 0) temp[half + i - 1] else 0.0f + val oddCurr = if (i < length / 2) temp[half + i] else 0.0f + temp[i] += (oddPrev + oddCurr + 2.0f) / 4.0f + } + if (half > 0) { + val oddCurr = if (0 < length / 2) temp[half] else 0.0f + temp[0] += oddCurr / 2.0f + } + + // Step 1: Undo predict step - odd[i] += (even[i] + even[i+1]) >> 1 + for (i in 0 until length / 2) { + val evenCurr = temp[i] + val evenNext = if (i + 1 < half) temp[i + 1] else temp[half - 1] + temp[half + i] -= (evenCurr + evenNext) / 2.0f + } + + // Interleave back + for (i in 0 until half) { + data[2 * i] = temp[i] + } + for (i in 0 until length / 2) { + data[2 * i + 1] = temp[half + i] + } + } + + private fun applyLift53InverseVertical(data: FloatArray, length: Int) { + // Same as horizontal but for vertical direction + applyLift53InverseHorizontal(data, length) + } + + // 1D lifting scheme implementations for 9/7 irreversible filter + private fun applyDWT97Inverse1D(data: FloatArray, length: Int) { + if (length < 2) return + + val temp = FloatArray(length) + val half = length / 2 + + // Split into low and high frequency components (matching encoder layout) + // After forward DWT: first half = low-pass, second half = high-pass + for (i in 0 until half) { + temp[i] = data[i] // Low-pass coefficients (first half) + temp[half + i] = data[half + i] // High-pass coefficients (second half) + } + + // 9/7 inverse lifting coefficients (exactly matching encoder) + val alpha = -1.586134342f + val beta = -0.052980118f + val gamma = 0.882911076f + val delta = 0.443506852f + val K = 1.230174105f + + // Inverse lifting steps (undo forward steps in reverse order) + + // Step 5: Undo scaling (reverse of encoder's final step) + for (i in 0 until half) { + temp[i] /= K // Undo temp[i] *= K + temp[half + i] *= K // Undo temp[half + i] /= K + } + + // Step 4: Undo update step (delta) + for (i in 0 until half) { + val left = if (i > 0) temp[half + i - 1] else temp[half + i] + val right = if (i < half - 1) temp[half + i + 1] else temp[half + i] + temp[i] -= delta * (left + right) + } + + // Step 3: Undo predict step (gamma) + for (i in 0 until half) { + val left = if (i > 0) temp[i - 1] else temp[i] + val right = if (i < half - 1) temp[i + 1] else temp[i] + temp[half + i] -= gamma * (left + right) + } + + // Step 2: Undo update step (beta) + for (i in 0 until half) { + val left = if (i > 0) temp[half + i - 1] else temp[half + i] + val right = if (i < half - 1) temp[half + i + 1] else temp[half + i] + temp[i] -= beta * (left + right) + } + + // Step 1: Undo predict step (alpha) + for (i in 0 until half) { + val left = if (i > 0) temp[i - 1] else temp[i] + val right = if (i < half - 1) temp[i + 1] else temp[i] + temp[half + i] -= alpha * (left + right) + } + + // Merge back (inverse of encoder's split) + for (i in 0 until half) { + data[2 * i] = temp[i] // Even positions get low-pass + if (2 * i + 1 < length) { + data[2 * i + 1] = temp[half + i] // Odd positions get high-pass + } + } + } + + private fun applyDWT53Inverse1D(data: FloatArray, length: Int) { + if (length < 2) return + + val temp = FloatArray(length) + val half = length / 2 + + // Split into low and high frequency components (matching encoder layout) + for (i in 0 until half) { + temp[i] = data[i] // Low-pass coefficients (first half) + temp[half + i] = data[half + i] // High-pass coefficients (second half) + } + + // 5/3 inverse lifting (undo forward steps in reverse order) + + // Step 2: Undo update step (1/4 coefficient) + for (i in 0 until half) { + val left = if (i > 0) temp[half + i - 1] else 0.0f + val right = if (i < half - 1) temp[half + i] else 0.0f + temp[i] -= 0.25f * (left + right) + } + + // Step 1: Undo predict step (1/2 coefficient) + for (i in 0 until half) { + val left = temp[i] + val right = if (i < half - 1) temp[i + 1] else temp[i] + temp[half + i] -= 0.5f * (left + right) + } + + // Merge back (inverse of encoder's split) + for (i in 0 until half) { + data[2 * i] = temp[i] // Even positions get low-pass + if (2 * i + 1 < length) { + data[2 * i + 1] = temp[half + i] // Odd positions get high-pass + } + } + } + + private fun bilinearInterpolate( + dataPtr: Long, width: Int, height: Int, + x: Float, y: Float + ): Float { + val x0 = floor(x).toInt() + val y0 = floor(y).toInt() + val x1 = x0 + 1 + val y1 = y0 + 1 + + if (x0 < 0 || y0 < 0 || x1 >= width || y1 >= height) { + return 0.0f // Out of bounds + } + + val fx = x - x0 + val fy = y - y0 + + val p00 = vm.peekFloat(dataPtr + (y0 * width + x0) * 4L)!! + val p10 = vm.peekFloat(dataPtr + (y0 * width + x1) * 4L)!! + val p01 = vm.peekFloat(dataPtr + (y1 * width + x0) * 4L)!! + val p11 = vm.peekFloat(dataPtr + (y1 * width + x1) * 4L)!! + + return p00 * (1 - fx) * (1 - fy) + + p10 * fx * (1 - fy) + + p01 * (1 - fx) * fy + + p11 * fx * fy + } + } \ No newline at end of file diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 2953055..d3e66e0 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -69,7 +69,9 @@ static inline float float16_to_float(uint16_t hbits) { // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" -#define TAV_VERSION 1 // Initial DWT implementation +// TAV version - dynamic based on color space mode +// Version 1: YCoCg-R (default) +// Version 2: ICtCp (--ictcp flag) // Tile encoding modes (64x64 tiles) #define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) @@ -193,6 +195,7 @@ typedef struct { int enable_roi; int verbose; int test_mode; + int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp color space // Frame buffers uint8_t *current_frame_rgb; @@ -271,6 +274,7 @@ static void show_usage(const char *program_name) { printf(" --enable-rcf Enable per-tile rate control (experimental)\n"); printf(" --enable-progressive Enable progressive transmission\n"); printf(" --enable-roi Enable region-of-interest coding\n"); + printf(" --ictcp Use ICtCp color space instead of YCoCg-R (generates TAV version 2)\n"); printf(" --help Show this help\n\n"); printf("Audio Rate by Quality:\n "); @@ -567,7 +571,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t)); // Debug: check DWT coefficients before quantization - if (tile_x == 0 && tile_y == 0) { + /*if (tile_x == 0 && tile_y == 0) { printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): "); for (int i = 0; i < 16; i++) { printf("%.2f ", tile_y_data[i]); @@ -575,20 +579,20 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, printf("\n"); printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n", enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor); - } + }*/ quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor); quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor); quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor); // Debug: check quantized coefficients after quantization - if (tile_x == 0 && tile_y == 0) { + /*if (tile_x == 0 && tile_y == 0) { printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): "); for (int i = 0; i < 16; i++) { printf("%d ", quantized_y[i]); } printf("\n"); - } + }*/ // Write quantized coefficients memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); @@ -647,13 +651,13 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) } // Debug: check input data before DWT - if (tile_x == 0 && tile_y == 0) { + /*if (tile_x == 0 && tile_y == 0) { printf("Encoder Debug: Tile (0,0) - Y data before DWT (first 16): "); for (int i = 0; i < 16; i++) { printf("%.2f ", tile_y_data[i]); } printf("\n"); - } + }*/ // Apply DWT transform to each channel dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter); @@ -763,6 +767,192 @@ static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int } } +// ---------------------- ICtCp Implementation ---------------------- + +static inline int iround(double v) { return (int)floor(v + 0.5); } + +// ---------------------- sRGB gamma helpers ---------------------- +static inline double srgb_linearize(double val) { + if (val <= 0.04045) return val / 12.92; + return pow((val + 0.055) / 1.055, 2.4); +} + +static inline double srgb_unlinearize(double val) { + if (val <= 0.0031308) return 12.92 * val; + return 1.055 * pow(val, 1.0/2.4) - 0.055; +} + +// ---------------------- HLG OETF/EOTF ---------------------- +static inline double HLG_OETF(double E) { + const double a = 0.17883277; + const double b = 0.28466892; // 1 - 4*a + const double c = 0.55991073; // 0.5 - a*ln(4*a) + + if (E <= 1.0/12.0) return sqrt(3.0 * E); + return a * log(12.0 * E - b) + c; +} + +static inline double HLG_EOTF(double Ep) { + const double a = 0.17883277; + const double b = 0.28466892; + const double c = 0.55991073; + + if (Ep <= 0.5) { + double val = Ep * Ep / 3.0; + return val; + } + double val = (exp((Ep - c) / a) + b) / 12.0; + return val; +} + +// sRGB -> LMS matrix +static const double M_RGB_TO_LMS[3][3] = { + {0.2958564579364564, 0.6230869483219083, 0.08106989398623762}, + {0.15627390752659093, 0.727308963512872, 0.11639736914944238}, + {0.035141262332177715, 0.15657109121101628, 0.8080956851990795} +}; + +static const double M_LMS_TO_RGB[3][3] = { + {6.1723815689243215, -5.319534979827695, 0.14699442094633924}, + {-1.3243428148026244, 2.560286104841917, -0.2359203727576164}, + {-0.011819739235953752, -0.26473549971186555, 1.2767952602537955} +}; + +// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants. +static const double M_LMSPRIME_TO_ICTCP[3][3] = { + { 2048.0/4096.0, 2048.0/4096.0, 0.0 }, + { 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 }, + { 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 } +}; + +// Inverse matrices +static const double M_ICTCP_TO_LMSPRIME[3][3] = { + { 1.0, 0.015718580108730416, 0.2095810681164055 }, + { 1.0, -0.015718580108730416, -0.20958106811640548 }, + { 1.0, 1.0212710798422344, -0.6052744909924316 } +}; + +// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ---------------------- +void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8, + double *out_I, double *out_Ct, double *out_Cp) +{ + // 1) linearize sRGB to 0..1 + double r = srgb_linearize((double)r8 / 255.0); + double g = srgb_linearize((double)g8 / 255.0); + double b = srgb_linearize((double)b8 / 255.0); + + // 2) linear RGB -> LMS (single 3x3 multiply) + double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b; + double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b; + double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b; + + // 3) HLG OETF + double Lp = HLG_OETF(L); + double Mp = HLG_OETF(M); + double Sp = HLG_OETF(S); + + // 4) L'M'S' -> ICtCp + double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp; + double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp; + double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp; + + *out_I = FCLAMP(I * 255.f, 0.f, 255.f); + *out_Ct = FCLAMP(Ct * 255.f + 127.5f, 0.f, 255.f); + *out_Cp = FCLAMP(Cp * 255.f + 127.5f, 0.f, 255.f); +} + +// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ---------------------- +void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8, + uint8_t *r8, uint8_t *g8, uint8_t *b8) +{ + double I = I8 / 255.f; + double Ct = (Ct8 - 127.5f) / 255.f; + double Cp = (Cp8 - 127.5f) / 255.f; + + // 1) ICtCp -> L' M' S' (3x3 multiply) + double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp; + double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp; + double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp; + + // 2) HLG decode: L' -> linear LMS + double L = HLG_EOTF(Lp); + double M = HLG_EOTF(Mp); + double S = HLG_EOTF(Sp); + + // 3) LMS -> linear sRGB (3x3 inverse) + double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S; + double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S; + double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S; + + // 4) gamma encode and convert to 0..255 with center-of-bin rounding + double r = srgb_unlinearize(r_lin); + double g = srgb_unlinearize(g_lin); + double b = srgb_unlinearize(b_lin); + + *r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0)); + *g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0)); + *b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0)); +} + +// ---------------------- Color Space Switching Functions ---------------------- +// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode + +static void rgb_to_color_space(tav_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b, + double *c1, double *c2, double *c3) { + if (enc->ictcp_mode) { + // Use ICtCp color space + srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3); + } else { + // Use YCoCg-R color space (convert from existing function) + float rf = r, gf = g, bf = b; + float co = rf - bf; + float tmp = bf + co / 2; + float cg = gf - tmp; + float y = tmp + cg / 2; + *c1 = (double)y; + *c2 = (double)co; + *c3 = (double)cg; + } +} + +static void color_space_to_rgb(tav_encoder_t *enc, double c1, double c2, double c3, + uint8_t *r, uint8_t *g, uint8_t *b) { + if (enc->ictcp_mode) { + // Use ICtCp color space + ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b); + } else { + // Use YCoCg-R color space (inverse of rgb_to_ycocg) + float y = (float)c1; + float co = (float)c2; + float cg = (float)c3; + float tmp = y - cg / 2.0f; + float g_val = cg + tmp; + float b_val = tmp - co / 2.0f; + float r_val = co + b_val; + *r = (uint8_t)CLAMP((int)(r_val + 0.5f), 0, 255); + *g = (uint8_t)CLAMP((int)(g_val + 0.5f), 0, 255); + *b = (uint8_t)CLAMP((int)(b_val + 0.5f), 0, 255); + } +} + +// RGB to color space conversion for full frames +static void rgb_to_color_space_frame(tav_encoder_t *enc, const uint8_t *rgb, + float *c1, float *c2, float *c3, int width, int height) { + if (enc->ictcp_mode) { + // ICtCp mode + for (int i = 0; i < width * height; i++) { + double I, Ct, Cp; + srgb8_to_ictcp_hlg(rgb[i*3], rgb[i*3+1], rgb[i*3+2], &I, &Ct, &Cp); + c1[i] = (float)I; + c2[i] = (float)Ct; + c3[i] = (float)Cp; + } + } else { + // Use existing YCoCg function + rgb_to_ycocg(rgb, c1, c2, c3, width, height); + } +} + // Write TAV file header static int write_tav_header(tav_encoder_t *enc) { if (!enc->output_fp) return -1; @@ -770,8 +960,9 @@ static int write_tav_header(tav_encoder_t *enc) { // Magic number fwrite(TAV_MAGIC, 1, 8, enc->output_fp); - // Version - fputc(TAV_VERSION, enc->output_fp); + // Version (dynamic based on color space) + uint8_t version = enc->ictcp_mode ? 2 : 1; // Version 2 for ICtCp, 1 for YCoCg-R + fputc(version, enc->output_fp); // Video parameters fwrite(&enc->width, sizeof(uint16_t), 1, enc->output_fp); @@ -991,6 +1182,7 @@ int main(int argc, char *argv[]) { {"enable-rcf", no_argument, 0, 1001}, {"enable-progressive", no_argument, 0, 1002}, {"enable-roi", no_argument, 0, 1003}, + {"ictcp", no_argument, 0, 1005}, {"help", no_argument, 0, 1004}, {0, 0, 0, 0} }; @@ -1046,6 +1238,9 @@ int main(int argc, char *argv[]) { case 1001: // --enable-rcf enc->enable_rcf = 1; break; + case 1005: // --ictcp + enc->ictcp_mode = 1; + break; case 1004: // --help show_usage(argv[0]); cleanup_encoder(enc); @@ -1077,6 +1272,7 @@ int main(int argc, char *argv[]) { printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible"); printf("Decomposition levels: %d\n", enc->decomp_levels); printf("Quality: Y=%d, Co=%d, Cg=%d\n", enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg); + printf("Color space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R"); // Open output file if (strcmp(enc->output_file, "-") == 0) { @@ -1204,28 +1400,28 @@ int main(int argc, char *argv[]) { int is_keyframe = 1;//(frame_count % keyframe_interval == 0); // Debug: check RGB input data - if (frame_count < 3) { + /*if (frame_count < 3) { printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count); for (int i = 0; i < 16; i++) { printf("%d ", enc->current_frame_rgb[i]); } printf("\n"); - } + }*/ - // Convert RGB to YCoCg - rgb_to_ycocg(enc->current_frame_rgb, - enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg, - enc->width, enc->height); + // Convert RGB to color space (YCoCg-R or ICtCp) + rgb_to_color_space_frame(enc, enc->current_frame_rgb, + enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg, + enc->width, enc->height); // Debug: check YCoCg conversion result - if (frame_count < 3) { + /*if (frame_count < 3) { printf("Encoder Debug: Frame %d - YCoCg result (first 16): ", frame_count); for (int i = 0; i < 16; i++) { printf("Y=%.1f Co=%.1f Cg=%.1f ", enc->current_frame_y[i], enc->current_frame_co[i], enc->current_frame_cg[i]); if (i % 4 == 3) break; // Only show first 4 pixels for readability } printf("\n"); - } + }*/ // Process motion vectors for P-frames int num_tiles = enc->tiles_x * enc->tiles_y;