mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-08 22:34:03 +09:00
TAV with ICtCp colour space
This commit is contained in:
@@ -156,7 +156,7 @@ for (let i = 0; i < 7; i++) {
|
|||||||
seqread.readOneByte()
|
seqread.readOneByte()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (header.version !== TAV_VERSION) {
|
if (header.version < 1 || header.version > 2) {
|
||||||
con.puts(`Error: Unsupported TAV version ${header.version}`)
|
con.puts(`Error: Unsupported TAV version ${header.version}`)
|
||||||
errorlevel = 1
|
errorlevel = 1
|
||||||
return
|
return
|
||||||
@@ -185,6 +185,7 @@ console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ?
|
|||||||
console.log(`Decomposition levels: ${header.decompLevels}`)
|
console.log(`Decomposition levels: ${header.decompLevels}`)
|
||||||
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
|
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
|
||||||
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
|
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
|
||||||
|
console.log(`Color space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`)
|
||||||
console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`)
|
console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`)
|
||||||
|
|
||||||
// Frame buffer addresses - same as TEV
|
// Frame buffer addresses - same as TEV
|
||||||
@@ -357,7 +358,8 @@ try {
|
|||||||
header.waveletFilter, // TAV-specific parameter
|
header.waveletFilter, // TAV-specific parameter
|
||||||
header.decompLevels, // TAV-specific parameter
|
header.decompLevels, // TAV-specific parameter
|
||||||
enableDeblocking,
|
enableDeblocking,
|
||||||
isLossless
|
isLossless,
|
||||||
|
header.version // TAV version for color space detection
|
||||||
)
|
)
|
||||||
|
|
||||||
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
|
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
|
||||||
|
|||||||
@@ -683,7 +683,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
|
|||||||
- Version 2.1: Added Rate Control Factor to all video packets (breaking change)
|
- Version 2.1: Added Rate Control Factor to all video packets (breaking change)
|
||||||
* Enables bitrate-constrained encoding alongside quality modes
|
* Enables bitrate-constrained encoding alongside quality modes
|
||||||
* All video frames now include 4-byte rate control factor after payload size
|
* All video frames now include 4-byte rate control factor after payload size
|
||||||
- Version 3.0: Additional support of XYB Colour space
|
- Version 3.0: Additional support of ICtCp Colour space
|
||||||
|
|
||||||
# File Structure
|
# File Structure
|
||||||
\x1F T S V M T E V
|
\x1F T S V M T E V
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
|
|||||||
import net.torvald.tsvm.peripheral.GraphicsAdapter
|
import net.torvald.tsvm.peripheral.GraphicsAdapter
|
||||||
import net.torvald.tsvm.peripheral.PeriBase
|
import net.torvald.tsvm.peripheral.PeriBase
|
||||||
import net.torvald.tsvm.peripheral.fmod
|
import net.torvald.tsvm.peripheral.fmod
|
||||||
import net.torvald.util.Float16
|
|
||||||
import kotlin.math.*
|
import kotlin.math.*
|
||||||
|
|
||||||
class GraphicsJSR223Delegate(private val vm: VM) {
|
class GraphicsJSR223Delegate(private val vm: VM) {
|
||||||
@@ -2176,14 +2175,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val Sp = I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp
|
val Sp = I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp
|
||||||
|
|
||||||
// HLG decode: L'M'S' -> linear LMS
|
// HLG decode: L'M'S' -> linear LMS
|
||||||
val L = HLG_inverse_OETF(Lp)
|
val L = HLG_EOTF(Lp)
|
||||||
val M = HLG_inverse_OETF(Mp)
|
val M = HLG_EOTF(Mp)
|
||||||
val S = HLG_inverse_OETF(Sp)
|
val S = HLG_EOTF(Sp)
|
||||||
|
|
||||||
// LMS -> linear sRGB (inverse matrix)
|
// LMS -> linear sRGB (inverse matrix)
|
||||||
val rLin = 3.436606694333079 * L -2.5064521186562705 * M + 0.06984542432319149 * S
|
val rLin = 6.1723815689243215 * L -5.319534979827695 * M + 0.14699442094633924 * S
|
||||||
val gLin = -0.7913295555989289 * L + 1.983600451792291 * M -0.192270896193362 * S
|
val gLin = -1.3243428148026244 * L + 2.560286104841917 * M -0.2359203727576164 * S
|
||||||
val bLin = -0.025949899690592665 * L -0.09891371471172647 * M + 1.1248636144023192 * S
|
val bLin = -0.011819739235953752 * L -0.26473549971186555 * M + 1.2767952602537955 * S
|
||||||
|
|
||||||
// Gamma encode to sRGB
|
// Gamma encode to sRGB
|
||||||
val rSrgb = srgbUnlinearize(rLin)
|
val rSrgb = srgbUnlinearize(rLin)
|
||||||
@@ -2204,7 +2203,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Helper functions for ICtCp decoding
|
// Helper functions for ICtCp decoding
|
||||||
|
|
||||||
// Inverse HLG OETF (HLG -> linear)
|
// Inverse HLG OETF (HLG -> linear)
|
||||||
fun HLG_inverse_OETF(V: Double): Double {
|
fun HLG_EOTF(V: Double): Double {
|
||||||
val a = 0.17883277
|
val a = 0.17883277
|
||||||
val b = 1.0 - 4.0 * a
|
val b = 1.0 - 4.0 * a
|
||||||
val c = 0.5 - a * ln(4.0 * a)
|
val c = 0.5 - a * ln(4.0 * a)
|
||||||
@@ -3919,4 +3918,665 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ================= TAV (TSVM Advanced Video) Decoder =================
|
||||||
|
// DWT-based video codec with ICtCp color space support
|
||||||
|
|
||||||
|
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
|
||||||
|
debugMotionVectors: Boolean = false, waveletFilter: Int = 1,
|
||||||
|
decompLevels: Int = 3, enableDeblocking: Boolean = true,
|
||||||
|
isLossless: Boolean = false, tavVersion: Int = 1) {
|
||||||
|
|
||||||
|
var readPtr = blockDataPtr
|
||||||
|
|
||||||
|
try {
|
||||||
|
val tilesX = (width + 63) / 64 // 64x64 tiles
|
||||||
|
val tilesY = (height + 63) / 64
|
||||||
|
|
||||||
|
// Process each tile
|
||||||
|
for (tileY in 0 until tilesY) {
|
||||||
|
for (tileX in 0 until tilesX) {
|
||||||
|
|
||||||
|
// Read tile header (9 bytes: mode + mvX + mvY + rcf)
|
||||||
|
val mode = vm.peek(readPtr).toInt() and 0xFF
|
||||||
|
readPtr += 1
|
||||||
|
val mvX = vm.peekShort(readPtr).toInt()
|
||||||
|
readPtr += 2
|
||||||
|
val mvY = vm.peekShort(readPtr).toInt()
|
||||||
|
readPtr += 2
|
||||||
|
val rcf = vm.peekFloat(readPtr)
|
||||||
|
readPtr += 4
|
||||||
|
|
||||||
|
when (mode) {
|
||||||
|
0x00 -> { // TAV_MODE_SKIP
|
||||||
|
// Copy 64x64 tile from previous frame to current frame
|
||||||
|
copyTile64x64RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
|
||||||
|
}
|
||||||
|
0x01 -> { // TAV_MODE_INTRA
|
||||||
|
// Decode DWT coefficients directly to RGB buffer
|
||||||
|
readPtr = decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr,
|
||||||
|
width, height, qY, qCo, qCg, rcf,
|
||||||
|
waveletFilter, decompLevels, isLossless, tavVersion)
|
||||||
|
}
|
||||||
|
0x02 -> { // TAV_MODE_INTER
|
||||||
|
// Motion compensation + DWT residual to RGB buffer
|
||||||
|
readPtr = decodeDWTInterTileRGB(readPtr, tileX, tileY, mvX, mvY,
|
||||||
|
currentRGBAddr, prevRGBAddr,
|
||||||
|
width, height, qY, qCo, qCg, rcf,
|
||||||
|
waveletFilter, decompLevels, isLossless, tavVersion)
|
||||||
|
}
|
||||||
|
0x03 -> { // TAV_MODE_MOTION
|
||||||
|
// Motion compensation only (no residual)
|
||||||
|
applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY,
|
||||||
|
currentRGBAddr, prevRGBAddr, width, height)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
println("TAV decode error: ${e.message}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun decodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||||
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
|
||||||
|
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
||||||
|
val tileSize = 64
|
||||||
|
val coeffCount = tileSize * tileSize
|
||||||
|
var ptr = readPtr
|
||||||
|
|
||||||
|
// Read quantized DWT coefficients for Y, Co, Cg channels
|
||||||
|
val quantizedY = ShortArray(coeffCount)
|
||||||
|
val quantizedCo = ShortArray(coeffCount)
|
||||||
|
val quantizedCg = ShortArray(coeffCount)
|
||||||
|
|
||||||
|
// Read Y coefficients
|
||||||
|
for (i in 0 until coeffCount) {
|
||||||
|
quantizedY[i] = vm.peekShort(ptr)
|
||||||
|
ptr += 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read Co coefficients
|
||||||
|
for (i in 0 until coeffCount) {
|
||||||
|
quantizedCo[i] = vm.peekShort(ptr)
|
||||||
|
ptr += 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read Cg coefficients
|
||||||
|
for (i in 0 until coeffCount) {
|
||||||
|
quantizedCg[i] = vm.peekShort(ptr)
|
||||||
|
ptr += 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dequantize and apply inverse DWT
|
||||||
|
val yTile = FloatArray(coeffCount)
|
||||||
|
val coTile = FloatArray(coeffCount)
|
||||||
|
val cgTile = FloatArray(coeffCount)
|
||||||
|
|
||||||
|
for (i in 0 until coeffCount) {
|
||||||
|
yTile[i] = quantizedY[i] * qY * rcf
|
||||||
|
coTile[i] = quantizedCo[i] * qCo * rcf
|
||||||
|
cgTile[i] = quantizedCg[i] * qCg * rcf
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply inverse DWT using specified filter with decomposition levels
|
||||||
|
if (isLossless) {
|
||||||
|
applyDWTInverseMultiLevel(yTile, tileSize, tileSize, decompLevels, 0)
|
||||||
|
applyDWTInverseMultiLevel(coTile, tileSize, tileSize, decompLevels, 0)
|
||||||
|
applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, decompLevels, 0)
|
||||||
|
} else {
|
||||||
|
applyDWTInverseMultiLevel(yTile, tileSize, tileSize, decompLevels, waveletFilter)
|
||||||
|
applyDWTInverseMultiLevel(coTile, tileSize, tileSize, decompLevels, waveletFilter)
|
||||||
|
applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, decompLevels, waveletFilter)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to RGB based on TAV version (YCoCg-R for v1, ICtCp for v2)
|
||||||
|
if (tavVersion == 2) {
|
||||||
|
convertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
|
||||||
|
} else {
|
||||||
|
convertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun convertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray,
|
||||||
|
rgbAddr: Long, width: Int, height: Int) {
|
||||||
|
val tileSize = 64
|
||||||
|
val startX = tileX * tileSize
|
||||||
|
val startY = tileY * tileSize
|
||||||
|
|
||||||
|
for (y in 0 until tileSize) {
|
||||||
|
for (x in 0 until tileSize) {
|
||||||
|
val frameX = startX + x
|
||||||
|
val frameY = startY + y
|
||||||
|
|
||||||
|
if (frameX < width && frameY < height) {
|
||||||
|
val tileIdx = y * tileSize + x
|
||||||
|
val pixelIdx = frameY * width + frameX
|
||||||
|
|
||||||
|
// YCoCg-R to RGB conversion (exact inverse of encoder)
|
||||||
|
val Y = yTile[tileIdx]
|
||||||
|
val Co = coTile[tileIdx]
|
||||||
|
val Cg = cgTile[tileIdx]
|
||||||
|
|
||||||
|
// Inverse of encoder's YCoCg-R transform:
|
||||||
|
val tmp = Y - Cg / 2.0f
|
||||||
|
val g = Cg + tmp
|
||||||
|
val b = tmp - Co / 2.0f
|
||||||
|
val r = Co + b
|
||||||
|
|
||||||
|
val rgbOffset = pixelIdx * 3L
|
||||||
|
vm.poke(rgbAddr + rgbOffset, r.toInt().coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(rgbAddr + rgbOffset + 1, g.toInt().coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(rgbAddr + rgbOffset + 2, b.toInt().coerceIn(0, 255).toByte())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun convertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray,
|
||||||
|
rgbAddr: Long, width: Int, height: Int) {
|
||||||
|
val tileSize = 64
|
||||||
|
val startX = tileX * tileSize
|
||||||
|
val startY = tileY * tileSize
|
||||||
|
|
||||||
|
for (y in 0 until tileSize) {
|
||||||
|
for (x in 0 until tileSize) {
|
||||||
|
val frameX = startX + x
|
||||||
|
val frameY = startY + y
|
||||||
|
|
||||||
|
if (frameX < width && frameY < height) {
|
||||||
|
val tileIdx = y * tileSize + x
|
||||||
|
val pixelIdx = frameY * width + frameX
|
||||||
|
|
||||||
|
// ICtCp to sRGB conversion (adapted from encoder ICtCp functions)
|
||||||
|
val I = iTile[tileIdx].toDouble() / 255.0
|
||||||
|
val Ct = (ctTile[tileIdx].toDouble() - 127.5) / 255.0
|
||||||
|
val Cp = (cpTile[tileIdx].toDouble() - 127.5) / 255.0
|
||||||
|
|
||||||
|
// ICtCp -> L'M'S' (inverse matrix)
|
||||||
|
val Lp = I + 0.015718580108730416 * Ct + 0.2095810681164055 * Cp
|
||||||
|
val Mp = I - 0.015718580108730416 * Ct - 0.20958106811640548 * Cp
|
||||||
|
val Sp = I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp
|
||||||
|
|
||||||
|
// HLG decode: L'M'S' -> linear LMS
|
||||||
|
val L = HLG_EOTF(Lp)
|
||||||
|
val M = HLG_EOTF(Mp)
|
||||||
|
val S = HLG_EOTF(Sp)
|
||||||
|
|
||||||
|
// LMS -> linear sRGB (inverse matrix)
|
||||||
|
val rLin = 6.1723815689243215 * L -5.319534979827695 * M + 0.14699442094633924 * S
|
||||||
|
val gLin = -1.3243428148026244 * L + 2.560286104841917 * M -0.2359203727576164 * S
|
||||||
|
val bLin = -0.011819739235953752 * L -0.26473549971186555 * M + 1.2767952602537955 * S
|
||||||
|
|
||||||
|
// Gamma encode to sRGB
|
||||||
|
val rSrgb = srgbUnlinearize(rLin)
|
||||||
|
val gSrgb = srgbUnlinearize(gLin)
|
||||||
|
val bSrgb = srgbUnlinearize(bLin)
|
||||||
|
|
||||||
|
val rgbOffset = pixelIdx * 3L
|
||||||
|
vm.poke(rgbAddr + rgbOffset, (rSrgb * 255.0).toInt().coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(rgbAddr + rgbOffset + 1, (gSrgb * 255.0).toInt().coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(rgbAddr + rgbOffset + 2, (bSrgb * 255.0).toInt().coerceIn(0, 255).toByte())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun addYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray,
|
||||||
|
rgbAddr: Long, width: Int, height: Int) {
|
||||||
|
val tileSize = 64
|
||||||
|
val startX = tileX * tileSize
|
||||||
|
val startY = tileY * tileSize
|
||||||
|
|
||||||
|
for (y in 0 until tileSize) {
|
||||||
|
for (x in 0 until tileSize) {
|
||||||
|
val frameX = startX + x
|
||||||
|
val frameY = startY + y
|
||||||
|
|
||||||
|
if (frameX < width && frameY < height) {
|
||||||
|
val tileIdx = y * tileSize + x
|
||||||
|
val pixelIdx = frameY * width + frameX
|
||||||
|
val rgbOffset = pixelIdx * 3L
|
||||||
|
|
||||||
|
// Get current RGB (from motion compensation)
|
||||||
|
val curR = (vm.peek(rgbAddr + rgbOffset).toInt() and 0xFF).toFloat()
|
||||||
|
val curG = (vm.peek(rgbAddr + rgbOffset + 1).toInt() and 0xFF).toFloat()
|
||||||
|
val curB = (vm.peek(rgbAddr + rgbOffset + 2).toInt() and 0xFF).toFloat()
|
||||||
|
|
||||||
|
// Convert current RGB back to YCoCg
|
||||||
|
val co = (curR - curB) / 2
|
||||||
|
val tmp = curB + co
|
||||||
|
val cg = (curG - tmp) / 2
|
||||||
|
val yPred = tmp + cg
|
||||||
|
|
||||||
|
// Add residual
|
||||||
|
val yFinal = yPred + yRes[tileIdx]
|
||||||
|
val coFinal = co + coRes[tileIdx]
|
||||||
|
val cgFinal = cg + cgRes[tileIdx]
|
||||||
|
|
||||||
|
// Convert back to RGB
|
||||||
|
val tmpFinal = yFinal - cgFinal
|
||||||
|
val gFinal = yFinal + cgFinal
|
||||||
|
val bFinal = tmpFinal - coFinal
|
||||||
|
val rFinal = tmpFinal + coFinal
|
||||||
|
|
||||||
|
vm.poke(rgbAddr + rgbOffset, rFinal.toInt().coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(rgbAddr + rgbOffset + 1, gFinal.toInt().coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(rgbAddr + rgbOffset + 2, bFinal.toInt().coerceIn(0, 255).toByte())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper functions (simplified versions of existing DWT functions)
|
||||||
|
private fun copyTile64x64RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
|
||||||
|
val tileSize = 64
|
||||||
|
val startX = tileX * tileSize
|
||||||
|
val startY = tileY * tileSize
|
||||||
|
|
||||||
|
for (y in 0 until tileSize) {
|
||||||
|
for (x in 0 until tileSize) {
|
||||||
|
val frameX = startX + x
|
||||||
|
val frameY = startY + y
|
||||||
|
|
||||||
|
if (frameX < width && frameY < height) {
|
||||||
|
val pixelIdx = frameY * width + frameX
|
||||||
|
val rgbOffset = pixelIdx * 3L
|
||||||
|
|
||||||
|
// Copy RGB pixel from previous frame
|
||||||
|
val r = vm.peek(prevRGBAddr + rgbOffset)
|
||||||
|
val g = vm.peek(prevRGBAddr + rgbOffset + 1)
|
||||||
|
val b = vm.peek(prevRGBAddr + rgbOffset + 2)
|
||||||
|
|
||||||
|
vm.poke(currentRGBAddr + rgbOffset, r)
|
||||||
|
vm.poke(currentRGBAddr + rgbOffset + 1, g)
|
||||||
|
vm.poke(currentRGBAddr + rgbOffset + 2, b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun decodeDWTInterTileRGB(readPtr: Long, tileX: Int, tileY: Int, mvX: Int, mvY: Int,
|
||||||
|
currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
|
||||||
|
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
||||||
|
|
||||||
|
// Step 1: Apply motion compensation
|
||||||
|
applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height)
|
||||||
|
|
||||||
|
// Step 2: Add DWT residual (same as intra but add to existing pixels)
|
||||||
|
return decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf,
|
||||||
|
waveletFilter, decompLevels, isLossless, tavVersion)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyMotionCompensation64x64RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int,
|
||||||
|
currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
|
width: Int, height: Int) {
|
||||||
|
val tileSize = 64
|
||||||
|
val startX = tileX * tileSize
|
||||||
|
val startY = tileY * tileSize
|
||||||
|
|
||||||
|
// Motion vectors in quarter-pixel precision
|
||||||
|
val refX = startX + (mvX / 4.0f)
|
||||||
|
val refY = startY + (mvY / 4.0f)
|
||||||
|
|
||||||
|
for (y in 0 until tileSize) {
|
||||||
|
for (x in 0 until tileSize) {
|
||||||
|
val currentPixelIdx = (startY + y) * width + (startX + x)
|
||||||
|
|
||||||
|
if (currentPixelIdx >= 0 && currentPixelIdx < width * height) {
|
||||||
|
// Bilinear interpolation for sub-pixel motion vectors
|
||||||
|
val srcX = refX + x
|
||||||
|
val srcY = refY + y
|
||||||
|
|
||||||
|
val interpolatedRGB = bilinearInterpolateRGB(prevRGBAddr, width, height, srcX, srcY)
|
||||||
|
|
||||||
|
val rgbOffset = currentPixelIdx * 3L
|
||||||
|
vm.poke(currentRGBAddr + rgbOffset, interpolatedRGB[0])
|
||||||
|
vm.poke(currentRGBAddr + rgbOffset + 1, interpolatedRGB[1])
|
||||||
|
vm.poke(currentRGBAddr + rgbOffset + 2, interpolatedRGB[2])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun bilinearInterpolateRGB(rgbPtr: Long, width: Int, height: Int, x: Float, y: Float): ByteArray {
|
||||||
|
val x0 = kotlin.math.floor(x).toInt()
|
||||||
|
val y0 = kotlin.math.floor(y).toInt()
|
||||||
|
val x1 = x0 + 1
|
||||||
|
val y1 = y0 + 1
|
||||||
|
|
||||||
|
if (x0 < 0 || y0 < 0 || x1 >= width || y1 >= height) {
|
||||||
|
return byteArrayOf(0, 0, 0) // Out of bounds - return black
|
||||||
|
}
|
||||||
|
|
||||||
|
val fx = x - x0
|
||||||
|
val fy = y - y0
|
||||||
|
|
||||||
|
// Get 4 corner pixels
|
||||||
|
val rgb00 = getRGBPixel(rgbPtr, y0 * width + x0)
|
||||||
|
val rgb10 = getRGBPixel(rgbPtr, y0 * width + x1)
|
||||||
|
val rgb01 = getRGBPixel(rgbPtr, y1 * width + x0)
|
||||||
|
val rgb11 = getRGBPixel(rgbPtr, y1 * width + x1)
|
||||||
|
|
||||||
|
// Bilinear interpolation
|
||||||
|
val result = ByteArray(3)
|
||||||
|
for (c in 0..2) {
|
||||||
|
val interp = (1 - fx) * (1 - fy) * (rgb00[c].toInt() and 0xFF) +
|
||||||
|
fx * (1 - fy) * (rgb10[c].toInt() and 0xFF) +
|
||||||
|
(1 - fx) * fy * (rgb01[c].toInt() and 0xFF) +
|
||||||
|
fx * fy * (rgb11[c].toInt() and 0xFF)
|
||||||
|
result[c] = interp.toInt().coerceIn(0, 255).toByte()
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getRGBPixel(rgbPtr: Long, pixelIdx: Int): ByteArray {
|
||||||
|
val offset = pixelIdx * 3L
|
||||||
|
return byteArrayOf(
|
||||||
|
vm.peek(rgbPtr + offset),
|
||||||
|
vm.peek(rgbPtr + offset + 1),
|
||||||
|
vm.peek(rgbPtr + offset + 2)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyDWT53Forward(data: FloatArray, width: Int, height: Int) {
|
||||||
|
// TODO: Implement 5/3 forward DWT
|
||||||
|
// Lifting scheme implementation for 5/3 reversible filter
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyDWT53Inverse(data: FloatArray, width: Int, height: Int) {
|
||||||
|
// 5/3 reversible DWT inverse using lifting scheme
|
||||||
|
// First apply horizontal inverse DWT on all rows
|
||||||
|
val tempRow = FloatArray(width)
|
||||||
|
for (y in 0 until height) {
|
||||||
|
for (x in 0 until width) {
|
||||||
|
tempRow[x] = data[y * width + x]
|
||||||
|
}
|
||||||
|
applyLift53InverseHorizontal(tempRow, width)
|
||||||
|
for (x in 0 until width) {
|
||||||
|
data[y * width + x] = tempRow[x]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then apply vertical inverse DWT on all columns
|
||||||
|
val tempCol = FloatArray(height)
|
||||||
|
for (x in 0 until width) {
|
||||||
|
for (y in 0 until height) {
|
||||||
|
tempCol[y] = data[y * width + x]
|
||||||
|
}
|
||||||
|
applyLift53InverseVertical(tempCol, height)
|
||||||
|
for (y in 0 until height) {
|
||||||
|
data[y * width + x] = tempCol[y]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyDWT97Forward(data: FloatArray, width: Int, height: Int) {
|
||||||
|
// TODO: Implement 9/7 forward DWT
|
||||||
|
// Lifting scheme implementation for 9/7 irreversible filter
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyDWTInverseMultiLevel(data: FloatArray, width: Int, height: Int, levels: Int, filterType: Int) {
|
||||||
|
// Multi-level inverse DWT - reconstruct from smallest to largest (reverse of encoder)
|
||||||
|
val size = width // Full tile size (64)
|
||||||
|
val tempRow = FloatArray(size)
|
||||||
|
val tempCol = FloatArray(size)
|
||||||
|
|
||||||
|
for (level in levels - 1 downTo 0) {
|
||||||
|
val currentSize = size shr level
|
||||||
|
if (currentSize < 2) break
|
||||||
|
|
||||||
|
// Apply inverse DWT to current subband region - EXACT match to encoder
|
||||||
|
// The encoder does ROW transform first, then COLUMN transform
|
||||||
|
// So inverse must do COLUMN inverse first, then ROW inverse
|
||||||
|
|
||||||
|
// Column inverse transform first
|
||||||
|
for (x in 0 until currentSize) {
|
||||||
|
for (y in 0 until currentSize) {
|
||||||
|
tempCol[y] = data[y * size + x]
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filterType == 0) {
|
||||||
|
applyDWT53Inverse1D(tempCol, currentSize)
|
||||||
|
} else {
|
||||||
|
applyDWT97Inverse1D(tempCol, currentSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (y in 0 until currentSize) {
|
||||||
|
data[y * size + x] = tempCol[y]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Row inverse transform second
|
||||||
|
for (y in 0 until currentSize) {
|
||||||
|
for (x in 0 until currentSize) {
|
||||||
|
tempRow[x] = data[y * size + x]
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filterType == 0) {
|
||||||
|
applyDWT53Inverse1D(tempRow, currentSize)
|
||||||
|
} else {
|
||||||
|
applyDWT97Inverse1D(tempRow, currentSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (x in 0 until currentSize) {
|
||||||
|
data[y * size + x] = tempRow[x]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyDWT97Inverse(data: FloatArray, width: Int, height: Int) {
|
||||||
|
// 9/7 irreversible DWT inverse using lifting scheme
|
||||||
|
// First apply horizontal inverse DWT on all rows
|
||||||
|
val tempRow = FloatArray(width)
|
||||||
|
for (y in 0 until height) {
|
||||||
|
for (x in 0 until width) {
|
||||||
|
tempRow[x] = data[y * width + x]
|
||||||
|
}
|
||||||
|
applyLift97InverseHorizontal(tempRow, width)
|
||||||
|
for (x in 0 until width) {
|
||||||
|
data[y * width + x] = tempRow[x]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then apply vertical inverse DWT on all columns
|
||||||
|
val tempCol = FloatArray(height)
|
||||||
|
for (x in 0 until width) {
|
||||||
|
for (y in 0 until height) {
|
||||||
|
tempCol[y] = data[y * width + x]
|
||||||
|
}
|
||||||
|
applyLift97InverseVertical(tempCol, height)
|
||||||
|
for (y in 0 until height) {
|
||||||
|
data[y * width + x] = tempCol[y]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyLift97InverseHorizontal(row: FloatArray, width: Int) { TODO() }
|
||||||
|
private fun applyLift97InverseVertical(col: FloatArray, height: Int) { TODO() }
|
||||||
|
|
||||||
|
// 1D lifting scheme implementations for 5/3 filter
|
||||||
|
private fun applyLift53InverseHorizontal(data: FloatArray, length: Int) {
|
||||||
|
if (length < 2) return
|
||||||
|
|
||||||
|
val temp = FloatArray(length)
|
||||||
|
val half = (length + 1) / 2
|
||||||
|
|
||||||
|
// Separate even and odd samples (inverse interleaving)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
temp[i] = data[2 * i] // Even samples (low-pass)
|
||||||
|
}
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
temp[half + i] = data[2 * i + 1] // Odd samples (high-pass)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inverse lifting steps for 5/3 filter
|
||||||
|
// Step 2: Undo update step - even[i] -= (odd[i-1] + odd[i] + 2) >> 2
|
||||||
|
for (i in 1 until half) {
|
||||||
|
val oddPrev = if (i - 1 >= 0) temp[half + i - 1] else 0.0f
|
||||||
|
val oddCurr = if (i < length / 2) temp[half + i] else 0.0f
|
||||||
|
temp[i] += (oddPrev + oddCurr + 2.0f) / 4.0f
|
||||||
|
}
|
||||||
|
if (half > 0) {
|
||||||
|
val oddCurr = if (0 < length / 2) temp[half] else 0.0f
|
||||||
|
temp[0] += oddCurr / 2.0f
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Undo predict step - odd[i] += (even[i] + even[i+1]) >> 1
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
val evenCurr = temp[i]
|
||||||
|
val evenNext = if (i + 1 < half) temp[i + 1] else temp[half - 1]
|
||||||
|
temp[half + i] -= (evenCurr + evenNext) / 2.0f
|
||||||
|
}
|
||||||
|
|
||||||
|
// Interleave back
|
||||||
|
for (i in 0 until half) {
|
||||||
|
data[2 * i] = temp[i]
|
||||||
|
}
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
data[2 * i + 1] = temp[half + i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyLift53InverseVertical(data: FloatArray, length: Int) {
|
||||||
|
// Same as horizontal but for vertical direction
|
||||||
|
applyLift53InverseHorizontal(data, length)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1D lifting scheme implementations for 9/7 irreversible filter
|
||||||
|
private fun applyDWT97Inverse1D(data: FloatArray, length: Int) {
|
||||||
|
if (length < 2) return
|
||||||
|
|
||||||
|
val temp = FloatArray(length)
|
||||||
|
val half = length / 2
|
||||||
|
|
||||||
|
// Split into low and high frequency components (matching encoder layout)
|
||||||
|
// After forward DWT: first half = low-pass, second half = high-pass
|
||||||
|
for (i in 0 until half) {
|
||||||
|
temp[i] = data[i] // Low-pass coefficients (first half)
|
||||||
|
temp[half + i] = data[half + i] // High-pass coefficients (second half)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 9/7 inverse lifting coefficients (exactly matching encoder)
|
||||||
|
val alpha = -1.586134342f
|
||||||
|
val beta = -0.052980118f
|
||||||
|
val gamma = 0.882911076f
|
||||||
|
val delta = 0.443506852f
|
||||||
|
val K = 1.230174105f
|
||||||
|
|
||||||
|
// Inverse lifting steps (undo forward steps in reverse order)
|
||||||
|
|
||||||
|
// Step 5: Undo scaling (reverse of encoder's final step)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
temp[i] /= K // Undo temp[i] *= K
|
||||||
|
temp[half + i] *= K // Undo temp[half + i] /= K
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Undo update step (delta)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val left = if (i > 0) temp[half + i - 1] else temp[half + i]
|
||||||
|
val right = if (i < half - 1) temp[half + i + 1] else temp[half + i]
|
||||||
|
temp[i] -= delta * (left + right)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Undo predict step (gamma)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val left = if (i > 0) temp[i - 1] else temp[i]
|
||||||
|
val right = if (i < half - 1) temp[i + 1] else temp[i]
|
||||||
|
temp[half + i] -= gamma * (left + right)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Undo update step (beta)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val left = if (i > 0) temp[half + i - 1] else temp[half + i]
|
||||||
|
val right = if (i < half - 1) temp[half + i + 1] else temp[half + i]
|
||||||
|
temp[i] -= beta * (left + right)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Undo predict step (alpha)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val left = if (i > 0) temp[i - 1] else temp[i]
|
||||||
|
val right = if (i < half - 1) temp[i + 1] else temp[i]
|
||||||
|
temp[half + i] -= alpha * (left + right)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge back (inverse of encoder's split)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
data[2 * i] = temp[i] // Even positions get low-pass
|
||||||
|
if (2 * i + 1 < length) {
|
||||||
|
data[2 * i + 1] = temp[half + i] // Odd positions get high-pass
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun applyDWT53Inverse1D(data: FloatArray, length: Int) {
|
||||||
|
if (length < 2) return
|
||||||
|
|
||||||
|
val temp = FloatArray(length)
|
||||||
|
val half = length / 2
|
||||||
|
|
||||||
|
// Split into low and high frequency components (matching encoder layout)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
temp[i] = data[i] // Low-pass coefficients (first half)
|
||||||
|
temp[half + i] = data[half + i] // High-pass coefficients (second half)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5/3 inverse lifting (undo forward steps in reverse order)
|
||||||
|
|
||||||
|
// Step 2: Undo update step (1/4 coefficient)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val left = if (i > 0) temp[half + i - 1] else 0.0f
|
||||||
|
val right = if (i < half - 1) temp[half + i] else 0.0f
|
||||||
|
temp[i] -= 0.25f * (left + right)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Undo predict step (1/2 coefficient)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val left = temp[i]
|
||||||
|
val right = if (i < half - 1) temp[i + 1] else temp[i]
|
||||||
|
temp[half + i] -= 0.5f * (left + right)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge back (inverse of encoder's split)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
data[2 * i] = temp[i] // Even positions get low-pass
|
||||||
|
if (2 * i + 1 < length) {
|
||||||
|
data[2 * i + 1] = temp[half + i] // Odd positions get high-pass
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun bilinearInterpolate(
|
||||||
|
dataPtr: Long, width: Int, height: Int,
|
||||||
|
x: Float, y: Float
|
||||||
|
): Float {
|
||||||
|
val x0 = floor(x).toInt()
|
||||||
|
val y0 = floor(y).toInt()
|
||||||
|
val x1 = x0 + 1
|
||||||
|
val y1 = y0 + 1
|
||||||
|
|
||||||
|
if (x0 < 0 || y0 < 0 || x1 >= width || y1 >= height) {
|
||||||
|
return 0.0f // Out of bounds
|
||||||
|
}
|
||||||
|
|
||||||
|
val fx = x - x0
|
||||||
|
val fy = y - y0
|
||||||
|
|
||||||
|
val p00 = vm.peekFloat(dataPtr + (y0 * width + x0) * 4L)!!
|
||||||
|
val p10 = vm.peekFloat(dataPtr + (y0 * width + x1) * 4L)!!
|
||||||
|
val p01 = vm.peekFloat(dataPtr + (y1 * width + x0) * 4L)!!
|
||||||
|
val p11 = vm.peekFloat(dataPtr + (y1 * width + x1) * 4L)!!
|
||||||
|
|
||||||
|
return p00 * (1 - fx) * (1 - fy) +
|
||||||
|
p10 * fx * (1 - fy) +
|
||||||
|
p01 * (1 - fx) * fy +
|
||||||
|
p11 * fx * fy
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -69,7 +69,9 @@ static inline float float16_to_float(uint16_t hbits) {
|
|||||||
|
|
||||||
// TSVM Advanced Video (TAV) format constants
|
// TSVM Advanced Video (TAV) format constants
|
||||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
||||||
#define TAV_VERSION 1 // Initial DWT implementation
|
// TAV version - dynamic based on color space mode
|
||||||
|
// Version 1: YCoCg-R (default)
|
||||||
|
// Version 2: ICtCp (--ictcp flag)
|
||||||
|
|
||||||
// Tile encoding modes (64x64 tiles)
|
// Tile encoding modes (64x64 tiles)
|
||||||
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
|
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
|
||||||
@@ -193,6 +195,7 @@ typedef struct {
|
|||||||
int enable_roi;
|
int enable_roi;
|
||||||
int verbose;
|
int verbose;
|
||||||
int test_mode;
|
int test_mode;
|
||||||
|
int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp color space
|
||||||
|
|
||||||
// Frame buffers
|
// Frame buffers
|
||||||
uint8_t *current_frame_rgb;
|
uint8_t *current_frame_rgb;
|
||||||
@@ -271,6 +274,7 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" --enable-rcf Enable per-tile rate control (experimental)\n");
|
printf(" --enable-rcf Enable per-tile rate control (experimental)\n");
|
||||||
printf(" --enable-progressive Enable progressive transmission\n");
|
printf(" --enable-progressive Enable progressive transmission\n");
|
||||||
printf(" --enable-roi Enable region-of-interest coding\n");
|
printf(" --enable-roi Enable region-of-interest coding\n");
|
||||||
|
printf(" --ictcp Use ICtCp color space instead of YCoCg-R (generates TAV version 2)\n");
|
||||||
printf(" --help Show this help\n\n");
|
printf(" --help Show this help\n\n");
|
||||||
|
|
||||||
printf("Audio Rate by Quality:\n ");
|
printf("Audio Rate by Quality:\n ");
|
||||||
@@ -567,7 +571,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t));
|
int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t));
|
||||||
|
|
||||||
// Debug: check DWT coefficients before quantization
|
// Debug: check DWT coefficients before quantization
|
||||||
if (tile_x == 0 && tile_y == 0) {
|
/*if (tile_x == 0 && tile_y == 0) {
|
||||||
printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): ");
|
printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): ");
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
printf("%.2f ", tile_y_data[i]);
|
printf("%.2f ", tile_y_data[i]);
|
||||||
@@ -575,20 +579,20 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n",
|
printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n",
|
||||||
enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor);
|
enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor);
|
||||||
}
|
}*/
|
||||||
|
|
||||||
quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor);
|
quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor);
|
||||||
quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor);
|
quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor);
|
||||||
quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor);
|
quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor);
|
||||||
|
|
||||||
// Debug: check quantized coefficients after quantization
|
// Debug: check quantized coefficients after quantization
|
||||||
if (tile_x == 0 && tile_y == 0) {
|
/*if (tile_x == 0 && tile_y == 0) {
|
||||||
printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): ");
|
printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): ");
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
printf("%d ", quantized_y[i]);
|
printf("%d ", quantized_y[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// Write quantized coefficients
|
// Write quantized coefficients
|
||||||
memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
||||||
@@ -647,13 +651,13 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Debug: check input data before DWT
|
// Debug: check input data before DWT
|
||||||
if (tile_x == 0 && tile_y == 0) {
|
/*if (tile_x == 0 && tile_y == 0) {
|
||||||
printf("Encoder Debug: Tile (0,0) - Y data before DWT (first 16): ");
|
printf("Encoder Debug: Tile (0,0) - Y data before DWT (first 16): ");
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
printf("%.2f ", tile_y_data[i]);
|
printf("%.2f ", tile_y_data[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// Apply DWT transform to each channel
|
// Apply DWT transform to each channel
|
||||||
dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter);
|
dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter);
|
||||||
@@ -763,6 +767,192 @@ static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------- ICtCp Implementation ----------------------
|
||||||
|
|
||||||
|
static inline int iround(double v) { return (int)floor(v + 0.5); }
|
||||||
|
|
||||||
|
// ---------------------- sRGB gamma helpers ----------------------
|
||||||
|
static inline double srgb_linearize(double val) {
|
||||||
|
if (val <= 0.04045) return val / 12.92;
|
||||||
|
return pow((val + 0.055) / 1.055, 2.4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double srgb_unlinearize(double val) {
|
||||||
|
if (val <= 0.0031308) return 12.92 * val;
|
||||||
|
return 1.055 * pow(val, 1.0/2.4) - 0.055;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------- HLG OETF/EOTF ----------------------
|
||||||
|
static inline double HLG_OETF(double E) {
|
||||||
|
const double a = 0.17883277;
|
||||||
|
const double b = 0.28466892; // 1 - 4*a
|
||||||
|
const double c = 0.55991073; // 0.5 - a*ln(4*a)
|
||||||
|
|
||||||
|
if (E <= 1.0/12.0) return sqrt(3.0 * E);
|
||||||
|
return a * log(12.0 * E - b) + c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double HLG_EOTF(double Ep) {
|
||||||
|
const double a = 0.17883277;
|
||||||
|
const double b = 0.28466892;
|
||||||
|
const double c = 0.55991073;
|
||||||
|
|
||||||
|
if (Ep <= 0.5) {
|
||||||
|
double val = Ep * Ep / 3.0;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
double val = (exp((Ep - c) / a) + b) / 12.0;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sRGB -> LMS matrix
|
||||||
|
static const double M_RGB_TO_LMS[3][3] = {
|
||||||
|
{0.2958564579364564, 0.6230869483219083, 0.08106989398623762},
|
||||||
|
{0.15627390752659093, 0.727308963512872, 0.11639736914944238},
|
||||||
|
{0.035141262332177715, 0.15657109121101628, 0.8080956851990795}
|
||||||
|
};
|
||||||
|
|
||||||
|
static const double M_LMS_TO_RGB[3][3] = {
|
||||||
|
{6.1723815689243215, -5.319534979827695, 0.14699442094633924},
|
||||||
|
{-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
|
||||||
|
{-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
|
||||||
|
};
|
||||||
|
|
||||||
|
// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants.
|
||||||
|
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
|
||||||
|
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
|
||||||
|
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
|
||||||
|
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Inverse matrices
|
||||||
|
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
|
||||||
|
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
|
||||||
|
{ 1.0, -0.015718580108730416, -0.20958106811640548 },
|
||||||
|
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ----------------------
|
||||||
|
void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
|
||||||
|
double *out_I, double *out_Ct, double *out_Cp)
|
||||||
|
{
|
||||||
|
// 1) linearize sRGB to 0..1
|
||||||
|
double r = srgb_linearize((double)r8 / 255.0);
|
||||||
|
double g = srgb_linearize((double)g8 / 255.0);
|
||||||
|
double b = srgb_linearize((double)b8 / 255.0);
|
||||||
|
|
||||||
|
// 2) linear RGB -> LMS (single 3x3 multiply)
|
||||||
|
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
|
||||||
|
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
|
||||||
|
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
|
||||||
|
|
||||||
|
// 3) HLG OETF
|
||||||
|
double Lp = HLG_OETF(L);
|
||||||
|
double Mp = HLG_OETF(M);
|
||||||
|
double Sp = HLG_OETF(S);
|
||||||
|
|
||||||
|
// 4) L'M'S' -> ICtCp
|
||||||
|
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
|
||||||
|
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
|
||||||
|
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
|
||||||
|
|
||||||
|
*out_I = FCLAMP(I * 255.f, 0.f, 255.f);
|
||||||
|
*out_Ct = FCLAMP(Ct * 255.f + 127.5f, 0.f, 255.f);
|
||||||
|
*out_Cp = FCLAMP(Cp * 255.f + 127.5f, 0.f, 255.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ----------------------
|
||||||
|
void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
|
||||||
|
uint8_t *r8, uint8_t *g8, uint8_t *b8)
|
||||||
|
{
|
||||||
|
double I = I8 / 255.f;
|
||||||
|
double Ct = (Ct8 - 127.5f) / 255.f;
|
||||||
|
double Cp = (Cp8 - 127.5f) / 255.f;
|
||||||
|
|
||||||
|
// 1) ICtCp -> L' M' S' (3x3 multiply)
|
||||||
|
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
|
||||||
|
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
|
||||||
|
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
|
||||||
|
|
||||||
|
// 2) HLG decode: L' -> linear LMS
|
||||||
|
double L = HLG_EOTF(Lp);
|
||||||
|
double M = HLG_EOTF(Mp);
|
||||||
|
double S = HLG_EOTF(Sp);
|
||||||
|
|
||||||
|
// 3) LMS -> linear sRGB (3x3 inverse)
|
||||||
|
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
|
||||||
|
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
|
||||||
|
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
|
||||||
|
|
||||||
|
// 4) gamma encode and convert to 0..255 with center-of-bin rounding
|
||||||
|
double r = srgb_unlinearize(r_lin);
|
||||||
|
double g = srgb_unlinearize(g_lin);
|
||||||
|
double b = srgb_unlinearize(b_lin);
|
||||||
|
|
||||||
|
*r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
|
||||||
|
*g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
|
||||||
|
*b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------- Color Space Switching Functions ----------------------
|
||||||
|
// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode
|
||||||
|
|
||||||
|
static void rgb_to_color_space(tav_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b,
|
||||||
|
double *c1, double *c2, double *c3) {
|
||||||
|
if (enc->ictcp_mode) {
|
||||||
|
// Use ICtCp color space
|
||||||
|
srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3);
|
||||||
|
} else {
|
||||||
|
// Use YCoCg-R color space (convert from existing function)
|
||||||
|
float rf = r, gf = g, bf = b;
|
||||||
|
float co = rf - bf;
|
||||||
|
float tmp = bf + co / 2;
|
||||||
|
float cg = gf - tmp;
|
||||||
|
float y = tmp + cg / 2;
|
||||||
|
*c1 = (double)y;
|
||||||
|
*c2 = (double)co;
|
||||||
|
*c3 = (double)cg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void color_space_to_rgb(tav_encoder_t *enc, double c1, double c2, double c3,
|
||||||
|
uint8_t *r, uint8_t *g, uint8_t *b) {
|
||||||
|
if (enc->ictcp_mode) {
|
||||||
|
// Use ICtCp color space
|
||||||
|
ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b);
|
||||||
|
} else {
|
||||||
|
// Use YCoCg-R color space (inverse of rgb_to_ycocg)
|
||||||
|
float y = (float)c1;
|
||||||
|
float co = (float)c2;
|
||||||
|
float cg = (float)c3;
|
||||||
|
float tmp = y - cg / 2.0f;
|
||||||
|
float g_val = cg + tmp;
|
||||||
|
float b_val = tmp - co / 2.0f;
|
||||||
|
float r_val = co + b_val;
|
||||||
|
*r = (uint8_t)CLAMP((int)(r_val + 0.5f), 0, 255);
|
||||||
|
*g = (uint8_t)CLAMP((int)(g_val + 0.5f), 0, 255);
|
||||||
|
*b = (uint8_t)CLAMP((int)(b_val + 0.5f), 0, 255);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RGB to color space conversion for full frames
|
||||||
|
static void rgb_to_color_space_frame(tav_encoder_t *enc, const uint8_t *rgb,
|
||||||
|
float *c1, float *c2, float *c3, int width, int height) {
|
||||||
|
if (enc->ictcp_mode) {
|
||||||
|
// ICtCp mode
|
||||||
|
for (int i = 0; i < width * height; i++) {
|
||||||
|
double I, Ct, Cp;
|
||||||
|
srgb8_to_ictcp_hlg(rgb[i*3], rgb[i*3+1], rgb[i*3+2], &I, &Ct, &Cp);
|
||||||
|
c1[i] = (float)I;
|
||||||
|
c2[i] = (float)Ct;
|
||||||
|
c3[i] = (float)Cp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Use existing YCoCg function
|
||||||
|
rgb_to_ycocg(rgb, c1, c2, c3, width, height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Write TAV file header
|
// Write TAV file header
|
||||||
static int write_tav_header(tav_encoder_t *enc) {
|
static int write_tav_header(tav_encoder_t *enc) {
|
||||||
if (!enc->output_fp) return -1;
|
if (!enc->output_fp) return -1;
|
||||||
@@ -770,8 +960,9 @@ static int write_tav_header(tav_encoder_t *enc) {
|
|||||||
// Magic number
|
// Magic number
|
||||||
fwrite(TAV_MAGIC, 1, 8, enc->output_fp);
|
fwrite(TAV_MAGIC, 1, 8, enc->output_fp);
|
||||||
|
|
||||||
// Version
|
// Version (dynamic based on color space)
|
||||||
fputc(TAV_VERSION, enc->output_fp);
|
uint8_t version = enc->ictcp_mode ? 2 : 1; // Version 2 for ICtCp, 1 for YCoCg-R
|
||||||
|
fputc(version, enc->output_fp);
|
||||||
|
|
||||||
// Video parameters
|
// Video parameters
|
||||||
fwrite(&enc->width, sizeof(uint16_t), 1, enc->output_fp);
|
fwrite(&enc->width, sizeof(uint16_t), 1, enc->output_fp);
|
||||||
@@ -991,6 +1182,7 @@ int main(int argc, char *argv[]) {
|
|||||||
{"enable-rcf", no_argument, 0, 1001},
|
{"enable-rcf", no_argument, 0, 1001},
|
||||||
{"enable-progressive", no_argument, 0, 1002},
|
{"enable-progressive", no_argument, 0, 1002},
|
||||||
{"enable-roi", no_argument, 0, 1003},
|
{"enable-roi", no_argument, 0, 1003},
|
||||||
|
{"ictcp", no_argument, 0, 1005},
|
||||||
{"help", no_argument, 0, 1004},
|
{"help", no_argument, 0, 1004},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
@@ -1046,6 +1238,9 @@ int main(int argc, char *argv[]) {
|
|||||||
case 1001: // --enable-rcf
|
case 1001: // --enable-rcf
|
||||||
enc->enable_rcf = 1;
|
enc->enable_rcf = 1;
|
||||||
break;
|
break;
|
||||||
|
case 1005: // --ictcp
|
||||||
|
enc->ictcp_mode = 1;
|
||||||
|
break;
|
||||||
case 1004: // --help
|
case 1004: // --help
|
||||||
show_usage(argv[0]);
|
show_usage(argv[0]);
|
||||||
cleanup_encoder(enc);
|
cleanup_encoder(enc);
|
||||||
@@ -1077,6 +1272,7 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible");
|
printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible");
|
||||||
printf("Decomposition levels: %d\n", enc->decomp_levels);
|
printf("Decomposition levels: %d\n", enc->decomp_levels);
|
||||||
printf("Quality: Y=%d, Co=%d, Cg=%d\n", enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg);
|
printf("Quality: Y=%d, Co=%d, Cg=%d\n", enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg);
|
||||||
|
printf("Color space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
|
||||||
|
|
||||||
// Open output file
|
// Open output file
|
||||||
if (strcmp(enc->output_file, "-") == 0) {
|
if (strcmp(enc->output_file, "-") == 0) {
|
||||||
@@ -1204,28 +1400,28 @@ int main(int argc, char *argv[]) {
|
|||||||
int is_keyframe = 1;//(frame_count % keyframe_interval == 0);
|
int is_keyframe = 1;//(frame_count % keyframe_interval == 0);
|
||||||
|
|
||||||
// Debug: check RGB input data
|
// Debug: check RGB input data
|
||||||
if (frame_count < 3) {
|
/*if (frame_count < 3) {
|
||||||
printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count);
|
printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count);
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
printf("%d ", enc->current_frame_rgb[i]);
|
printf("%d ", enc->current_frame_rgb[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// Convert RGB to YCoCg
|
// Convert RGB to color space (YCoCg-R or ICtCp)
|
||||||
rgb_to_ycocg(enc->current_frame_rgb,
|
rgb_to_color_space_frame(enc, enc->current_frame_rgb,
|
||||||
enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg,
|
enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg,
|
||||||
enc->width, enc->height);
|
enc->width, enc->height);
|
||||||
|
|
||||||
// Debug: check YCoCg conversion result
|
// Debug: check YCoCg conversion result
|
||||||
if (frame_count < 3) {
|
/*if (frame_count < 3) {
|
||||||
printf("Encoder Debug: Frame %d - YCoCg result (first 16): ", frame_count);
|
printf("Encoder Debug: Frame %d - YCoCg result (first 16): ", frame_count);
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
printf("Y=%.1f Co=%.1f Cg=%.1f ", enc->current_frame_y[i], enc->current_frame_co[i], enc->current_frame_cg[i]);
|
printf("Y=%.1f Co=%.1f Cg=%.1f ", enc->current_frame_y[i], enc->current_frame_co[i], enc->current_frame_cg[i]);
|
||||||
if (i % 4 == 3) break; // Only show first 4 pixels for readability
|
if (i % 4 == 3) break; // Only show first 4 pixels for readability
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// Process motion vectors for P-frames
|
// Process motion vectors for P-frames
|
||||||
int num_tiles = enc->tiles_x * enc->tiles_y;
|
int num_tiles = enc->tiles_x * enc->tiles_y;
|
||||||
|
|||||||
Reference in New Issue
Block a user