Faster ICtCp colour conversion using extensive LUT

This commit is contained in:
minjaesong
2025-10-03 14:50:52 +09:00
parent 5f14169e6b
commit 7d61074a13

View File

@@ -3952,6 +3952,102 @@ class GraphicsJSR223Delegate(private val vm: VM) {
const val CHANNEL_LAYOUT_Y_A = 3 // Y-A (011: has alpha, no chroma, has luma) const val CHANNEL_LAYOUT_Y_A = 3 // Y-A (011: has alpha, no chroma, has luma)
const val CHANNEL_LAYOUT_COCG = 4 // Co-Cg (100: no alpha, has chroma, no luma) const val CHANNEL_LAYOUT_COCG = 4 // Co-Cg (100: no alpha, has chroma, no luma)
const val CHANNEL_LAYOUT_COCG_A = 5 // Co-Cg-A (101: has alpha, has chroma, no luma) const val CHANNEL_LAYOUT_COCG_A = 5 // Co-Cg-A (101: has alpha, has chroma, no luma)
// ICtCp→RGB LUT (256×256×256 × 3 bytes = 48 MB)
// Layout: lut[I * 256 * 256 * 3 + Ct * 256 * 3 + Cp * 3 + channel]
// where I ∈ [0,255], Ct ∈ [0,511], Cp ∈ [0,511], channel ∈ {0=R, 1=G, 2=B}
private val ICTCP_LUT_SIZE = 256L * 256L * 256L * 3L // 201,326,592 bytes
private var ictcpLUT: UnsafePtr? = null
init {
println("[ICtCp LUT] Initializing 256×256×256 lookup table (48 MB)...")
val startTime = System.currentTimeMillis()
// Allocate native memory
ictcpLUT = UnsafeHelper.allocate(ICTCP_LUT_SIZE, this)
// Precompute all possible ICtCp→RGB conversions
for (i in 0..255) {
for (ct in 0..255) {
for (cp in 0..255) {
// Convert index to ICtCp values (matching decoder range)
val I = i / 255.0f
val Ct = (ct - 127.5f) / 255.0f // Center at 127.5 for symmetric range
val Cp = (cp - 127.5f) / 255.0f
// ICtCp → L'M'S' (inverse matrix)
val Lp = I + 0.015718580108730416f * Ct + 0.2095810681164055f * Cp
val Mp = I - 0.015718580108730416f * Ct - 0.20958106811640548f * Cp
val Sp = I + 1.0212710798422344f * Ct - 0.6052744909924316f * Cp
// HLG decode: L'M'S' → linear LMS
val L = HLG_EOTF_static(Lp)
val M = HLG_EOTF_static(Mp)
val S = HLG_EOTF_static(Sp)
// LMS → linear sRGB (inverse matrix)
val rLin = 6.1723815689243215f * L - 5.319534979827695f * M + 0.14699442094633924f * S
val gLin = -1.3243428148026244f * L + 2.560286104841917f * M - 0.2359203727576164f * S
val bLin = -0.011819739235953752f * L - 0.26473549971186555f * M + 1.2767952602537955f * S
// Gamma encode to sRGB
val rSrgb = srgbUnlinearise_static(rLin)
val gSrgb = srgbUnlinearise_static(gLin)
val bSrgb = srgbUnlinearise_static(bLin)
// Store RGB bytes in LUT
val lutIndex = (i.toLong() * 256L * 256L * 3L) + (ct.toLong() * 256L * 3L) + (cp.toLong() * 3L)
ictcpLUT!!.set(lutIndex + 0, (rSrgb * 255.0f).toInt().coerceIn(0, 255).toByte())
ictcpLUT!!.set(lutIndex + 1, (gSrgb * 255.0f).toInt().coerceIn(0, 255).toByte())
ictcpLUT!!.set(lutIndex + 2, (bSrgb * 255.0f).toInt().coerceIn(0, 255).toByte())
}
}
// Progress indicator every 32 I values
if (i % 32 == 0) {
print(".")
}
}
val elapsedMs = System.currentTimeMillis() - startTime
println("\n[ICtCp LUT] Initialized in ${elapsedMs}ms")
// Register shutdown hook to free native memory
Runtime.getRuntime().addShutdownHook(Thread {
println("[ICtCp LUT] Freeing native memory...")
ictcpLUT?.destroy()
ictcpLUT = null
})
}
// Static helper functions for LUT initialization (must match instance methods)
private fun HLG_EOTF_static(V: Float): Float {
val a = 0.17883277f
val b = 1.0f - 4.0f * a
val c = 0.5f - a * ln(4.0f * a)
return if (V <= 0.5f)
(V * V) / 3.0f
else
(exp((V - c) / a) + b) / 12.0f
}
private fun srgbUnlinearise_static(value: Float): Float {
return if (value <= 0.0031308f) {
value * 12.92f
} else {
1.055f * value.pow(1.0f / 2.4f) - 0.055f
}
}
// Fast LUT lookup function (no bounds checking with UnsafePtr)
fun lookupICtCpToRGB(I: Int, Ct: Int, Cp: Int): Triple<Byte, Byte, Byte> {
val lutIndex = (I.toLong() * 256L * 256L * 3L) + (Ct.toLong() * 256L * 3L) + (Cp.toLong() * 3L)
val r = ictcpLUT!!.get(lutIndex + 0)
val g = ictcpLUT!!.get(lutIndex + 1)
val b = ictcpLUT!!.get(lutIndex + 2)
return Triple(r, g, b)
}
} }
// Variable channel layout postprocessing for concatenated maps // Variable channel layout postprocessing for concatenated maps
@@ -4826,58 +4922,41 @@ class GraphicsJSR223Delegate(private val vm: VM) {
rgbAddr: Long, width: Int, height: Int) { rgbAddr: Long, width: Int, height: Int) {
val startX = tileX * TAV_TILE_SIZE_X val startX = tileX * TAV_TILE_SIZE_X
val startY = tileY * TAV_TILE_SIZE_Y val startY = tileY * TAV_TILE_SIZE_Y
// OPTIMISATION: Process pixels row by row with bulk copying for better cache locality // OPTIMISATION: Process pixels row by row with bulk copying for better cache locality
for (y in 0 until TAV_TILE_SIZE_Y) { for (y in 0 until TAV_TILE_SIZE_Y) {
val frameY = startY + y val frameY = startY + y
if (frameY >= height) break if (frameY >= height) break
// Calculate valid pixel range for this row // Calculate valid pixel range for this row
val validStartX = maxOf(0, startX) val validStartX = maxOf(0, startX)
val validEndX = minOf(width, startX + TAV_TILE_SIZE_X) val validEndX = minOf(width, startX + TAV_TILE_SIZE_X)
val validPixelsInRow = validEndX - validStartX val validPixelsInRow = validEndX - validStartX
if (validPixelsInRow > 0) { if (validPixelsInRow > 0) {
// Create row buffer for bulk RGB data // Create row buffer for bulk RGB data
val rowRgbBuffer = ByteArray(validPixelsInRow * 3) val rowRgbBuffer = ByteArray(validPixelsInRow * 3)
var bufferIdx = 0 var bufferIdx = 0
for (x in validStartX until validEndX) { for (x in validStartX until validEndX) {
val tileIdx = y * TAV_TILE_SIZE_X + (x - startX) val tileIdx = y * TAV_TILE_SIZE_X + (x - startX)
// ICtCp to sRGB conversion (adapted from encoder ICtCp functions)
val I = iTile[tileIdx] / 255.0f
val Ct = (ctTile[tileIdx] - 127.5f) / 255.0f
val Cp = (cpTile[tileIdx] - 127.5f) / 255.0f
// ICtCp -> L'M'S' (inverse matrix) // ICtCp to RGB conversion via LUT
val Lp = I + 0.015718580108730416f * Ct + 0.2095810681164055f * Cp // Convert float values to LUT indices (values already in [0,255] from IDWT)
val Mp = I - 0.015718580108730416f * Ct - 0.20958106811640548f * Cp val iIdx = iTile[tileIdx].toInt().coerceIn(0, 255)
val Sp = I + 1.0212710798422344f * Ct - 0.6052744909924316f * Cp val ctIdx = ctTile[tileIdx].toInt().coerceIn(0, 255)
val cpIdx = cpTile[tileIdx].toInt().coerceIn(0, 255)
// HLG decode: L'M'S' -> linear LMS // Direct LUT lookup (no bounds checking with UnsafePtr)
val L = HLG_EOTF(Lp) val lutIndex = (iIdx.toLong() * 256L * 256L * 3L) + (ctIdx.toLong() * 256L * 3L) + (cpIdx.toLong() * 3L)
val M = HLG_EOTF(Mp) rowRgbBuffer[bufferIdx++] = ictcpLUT!!.get(lutIndex + 0)
val S = HLG_EOTF(Sp) rowRgbBuffer[bufferIdx++] = ictcpLUT!!.get(lutIndex + 1)
rowRgbBuffer[bufferIdx++] = ictcpLUT!!.get(lutIndex + 2)
// LMS -> linear sRGB (inverse matrix)
val rLin = 6.1723815689243215f * L -5.319534979827695f * M + 0.14699442094633924f * S
val gLin = -1.3243428148026244f * L + 2.560286104841917f * M -0.2359203727576164f * S
val bLin = -0.011819739235953752f * L -0.26473549971186555f * M + 1.2767952602537955f * S
// Gamma encode to sRGB
val rSrgb = srgbUnlinearise(rLin)
val gSrgb = srgbUnlinearise(gLin)
val bSrgb = srgbUnlinearise(bLin)
rowRgbBuffer[bufferIdx++] = (rSrgb * 255.0f).toInt().coerceIn(0, 255).toByte()
rowRgbBuffer[bufferIdx++] = (gSrgb * 255.0f).toInt().coerceIn(0, 255).toByte()
rowRgbBuffer[bufferIdx++] = (bSrgb * 255.0f).toInt().coerceIn(0, 255).toByte()
} }
// OPTIMISATION: Bulk copy entire row at once // OPTIMISATION: Bulk copy entire row at once
val rowStartOffset = (frameY * width + validStartX) * 3L val rowStartOffset = (frameY * width + validStartX) * 3L
UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer), UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer),
null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong()) null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong())
} }
} }
@@ -4965,34 +5044,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
for (x in 0 until width) { for (x in 0 until width) {
val idx = y * width + x val idx = y * width + x
// ICtCp to sRGB conversion (adapted from encoder ICtCp functions) // ICtCp to RGB conversion via LUT
val I = iData[idx] / 255.0f // Convert float values to LUT indices (values already in [0,255] from IDWT)
val Ct = (ctData[idx] - 127.5f) / 255.0f val iIdx = iData[idx].toInt().coerceIn(0, 255)
val Cp = (cpData[idx] - 127.5f) / 255.0f val ctIdx = ctData[idx].toInt().coerceIn(0, 255)
val cpIdx = cpData[idx].toInt().coerceIn(0, 255)
// ICtCp -> L'M'S' (inverse matrix) // Direct LUT lookup (no bounds checking with UnsafePtr)
val Lp = I + 0.015718580108730416f * Ct + 0.2095810681164055f * Cp val lutIndex = (iIdx.toLong() * 256L * 256L * 3L) + (ctIdx.toLong() * 256L * 3L) + (cpIdx.toLong() * 3L)
val Mp = I - 0.015718580108730416f * Ct - 0.20958106811640548f * Cp rowRgbBuffer[bufferIdx++] = ictcpLUT!!.get(lutIndex + 0)
val Sp = I + 1.0212710798422344f * Ct - 0.6052744909924316f * Cp rowRgbBuffer[bufferIdx++] = ictcpLUT!!.get(lutIndex + 1)
rowRgbBuffer[bufferIdx++] = ictcpLUT!!.get(lutIndex + 2)
// HLG decode: L'M'S' -> linear LMS
val L = HLG_EOTF(Lp)
val M = HLG_EOTF(Mp)
val S = HLG_EOTF(Sp)
// LMS -> linear sRGB (inverse matrix)
val rLin = 6.1723815689243215f * L -5.319534979827695f * M + 0.14699442094633924f * S
val gLin = -1.3243428148026244f * L + 2.560286104841917f * M -0.2359203727576164f * S
val bLin = -0.011819739235953752f * L -0.26473549971186555f * M + 1.2767952602537955f * S
// Gamma encode to sRGB
val rSrgb = srgbUnlinearise(rLin)
val gSrgb = srgbUnlinearise(gLin)
val bSrgb = srgbUnlinearise(bLin)
rowRgbBuffer[bufferIdx++] = (rSrgb * 255.0f).toInt().coerceIn(0, 255).toByte()
rowRgbBuffer[bufferIdx++] = (gSrgb * 255.0f).toInt().coerceIn(0, 255).toByte()
rowRgbBuffer[bufferIdx++] = (bSrgb * 255.0f).toInt().coerceIn(0, 255).toByte()
} }
// OPTIMISATION: Bulk copy entire row at once // OPTIMISATION: Bulk copy entire row at once