optimised tevDecode

This commit is contained in:
minjaesong
2025-09-10 01:49:32 +09:00
parent 3f9747ebf0
commit 8601f614b4

View File

@@ -2422,6 +2422,59 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
} }
/**
* Bulk write RGB block data to VM memory
*/
private fun bulkWriteRGB(destAddr: Long, rgbData: IntArray, width: Int, height: Int,
startX: Int, startY: Int, blockWidth: Int, blockHeight: Int, addrIncVec: Int) {
val (memspace, baseOffset) = vm.translateAddr(destAddr)
if (memspace is UnsafePtr && addrIncVec == 1) {
// Optimized path for user memory with forward addressing
for (dy in 0 until blockHeight) {
val y = startY + dy
if (y >= height) break
val rowStartX = kotlin.math.max(0, startX)
val rowEndX = kotlin.math.min(width, startX + blockWidth)
val rowPixels = rowEndX - rowStartX
if (rowPixels > 0) {
val srcRowOffset = dy * blockWidth * 3 + (rowStartX - startX) * 3
val dstRowOffset = baseOffset + (y * width + rowStartX) * 3
val rowBytes = rowPixels * 3
// Convert IntArray to ByteArray for this row
val rowBuffer = ByteArray(rowBytes)
for (i in 0 until rowBytes) {
rowBuffer[i] = rgbData[srcRowOffset + i].toByte()
}
// Bulk write the row
UnsafeHelper.memcpyRaw(
rowBuffer, UnsafeHelper.getArrayOffset(rowBuffer),
null, memspace.ptr + dstRowOffset, rowBytes.toLong())
}
}
} else {
// Fallback to individual pokes for peripheral memory or reverse addressing
for (dy in 0 until blockHeight) {
for (dx in 0 until blockWidth) {
val x = startX + dx
val y = startY + dy
if (x < width && y < height) {
val rgbIdx = (dy * blockWidth + dx) * 3
val bufferOffset = (y.toLong() * width + x) * 3
vm.poke(destAddr + bufferOffset * addrIncVec, rgbData[rgbIdx].toByte())
vm.poke(destAddr + (bufferOffset + 1) * addrIncVec, rgbData[rgbIdx + 1].toByte())
vm.poke(destAddr + (bufferOffset + 2) * addrIncVec, rgbData[rgbIdx + 2].toByte())
}
}
}
}
}
/** /**
* Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format * Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format
* Decodes compressed TEV block data directly to framebuffer * Decodes compressed TEV block data directly to framebuffer
@@ -2456,16 +2509,25 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val startX = bx * 16 val startX = bx * 16
val startY = by * 16 val startY = by * 16
// Read TEV block header (7 bytes) // Read TEV block header (11 bytes) with bulk operation
val mode = vm.peek(readPtr)!!.toUint() val headerBuffer = ByteArray(11)
val mvX = ((vm.peek(readPtr + 1)!!.toUint()) or val (memspace, offset) = vm.translateAddr(readPtr)
((vm.peek(readPtr + 2)!!.toUint()) shl 8)).toShort().toInt() if (memspace is UnsafePtr) {
val mvY = ((vm.peek(readPtr + 3)!!.toUint()) or UnsafeHelper.memcpyRaw(null, memspace.ptr + offset,
((vm.peek(readPtr + 4)!!.toUint()) shl 8)).toShort().toInt() headerBuffer, UnsafeHelper.getArrayOffset(headerBuffer), 11L)
val rateControlFactor = Float.fromBits((vm.peek(readPtr + 5)!!.toUint()) or } else {
((vm.peek(readPtr + 6)!!.toUint()) shl 8) or // Fallback for peripheral memory
((vm.peek(readPtr + 7)!!.toUint()) shl 16) or for (i in 0 until 11) {
((vm.peek(readPtr + 8)!!.toUint()) shl 24)) headerBuffer[i] = vm.peek(readPtr + i) ?: 0
}
}
val mode = headerBuffer[0].toUint()
val mvX = ((headerBuffer[1].toUint()) or ((headerBuffer[2].toUint()) shl 8)).toShort().toInt()
val mvY = ((headerBuffer[3].toUint()) or ((headerBuffer[4].toUint()) shl 8)).toShort().toInt()
val rateControlFactor = Float.fromBits((headerBuffer[5].toUint()) or
((headerBuffer[6].toUint()) shl 8) or
((headerBuffer[7].toUint()) shl 16) or
((headerBuffer[8].toUint()) shl 24))
readPtr += 11 // Skip CBP field readPtr += 11 // Skip CBP field
@@ -2484,23 +2546,25 @@ class GraphicsJSR223Delegate(private val vm: VM) {
) )
} }
} else { } else {
// Fallback to pixel-by-pixel for boundary blocks // Optimized fallback using row-by-row copying for boundary blocks
for (dy in 0 until 16) { for (dy in 0 until 16) {
for (dx in 0 until 16) { val y = startY + dy
val x = startX + dx if (y < height) {
val y = startY + dy val rowStartX = kotlin.math.max(0, startX)
if (x < width && y < height) { val rowEndX = kotlin.math.min(width, startX + 16)
val pixelOffset = y.toLong() * width + x val rowPixels = rowEndX - rowStartX
val rgbOffset = pixelOffset * 3
if (rowPixels > 0) {
val srcRowOffset = (y.toLong() * width + rowStartX) * 3
val dstRowOffset = srcRowOffset
val rowBytes = rowPixels * 3
// Copy RGB values from previous frame // Use vm.memcpy for partial rows
val prevR = vm.peek(prevRGBAddr + rgbOffset*prevAddrIncVec)!! vm.memcpy(
val prevG = vm.peek(prevRGBAddr + (rgbOffset + 1)*prevAddrIncVec)!! (prevRGBAddr + srcRowOffset*prevAddrIncVec).toInt(),
val prevB = vm.peek(prevRGBAddr + (rgbOffset + 2)*prevAddrIncVec)!! (currentRGBAddr + dstRowOffset*thisAddrIncVec).toInt(),
rowBytes
vm.poke(currentRGBAddr + rgbOffset*thisAddrIncVec, prevR) )
vm.poke(currentRGBAddr + (rgbOffset + 1)*thisAddrIncVec, prevG)
vm.poke(currentRGBAddr + (rgbOffset + 2)*thisAddrIncVec, prevB)
} }
} }
} }
@@ -2621,22 +2685,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2) tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2)
} }
// Store RGB data to frame buffer (complete replacement) // Store RGB data to frame buffer with bulk write
for (dy in 0 until 16) { bulkWriteRGB(currentRGBAddr, rgbData, width, height, startX, startY, 16, 16, thisAddrIncVec)
for (dx in 0 until 16) {
val x = startX + dx
val y = startY + dy
if (x < width && y < height) {
val rgbIdx = (dy * 16 + dx) * 3
val imageOffset = y.toLong() * width + x
val bufferOffset = imageOffset * 3
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, rgbData[rgbIdx].toByte())
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, rgbData[rgbIdx + 1].toByte())
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, rgbData[rgbIdx + 2].toByte())
}
}
}
} }
0x02 -> { // TEV_MODE_INTER - Motion compensation + residual DCT 0x02 -> { // TEV_MODE_INTER - Motion compensation + residual DCT
@@ -2763,34 +2813,28 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// Step 5: Store final RGB data to frame buffer // Step 5: Store final RGB data to frame buffer
for (dy in 0 until 16) { if (debugMotionVectors) {
for (dx in 0 until 16) { // Debug mode: individual pokes for motion vector visualization
val x = startX + dx for (dy in 0 until 16) {
val y = startY + dy for (dx in 0 until 16) {
if (x < width && y < height) { val x = startX + dx
val imageOffset = y.toLong() * width + x val y = startY + dy
val bufferOffset = imageOffset * 3 if (x < width && y < height) {
val imageOffset = y.toLong() * width + x
if (debugMotionVectors) { val bufferOffset = imageOffset * 3
// Debug: Color INTER blocks by motion vector magnitude
val mvMagnitude = kotlin.math.sqrt((mvX * mvX + mvY * mvY).toDouble()).toInt() val mvMagnitude = kotlin.math.sqrt((mvX * mvX + mvY * mvY).toDouble()).toInt()
val intensity = (mvMagnitude * 8).coerceIn(0, 255) // Scale for visibility val intensity = (mvMagnitude * 8).coerceIn(0, 255) // Scale for visibility
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, intensity.toByte()) // R = MV magnitude vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, intensity.toByte()) // R = MV magnitude
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, 0.toByte()) // G = 0 vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, 0.toByte()) // G = 0
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, (255-intensity).toByte()) // B = inverse vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, (255-intensity).toByte()) // B = inverse
} else {
val rgbIdx = (dy * 16 + dx) * 3
val finalR = finalRgb[rgbIdx]
val finalG = finalRgb[rgbIdx + 1]
val finalB = finalRgb[rgbIdx + 2]
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, finalR.toByte())
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, finalG.toByte())
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, finalB.toByte())
} }
} }
} }
} else {
// Optimized bulk write for normal operation
bulkWriteRGB(currentRGBAddr, finalRgb, width, height, startX, startY, 16, 16, thisAddrIncVec)
} }
} }