mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-06 05:28:31 +09:00
optimised tevDecode
This commit is contained in:
@@ -2422,6 +2422,59 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bulk write RGB block data to VM memory
|
||||||
|
*/
|
||||||
|
private fun bulkWriteRGB(destAddr: Long, rgbData: IntArray, width: Int, height: Int,
|
||||||
|
startX: Int, startY: Int, blockWidth: Int, blockHeight: Int, addrIncVec: Int) {
|
||||||
|
val (memspace, baseOffset) = vm.translateAddr(destAddr)
|
||||||
|
|
||||||
|
if (memspace is UnsafePtr && addrIncVec == 1) {
|
||||||
|
// Optimized path for user memory with forward addressing
|
||||||
|
for (dy in 0 until blockHeight) {
|
||||||
|
val y = startY + dy
|
||||||
|
if (y >= height) break
|
||||||
|
|
||||||
|
val rowStartX = kotlin.math.max(0, startX)
|
||||||
|
val rowEndX = kotlin.math.min(width, startX + blockWidth)
|
||||||
|
val rowPixels = rowEndX - rowStartX
|
||||||
|
|
||||||
|
if (rowPixels > 0) {
|
||||||
|
val srcRowOffset = dy * blockWidth * 3 + (rowStartX - startX) * 3
|
||||||
|
val dstRowOffset = baseOffset + (y * width + rowStartX) * 3
|
||||||
|
val rowBytes = rowPixels * 3
|
||||||
|
|
||||||
|
// Convert IntArray to ByteArray for this row
|
||||||
|
val rowBuffer = ByteArray(rowBytes)
|
||||||
|
for (i in 0 until rowBytes) {
|
||||||
|
rowBuffer[i] = rgbData[srcRowOffset + i].toByte()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bulk write the row
|
||||||
|
UnsafeHelper.memcpyRaw(
|
||||||
|
rowBuffer, UnsafeHelper.getArrayOffset(rowBuffer),
|
||||||
|
null, memspace.ptr + dstRowOffset, rowBytes.toLong())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback to individual pokes for peripheral memory or reverse addressing
|
||||||
|
for (dy in 0 until blockHeight) {
|
||||||
|
for (dx in 0 until blockWidth) {
|
||||||
|
val x = startX + dx
|
||||||
|
val y = startY + dy
|
||||||
|
if (x < width && y < height) {
|
||||||
|
val rgbIdx = (dy * blockWidth + dx) * 3
|
||||||
|
val bufferOffset = (y.toLong() * width + x) * 3
|
||||||
|
|
||||||
|
vm.poke(destAddr + bufferOffset * addrIncVec, rgbData[rgbIdx].toByte())
|
||||||
|
vm.poke(destAddr + (bufferOffset + 1) * addrIncVec, rgbData[rgbIdx + 1].toByte())
|
||||||
|
vm.poke(destAddr + (bufferOffset + 2) * addrIncVec, rgbData[rgbIdx + 2].toByte())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format
|
* Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format
|
||||||
* Decodes compressed TEV block data directly to framebuffer
|
* Decodes compressed TEV block data directly to framebuffer
|
||||||
@@ -2456,16 +2509,25 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val startX = bx * 16
|
val startX = bx * 16
|
||||||
val startY = by * 16
|
val startY = by * 16
|
||||||
|
|
||||||
// Read TEV block header (7 bytes)
|
// Read TEV block header (11 bytes) with bulk operation
|
||||||
val mode = vm.peek(readPtr)!!.toUint()
|
val headerBuffer = ByteArray(11)
|
||||||
val mvX = ((vm.peek(readPtr + 1)!!.toUint()) or
|
val (memspace, offset) = vm.translateAddr(readPtr)
|
||||||
((vm.peek(readPtr + 2)!!.toUint()) shl 8)).toShort().toInt()
|
if (memspace is UnsafePtr) {
|
||||||
val mvY = ((vm.peek(readPtr + 3)!!.toUint()) or
|
UnsafeHelper.memcpyRaw(null, memspace.ptr + offset,
|
||||||
((vm.peek(readPtr + 4)!!.toUint()) shl 8)).toShort().toInt()
|
headerBuffer, UnsafeHelper.getArrayOffset(headerBuffer), 11L)
|
||||||
val rateControlFactor = Float.fromBits((vm.peek(readPtr + 5)!!.toUint()) or
|
} else {
|
||||||
((vm.peek(readPtr + 6)!!.toUint()) shl 8) or
|
// Fallback for peripheral memory
|
||||||
((vm.peek(readPtr + 7)!!.toUint()) shl 16) or
|
for (i in 0 until 11) {
|
||||||
((vm.peek(readPtr + 8)!!.toUint()) shl 24))
|
headerBuffer[i] = vm.peek(readPtr + i) ?: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val mode = headerBuffer[0].toUint()
|
||||||
|
val mvX = ((headerBuffer[1].toUint()) or ((headerBuffer[2].toUint()) shl 8)).toShort().toInt()
|
||||||
|
val mvY = ((headerBuffer[3].toUint()) or ((headerBuffer[4].toUint()) shl 8)).toShort().toInt()
|
||||||
|
val rateControlFactor = Float.fromBits((headerBuffer[5].toUint()) or
|
||||||
|
((headerBuffer[6].toUint()) shl 8) or
|
||||||
|
((headerBuffer[7].toUint()) shl 16) or
|
||||||
|
((headerBuffer[8].toUint()) shl 24))
|
||||||
readPtr += 11 // Skip CBP field
|
readPtr += 11 // Skip CBP field
|
||||||
|
|
||||||
|
|
||||||
@@ -2484,23 +2546,25 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Fallback to pixel-by-pixel for boundary blocks
|
// Optimized fallback using row-by-row copying for boundary blocks
|
||||||
for (dy in 0 until 16) {
|
for (dy in 0 until 16) {
|
||||||
for (dx in 0 until 16) {
|
val y = startY + dy
|
||||||
val x = startX + dx
|
if (y < height) {
|
||||||
val y = startY + dy
|
val rowStartX = kotlin.math.max(0, startX)
|
||||||
if (x < width && y < height) {
|
val rowEndX = kotlin.math.min(width, startX + 16)
|
||||||
val pixelOffset = y.toLong() * width + x
|
val rowPixels = rowEndX - rowStartX
|
||||||
val rgbOffset = pixelOffset * 3
|
|
||||||
|
if (rowPixels > 0) {
|
||||||
|
val srcRowOffset = (y.toLong() * width + rowStartX) * 3
|
||||||
|
val dstRowOffset = srcRowOffset
|
||||||
|
val rowBytes = rowPixels * 3
|
||||||
|
|
||||||
// Copy RGB values from previous frame
|
// Use vm.memcpy for partial rows
|
||||||
val prevR = vm.peek(prevRGBAddr + rgbOffset*prevAddrIncVec)!!
|
vm.memcpy(
|
||||||
val prevG = vm.peek(prevRGBAddr + (rgbOffset + 1)*prevAddrIncVec)!!
|
(prevRGBAddr + srcRowOffset*prevAddrIncVec).toInt(),
|
||||||
val prevB = vm.peek(prevRGBAddr + (rgbOffset + 2)*prevAddrIncVec)!!
|
(currentRGBAddr + dstRowOffset*thisAddrIncVec).toInt(),
|
||||||
|
rowBytes
|
||||||
vm.poke(currentRGBAddr + rgbOffset*thisAddrIncVec, prevR)
|
)
|
||||||
vm.poke(currentRGBAddr + (rgbOffset + 1)*thisAddrIncVec, prevG)
|
|
||||||
vm.poke(currentRGBAddr + (rgbOffset + 2)*thisAddrIncVec, prevB)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2621,22 +2685,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2)
|
tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store RGB data to frame buffer (complete replacement)
|
// Store RGB data to frame buffer with bulk write
|
||||||
for (dy in 0 until 16) {
|
bulkWriteRGB(currentRGBAddr, rgbData, width, height, startX, startY, 16, 16, thisAddrIncVec)
|
||||||
for (dx in 0 until 16) {
|
|
||||||
val x = startX + dx
|
|
||||||
val y = startY + dy
|
|
||||||
if (x < width && y < height) {
|
|
||||||
val rgbIdx = (dy * 16 + dx) * 3
|
|
||||||
val imageOffset = y.toLong() * width + x
|
|
||||||
val bufferOffset = imageOffset * 3
|
|
||||||
|
|
||||||
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, rgbData[rgbIdx].toByte())
|
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, rgbData[rgbIdx + 1].toByte())
|
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, rgbData[rgbIdx + 2].toByte())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
0x02 -> { // TEV_MODE_INTER - Motion compensation + residual DCT
|
0x02 -> { // TEV_MODE_INTER - Motion compensation + residual DCT
|
||||||
@@ -2763,34 +2813,28 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Step 5: Store final RGB data to frame buffer
|
// Step 5: Store final RGB data to frame buffer
|
||||||
for (dy in 0 until 16) {
|
if (debugMotionVectors) {
|
||||||
for (dx in 0 until 16) {
|
// Debug mode: individual pokes for motion vector visualization
|
||||||
val x = startX + dx
|
for (dy in 0 until 16) {
|
||||||
val y = startY + dy
|
for (dx in 0 until 16) {
|
||||||
if (x < width && y < height) {
|
val x = startX + dx
|
||||||
val imageOffset = y.toLong() * width + x
|
val y = startY + dy
|
||||||
val bufferOffset = imageOffset * 3
|
if (x < width && y < height) {
|
||||||
|
val imageOffset = y.toLong() * width + x
|
||||||
if (debugMotionVectors) {
|
val bufferOffset = imageOffset * 3
|
||||||
// Debug: Color INTER blocks by motion vector magnitude
|
|
||||||
val mvMagnitude = kotlin.math.sqrt((mvX * mvX + mvY * mvY).toDouble()).toInt()
|
val mvMagnitude = kotlin.math.sqrt((mvX * mvX + mvY * mvY).toDouble()).toInt()
|
||||||
val intensity = (mvMagnitude * 8).coerceIn(0, 255) // Scale for visibility
|
val intensity = (mvMagnitude * 8).coerceIn(0, 255) // Scale for visibility
|
||||||
|
|
||||||
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, intensity.toByte()) // R = MV magnitude
|
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, intensity.toByte()) // R = MV magnitude
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, 0.toByte()) // G = 0
|
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, 0.toByte()) // G = 0
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, (255-intensity).toByte()) // B = inverse
|
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, (255-intensity).toByte()) // B = inverse
|
||||||
} else {
|
|
||||||
val rgbIdx = (dy * 16 + dx) * 3
|
|
||||||
val finalR = finalRgb[rgbIdx]
|
|
||||||
val finalG = finalRgb[rgbIdx + 1]
|
|
||||||
val finalB = finalRgb[rgbIdx + 2]
|
|
||||||
|
|
||||||
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, finalR.toByte())
|
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, finalG.toByte())
|
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, finalB.toByte())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Optimized bulk write for normal operation
|
||||||
|
bulkWriteRGB(currentRGBAddr, finalRgb, width, height, startX, startY, 16, 16, thisAddrIncVec)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user