mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
colour with DCT
This commit is contained in:
@@ -204,14 +204,58 @@ let stopPlay = false
|
||||
// Dequantize DCT coefficient
|
||||
function dequantizeCoeff(coeff, quant, isDC) {
|
||||
if (isDC) {
|
||||
// DC coefficient represents the average pixel value
|
||||
// It should be in range roughly -128 to +127 after dequantization
|
||||
return coeff // No multiplication needed for DC
|
||||
// DC coefficient also needs dequantization
|
||||
return coeff * quant
|
||||
} else {
|
||||
return coeff * quant
|
||||
}
|
||||
}
|
||||
|
||||
// 8x8 Inverse DCT implementation
|
||||
function idct8x8(coeffs, quantTable) {
|
||||
const N = 8
|
||||
let block = new Array(64)
|
||||
|
||||
// Dequantize coefficients
|
||||
for (let i = 0; i < 64; i++) {
|
||||
block[i] = dequantizeCoeff(coeffs[i], quantTable[i], i === 0)
|
||||
}
|
||||
|
||||
// IDCT constants
|
||||
const cos = Math.cos
|
||||
const sqrt2 = Math.sqrt(2)
|
||||
const c = new Array(8)
|
||||
c[0] = 1.0 / sqrt2
|
||||
for (let i = 1; i < 8; i++) {
|
||||
c[i] = 1.0
|
||||
}
|
||||
|
||||
let result = new Array(64)
|
||||
|
||||
// 2D IDCT
|
||||
for (let x = 0; x < N; x++) {
|
||||
for (let y = 0; y < N; y++) {
|
||||
let sum = 0.0
|
||||
for (let u = 0; u < N; u++) {
|
||||
for (let v = 0; v < N; v++) {
|
||||
let coeff = block[v * N + u]
|
||||
let cosU = cos((2 * x + 1) * u * Math.PI / (2 * N))
|
||||
let cosV = cos((2 * y + 1) * v * Math.PI / (2 * N))
|
||||
sum += c[u] * c[v] * coeff * cosU * cosV
|
||||
}
|
||||
}
|
||||
result[y * N + x] = sum / 4.0
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to pixel values (0-255)
|
||||
for (let i = 0; i < 64; i++) {
|
||||
result[i] = Math.max(0, Math.min(255, Math.round(result[i] + 128)))
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Hardware-accelerated decoding uses graphics.tevIdct8x8() instead of pure JS
|
||||
|
||||
// Hardware-accelerated TEV block decoder
|
||||
@@ -260,43 +304,43 @@ function decodeBlock(blockData, blockX, blockY, prevRG, prevBA, currRG, currBA,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// INTRA or INTER modes: simplified DC-only decoding for debugging
|
||||
// INTRA or INTER modes: Full DCT decoding
|
||||
|
||||
// Extract DC coefficients and convert to colors
|
||||
let rCoeff = blockData.dctCoeffs[0 * 64 + 0] // R DC
|
||||
let gCoeff = blockData.dctCoeffs[1 * 64 + 0] // G DC
|
||||
let bCoeff = blockData.dctCoeffs[2 * 64 + 0] // B DC
|
||||
// Extract DCT coefficients for each channel (R, G, B)
|
||||
let rCoeffs = blockData.dctCoeffs.slice(0 * 64, 1 * 64) // R channel
|
||||
let gCoeffs = blockData.dctCoeffs.slice(1 * 64, 2 * 64) // G channel
|
||||
let bCoeffs = blockData.dctCoeffs.slice(2 * 64, 3 * 64) // B channel
|
||||
|
||||
// Dequantize DC coefficients
|
||||
let rDC = dequantizeCoeff(rCoeff, quantTable[0], true)
|
||||
let gDC = dequantizeCoeff(gCoeff, quantTable[0], true)
|
||||
let bDC = dequantizeCoeff(bCoeff, quantTable[0], true)
|
||||
// Perform IDCT for each channel
|
||||
let rBlock = idct8x8(rCoeffs, quantTable)
|
||||
let gBlock = idct8x8(gCoeffs, quantTable)
|
||||
let bBlock = idct8x8(bCoeffs, quantTable)
|
||||
|
||||
// Convert to RGB values (DC represents average)
|
||||
let r = Math.max(0, Math.min(255, rDC + 128))
|
||||
let g = Math.max(0, Math.min(255, gDC + 128))
|
||||
let b = Math.max(0, Math.min(255, bDC + 128))
|
||||
|
||||
// Convert to 4-bit values
|
||||
let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255)))
|
||||
let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255)))
|
||||
let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255)))
|
||||
|
||||
let rgValue = (r4 << 4) | g4 // R in MSB, G in LSB
|
||||
let baValue = (b4 << 4) | 15 // B in MSB, A=15 (opaque) in LSB
|
||||
|
||||
// Software decoding (for fallback only)
|
||||
|
||||
// Fill 8x8 block with solid color
|
||||
// Fill 8x8 block with IDCT results
|
||||
for (let dy = 0; dy < BLOCK_SIZE; dy++) {
|
||||
for (let dx = 0; dx < BLOCK_SIZE; dx++) {
|
||||
let x = startX + dx
|
||||
let y = startY + dy
|
||||
if (x < width && y < height) {
|
||||
let offset = y * width + x
|
||||
// Normal memory plane assignments
|
||||
sys.poke(currRG - offset, rgValue) // Graphics memory uses negative addressing
|
||||
sys.poke(currBA - offset, baValue)
|
||||
let blockOffset = dy * BLOCK_SIZE + dx
|
||||
let imageOffset = y * width + x
|
||||
|
||||
// Get RGB values from IDCT results
|
||||
let r = rBlock[blockOffset]
|
||||
let g = gBlock[blockOffset]
|
||||
let b = bBlock[blockOffset]
|
||||
|
||||
// Convert to 4-bit values
|
||||
let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255)))
|
||||
let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255)))
|
||||
let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255)))
|
||||
|
||||
let rgValue = (r4 << 4) | g4 // R in MSB, G in LSB
|
||||
let baValue = (b4 << 4) | 15 // B in MSB, A=15 (opaque) in LSB
|
||||
|
||||
// Write to graphics memory
|
||||
sys.poke(currRG - imageOffset, rgValue) // Graphics memory uses negative addressing
|
||||
sys.poke(currBA - imageOffset, baValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,47 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform IDCT on a single channel with integer coefficients
|
||||
*/
|
||||
private fun performIDCT(coeffs: IntArray, quantTable: IntArray): IntArray {
|
||||
// Use the same DCT basis as tevIdct8x8
|
||||
val dctBasis = Array(8) { u ->
|
||||
Array(8) { x ->
|
||||
val cu = if (u == 0) 1.0 / kotlin.math.sqrt(2.0) else 1.0
|
||||
cu * kotlin.math.cos((2.0 * x + 1.0) * u * kotlin.math.PI / 16.0) / 2.0
|
||||
}
|
||||
}
|
||||
|
||||
val dctCoeffs = Array(8) { DoubleArray(8) }
|
||||
val result = IntArray(64)
|
||||
|
||||
// Convert integer coefficients to 2D array and dequantize
|
||||
for (u in 0 until 8) {
|
||||
for (v in 0 until 8) {
|
||||
val idx = u * 8 + v
|
||||
val coeff = coeffs[idx]
|
||||
dctCoeffs[u][v] = (coeff * quantTable[idx]).toDouble()
|
||||
}
|
||||
}
|
||||
|
||||
// Apply 2D inverse DCT
|
||||
for (x in 0 until 8) {
|
||||
for (y in 0 until 8) {
|
||||
var sum = 0.0
|
||||
for (u in 0 until 8) {
|
||||
for (v in 0 until 8) {
|
||||
sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[u][v]
|
||||
}
|
||||
}
|
||||
val pixel = kotlin.math.max(0.0, kotlin.math.min(255.0, sum + 128.0))
|
||||
result[y * 8 + x] = pixel.toInt()
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
fun getGpuMemBase(): Int {
|
||||
return -1 - (1048576 * (vm.findPeriIndexByType(VM.PERITYPE_GPU_AND_TERM) ?: 0))
|
||||
}
|
||||
@@ -1331,68 +1372,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast 8x8 inverse DCT optimized for video decompression
|
||||
* @param dctPtr pointer to DCT coefficients (192 floats)
|
||||
* @param blockPtr pointer to output RGB block (192 bytes)
|
||||
*/
|
||||
fun tevIdct8x8(dctPtr: Int, blockPtr: Int) {
|
||||
val gpu = getFirstGPU() ?: return
|
||||
|
||||
val dctBasis = Array(8) { u ->
|
||||
Array(8) { x ->
|
||||
val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0
|
||||
cu * cos((2.0 * x + 1.0) * u * PI / 16.0) / 2.0
|
||||
}
|
||||
}
|
||||
|
||||
val dctCoeffs = Array(3) { Array(8) { DoubleArray(8) } }
|
||||
val block = Array(3) { Array(8) { DoubleArray(8) } }
|
||||
|
||||
// Read DCT coefficients from memory
|
||||
for (channel in 0..2) {
|
||||
for (u in 0..7) {
|
||||
for (v in 0..7) {
|
||||
val offset = (channel * 64 + u * 8 + v) * 4
|
||||
val b0 = vm.peek(dctPtr.toLong() + offset)!! and -1
|
||||
val b1 = vm.peek(dctPtr.toLong() + offset + 1)!! and -1
|
||||
val b2 = vm.peek(dctPtr.toLong() + offset + 2)!! and -1
|
||||
val b3 = vm.peek(dctPtr.toLong() + offset + 3)!! and -1
|
||||
val floatBits = b0.toUint() or (b1.toUint() shl 8) or (b2.toUint() shl 16) or (b3.toUint() shl 24)
|
||||
dctCoeffs[channel][u][v] = java.lang.Float.intBitsToFloat(floatBits).toDouble()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply 2D inverse DCT to each channel
|
||||
for (channel in 0..2) {
|
||||
for (x in 0..7) {
|
||||
for (y in 0..7) {
|
||||
var sum = 0.0
|
||||
for (u in 0..7) {
|
||||
for (v in 0..7) {
|
||||
sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[channel][u][v]
|
||||
}
|
||||
}
|
||||
block[channel][y][x] = sum + 0.5 // Add back DC offset
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write RGB block to memory (clamped to 0-255)
|
||||
for (y in 0..7) {
|
||||
for (x in 0..7) {
|
||||
val offset = (y * 8 + x) * 3
|
||||
val r = (clamp(block[0][y][x] * 255.0, 0.0, 255.0)).toInt()
|
||||
val g = (clamp(block[1][y][x] * 255.0, 0.0, 255.0)).toInt()
|
||||
val b = (clamp(block[2][y][x] * 255.0, 0.0, 255.0)).toInt()
|
||||
|
||||
vm.poke(blockPtr.toLong() + offset, r.toByte())
|
||||
vm.poke(blockPtr.toLong() + offset + 1, g.toByte())
|
||||
vm.poke(blockPtr.toLong() + offset + 2, b.toByte())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Motion compensation: copy 8x8 block with sub-pixel interpolation
|
||||
@@ -1733,34 +1712,41 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - DCT decode
|
||||
// Extract DC coefficients and dequantize
|
||||
val rDC = dctCoeffs[0 * 64 + 0] // R channel DC
|
||||
val gDC = dctCoeffs[1 * 64 + 0] // G channel DC
|
||||
val bDC = dctCoeffs[2 * 64 + 0] // B channel DC
|
||||
else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - Full DCT decode
|
||||
// Hardware-accelerated IDCT for all three channels
|
||||
val rCoeffs = dctCoeffs.sliceArray(0 * 64 until 1 * 64) // R channel
|
||||
val gCoeffs = dctCoeffs.sliceArray(1 * 64 until 2 * 64) // G channel
|
||||
val bCoeffs = dctCoeffs.sliceArray(2 * 64 until 3 * 64) // B channel
|
||||
|
||||
// Convert DC to RGB (add 128 offset)
|
||||
val r = kotlin.math.max(0, kotlin.math.min(255, rDC + 128))
|
||||
val g = kotlin.math.max(0, kotlin.math.min(255, gDC + 128))
|
||||
val b = kotlin.math.max(0, kotlin.math.min(255, bDC + 128))
|
||||
// Perform hardware IDCT for each channel
|
||||
val rBlock = performIDCT(rCoeffs, quantTable)
|
||||
val gBlock = performIDCT(gCoeffs, quantTable)
|
||||
val bBlock = performIDCT(bCoeffs, quantTable)
|
||||
|
||||
// Convert to 4-bit 4096-color format
|
||||
val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255)))
|
||||
val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255)))
|
||||
val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255)))
|
||||
|
||||
val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB
|
||||
val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB
|
||||
|
||||
// Fill 8x8 block
|
||||
// Fill 8x8 block with IDCT results
|
||||
for (dy in 0 until 8) {
|
||||
for (dx in 0 until 8) {
|
||||
val x = startX + dx
|
||||
val y = startY + dy
|
||||
if (x < width && y < height) {
|
||||
val offset = y.toLong() * width + x
|
||||
vm.poke(rgPlaneAddr + offset*thisAddrIncVec, rgValue.toByte())
|
||||
vm.poke(baPlaneAddr + offset*thisAddrIncVec, baValue.toByte())
|
||||
val blockOffset = dy * 8 + dx
|
||||
val imageOffset = y.toLong() * width + x
|
||||
|
||||
// Get RGB values from IDCT results
|
||||
val r = rBlock[blockOffset]
|
||||
val g = gBlock[blockOffset]
|
||||
val b = bBlock[blockOffset]
|
||||
|
||||
// Convert to 4-bit 4096-color format
|
||||
val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255)))
|
||||
val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255)))
|
||||
val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255)))
|
||||
|
||||
val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB
|
||||
val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB
|
||||
|
||||
vm.poke(rgPlaneAddr + imageOffset*thisAddrIncVec, rgValue.toByte())
|
||||
vm.poke(baPlaneAddr + imageOffset*thisAddrIncVec, baValue.toByte())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user