colour with DCT

This commit is contained in:
minjaesong
2025-08-18 03:11:34 +09:00
parent 964728dd59
commit ca8ea7f2be
2 changed files with 145 additions and 115 deletions

View File

@@ -204,14 +204,58 @@ let stopPlay = false
// Dequantize DCT coefficient
function dequantizeCoeff(coeff, quant, isDC) {
if (isDC) {
// DC coefficient represents the average pixel value
// It should be in range roughly -128 to +127 after dequantization
return coeff // No multiplication needed for DC
// DC coefficient also needs dequantization
return coeff * quant
} else {
return coeff * quant
}
}
// 8x8 Inverse DCT implementation
function idct8x8(coeffs, quantTable) {
const N = 8
let block = new Array(64)
// Dequantize coefficients
for (let i = 0; i < 64; i++) {
block[i] = dequantizeCoeff(coeffs[i], quantTable[i], i === 0)
}
// IDCT constants
const cos = Math.cos
const sqrt2 = Math.sqrt(2)
const c = new Array(8)
c[0] = 1.0 / sqrt2
for (let i = 1; i < 8; i++) {
c[i] = 1.0
}
let result = new Array(64)
// 2D IDCT
for (let x = 0; x < N; x++) {
for (let y = 0; y < N; y++) {
let sum = 0.0
for (let u = 0; u < N; u++) {
for (let v = 0; v < N; v++) {
let coeff = block[v * N + u]
let cosU = cos((2 * x + 1) * u * Math.PI / (2 * N))
let cosV = cos((2 * y + 1) * v * Math.PI / (2 * N))
sum += c[u] * c[v] * coeff * cosU * cosV
}
}
result[y * N + x] = sum / 4.0
}
}
// Convert to pixel values (0-255)
for (let i = 0; i < 64; i++) {
result[i] = Math.max(0, Math.min(255, Math.round(result[i] + 128)))
}
return result
}
// Hardware-accelerated decoding uses graphics.tevIdct8x8() instead of pure JS
// Hardware-accelerated TEV block decoder
@@ -260,43 +304,43 @@ function decodeBlock(blockData, blockX, blockY, prevRG, prevBA, currRG, currBA,
}
}
} else {
// INTRA or INTER modes: simplified DC-only decoding for debugging
// INTRA or INTER modes: Full DCT decoding
// Extract DC coefficients and convert to colors
let rCoeff = blockData.dctCoeffs[0 * 64 + 0] // R DC
let gCoeff = blockData.dctCoeffs[1 * 64 + 0] // G DC
let bCoeff = blockData.dctCoeffs[2 * 64 + 0] // B DC
// Extract DCT coefficients for each channel (R, G, B)
let rCoeffs = blockData.dctCoeffs.slice(0 * 64, 1 * 64) // R channel
let gCoeffs = blockData.dctCoeffs.slice(1 * 64, 2 * 64) // G channel
let bCoeffs = blockData.dctCoeffs.slice(2 * 64, 3 * 64) // B channel
// Dequantize DC coefficients
let rDC = dequantizeCoeff(rCoeff, quantTable[0], true)
let gDC = dequantizeCoeff(gCoeff, quantTable[0], true)
let bDC = dequantizeCoeff(bCoeff, quantTable[0], true)
// Perform IDCT for each channel
let rBlock = idct8x8(rCoeffs, quantTable)
let gBlock = idct8x8(gCoeffs, quantTable)
let bBlock = idct8x8(bCoeffs, quantTable)
// Convert to RGB values (DC represents average)
let r = Math.max(0, Math.min(255, rDC + 128))
let g = Math.max(0, Math.min(255, gDC + 128))
let b = Math.max(0, Math.min(255, bDC + 128))
// Convert to 4-bit values
let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255)))
let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255)))
let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255)))
let rgValue = (r4 << 4) | g4 // R in MSB, G in LSB
let baValue = (b4 << 4) | 15 // B in MSB, A=15 (opaque) in LSB
// Software decoding (for fallback only)
// Fill 8x8 block with solid color
// Fill 8x8 block with IDCT results
for (let dy = 0; dy < BLOCK_SIZE; dy++) {
for (let dx = 0; dx < BLOCK_SIZE; dx++) {
let x = startX + dx
let y = startY + dy
if (x < width && y < height) {
let offset = y * width + x
// Normal memory plane assignments
sys.poke(currRG - offset, rgValue) // Graphics memory uses negative addressing
sys.poke(currBA - offset, baValue)
let blockOffset = dy * BLOCK_SIZE + dx
let imageOffset = y * width + x
// Get RGB values from IDCT results
let r = rBlock[blockOffset]
let g = gBlock[blockOffset]
let b = bBlock[blockOffset]
// Convert to 4-bit values
let r4 = Math.max(0, Math.min(15, Math.round(r * 15 / 255)))
let g4 = Math.max(0, Math.min(15, Math.round(g * 15 / 255)))
let b4 = Math.max(0, Math.min(15, Math.round(b * 15 / 255)))
let rgValue = (r4 << 4) | g4 // R in MSB, G in LSB
let baValue = (b4 << 4) | 15 // B in MSB, A=15 (opaque) in LSB
// Write to graphics memory
sys.poke(currRG - imageOffset, rgValue) // Graphics memory uses negative addressing
sys.poke(currBA - imageOffset, baValue)
}
}
}

View File

@@ -20,6 +20,47 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
}
/**
* Perform IDCT on a single channel with integer coefficients
*/
private fun performIDCT(coeffs: IntArray, quantTable: IntArray): IntArray {
// Use the same DCT basis as tevIdct8x8
val dctBasis = Array(8) { u ->
Array(8) { x ->
val cu = if (u == 0) 1.0 / kotlin.math.sqrt(2.0) else 1.0
cu * kotlin.math.cos((2.0 * x + 1.0) * u * kotlin.math.PI / 16.0) / 2.0
}
}
val dctCoeffs = Array(8) { DoubleArray(8) }
val result = IntArray(64)
// Convert integer coefficients to 2D array and dequantize
for (u in 0 until 8) {
for (v in 0 until 8) {
val idx = u * 8 + v
val coeff = coeffs[idx]
dctCoeffs[u][v] = (coeff * quantTable[idx]).toDouble()
}
}
// Apply 2D inverse DCT
for (x in 0 until 8) {
for (y in 0 until 8) {
var sum = 0.0
for (u in 0 until 8) {
for (v in 0 until 8) {
sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[u][v]
}
}
val pixel = kotlin.math.max(0.0, kotlin.math.min(255.0, sum + 128.0))
result[y * 8 + x] = pixel.toInt()
}
}
return result
}
fun getGpuMemBase(): Int {
return -1 - (1048576 * (vm.findPeriIndexByType(VM.PERITYPE_GPU_AND_TERM) ?: 0))
}
@@ -1331,68 +1372,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
}
/**
* Fast 8x8 inverse DCT optimized for video decompression
* @param dctPtr pointer to DCT coefficients (192 floats)
* @param blockPtr pointer to output RGB block (192 bytes)
*/
fun tevIdct8x8(dctPtr: Int, blockPtr: Int) {
val gpu = getFirstGPU() ?: return
val dctBasis = Array(8) { u ->
Array(8) { x ->
val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0
cu * cos((2.0 * x + 1.0) * u * PI / 16.0) / 2.0
}
}
val dctCoeffs = Array(3) { Array(8) { DoubleArray(8) } }
val block = Array(3) { Array(8) { DoubleArray(8) } }
// Read DCT coefficients from memory
for (channel in 0..2) {
for (u in 0..7) {
for (v in 0..7) {
val offset = (channel * 64 + u * 8 + v) * 4
val b0 = vm.peek(dctPtr.toLong() + offset)!! and -1
val b1 = vm.peek(dctPtr.toLong() + offset + 1)!! and -1
val b2 = vm.peek(dctPtr.toLong() + offset + 2)!! and -1
val b3 = vm.peek(dctPtr.toLong() + offset + 3)!! and -1
val floatBits = b0.toUint() or (b1.toUint() shl 8) or (b2.toUint() shl 16) or (b3.toUint() shl 24)
dctCoeffs[channel][u][v] = java.lang.Float.intBitsToFloat(floatBits).toDouble()
}
}
}
// Apply 2D inverse DCT to each channel
for (channel in 0..2) {
for (x in 0..7) {
for (y in 0..7) {
var sum = 0.0
for (u in 0..7) {
for (v in 0..7) {
sum += dctBasis[u][x] * dctBasis[v][y] * dctCoeffs[channel][u][v]
}
}
block[channel][y][x] = sum + 0.5 // Add back DC offset
}
}
}
// Write RGB block to memory (clamped to 0-255)
for (y in 0..7) {
for (x in 0..7) {
val offset = (y * 8 + x) * 3
val r = (clamp(block[0][y][x] * 255.0, 0.0, 255.0)).toInt()
val g = (clamp(block[1][y][x] * 255.0, 0.0, 255.0)).toInt()
val b = (clamp(block[2][y][x] * 255.0, 0.0, 255.0)).toInt()
vm.poke(blockPtr.toLong() + offset, r.toByte())
vm.poke(blockPtr.toLong() + offset + 1, g.toByte())
vm.poke(blockPtr.toLong() + offset + 2, b.toByte())
}
}
}
/**
* Motion compensation: copy 8x8 block with sub-pixel interpolation
@@ -1733,34 +1712,41 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
}
else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - DCT decode
// Extract DC coefficients and dequantize
val rDC = dctCoeffs[0 * 64 + 0] // R channel DC
val gDC = dctCoeffs[1 * 64 + 0] // G channel DC
val bDC = dctCoeffs[2 * 64 + 0] // B channel DC
else -> { // TEV_MODE_INTRA (0x01) or TEV_MODE_INTER (0x02) - Full DCT decode
// Hardware-accelerated IDCT for all three channels
val rCoeffs = dctCoeffs.sliceArray(0 * 64 until 1 * 64) // R channel
val gCoeffs = dctCoeffs.sliceArray(1 * 64 until 2 * 64) // G channel
val bCoeffs = dctCoeffs.sliceArray(2 * 64 until 3 * 64) // B channel
// Convert DC to RGB (add 128 offset)
val r = kotlin.math.max(0, kotlin.math.min(255, rDC + 128))
val g = kotlin.math.max(0, kotlin.math.min(255, gDC + 128))
val b = kotlin.math.max(0, kotlin.math.min(255, bDC + 128))
// Perform hardware IDCT for each channel
val rBlock = performIDCT(rCoeffs, quantTable)
val gBlock = performIDCT(gCoeffs, quantTable)
val bBlock = performIDCT(bCoeffs, quantTable)
// Convert to 4-bit 4096-color format
val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255)))
val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255)))
val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255)))
val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB
val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB
// Fill 8x8 block
// Fill 8x8 block with IDCT results
for (dy in 0 until 8) {
for (dx in 0 until 8) {
val x = startX + dx
val y = startY + dy
if (x < width && y < height) {
val offset = y.toLong() * width + x
vm.poke(rgPlaneAddr + offset*thisAddrIncVec, rgValue.toByte())
vm.poke(baPlaneAddr + offset*thisAddrIncVec, baValue.toByte())
val blockOffset = dy * 8 + dx
val imageOffset = y.toLong() * width + x
// Get RGB values from IDCT results
val r = rBlock[blockOffset]
val g = gBlock[blockOffset]
val b = bBlock[blockOffset]
// Convert to 4-bit 4096-color format
val r4 = kotlin.math.max(0, kotlin.math.min(15, (r * 15 / 255)))
val g4 = kotlin.math.max(0, kotlin.math.min(15, (g * 15 / 255)))
val b4 = kotlin.math.max(0, kotlin.math.min(15, (b * 15 / 255)))
val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB
val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB
vm.poke(rgPlaneAddr + imageOffset*thisAddrIncVec, rgValue.toByte())
vm.poke(baPlaneAddr + imageOffset*thisAddrIncVec, baValue.toByte())
}
}
}