diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index 3156864..65386b5 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -580,8 +580,6 @@ let frameDuped = false // Main decoding loop - simplified for performance try { - graphics.tevPrepareQuantTable(qualityY, qualityCo, qualityCg) - let t1 = sys.nanoTime() while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && trueFrameCount < totalFrames) { @@ -657,14 +655,14 @@ try { if (isInterlaced) { // For interlaced: decode current frame into currentFieldAddr // For display: use prevFieldAddr as current, currentFieldAddr as next - graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking) + graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking) graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm) // Rotate field buffers for next frame: NEXT -> CURRENT -> PREV rotateFieldBuffers() } else { // Progressive or first frame: normal decoding without temporal prediction - graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking) + graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking) } decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 32eb3ef..3611b0f 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -1273,7 +1273,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // TEV (TSVM Enhanced Video) format support // Created by Claude on 2025-08-17 - fun jpeg_quality_to_mult(q: Int): Float { + fun jpeg_quality_to_mult(q: Float): Float { return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f } @@ -1525,7 +1525,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - private fun tevIdct8x8_fast(coeffs: ShortArray, quantTable: FloatArray, isChromaResidual: Boolean = false, mult: Float = 1f): IntArray { + private fun tevIdct8x8_fast(coeffs: ShortArray, quantTable: IntArray, isChromaResidual: Boolean = false, qualityIndex: Int, rateControlFactor: Float): IntArray { val result = IntArray(64) // Reuse preallocated temp buffer to reduce GC pressure @@ -1539,7 +1539,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val coeff = if (isChromaResidual && coeffIdx == 0) { coeffs[coeffIdx].toFloat() // DC lossless for chroma residual } else { - coeffs[coeffIdx] * quantTable[coeffIdx] * mult + coeffs[coeffIdx] * quantTable[coeffIdx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor) } sum += dctBasis8[u][col] * coeff } @@ -1576,7 +1576,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // 16x16 IDCT for Y channel (YCoCg-R format) - private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: FloatArray, mult: Float = 1.0f): IntArray { + private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: IntArray, qualityIndex: Int, rateControlFactor: Float): IntArray { val result = IntArray(256) // 16x16 = 256 // Process coefficients and dequantize using preallocated buffer @@ -1586,13 +1586,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { val coeff = if (idx == 0) { coeffs[idx].toFloat() // DC lossless for luma } else { - coeffs[idx] * quantTable[idx] * mult + coeffs[idx] * quantTable[idx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor) } idct16TempBuffer[idx] = coeff } } - // Fast separable IDCT: 8x performance improvement - but causes 90° rotation! + // Fast separable IDCT // First pass: Process rows (16 1D IDCTs) for (row in 0 until 16) { for (col in 0 until 16) { @@ -2242,23 +2242,6 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - private lateinit var quantTableY: FloatArray - private lateinit var quantTableCo: FloatArray - private lateinit var quantTableCg: FloatArray - private lateinit var quantTableB: FloatArray - - fun tevPrepareQuantTable(qualityY: Int, qualityCo: Int, qualityCg: Int) { - val quantYmult = jpeg_quality_to_mult(qualityY) - val quantCOmult = jpeg_quality_to_mult(qualityCo) - val quantCGmult = jpeg_quality_to_mult(qualityCg) - val quantBmult = quantCGmult - - quantTableY = QUANT_TABLE_Y.map { (it * quantYmult).coerceIn(1f, 255f) }.toFloatArray() - quantTableCo = QUANT_TABLE_C.map { (it * quantCOmult).coerceIn(1f, 255f) }.toFloatArray() - quantTableCg = QUANT_TABLE_C.map { (it * quantCGmult).coerceIn(1f, 255f) }.toFloatArray() - quantTableB = QUANT_TABLE_C.map { (it * quantBmult).coerceIn(1f, 255f) }.toFloatArray() - } - /** * Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format * Decodes compressed TEV block data directly to framebuffer @@ -2272,7 +2255,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { * @param frameCounter Frame counter for temporal patterns */ fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, - width: Int, height: Int, frameCounter: Int, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int, debugMotionVectors: Boolean = false, tevVersion: Int = 2, enableDeblocking: Boolean = true) { @@ -2447,9 +2430,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { readPtr += 768 // Perform hardware IDCT for each channel using fast algorithm - val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), quantTableY, rateControlFactor) - val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), quantTableCo, true, rateControlFactor) - val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), if (tevVersion == 3) quantTableB else quantTableCg, true, rateControlFactor) + val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor) + val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor) + val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor) // Convert to RGB (YCoCg-R for v2, XYB for v3) val rgbData = if (tevVersion == 3) { @@ -2485,9 +2468,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { readPtr += 768 // Step 2: Decode residual DCT - val yResidual = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), quantTableY, rateControlFactor) - val coResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), quantTableCo, true, rateControlFactor) - val cgResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), if (tevVersion == 3) quantTableB else quantTableCg, true, rateControlFactor) + val yResidual = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor) + val coResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor) + val cgResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor) // Step 3: Build motion-compensated YCoCg-R block and add residuals val finalY = IntArray(256) diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index c5e6376..c9997f4 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -931,10 +931,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke // quantise Y coefficients (luma) using per-block rate control const uint32_t *y_quant = QUANT_TABLE_Y; - const float qmult_y = jpeg_quality_to_mult(enc->qualityY); + const float qmult_y = jpeg_quality_to_mult(enc->qualityY * block->rate_control_factor); for (int i = 0; i < BLOCK_SIZE_SQR; i++) { // Apply rate control factor to quantization table (like decoder does) - float effective_quant = y_quant[i] * qmult_y * block->rate_control_factor; + float effective_quant = y_quant[i] * qmult_y; block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 0); } @@ -943,10 +943,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke // quantise Co coefficients (chroma - orange-blue) using per-block rate control const uint32_t *co_quant = QUANT_TABLE_C; - const float qmult_co = jpeg_quality_to_mult(enc->qualityCo); + const float qmult_co = jpeg_quality_to_mult(enc->qualityCo * block->rate_control_factor); for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) { // Apply rate control factor to quantization table (like decoder does) - float effective_quant = co_quant[i] * qmult_co * block->rate_control_factor; + float effective_quant = co_quant[i] * qmult_co; block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1); } @@ -955,10 +955,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke // quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) using per-block rate control const uint32_t *cg_quant = QUANT_TABLE_C; - const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg); + const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg * block->rate_control_factor); for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) { // Apply rate control factor to quantization table (like decoder does) - float effective_quant = cg_quant[i] * qmult_cg * block->rate_control_factor; + float effective_quant = cg_quant[i] * qmult_cg; block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1); }