From 3495dfca5eff0374a279f6781f8ce0a9b7784c36 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Tue, 9 Sep 2025 09:47:56 +0900 Subject: [PATCH] tev slight optimisation --- assets/disk0/tvdos/bin/playtev.js | 6 ++-- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 31 +++++++++++-------- video_encoder/encoder_tev.c | 15 --------- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index 0572548..3156864 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -580,6 +580,8 @@ let frameDuped = false // Main decoding loop - simplified for performance try { + graphics.tevPrepareQuantTable(qualityY, qualityCo, qualityCg) + let t1 = sys.nanoTime() while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && trueFrameCount < totalFrames) { @@ -655,14 +657,14 @@ try { if (isInterlaced) { // For interlaced: decode current frame into currentFieldAddr // For display: use prevFieldAddr as current, currentFieldAddr as next - graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, [qualityY, qualityCo, qualityCg], trueFrameCount, debugMotionVectors, version, enableDeblocking) + graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking) graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm) // Rotate field buffers for next frame: NEXT -> CURRENT -> PREV rotateFieldBuffers() } else { // Progressive or first frame: normal decoding without temporal prediction - graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, [qualityY, qualityCo, qualityCg], trueFrameCount, debugMotionVectors, version, enableDeblocking) + graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking) } decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 6d622d7..32eb3ef 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -2242,6 +2242,23 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + private lateinit var quantTableY: FloatArray + private lateinit var quantTableCo: FloatArray + private lateinit var quantTableCg: FloatArray + private lateinit var quantTableB: FloatArray + + fun tevPrepareQuantTable(qualityY: Int, qualityCo: Int, qualityCg: Int) { + val quantYmult = jpeg_quality_to_mult(qualityY) + val quantCOmult = jpeg_quality_to_mult(qualityCo) + val quantCGmult = jpeg_quality_to_mult(qualityCg) + val quantBmult = quantCGmult + + quantTableY = QUANT_TABLE_Y.map { (it * quantYmult).coerceIn(1f, 255f) }.toFloatArray() + quantTableCo = QUANT_TABLE_C.map { (it * quantCOmult).coerceIn(1f, 255f) }.toFloatArray() + quantTableCg = QUANT_TABLE_C.map { (it * quantCGmult).coerceIn(1f, 255f) }.toFloatArray() + quantTableB = QUANT_TABLE_C.map { (it * quantBmult).coerceIn(1f, 255f) }.toFloatArray() + } + /** * Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format * Decodes compressed TEV block data directly to framebuffer @@ -2255,7 +2272,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { * @param frameCounter Frame counter for temporal patterns */ fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, - width: Int, height: Int, qualityIndices: IntArray, frameCounter: Int, + width: Int, height: Int, frameCounter: Int, debugMotionVectors: Boolean = false, tevVersion: Int = 2, enableDeblocking: Boolean = true) { @@ -2265,24 +2282,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { val blocksX = (width + 15) / 16 // 16x16 blocks now val blocksY = (height + 15) / 16 - val quantYmult = jpeg_quality_to_mult(qualityIndices[0]) - val quantCOmult = jpeg_quality_to_mult(qualityIndices[1]) - val quantCGmult = jpeg_quality_to_mult(qualityIndices[2]) - val quantBmult = quantCGmult - - // Apply rate control factor to quantization tables (if not ~1.0, skip optimization) - val quantTableY = QUANT_TABLE_Y.map { (it * quantYmult).coerceIn(1f, 255f) }.toFloatArray() - val quantTableCo = QUANT_TABLE_C.map { (it * quantCOmult).coerceIn(1f, 255f) }.toFloatArray() - val quantTableCg = QUANT_TABLE_C.map { (it * quantCGmult).coerceIn(1f, 255f) }.toFloatArray() - val quantTableB = QUANT_TABLE_C.map { it * quantBmult.toFloat() }.toFloatArray() - var readPtr = blockDataPtr // decide increment "direction" by the sign of the pointer val prevAddrIncVec = if (prevRGBAddr >= 0) 1 else -1 val thisAddrIncVec = if (currentRGBAddr >= 0) 1 else -1 - for (by in 0 until blocksY) { for (bx in 0 until blocksX) { val startX = bx * 16 diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index d28956d..c5e6376 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -2589,27 +2589,12 @@ int main(int argc, char *argv[]) { printf("\nEncoding complete!\n"); printf(" Frames encoded: %d\n", frame_count); - printf(" - sync packets: %d\n", sync_packet_count); printf(" Framerate: %d\n", enc->output_fps); printf(" Output size: %zu bytes\n", enc->total_output_bytes); - - // Calculate achieved bitrate - double achieved_bitrate_kbps = (enc->total_output_bytes * 8.0) / 1000.0 / total_time; - printf(" Achieved bitrate: %.1f kbps", achieved_bitrate_kbps); - if (enc->bitrate_mode > 0) { - printf(" (target: %d kbps, %.1f%%)", enc->target_bitrate_kbps, - (achieved_bitrate_kbps / enc->target_bitrate_kbps) * 100.0); - } - printf("\n"); - printf(" Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time); printf(" Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n", enc->blocks_intra, enc->blocks_inter, enc->blocks_motion, enc->blocks_skip); - if (enc->bitrate_mode > 0) { - printf(" Per-block complexity-based rate control: enabled\n"); - } - // Print complexity statistics if enabled calculate_complexity_stats(enc);