tev slight optimisation

This commit is contained in:
minjaesong
2025-09-09 09:47:56 +09:00
parent cf1ee80aa1
commit 3495dfca5e
3 changed files with 22 additions and 30 deletions

View File

@@ -580,6 +580,8 @@ let frameDuped = false
// Main decoding loop - simplified for performance
try {
graphics.tevPrepareQuantTable(qualityY, qualityCo, qualityCg)
let t1 = sys.nanoTime()
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && trueFrameCount < totalFrames) {
@@ -655,14 +657,14 @@ try {
if (isInterlaced) {
// For interlaced: decode current frame into currentFieldAddr
// For display: use prevFieldAddr as current, currentFieldAddr as next
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, [qualityY, qualityCo, qualityCg], trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
rotateFieldBuffers()
} else {
// Progressive or first frame: normal decoding without temporal prediction
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, [qualityY, qualityCo, qualityCg], trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking)
}
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds

View File

@@ -2242,6 +2242,23 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
}
private lateinit var quantTableY: FloatArray
private lateinit var quantTableCo: FloatArray
private lateinit var quantTableCg: FloatArray
private lateinit var quantTableB: FloatArray
fun tevPrepareQuantTable(qualityY: Int, qualityCo: Int, qualityCg: Int) {
val quantYmult = jpeg_quality_to_mult(qualityY)
val quantCOmult = jpeg_quality_to_mult(qualityCo)
val quantCGmult = jpeg_quality_to_mult(qualityCg)
val quantBmult = quantCGmult
quantTableY = QUANT_TABLE_Y.map { (it * quantYmult).coerceIn(1f, 255f) }.toFloatArray()
quantTableCo = QUANT_TABLE_C.map { (it * quantCOmult).coerceIn(1f, 255f) }.toFloatArray()
quantTableCg = QUANT_TABLE_C.map { (it * quantCGmult).coerceIn(1f, 255f) }.toFloatArray()
quantTableB = QUANT_TABLE_C.map { (it * quantBmult).coerceIn(1f, 255f) }.toFloatArray()
}
/**
* Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format
* Decodes compressed TEV block data directly to framebuffer
@@ -2255,7 +2272,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
* @param frameCounter Frame counter for temporal patterns
*/
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qualityIndices: IntArray, frameCounter: Int,
width: Int, height: Int, frameCounter: Int,
debugMotionVectors: Boolean = false, tevVersion: Int = 2,
enableDeblocking: Boolean = true) {
@@ -2265,24 +2282,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val blocksX = (width + 15) / 16 // 16x16 blocks now
val blocksY = (height + 15) / 16
val quantYmult = jpeg_quality_to_mult(qualityIndices[0])
val quantCOmult = jpeg_quality_to_mult(qualityIndices[1])
val quantCGmult = jpeg_quality_to_mult(qualityIndices[2])
val quantBmult = quantCGmult
// Apply rate control factor to quantization tables (if not ~1.0, skip optimization)
val quantTableY = QUANT_TABLE_Y.map { (it * quantYmult).coerceIn(1f, 255f) }.toFloatArray()
val quantTableCo = QUANT_TABLE_C.map { (it * quantCOmult).coerceIn(1f, 255f) }.toFloatArray()
val quantTableCg = QUANT_TABLE_C.map { (it * quantCGmult).coerceIn(1f, 255f) }.toFloatArray()
val quantTableB = QUANT_TABLE_C.map { it * quantBmult.toFloat() }.toFloatArray()
var readPtr = blockDataPtr
// decide increment "direction" by the sign of the pointer
val prevAddrIncVec = if (prevRGBAddr >= 0) 1 else -1
val thisAddrIncVec = if (currentRGBAddr >= 0) 1 else -1
for (by in 0 until blocksY) {
for (bx in 0 until blocksX) {
val startX = bx * 16

View File

@@ -2589,27 +2589,12 @@ int main(int argc, char *argv[]) {
printf("\nEncoding complete!\n");
printf(" Frames encoded: %d\n", frame_count);
printf(" - sync packets: %d\n", sync_packet_count);
printf(" Framerate: %d\n", enc->output_fps);
printf(" Output size: %zu bytes\n", enc->total_output_bytes);
// Calculate achieved bitrate
double achieved_bitrate_kbps = (enc->total_output_bytes * 8.0) / 1000.0 / total_time;
printf(" Achieved bitrate: %.1f kbps", achieved_bitrate_kbps);
if (enc->bitrate_mode > 0) {
printf(" (target: %d kbps, %.1f%%)", enc->target_bitrate_kbps,
(achieved_bitrate_kbps / enc->target_bitrate_kbps) * 100.0);
}
printf("\n");
printf(" Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time);
printf(" Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n",
enc->blocks_intra, enc->blocks_inter, enc->blocks_motion, enc->blocks_skip);
if (enc->bitrate_mode > 0) {
printf(" Per-block complexity-based rate control: enabled\n");
}
// Print complexity statistics if enabled
calculate_complexity_stats(enc);