tev rate factor impl fix

This commit is contained in:
minjaesong
2025-09-09 10:07:52 +09:00
parent 3495dfca5e
commit 9f7a4ef2e7
3 changed files with 21 additions and 40 deletions

View File

@@ -580,8 +580,6 @@ let frameDuped = false
// Main decoding loop - simplified for performance
try {
graphics.tevPrepareQuantTable(qualityY, qualityCo, qualityCg)
let t1 = sys.nanoTime()
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && trueFrameCount < totalFrames) {
@@ -657,14 +655,14 @@ try {
if (isInterlaced) {
// For interlaced: decode current frame into currentFieldAddr
// For display: use prevFieldAddr as current, currentFieldAddr as next
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
rotateFieldBuffers()
} else {
// Progressive or first frame: normal decoding without temporal prediction
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking)
}
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds

View File

@@ -1273,7 +1273,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// TEV (TSVM Enhanced Video) format support
// Created by Claude on 2025-08-17
fun jpeg_quality_to_mult(q: Int): Float {
fun jpeg_quality_to_mult(q: Float): Float {
return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f
}
@@ -1525,7 +1525,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
}
private fun tevIdct8x8_fast(coeffs: ShortArray, quantTable: FloatArray, isChromaResidual: Boolean = false, mult: Float = 1f): IntArray {
private fun tevIdct8x8_fast(coeffs: ShortArray, quantTable: IntArray, isChromaResidual: Boolean = false, qualityIndex: Int, rateControlFactor: Float): IntArray {
val result = IntArray(64)
// Reuse preallocated temp buffer to reduce GC pressure
@@ -1539,7 +1539,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val coeff = if (isChromaResidual && coeffIdx == 0) {
coeffs[coeffIdx].toFloat() // DC lossless for chroma residual
} else {
coeffs[coeffIdx] * quantTable[coeffIdx] * mult
coeffs[coeffIdx] * quantTable[coeffIdx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)
}
sum += dctBasis8[u][col] * coeff
}
@@ -1576,7 +1576,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
// 16x16 IDCT for Y channel (YCoCg-R format)
private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: FloatArray, mult: Float = 1.0f): IntArray {
private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: IntArray, qualityIndex: Int, rateControlFactor: Float): IntArray {
val result = IntArray(256) // 16x16 = 256
// Process coefficients and dequantize using preallocated buffer
@@ -1586,13 +1586,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val coeff = if (idx == 0) {
coeffs[idx].toFloat() // DC lossless for luma
} else {
coeffs[idx] * quantTable[idx] * mult
coeffs[idx] * quantTable[idx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)
}
idct16TempBuffer[idx] = coeff
}
}
// Fast separable IDCT: 8x performance improvement - but causes 90° rotation!
// Fast separable IDCT
// First pass: Process rows (16 1D IDCTs)
for (row in 0 until 16) {
for (col in 0 until 16) {
@@ -2242,23 +2242,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
}
private lateinit var quantTableY: FloatArray
private lateinit var quantTableCo: FloatArray
private lateinit var quantTableCg: FloatArray
private lateinit var quantTableB: FloatArray
fun tevPrepareQuantTable(qualityY: Int, qualityCo: Int, qualityCg: Int) {
val quantYmult = jpeg_quality_to_mult(qualityY)
val quantCOmult = jpeg_quality_to_mult(qualityCo)
val quantCGmult = jpeg_quality_to_mult(qualityCg)
val quantBmult = quantCGmult
quantTableY = QUANT_TABLE_Y.map { (it * quantYmult).coerceIn(1f, 255f) }.toFloatArray()
quantTableCo = QUANT_TABLE_C.map { (it * quantCOmult).coerceIn(1f, 255f) }.toFloatArray()
quantTableCg = QUANT_TABLE_C.map { (it * quantCGmult).coerceIn(1f, 255f) }.toFloatArray()
quantTableB = QUANT_TABLE_C.map { (it * quantBmult).coerceIn(1f, 255f) }.toFloatArray()
}
/**
* Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format
* Decodes compressed TEV block data directly to framebuffer
@@ -2272,7 +2255,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
* @param frameCounter Frame counter for temporal patterns
*/
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, frameCounter: Int,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
debugMotionVectors: Boolean = false, tevVersion: Int = 2,
enableDeblocking: Boolean = true) {
@@ -2447,9 +2430,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
readPtr += 768
// Perform hardware IDCT for each channel using fast algorithm
val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), quantTableY, rateControlFactor)
val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), quantTableCo, true, rateControlFactor)
val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), if (tevVersion == 3) quantTableB else quantTableCg, true, rateControlFactor)
val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
// Convert to RGB (YCoCg-R for v2, XYB for v3)
val rgbData = if (tevVersion == 3) {
@@ -2485,9 +2468,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
readPtr += 768
// Step 2: Decode residual DCT
val yResidual = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), quantTableY, rateControlFactor)
val coResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), quantTableCo, true, rateControlFactor)
val cgResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), if (tevVersion == 3) quantTableB else quantTableCg, true, rateControlFactor)
val yResidual = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
val coResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
val cgResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
// Step 3: Build motion-compensated YCoCg-R block and add residuals
val finalY = IntArray(256)

View File

@@ -931,10 +931,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
// quantise Y coefficients (luma) using per-block rate control
const uint32_t *y_quant = QUANT_TABLE_Y;
const float qmult_y = jpeg_quality_to_mult(enc->qualityY);
const float qmult_y = jpeg_quality_to_mult(enc->qualityY * block->rate_control_factor);
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
// Apply rate control factor to quantization table (like decoder does)
float effective_quant = y_quant[i] * qmult_y * block->rate_control_factor;
float effective_quant = y_quant[i] * qmult_y;
block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 0);
}
@@ -943,10 +943,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
// quantise Co coefficients (chroma - orange-blue) using per-block rate control
const uint32_t *co_quant = QUANT_TABLE_C;
const float qmult_co = jpeg_quality_to_mult(enc->qualityCo);
const float qmult_co = jpeg_quality_to_mult(enc->qualityCo * block->rate_control_factor);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
// Apply rate control factor to quantization table (like decoder does)
float effective_quant = co_quant[i] * qmult_co * block->rate_control_factor;
float effective_quant = co_quant[i] * qmult_co;
block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
}
@@ -955,10 +955,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
// quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) using per-block rate control
const uint32_t *cg_quant = QUANT_TABLE_C;
const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg);
const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg * block->rate_control_factor);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
// Apply rate control factor to quantization table (like decoder does)
float effective_quant = cg_quant[i] * qmult_cg * block->rate_control_factor;
float effective_quant = cg_quant[i] * qmult_cg;
block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
}