mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-10 06:54:04 +09:00
tev rate factor impl fix
This commit is contained in:
@@ -580,8 +580,6 @@ let frameDuped = false
|
|||||||
|
|
||||||
// Main decoding loop - simplified for performance
|
// Main decoding loop - simplified for performance
|
||||||
try {
|
try {
|
||||||
graphics.tevPrepareQuantTable(qualityY, qualityCo, qualityCg)
|
|
||||||
|
|
||||||
let t1 = sys.nanoTime()
|
let t1 = sys.nanoTime()
|
||||||
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && trueFrameCount < totalFrames) {
|
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && trueFrameCount < totalFrames) {
|
||||||
|
|
||||||
@@ -657,14 +655,14 @@ try {
|
|||||||
if (isInterlaced) {
|
if (isInterlaced) {
|
||||||
// For interlaced: decode current frame into currentFieldAddr
|
// For interlaced: decode current frame into currentFieldAddr
|
||||||
// For display: use prevFieldAddr as current, currentFieldAddr as next
|
// For display: use prevFieldAddr as current, currentFieldAddr as next
|
||||||
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking)
|
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking)
|
||||||
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
|
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
|
||||||
|
|
||||||
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
|
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
|
||||||
rotateFieldBuffers()
|
rotateFieldBuffers()
|
||||||
} else {
|
} else {
|
||||||
// Progressive or first frame: normal decoding without temporal prediction
|
// Progressive or first frame: normal decoding without temporal prediction
|
||||||
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, trueFrameCount, debugMotionVectors, version, enableDeblocking)
|
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking)
|
||||||
}
|
}
|
||||||
|
|
||||||
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds
|
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds
|
||||||
|
|||||||
@@ -1273,7 +1273,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// TEV (TSVM Enhanced Video) format support
|
// TEV (TSVM Enhanced Video) format support
|
||||||
// Created by Claude on 2025-08-17
|
// Created by Claude on 2025-08-17
|
||||||
|
|
||||||
fun jpeg_quality_to_mult(q: Int): Float {
|
fun jpeg_quality_to_mult(q: Float): Float {
|
||||||
return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f
|
return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1525,7 +1525,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun tevIdct8x8_fast(coeffs: ShortArray, quantTable: FloatArray, isChromaResidual: Boolean = false, mult: Float = 1f): IntArray {
|
private fun tevIdct8x8_fast(coeffs: ShortArray, quantTable: IntArray, isChromaResidual: Boolean = false, qualityIndex: Int, rateControlFactor: Float): IntArray {
|
||||||
val result = IntArray(64)
|
val result = IntArray(64)
|
||||||
// Reuse preallocated temp buffer to reduce GC pressure
|
// Reuse preallocated temp buffer to reduce GC pressure
|
||||||
|
|
||||||
@@ -1539,7 +1539,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val coeff = if (isChromaResidual && coeffIdx == 0) {
|
val coeff = if (isChromaResidual && coeffIdx == 0) {
|
||||||
coeffs[coeffIdx].toFloat() // DC lossless for chroma residual
|
coeffs[coeffIdx].toFloat() // DC lossless for chroma residual
|
||||||
} else {
|
} else {
|
||||||
coeffs[coeffIdx] * quantTable[coeffIdx] * mult
|
coeffs[coeffIdx] * quantTable[coeffIdx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)
|
||||||
}
|
}
|
||||||
sum += dctBasis8[u][col] * coeff
|
sum += dctBasis8[u][col] * coeff
|
||||||
}
|
}
|
||||||
@@ -1576,7 +1576,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 16x16 IDCT for Y channel (YCoCg-R format)
|
// 16x16 IDCT for Y channel (YCoCg-R format)
|
||||||
private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: FloatArray, mult: Float = 1.0f): IntArray {
|
private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: IntArray, qualityIndex: Int, rateControlFactor: Float): IntArray {
|
||||||
val result = IntArray(256) // 16x16 = 256
|
val result = IntArray(256) // 16x16 = 256
|
||||||
|
|
||||||
// Process coefficients and dequantize using preallocated buffer
|
// Process coefficients and dequantize using preallocated buffer
|
||||||
@@ -1586,13 +1586,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val coeff = if (idx == 0) {
|
val coeff = if (idx == 0) {
|
||||||
coeffs[idx].toFloat() // DC lossless for luma
|
coeffs[idx].toFloat() // DC lossless for luma
|
||||||
} else {
|
} else {
|
||||||
coeffs[idx] * quantTable[idx] * mult
|
coeffs[idx] * quantTable[idx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)
|
||||||
}
|
}
|
||||||
idct16TempBuffer[idx] = coeff
|
idct16TempBuffer[idx] = coeff
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fast separable IDCT: 8x performance improvement - but causes 90° rotation!
|
// Fast separable IDCT
|
||||||
// First pass: Process rows (16 1D IDCTs)
|
// First pass: Process rows (16 1D IDCTs)
|
||||||
for (row in 0 until 16) {
|
for (row in 0 until 16) {
|
||||||
for (col in 0 until 16) {
|
for (col in 0 until 16) {
|
||||||
@@ -2242,23 +2242,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private lateinit var quantTableY: FloatArray
|
|
||||||
private lateinit var quantTableCo: FloatArray
|
|
||||||
private lateinit var quantTableCg: FloatArray
|
|
||||||
private lateinit var quantTableB: FloatArray
|
|
||||||
|
|
||||||
fun tevPrepareQuantTable(qualityY: Int, qualityCo: Int, qualityCg: Int) {
|
|
||||||
val quantYmult = jpeg_quality_to_mult(qualityY)
|
|
||||||
val quantCOmult = jpeg_quality_to_mult(qualityCo)
|
|
||||||
val quantCGmult = jpeg_quality_to_mult(qualityCg)
|
|
||||||
val quantBmult = quantCGmult
|
|
||||||
|
|
||||||
quantTableY = QUANT_TABLE_Y.map { (it * quantYmult).coerceIn(1f, 255f) }.toFloatArray()
|
|
||||||
quantTableCo = QUANT_TABLE_C.map { (it * quantCOmult).coerceIn(1f, 255f) }.toFloatArray()
|
|
||||||
quantTableCg = QUANT_TABLE_C.map { (it * quantCGmult).coerceIn(1f, 255f) }.toFloatArray()
|
|
||||||
quantTableB = QUANT_TABLE_C.map { (it * quantBmult).coerceIn(1f, 255f) }.toFloatArray()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format
|
* Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format
|
||||||
* Decodes compressed TEV block data directly to framebuffer
|
* Decodes compressed TEV block data directly to framebuffer
|
||||||
@@ -2272,7 +2255,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
* @param frameCounter Frame counter for temporal patterns
|
* @param frameCounter Frame counter for temporal patterns
|
||||||
*/
|
*/
|
||||||
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
width: Int, height: Int, frameCounter: Int,
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
|
||||||
debugMotionVectors: Boolean = false, tevVersion: Int = 2,
|
debugMotionVectors: Boolean = false, tevVersion: Int = 2,
|
||||||
enableDeblocking: Boolean = true) {
|
enableDeblocking: Boolean = true) {
|
||||||
|
|
||||||
@@ -2447,9 +2430,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
readPtr += 768
|
readPtr += 768
|
||||||
|
|
||||||
// Perform hardware IDCT for each channel using fast algorithm
|
// Perform hardware IDCT for each channel using fast algorithm
|
||||||
val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), quantTableY, rateControlFactor)
|
val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
|
||||||
val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), quantTableCo, true, rateControlFactor)
|
val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
|
||||||
val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), if (tevVersion == 3) quantTableB else quantTableCg, true, rateControlFactor)
|
val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
|
||||||
|
|
||||||
// Convert to RGB (YCoCg-R for v2, XYB for v3)
|
// Convert to RGB (YCoCg-R for v2, XYB for v3)
|
||||||
val rgbData = if (tevVersion == 3) {
|
val rgbData = if (tevVersion == 3) {
|
||||||
@@ -2485,9 +2468,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
readPtr += 768
|
readPtr += 768
|
||||||
|
|
||||||
// Step 2: Decode residual DCT
|
// Step 2: Decode residual DCT
|
||||||
val yResidual = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), quantTableY, rateControlFactor)
|
val yResidual = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
|
||||||
val coResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), quantTableCo, true, rateControlFactor)
|
val coResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
|
||||||
val cgResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), if (tevVersion == 3) quantTableB else quantTableCg, true, rateControlFactor)
|
val cgResidual = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
|
||||||
|
|
||||||
// Step 3: Build motion-compensated YCoCg-R block and add residuals
|
// Step 3: Build motion-compensated YCoCg-R block and add residuals
|
||||||
val finalY = IntArray(256)
|
val finalY = IntArray(256)
|
||||||
|
|||||||
@@ -931,10 +931,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
|
|
||||||
// quantise Y coefficients (luma) using per-block rate control
|
// quantise Y coefficients (luma) using per-block rate control
|
||||||
const uint32_t *y_quant = QUANT_TABLE_Y;
|
const uint32_t *y_quant = QUANT_TABLE_Y;
|
||||||
const float qmult_y = jpeg_quality_to_mult(enc->qualityY);
|
const float qmult_y = jpeg_quality_to_mult(enc->qualityY * block->rate_control_factor);
|
||||||
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
|
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
|
||||||
// Apply rate control factor to quantization table (like decoder does)
|
// Apply rate control factor to quantization table (like decoder does)
|
||||||
float effective_quant = y_quant[i] * qmult_y * block->rate_control_factor;
|
float effective_quant = y_quant[i] * qmult_y;
|
||||||
block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 0);
|
block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -943,10 +943,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
|
|
||||||
// quantise Co coefficients (chroma - orange-blue) using per-block rate control
|
// quantise Co coefficients (chroma - orange-blue) using per-block rate control
|
||||||
const uint32_t *co_quant = QUANT_TABLE_C;
|
const uint32_t *co_quant = QUANT_TABLE_C;
|
||||||
const float qmult_co = jpeg_quality_to_mult(enc->qualityCo);
|
const float qmult_co = jpeg_quality_to_mult(enc->qualityCo * block->rate_control_factor);
|
||||||
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
|
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
|
||||||
// Apply rate control factor to quantization table (like decoder does)
|
// Apply rate control factor to quantization table (like decoder does)
|
||||||
float effective_quant = co_quant[i] * qmult_co * block->rate_control_factor;
|
float effective_quant = co_quant[i] * qmult_co;
|
||||||
block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
|
block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -955,10 +955,10 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
|
|
||||||
// quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) using per-block rate control
|
// quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) using per-block rate control
|
||||||
const uint32_t *cg_quant = QUANT_TABLE_C;
|
const uint32_t *cg_quant = QUANT_TABLE_C;
|
||||||
const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg);
|
const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg * block->rate_control_factor);
|
||||||
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
|
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
|
||||||
// Apply rate control factor to quantization table (like decoder does)
|
// Apply rate control factor to quantization table (like decoder does)
|
||||||
float effective_quant = cg_quant[i] * qmult_cg * block->rate_control_factor;
|
float effective_quant = cg_quant[i] * qmult_cg;
|
||||||
block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
|
block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user