diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 873b179..b085d18 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -439,7 +439,6 @@ const roiCoding = (header.extraFlags & 0x08) !== 0 const isInterlaced = (header.videoFlags & 0x01) !== 0 const isNTSC = (header.videoFlags & 0x02) !== 0 const isLossless = (header.videoFlags & 0x04) !== 0 -const multiResolution = (header.videoFlags & 0x08) !== 0 // Calculate tile dimensions (112x112 vs TEV's 16x16 blocks) const tilesX = Math.ceil(header.width / TILE_SIZE) diff --git a/terranmon.txt b/terranmon.txt index d71bb41..dd67b50 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -826,19 +826,16 @@ transmission capability, and region-of-interest coding. uint32 Total Frames: number of video frames uint8 Wavelet Filter Type: 0=5/3 reversible, 1=9/7 irreversible uint8 Decomposition Levels: number of DWT levels (1-4) - uint8 Quality Index for Y channel (0-99; 100 denotes lossless) - uint8 Quality Index for Co channel (0-99; 100 denotes lossless) - uint8 Quality Index for Cg channel (0-99; 100 denotes lossless) + uint8 Quantiser Index for Y channel (1: lossless, 255: potato) + uint8 Quantiser Index for Co channel (1: lossless, 255: potato) + uint8 Quantiser Index for Cg channel (1: lossless, 255: potato) uint8 Extra Feature Flags - bit 0 = has audio - bit 1 = has subtitle - - bit 2 = progressive transmission enabled - - bit 3 = region-of-interest coding enabled uint8 Video Flags - - bit 0 = is interlaced + - bit 0 = is interlaced (unused) - bit 1 = is NTSC framerate - bit 2 = is lossless mode - - bit 3 = multi-resolution encoding uint8 Reserved[7]: fill with zeros ## Packet Types diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 8853adc..f86471b 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -17,16 +17,21 @@ import kotlin.math.* class GraphicsJSR223Delegate(private val vm: VM) { // TAV Simulated overlapping tiles constants (must match encoder) - private val TAV_TILE_SIZE_X = 280 - private val TAV_TILE_SIZE_Y = 224 + private val TILE_SIZE_X = 280 + private val TILE_SIZE_Y = 224 private val TAV_TILE_MARGIN = 32 // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px) - private val TAV_PADDED_TILE_SIZE_X = TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN // 280 + 64 = 344px - private val TAV_PADDED_TILE_SIZE_Y = TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // 224 + 64 = 288px + private val PADDED_TILE_SIZE_X = TILE_SIZE_X + 2 * TAV_TILE_MARGIN // 280 + 64 = 344px + private val PADDED_TILE_SIZE_Y = TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // 224 + 64 = 288px // Reusable working arrays to reduce allocation overhead private val tevIdct8TempBuffer = FloatArray(64) private val tevIdct16TempBuffer = FloatArray(256) // For 16x16 IDCT private val tevIdct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT + + // TAV coefficient delta storage for previous frame (for efficient P-frames) + private var tavPreviousCoeffsY: MutableMap? = null + private var tavPreviousCoeffsCo: MutableMap? = null + private var tavPreviousCoeffsCg: MutableMap? = null private fun getFirstGPU(): GraphicsAdapter? { return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter @@ -1285,7 +1290,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f } - // Quality settings for quantization (Y channel) - 16x16 tables + // Quality settings for quantisation (Y channel) - 16x16 tables val QUANT_TABLE_Y: IntArray = intArrayOf( 16, 14, 12, 11, 11, 13, 16, 20, 24, 30, 39, 48, 54, 61, 67, 73, 14, 13, 12, 12, 12, 15, 18, 21, 25, 33, 46, 57, 61, 65, 67, 70, @@ -1304,7 +1309,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { 73, 82, 92, 98, 103, 107, 110, 117, 126, 132, 134, 136, 138, 138, 133, 127, 86, 98, 109, 112, 114, 116, 118, 124, 133, 135, 129, 125, 128, 130, 128, 127) - // Quality settings for quantization (Co channel - orange-blue, 8x8) + // Quality settings for quantisation (Co channel - orange-blue, 8x8) val QUANT_TABLE_C: IntArray = intArrayOf( 17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99, @@ -1527,7 +1532,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } /** - * Apply Bayer dithering to reduce banding when quantizing to 4-bit + * Apply Bayer dithering to reduce banding when quantising to 4-bit */ private fun ditherValue(value: Int, x: Int, y: Int, f: Int): Int { // Preserve pure values (0 and 255) exactly to maintain colour primaries @@ -1707,7 +1712,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: IntArray, qualityIndex: Int, rateControlFactor: Float): IntArray { val result = IntArray(256) // 16x16 = 256 - // Process coefficients and dequantize using preallocated buffer + // Process coefficients and dequantise using preallocated buffer for (u in 0 until 16) { for (v in 0 until 16) { val idx = u * 16 + v @@ -2499,7 +2504,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { * @param prevRGBAddr Address of previous frame RGB buffer (for motion compensation) * @param width Frame width in pixels * @param height Frame height in pixels - * @param quality Quantization quality level (0-7) + * @param quality Quantisation quality level (0-7) * @param frameCounter Frame counter for temporal patterns */ fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, @@ -2617,7 +2622,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // tevApplyMotionCompensationTwoPass(yBlock, coBlock, cgBlock, startX, startY, mv[0], mv[1], prevRGBAddr, width, height, prevAddrIncVec) // } - // Use IDCT on knusperli-optimised coefficients (coefficients are already optimally dequantized) + // Use IDCT on knusperli-optimised coefficients (coefficients are already optimally dequantised) val yPixels = tevIdct16x16_fromOptimisedCoeffs(yBlock) val coPixels = tevIdct8x8_fromOptimisedCoeffs(coBlock) val cgPixels = tevIdct8x8_fromOptimisedCoeffs(cgBlock) @@ -2798,7 +2803,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } 0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation) - // Regular lossy mode: quantized int16 coefficients + // Regular lossy mode: quantised int16 coefficients // Optimised bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768) @@ -3141,7 +3146,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val kAlphaSqrt2 = intArrayOf(1024, 1448, 1448, 1448, 1448, 1448, 1448, 1448) val kHalfSqrt2 = 724 // sqrt(2)/2 in 10-bit fixed-point - // Convert to dequantized FloatArrays and apply knusperli optimisation + // Convert to dequantised FloatArrays and apply knusperli optimisation val optimisedYBlocks = tevConvertAndOptimise16x16Blocks(yBlocks, quantTableY, qY, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) val optimisedCoBlocks = tevConvertAndOptimise8x8Blocks(coBlocks, quantTableCo, qCo, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) val optimisedCgBlocks = tevConvertAndOptimise8x8Blocks(cgBlocks, quantTableCg, qCg, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) @@ -3149,7 +3154,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { return Triple(optimisedYBlocks, optimisedCoBlocks, optimisedCgBlocks) } - // IDCT functions for knusperli-optimised coefficients (coefficients are already dequantized) + // IDCT functions for knusperli-optimised coefficients (coefficients are already dequantised) private fun tevIdct16x16_fromOptimisedCoeffs(coeffs: FloatArray): IntArray { val result = IntArray(256) // 16x16 @@ -3214,7 +3219,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { tevProcessBlocksWithKnusperli16x16(blocks, quantTable, qScale, rateControlFactors, blocksX, blocksY, kLinearGradient16, kAlphaSqrt2_16, kHalfSqrt2) - // Convert optimised ShortArray blocks to FloatArray (dequantized) + // Convert optimised ShortArray blocks to FloatArray (dequantised) for (blockIndex in 0 until blocks.size) { val block = blocks[blockIndex] if (block != null) { @@ -3243,7 +3248,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val coeffsSize = 256 // 16x16 = 256 val numBlocks = blocksX * blocksY - // OPTIMIZATION 1: Pre-compute quantization values to avoid repeated calculations + // OPTIMIZATION 1: Pre-compute quantisation values to avoid repeated calculations val quantValues = Array(numBlocks) { IntArray(coeffsSize) } val quantHalfValues = Array(numBlocks) { IntArray(coeffsSize) } @@ -3254,7 +3259,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val qualityMult = jpeg_quality_to_mult(qScale * rateControlFactor) quantValues[blockIndex][0] = 1 // DC is lossless - quantHalfValues[blockIndex][0] = 0 // DC has no quantization interval + quantHalfValues[blockIndex][0] = 0 // DC has no quantisation interval for (i in 1 until coeffsSize) { val coeffIdx = i.coerceIn(0, quantTable.size - 1) @@ -3269,7 +3274,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val blocksMid = Array(numBlocks) { IntArray(coeffsSize) } val blocksOff = Array(numBlocks) { LongArray(coeffsSize) } // Keep Long for accumulation - // Step 1: Setup dequantized values and initialize adjustments (BULK OPTIMIZED) + // Step 1: Setup dequantised values and initialize adjustments (BULK OPTIMIZED) for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] if (block != null) { @@ -3277,8 +3282,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { val off = blocksOff[blockIndex] val quantVals = quantValues[blockIndex] - // OPTIMIZATION 9: Bulk dequantization using vectorized operations - tevBulkDequantizeCoefficients(block, mid, quantVals, coeffsSize) + // OPTIMIZATION 9: Bulk dequantisation using vectorized operations + tevBulkDequantiseCoefficients(block, mid, quantVals, coeffsSize) // OPTIMIZATION 10: Bulk zero initialization of adjustments off.fill(0L) @@ -3315,11 +3320,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - // Step 4: Apply corrections and clamp to quantization intervals (BULK OPTIMIZED) + // Step 4: Apply corrections and clamp to quantisation intervals (BULK OPTIMIZED) for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] if (block != null) { - // OPTIMIZATION 11: Bulk apply corrections and quantization clamping + // OPTIMIZATION 11: Bulk apply corrections and quantisation clamping tevBulkApplyCorrectionsAndClamp( block, blocksMid[blockIndex], blocksOff[blockIndex], quantValues[blockIndex], quantHalfValues[blockIndex], @@ -3332,10 +3337,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { // BULK MEMORY ACCESS HELPER FUNCTIONS FOR KNUSPERLI /** - * OPTIMIZATION 9: Bulk dequantization using vectorized operations - * Performs coefficient * quantization in optimised chunks + * OPTIMIZATION 9: Bulk dequantisation using vectorized operations + * Performs coefficient * quantisation in optimised chunks */ - private fun tevBulkDequantizeCoefficients( + private fun tevBulkDequantiseCoefficients( coeffs: ShortArray, result: IntArray, quantVals: IntArray, size: Int ) { // Process in chunks of 16 for better vectorization (CPU can process multiple values per instruction) @@ -3372,7 +3377,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } /** - * OPTIMIZATION 11: Bulk apply corrections and quantization clamping + * OPTIMIZATION 11: Bulk apply corrections and quantisation clamping * Vectorized correction application with proper bounds checking */ private fun tevBulkApplyCorrectionsAndClamp( @@ -3404,7 +3409,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { mid[i + 6] += corr6 mid[i + 7] += corr7 - // Apply quantization interval clamping - bulk operations + // Apply quantisation interval clamping - bulk operations val orig0 = block[i].toInt() * quantVals[i] val orig1 = block[i + 1].toInt() * quantVals[i + 1] val orig2 = block[i + 2].toInt() * quantVals[i + 2] @@ -3423,7 +3428,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { mid[i + 6] = mid[i + 6].coerceIn(orig6 - quantHalf[i + 6], orig6 + quantHalf[i + 6]) mid[i + 7] = mid[i + 7].coerceIn(orig7 - quantHalf[i + 7], orig7 + quantHalf[i + 7]) - // Convert back to quantized coefficients - bulk operations + // Convert back to quantised coefficients - bulk operations val quantMax = Short.MAX_VALUE.toInt() val quantMin = Short.MIN_VALUE.toInt() block[i] = (mid[i] / quantVals[i]).coerceIn(quantMin, quantMax).toShort() @@ -3603,7 +3608,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val coeffsSize = 64 val numBlocks = blocksX * blocksY - // Step 1: Setup quantization intervals for all blocks (using integers like Google's code) + // Step 1: Setup quantisation intervals for all blocks (using integers like Google's code) val blocksMid = Array(numBlocks) { IntArray(coeffsSize) } val blocksMin = Array(numBlocks) { IntArray(coeffsSize) } val blocksMax = Array(numBlocks) { IntArray(coeffsSize) } @@ -3617,19 +3622,19 @@ class GraphicsJSR223Delegate(private val vm: VM) { val quantIdx = i.coerceIn(0, quantTable.size - 1) if (i == 0) { - // DC coefficient: lossless (no quantization) + // DC coefficient: lossless (no quantisation) val dcValue = block[i].toInt() blocksMid[blockIndex][i] = dcValue blocksMin[blockIndex][i] = dcValue // No interval for DC blocksMax[blockIndex][i] = dcValue } else { - // AC coefficients: use quantization intervals + // AC coefficients: use quantisation intervals val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).coerceIn(1f, 255f).toInt() - // Standard dequantized value (midpoint) + // Standard dequantised value (midpoint) blocksMid[blockIndex][i] = block[i].toInt() * quant - // Quantization interval bounds + // Quantisation interval bounds val halfQuant = quant / 2 blocksMin[blockIndex][i] = blocksMid[blockIndex][i] - halfQuant blocksMax[blockIndex][i] = blocksMid[blockIndex][i] + halfQuant @@ -3671,7 +3676,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - // Step 4: Apply corrections and return optimised dequantized coefficients + // Step 4: Apply corrections and return optimised dequantised coefficients val result = Array(blocks.size) { null } for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] @@ -3680,7 +3685,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Apply corrections with sqrt(2)/2 weighting (Google's exact formula with right shift) blocksMid[blockIndex][i] += ((blocksOff[blockIndex][i] * kHalfSqrt2) shr 31).toInt() - // Clamp to quantization interval bounds + // Clamp to quantisation interval bounds val optimisedValue = blocksMid[blockIndex][i].coerceIn( blocksMin[blockIndex][i], blocksMax[blockIndex][i] @@ -3819,8 +3824,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { var readPtr = blockDataPtr try { - val tilesX = (width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X // 280x224 tiles - val tilesY = (height + TAV_TILE_SIZE_Y - 1) / TAV_TILE_SIZE_Y + val tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X // 280x224 tiles + val tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y // Process each tile for (tileY in 0 until tilesY) { @@ -3836,6 +3841,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { val rcf = vm.peekFloat(readPtr) readPtr += 4 + // debug print: raw decompressed bytes + /*print("TAV Decode raw bytes (Frame $frameCounter, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ") + for (i in 0 until 32) { + print("${vm.peek(blockDataPtr + i).toUint().toString(16).uppercase().padStart(2, '0')} ") + } + println("...")*/ + when (mode) { 0x00 -> { // TAV_MODE_SKIP // Copy 280x224 tile from previous frame to current frame @@ -3847,17 +3859,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { width, height, qY, qCo, qCg, rcf, waveletFilter, decompLevels, isLossless, tavVersion) } - 0x02 -> { // TAV_MODE_INTER - // Motion compensation + DWT residual to RGB buffer - readPtr = tavDecodeDWTInterTileRGB(readPtr, tileX, tileY, mvX, mvY, - currentRGBAddr, prevRGBAddr, - width, height, qY, qCo, qCg, rcf, - waveletFilter, decompLevels, isLossless, tavVersion) - } - 0x03 -> { // TAV_MODE_MOTION - // Motion compensation only (no residual) - tavApplyMotionCompensationRGB(tileX, tileY, mvX, mvY, - currentRGBAddr, prevRGBAddr, width, height) + 0x02 -> { // TAV_MODE_DELTA + // Coefficient delta encoding for efficient P-frames + readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr, + width, height, qY, qCo, qCg, rcf, + waveletFilter, decompLevels, isLossless, tavVersion) } } } @@ -3872,13 +3878,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { // Now reading padded coefficient tiles (344x288) instead of core tiles (280x224) - val paddedCoeffCount = TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y + val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y var ptr = readPtr - // Read quantized DWT coefficients for padded tile Y, Co, Cg channels (344x288) - val quantizedY = ShortArray(paddedCoeffCount) - val quantizedCo = ShortArray(paddedCoeffCount) - val quantizedCg = ShortArray(paddedCoeffCount) + // Read quantised DWT coefficients for padded tile Y, Co, Cg channels (344x288) + val quantisedY = ShortArray(paddedCoeffCount) + val quantisedCo = ShortArray(paddedCoeffCount) + val quantisedCg = ShortArray(paddedCoeffCount) // OPTIMIZATION: Bulk read all coefficient data (344x288 * 3 channels * 2 bytes = 594,432 bytes) val totalCoeffBytes = paddedCoeffCount * 3 * 2L // 3 channels, 2 bytes per short @@ -3888,51 +3894,62 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Convert bulk data to coefficient arrays var bufferOffset = 0 for (i in 0 until paddedCoeffCount) { - quantizedY[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() + quantisedY[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() bufferOffset += 2 } for (i in 0 until paddedCoeffCount) { - quantizedCo[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() + quantisedCo[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() bufferOffset += 2 } for (i in 0 until paddedCoeffCount) { - quantizedCg[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() + quantisedCg[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() bufferOffset += 2 } ptr += totalCoeffBytes.toInt() - // Dequantize padded coefficient tiles (344x288) + // Dequantise padded coefficient tiles (344x288) val yPaddedTile = FloatArray(paddedCoeffCount) val coPaddedTile = FloatArray(paddedCoeffCount) val cgPaddedTile = FloatArray(paddedCoeffCount) for (i in 0 until paddedCoeffCount) { - yPaddedTile[i] = quantizedY[i] * qY * rcf - coPaddedTile[i] = quantizedCo[i] * qCo * rcf - cgPaddedTile[i] = quantizedCg[i] * qCg * rcf + yPaddedTile[i] = quantisedY[i] * qY * rcf + coPaddedTile[i] = quantisedCo[i] * qCo * rcf + cgPaddedTile[i] = quantisedCg[i] * qCg * rcf } + // Store coefficients for future delta reference (for P-frames) + val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + if (tavPreviousCoeffsY == null) { + tavPreviousCoeffsY = mutableMapOf() + tavPreviousCoeffsCo = mutableMapOf() + tavPreviousCoeffsCg = mutableMapOf() + } + tavPreviousCoeffsY!![tileIdx] = yPaddedTile.clone() + tavPreviousCoeffsCo!![tileIdx] = coPaddedTile.clone() + tavPreviousCoeffsCg!![tileIdx] = cgPaddedTile.clone() + // Apply inverse DWT on full padded tiles (344x288) if (isLossless) { - tavApplyDWTInverseMultiLevel(yPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, 0) - tavApplyDWTInverseMultiLevel(coPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, 0) - tavApplyDWTInverseMultiLevel(cgPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) } else { - tavApplyDWTInverseMultiLevel(yPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) - tavApplyDWTInverseMultiLevel(coPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) - tavApplyDWTInverseMultiLevel(cgPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) } // Extract core 280x224 pixels from reconstructed padded tiles (344x288) - val yTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) - val coTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) - val cgTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) + val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - for (y in 0 until TAV_TILE_SIZE_Y) { - for (x in 0 until TAV_TILE_SIZE_X) { - val coreIdx = y * TAV_TILE_SIZE_X + x - val paddedIdx = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val coreIdx = y * TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) yTile[coreIdx] = yPaddedTile[paddedIdx] coTile[coreIdx] = coPaddedTile[paddedIdx] @@ -3952,17 +3969,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavConvertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val startX = tileX * TAV_TILE_SIZE_X - val startY = tileY * TAV_TILE_SIZE_Y + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y // OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality - for (y in 0 until TAV_TILE_SIZE_Y) { + for (y in 0 until TILE_SIZE_Y) { val frameY = startY + y if (frameY >= height) break // Calculate valid pixel range for this row val validStartX = maxOf(0, startX) - val validEndX = minOf(width, startX + TAV_TILE_SIZE_X) + val validEndX = minOf(width, startX + TILE_SIZE_X) val validPixelsInRow = validEndX - validStartX if (validPixelsInRow > 0) { @@ -3971,7 +3988,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var bufferIdx = 0 for (x in validStartX until validEndX) { - val tileIdx = y * TAV_TILE_SIZE_X + (x - startX) + val tileIdx = y * TILE_SIZE_X + (x - startX) // YCoCg-R to RGB conversion (exact inverse of encoder) val Y = yTile[tileIdx] @@ -3999,17 +4016,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavConvertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val startX = tileX * TAV_TILE_SIZE_X - val startY = tileY * TAV_TILE_SIZE_Y + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y // OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality - for (y in 0 until TAV_TILE_SIZE_Y) { + for (y in 0 until TILE_SIZE_Y) { val frameY = startY + y if (frameY >= height) break // Calculate valid pixel range for this row val validStartX = maxOf(0, startX) - val validEndX = minOf(width, startX + TAV_TILE_SIZE_X) + val validEndX = minOf(width, startX + TILE_SIZE_X) val validPixelsInRow = validEndX - validStartX if (validPixelsInRow > 0) { @@ -4018,7 +4035,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var bufferIdx = 0 for (x in validStartX until validEndX) { - val tileIdx = y * TAV_TILE_SIZE_X + (x - startX) + val tileIdx = y * TILE_SIZE_X + (x - startX) // ICtCp to sRGB conversion (adapted from encoder ICtCp functions) val I = iTile[tileIdx].toDouble() / 255.0 @@ -4060,16 +4077,16 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavAddYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val startX = tileX * TAV_TILE_SIZE_X - val startY = tileY * TAV_TILE_SIZE_Y + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y - for (y in 0 until TAV_TILE_SIZE_Y) { - for (x in 0 until TAV_TILE_SIZE_X) { + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { val frameX = startX + x val frameY = startY + y if (frameX < width && frameY < height) { - val tileIdx = y * TAV_TILE_SIZE_X + x + val tileIdx = y * TILE_SIZE_X + x val pixelIdx = frameY * width + frameX val rgbOffset = pixelIdx * 3L @@ -4105,17 +4122,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Helper functions (simplified versions of existing DWT functions) private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { - val startX = tileX * TAV_TILE_SIZE_X - val startY = tileY * TAV_TILE_SIZE_Y + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y // OPTIMIZATION: Copy entire rows at once for maximum performance - for (y in 0 until TAV_TILE_SIZE_Y) { + for (y in 0 until TILE_SIZE_Y) { val frameY = startY + y if (frameY >= height) break // Calculate valid pixel range for this row val validStartX = maxOf(0, startX) - val validEndX = minOf(width, startX + TAV_TILE_SIZE_X) + val validEndX = minOf(width, startX + TILE_SIZE_X) val validPixelsInRow = validEndX - validStartX if (validPixelsInRow > 0) { @@ -4132,31 +4149,105 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - private fun tavDecodeDWTInterTileRGB(readPtr: Long, tileX: Int, tileY: Int, mvX: Int, mvY: Int, - currentRGBAddr: Long, prevRGBAddr: Long, - width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, - waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { + private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, + waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { - // Step 1: Apply motion compensation - tavApplyMotionCompensationRGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height) + val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + var ptr = readPtr - // Step 2: Add DWT residual (same as intra but add to existing pixels) - return tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf, - waveletFilter, decompLevels, isLossless, tavVersion) + // Initialize coefficient storage if needed + if (tavPreviousCoeffsY == null) { + tavPreviousCoeffsY = mutableMapOf() + tavPreviousCoeffsCo = mutableMapOf() + tavPreviousCoeffsCg = mutableMapOf() + } + + // Coefficient count for padded tiles: 344x288 = 99,072 coefficients per channel + val coeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + + // Read delta coefficients (same format as intra: quantised int16 -> float) + val deltaY = ShortArray(coeffCount) + val deltaCo = ShortArray(coeffCount) + val deltaCg = ShortArray(coeffCount) + + vm.bulkPeekShort(ptr.toInt(), deltaY, coeffCount * 2) + ptr += coeffCount * 2 + vm.bulkPeekShort(ptr.toInt(), deltaCo, coeffCount * 2) + ptr += coeffCount * 2 + vm.bulkPeekShort(ptr.toInt(), deltaCg, coeffCount * 2) + ptr += coeffCount * 2 + + // Get or initialize previous coefficients for this tile + val prevY = tavPreviousCoeffsY!![tileIdx] ?: FloatArray(coeffCount) + val prevCo = tavPreviousCoeffsCo!![tileIdx] ?: FloatArray(coeffCount) + val prevCg = tavPreviousCoeffsCg!![tileIdx] ?: FloatArray(coeffCount) + + // Reconstruct current coefficients: current = previous + delta + val currentY = FloatArray(coeffCount) + val currentCo = FloatArray(coeffCount) + val currentCg = FloatArray(coeffCount) + + for (i in 0 until coeffCount) { + currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY * rcf) + currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo * rcf) + currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg * rcf) + } + + // Store current coefficients as previous for next frame + tavPreviousCoeffsY!![tileIdx] = currentY.clone() + tavPreviousCoeffsCo!![tileIdx] = currentCo.clone() + tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() + + // Apply inverse DWT + if (isLossless) { + tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + } else { + tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + } + + // Extract core 280x224 pixels and convert to RGB (same as intra) + val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val coreIdx = y * TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + + yTile[coreIdx] = currentY[paddedIdx] + coTile[coreIdx] = currentCo[paddedIdx] + cgTile[coreIdx] = currentCg[paddedIdx] + } + } + + // Convert to RGB based on TAV version + if (tavVersion == 2) { + tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } else { + tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } + + return ptr } private fun tavApplyMotionCompensationRGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { - val startX = tileX * TAV_TILE_SIZE_X - val startY = tileY * TAV_TILE_SIZE_Y + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y // Motion vectors in quarter-pixel precision val refX = startX + (mvX / 4.0f) val refY = startY + (mvY / 4.0f) - for (y in 0 until TAV_TILE_SIZE_Y) { - for (x in 0 until TAV_TILE_SIZE_X) { + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { val currentPixelIdx = (startY + y) * width + (startX + x) if (currentPixelIdx >= 0 && currentPixelIdx < width * height) { diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 17c7799..928ed25 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -26,11 +26,10 @@ // Version 1: YCoCg-R (default) // Version 2: ICtCp (--ictcp flag) -// Tile encoding modes (112x112 tiles) +// Tile encoding modes (280x224 tiles) #define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) #define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles) -#define TAV_MODE_INTER 0x02 // Inter DWT coding with motion compensation -#define TAV_MODE_MOTION 0x03 // Motion vector only (good prediction) +#define TAV_MODE_DELTA 0x02 // Coefficient delta encoding (efficient P-frames) // Video packet types #define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) @@ -60,6 +59,7 @@ #define DEFAULT_HEIGHT 448 #define DEFAULT_FPS 30 #define DEFAULT_QUALITY 2 +int KEYFRAME_INTERVAL = 60; // Audio/subtitle constants (reused from TEV) #define MP2_DEFAULT_PACKET_SIZE 1152 @@ -106,10 +106,10 @@ static inline float FCLAMP(float x, float min, float max) { // MP2 audio rate table (same as TEV) static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384}; -// Quality level to quantization mapping for different channels -static const int QUALITY_Y[] = {90, 70, 50, 30, 15, 5}; // Luma (fine) -static const int QUALITY_CO[] = {80, 60, 40, 20, 10, 3}; // Chroma Co (aggressive) -static const int QUALITY_CG[] = {70, 50, 30, 15, 8, 2}; // Chroma Cg (very aggressive) +// Quality level to quantisation mapping for different channels +static const int QUALITY_Y[] = {60, 42, 25, 12, 6, 2}; +static const int QUALITY_CO[] = {120, 90, 60, 30, 15, 3}; +static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5}; // DWT coefficient structure for each subband typedef struct { @@ -153,7 +153,7 @@ typedef struct { // Encoding parameters int quality_level; - int quantizer_y, quantizer_co, quantizer_cg; + int quantiser_y, quantiser_co, quantiser_cg; int wavelet_filter; int decomp_levels; int bitrate_mode; @@ -168,6 +168,7 @@ typedef struct { int verbose; int test_mode; int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp colour space + int intra_only; // Force all tiles to use INTRA mode (disable delta encoding) // Frame buffers uint8_t *current_frame_rgb; @@ -199,9 +200,15 @@ typedef struct { size_t compressed_buffer_size; // OPTIMIZATION: Pre-allocated buffers to avoid malloc/free per tile - int16_t *reusable_quantized_y; - int16_t *reusable_quantized_co; - int16_t *reusable_quantized_cg; + int16_t *reusable_quantised_y; + int16_t *reusable_quantised_co; + int16_t *reusable_quantised_cg; + + // Coefficient delta storage for P-frames (previous frame's coefficients) + float *previous_coeffs_y; // Previous frame Y coefficients for all tiles + float *previous_coeffs_co; // Previous frame Co coefficients for all tiles + float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles + int previous_coeffs_allocated; // Flag to track allocation // Statistics size_t total_compressed_size; @@ -217,9 +224,6 @@ static tav_encoder_t* create_encoder(void); static void cleanup_encoder(tav_encoder_t *enc); static int initialize_encoder(tav_encoder_t *enc); static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height); -static int estimate_motion_280x224(const float *current, const float *reference, - int width, int height, int tile_x, int tile_y, - motion_vector_t *mv); // Audio and subtitle processing prototypes (from TEV) static int start_audio_conversion(tav_encoder_t *enc); @@ -245,7 +249,7 @@ static void show_usage(const char *program_name) { printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT); printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); printf(" -q, --quality N Quality level 0-5 (default: 2)\n"); - printf(" -Q, --quantizer Y,Co,Cg Quantizer levels 0-100 for each channel\n"); + printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 0-100 for each channel\n"); // printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n"); printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); @@ -254,14 +258,15 @@ static void show_usage(const char *program_name) { printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n"); // printf(" --enable-progressive Enable progressive transmission\n"); // printf(" --enable-roi Enable region-of-interest coding\n"); - printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (generates TAV version 2)\n"); + printf(" --intra-only Disable delta encoding (force all tiles to use INTRA mode)\n"); + printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n"); printf(" --help Show this help\n\n"); printf("Audio Rate by Quality:\n "); for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) { printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]); } - printf("\n\nQuantizer Value by Quality:\n"); + printf("\n\nQuantiser Value by Quality:\n"); printf(" Y (Luma): "); for (int i = 0; i < 6; i++) { printf("%d: Q%d ", i, QUALITY_Y[i]); @@ -278,8 +283,6 @@ static void show_usage(const char *program_name) { printf("\n\nFeatures:\n"); printf(" - 112x112 DWT tiles with multi-resolution encoding\n"); printf(" - Full resolution YCoCg-R/ICtCp colour space\n"); -// printf(" - Progressive transmission and ROI coding\n"); -// printf(" - Motion compensation with ±16 pixel search range\n"); printf(" - Lossless and lossy compression modes\n"); printf("\nExamples:\n"); @@ -302,9 +305,9 @@ static tav_encoder_t* create_encoder(void) { enc->quality_level = DEFAULT_QUALITY; enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; enc->decomp_levels = MAX_DECOMP_LEVELS; - enc->quantizer_y = QUALITY_Y[DEFAULT_QUALITY]; - enc->quantizer_co = QUALITY_CO[DEFAULT_QUALITY]; - enc->quantizer_cg = QUALITY_CG[DEFAULT_QUALITY]; + enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY]; + enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY]; + enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY]; return enc; } @@ -333,22 +336,37 @@ static int initialize_encoder(tav_encoder_t *enc) { enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t)); enc->motion_vectors = malloc(num_tiles * sizeof(motion_vector_t)); + // Initialize motion vectors + for (int i = 0; i < num_tiles; i++) { + enc->motion_vectors[i].mv_x = 0; + enc->motion_vectors[i].mv_y = 0; + enc->motion_vectors[i].rate_control_factor = 1.0f; // Initialize to 1.0f + } + // Initialize ZSTD compression enc->zstd_ctx = ZSTD_createCCtx(); enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max enc->compressed_buffer = malloc(enc->compressed_buffer_size); - // OPTIMIZATION: Allocate reusable quantization buffers for padded tiles (344x288) + // OPTIMIZATION: Allocate reusable quantisation buffers for padded tiles (344x288) const int padded_coeff_count = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; - enc->reusable_quantized_y = malloc(padded_coeff_count * sizeof(int16_t)); - enc->reusable_quantized_co = malloc(padded_coeff_count * sizeof(int16_t)); - enc->reusable_quantized_cg = malloc(padded_coeff_count * sizeof(int16_t)); + enc->reusable_quantised_y = malloc(padded_coeff_count * sizeof(int16_t)); + enc->reusable_quantised_co = malloc(padded_coeff_count * sizeof(int16_t)); + enc->reusable_quantised_cg = malloc(padded_coeff_count * sizeof(int16_t)); + + // Allocate coefficient delta storage for P-frames (per-tile coefficient storage) + size_t total_coeff_size = num_tiles * padded_coeff_count * sizeof(float); + enc->previous_coeffs_y = malloc(total_coeff_size); + enc->previous_coeffs_co = malloc(total_coeff_size); + enc->previous_coeffs_cg = malloc(total_coeff_size); + enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame if (!enc->current_frame_rgb || !enc->previous_frame_rgb || !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || !enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg || !enc->tiles || !enc->motion_vectors || !enc->zstd_ctx || !enc->compressed_buffer || - !enc->reusable_quantized_y || !enc->reusable_quantized_co || !enc->reusable_quantized_cg) { + !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || + !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) { return -1; } @@ -601,14 +619,14 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) -// Quantization for DWT subbands with rate control -static void quantize_dwt_coefficients(float *coeffs, int16_t *quantized, int size, int quantizer, float rcf) { - float effective_q = quantizer * rcf; +// Quantisation for DWT subbands with rate control +static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser, float rcf) { + float effective_q = quantiser * rcf; effective_q = FCLAMP(effective_q, 1.0f, 255.0f); for (int i = 0; i < size; i++) { - float quantized_val = coeffs[i] / effective_q; - quantized[i] = (int16_t)CLAMP((int)(quantized_val + (quantized_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); + float quantised_val = coeffs[i] / effective_q; + quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); } } @@ -624,46 +642,96 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, memcpy(buffer + offset, &mv->mv_y, sizeof(int16_t)); offset += sizeof(int16_t); memcpy(buffer + offset, &mv->rate_control_factor, sizeof(float)); offset += sizeof(float); - if (mode == TAV_MODE_SKIP || mode == TAV_MODE_MOTION) { + if (mode == TAV_MODE_SKIP) { // No coefficient data for SKIP/MOTION modes return offset; } - // Quantize and serialize DWT coefficients (full padded tile: 344x288) + // Quantise and serialize DWT coefficients (full padded tile: 344x288) const int tile_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; // OPTIMIZATION: Use pre-allocated buffers instead of malloc/free per tile - int16_t *quantized_y = enc->reusable_quantized_y; - int16_t *quantized_co = enc->reusable_quantized_co; - int16_t *quantized_cg = enc->reusable_quantized_cg; + int16_t *quantised_y = enc->reusable_quantised_y; + int16_t *quantised_co = enc->reusable_quantised_co; + int16_t *quantised_cg = enc->reusable_quantised_cg; - // Debug: check DWT coefficients before quantization + // Debug: check DWT coefficients before quantisation /*if (tile_x == 0 && tile_y == 0) { - printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): "); + printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantisation (first 16): "); for (int i = 0; i < 16; i++) { printf("%.2f ", tile_y_data[i]); } printf("\n"); - printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n", - enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor); + printf("Encoder Debug: Quantisers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n", + enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg, mv->rate_control_factor); }*/ - quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor); - quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor); - quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor); + if (mode == TAV_MODE_INTRA) { + // INTRA mode: quantise coefficients directly and store for future reference + quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor); + quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor); + quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor); + + // Store current coefficients for future delta reference + int tile_idx = tile_y * enc->tiles_x + tile_x; + float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size); + float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size); + float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size); + memcpy(prev_y, tile_y_data, tile_size * sizeof(float)); + memcpy(prev_co, tile_co_data, tile_size * sizeof(float)); + memcpy(prev_cg, tile_cg_data, tile_size * sizeof(float)); + + } else if (mode == TAV_MODE_DELTA) { + // DELTA mode: compute coefficient deltas and quantise them + int tile_idx = tile_y * enc->tiles_x + tile_x; + float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size); + float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size); + float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size); + + // Compute deltas: delta = current - previous + float *delta_y = malloc(tile_size * sizeof(float)); + float *delta_co = malloc(tile_size * sizeof(float)); + float *delta_cg = malloc(tile_size * sizeof(float)); + + for (int i = 0; i < tile_size; i++) { + delta_y[i] = tile_y_data[i] - prev_y[i]; + delta_co[i] = tile_co_data[i] - prev_co[i]; + delta_cg[i] = tile_cg_data[i] - prev_cg[i]; + } + + // Quantise the deltas + quantise_dwt_coefficients(delta_y, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor); + quantise_dwt_coefficients(delta_co, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor); + quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor); + + // Reconstruct coefficients like decoder will (previous + dequantised_delta) + for (int i = 0; i < tile_size; i++) { + float dequant_delta_y = (float)quantised_y[i] * enc->quantiser_y * mv->rate_control_factor; + float dequant_delta_co = (float)quantised_co[i] * enc->quantiser_co * mv->rate_control_factor; + float dequant_delta_cg = (float)quantised_cg[i] * enc->quantiser_cg * mv->rate_control_factor; + + prev_y[i] = prev_y[i] + dequant_delta_y; + prev_co[i] = prev_co[i] + dequant_delta_co; + prev_cg[i] = prev_cg[i] + dequant_delta_cg; + } + + free(delta_y); + free(delta_co); + free(delta_cg); + } - // Debug: check quantized coefficients after quantization + // Debug: check quantised coefficients after quantisation /*if (tile_x == 0 && tile_y == 0) { - printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): "); + printf("Encoder Debug: Tile (0,0) - Quantised Y coeffs (first 16): "); for (int i = 0; i < 16; i++) { - printf("%d ", quantized_y[i]); + printf("%d ", quantised_y[i]); } printf("\n"); }*/ - // Write quantized coefficients - memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); - memcpy(buffer + offset, quantized_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); - memcpy(buffer + offset, quantized_cg, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); + // Write quantised coefficients + memcpy(buffer + offset, quantised_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); + memcpy(buffer + offset, quantised_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); + memcpy(buffer + offset, quantised_cg, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); // OPTIMIZATION: No need to free - using pre-allocated reusable buffers @@ -685,8 +753,14 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) for (int tile_x = 0; tile_x < enc->tiles_x; tile_x++) { int tile_idx = tile_y * enc->tiles_x + tile_x; - // Determine tile mode (simplified) - uint8_t mode = TAV_MODE_INTRA; // For now, all tiles are INTRA + // Determine tile mode based on frame type, coefficient availability, and intra_only flag + uint8_t mode; + int is_keyframe = (packet_type == TAV_PACKET_IFRAME); + if (is_keyframe || !enc->previous_coeffs_allocated) { + mode = TAV_MODE_INTRA; // I-frames, first frames, or intra-only mode always use INTRA + } else { + mode = TAV_MODE_DELTA; // P-frames use coefficient delta encoding + } // Extract padded tile data (344x288) with neighbour context for overlapping tiles float tile_y_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y]; @@ -741,62 +815,12 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) enc->total_compressed_size += compressed_size; enc->total_uncompressed_size += uncompressed_offset; - return compressed_size + 5; // packet type + size field + compressed data -} - -// Motion estimation for 112x112 tiles using SAD -static int estimate_motion_280x224(const float *current, const float *reference, - int width, int height, int tile_x, int tile_y, - motion_vector_t *mv) { - const int tile_size_x = TILE_SIZE_X; - const int tile_size_y = TILE_SIZE_Y; - const int search_range = 32; // ±32 pixels (scaled for larger tiles) - const int start_x = tile_x * tile_size_x; - const int start_y = tile_y * tile_size_y; - - int best_mv_x = 0, best_mv_y = 0; - int min_sad = INT_MAX; - - // Search within ±16 pixel range - for (int dy = -search_range; dy <= search_range; dy++) { - for (int dx = -search_range; dx <= search_range; dx++) { - int ref_x = start_x + dx; - int ref_y = start_y + dy; - - // Check bounds - if (ref_x < 0 || ref_y < 0 || - ref_x + tile_size_x > width || ref_y + tile_size_y > height) { - continue; - } - - // Calculate SAD - int sad = 0; - for (int y = 0; y < tile_size_y; y++) { - for (int x = 0; x < tile_size_x; x++) { - int curr_idx = (start_y + y) * width + (start_x + x); - int ref_idx = (ref_y + y) * width + (ref_x + x); - - if (curr_idx >= 0 && curr_idx < width * height && - ref_idx >= 0 && ref_idx < width * height) { - int diff = (int)(current[curr_idx] - reference[ref_idx]); - sad += abs(diff); - } - } - } - - if (sad < min_sad) { - min_sad = sad; - best_mv_x = dx * 4; // Convert to 1/4 pixel precision - best_mv_y = dy * 4; - } - } + // Mark coefficient storage as available after first I-frame + if (packet_type == TAV_PACKET_IFRAME) { + enc->previous_coeffs_allocated = 1; } - mv->mv_x = best_mv_x; - mv->mv_y = best_mv_y; - mv->rate_control_factor = 1.0f; // TODO: Calculate based on complexity - - return min_sad; + return compressed_size + 5; // packet type + size field + compressed data } // RGB to YCoCg colour space conversion @@ -879,10 +903,16 @@ static inline double HLG_EOTF(double Ep) { } // sRGB -> LMS matrix -static const double M_RGB_TO_LMS[3][3] = { +/*static const double M_RGB_TO_LMS[3][3] = { {0.2958564579364564, 0.6230869483219083, 0.08106989398623762}, {0.15627390752659093, 0.727308963512872, 0.11639736914944238}, {0.035141262332177715, 0.15657109121101628, 0.8080956851990795} +};*/ +// BT.2100 -> LMS matrix +static const double M_RGB_TO_LMS[3][3] = { + {1688.0/4096,2146.0/4096, 262.0/4096}, + { 683.0/4096,2951.0/4096, 462.0/4096}, + { 99.0/4096, 309.0/4096,3688.0/4096} }; static const double M_LMS_TO_RGB[3][3] = { @@ -1046,13 +1076,13 @@ static int write_tav_header(tav_encoder_t *enc) { // Encoder parameters fputc(enc->wavelet_filter, enc->output_fp); fputc(enc->decomp_levels, enc->output_fp); - fputc(enc->quantizer_y, enc->output_fp); - fputc(enc->quantizer_co, enc->output_fp); - fputc(enc->quantizer_cg, enc->output_fp); + fputc(enc->quantiser_y, enc->output_fp); + fputc(enc->quantiser_co, enc->output_fp); + fputc(enc->quantiser_cg, enc->output_fp); // Feature flags uint8_t extra_flags = 0; - if (1) extra_flags |= 0x01; // Has audio (placeholder) + if (enc->has_audio) extra_flags |= 0x01; // Has audio (placeholder) if (enc->subtitle_file) extra_flags |= 0x02; // Has subtitles if (enc->enable_progressive_transmission) extra_flags |= 0x04; if (enc->enable_roi) extra_flags |= 0x08; @@ -1060,9 +1090,8 @@ static int write_tav_header(tav_encoder_t *enc) { uint8_t video_flags = 0; // if (!enc->progressive) video_flags |= 0x01; // Interlaced - if (enc->fps == 29 || enc->fps == 30) video_flags |= 0x02; // NTSC + if (enc->is_ntsc_framerate) video_flags |= 0x02; // NTSC if (enc->lossless) video_flags |= 0x04; // Lossless - if (enc->decomp_levels > 1) video_flags |= 0x08; // Multi-resolution fputc(video_flags, enc->output_fp); // Reserved bytes (7 bytes) @@ -1175,6 +1204,8 @@ static int get_video_metadata(tav_encoder_t *config) { // fprintf(stderr, " Resolution: %dx%d (%s)\n", config->width, config->height, // config->progressive ? "progressive" : "interlaced"); fprintf(stderr, " Resolution: %dx%d\n", config->width, config->height); + + return 1; } // Start FFmpeg process for video conversion with frame rate support @@ -1182,11 +1213,21 @@ static int start_video_conversion(tav_encoder_t *enc) { char command[2048]; // Use simple FFmpeg command like TEV encoder for reliable EOF detection - snprintf(command, sizeof(command), - "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 " - "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " - "-y - 2>/dev/null", - enc->input_file, enc->width, enc->height, enc->width, enc->height); + if (enc->output_fps > 0 && enc->output_fps != enc->fps) { + // Frame rate conversion requested + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y - 2>&1", + enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height); + } else { + // No frame rate conversion + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y -", + enc->input_file, enc->width, enc->height, enc->width, enc->height); + } if (enc->verbose) { printf("FFmpeg command: %s\n", command); @@ -1618,6 +1659,53 @@ static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) { return bytes_written; } +// Detect scene changes by analysing frame differences +static int detect_scene_change(tav_encoder_t *enc) { + if (!enc->current_frame_rgb || enc->intra_only) { + return 0; // No current frame to compare + } + + uint8_t *comparison_buffer = enc->previous_frame_rgb; + + long long total_diff = 0; + int changed_pixels = 0; + + // Sample every 4th pixel for performance (still gives good detection) + for (int y = 0; y < enc->height; y += 2) { + for (int x = 0; x < enc->width; x += 2) { + int offset = (y * enc->width + x) * 3; + + // Calculate color difference + int r_diff = abs(enc->current_frame_rgb[offset] - comparison_buffer[offset]); + int g_diff = abs(enc->current_frame_rgb[offset + 1] - comparison_buffer[offset + 1]); + int b_diff = abs(enc->current_frame_rgb[offset + 2] - comparison_buffer[offset + 2]); + + int pixel_diff = r_diff + g_diff + b_diff; + total_diff += pixel_diff; + + // Count significantly changed pixels (threshold of 30 per channel average) + if (pixel_diff > 90) { + changed_pixels++; + } + } + } + + // Calculate metrics for scene change detection + int sampled_pixels = (enc->height / 2) * (enc->width / 2); + double avg_diff = (double)total_diff / sampled_pixels; + double changed_ratio = (double)changed_pixels / sampled_pixels; + + if (enc->verbose) { + printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio); + } + + // Scene change thresholds - adjust for interlaced mode + // Interlaced fields have more natural differences due to temporal field separation + double threshold = 0.30; + + return changed_ratio > threshold; +} + // Main function int main(int argc, char *argv[]) { generate_random_filename(TEMP_AUDIO_FILE); @@ -1636,8 +1724,8 @@ int main(int argc, char *argv[]) { {"size", required_argument, 0, 's'}, {"fps", required_argument, 0, 'f'}, {"quality", required_argument, 0, 'q'}, - {"quantizer", required_argument, 0, 'Q'}, {"quantiser", required_argument, 0, 'Q'}, + {"quantizer", required_argument, 0, 'Q'}, // {"wavelet", required_argument, 0, 'w'}, // {"decomp", required_argument, 0, 'd'}, {"bitrate", required_argument, 0, 'b'}, @@ -1648,6 +1736,7 @@ int main(int argc, char *argv[]) { {"lossless", no_argument, 0, 1000}, // {"enable-progressive", no_argument, 0, 1002}, // {"enable-roi", no_argument, 0, 1003}, + {"intra-only", no_argument, 0, 1006}, {"ictcp", no_argument, 0, 1005}, {"help", no_argument, 0, 1004}, {0, 0, 0, 0} @@ -1664,26 +1753,32 @@ int main(int argc, char *argv[]) { break; case 'q': enc->quality_level = CLAMP(atoi(optarg), 0, 5); - enc->quantizer_y = QUALITY_Y[enc->quality_level]; - enc->quantizer_co = QUALITY_CO[enc->quality_level]; - enc->quantizer_cg = QUALITY_CG[enc->quality_level]; + enc->quantiser_y = QUALITY_Y[enc->quality_level]; + enc->quantiser_co = QUALITY_CO[enc->quality_level]; + enc->quantiser_cg = QUALITY_CG[enc->quality_level]; break; case 'Q': - // Parse quantizer values Y,Co,Cg - if (sscanf(optarg, "%d,%d,%d", &enc->quantizer_y, &enc->quantizer_co, &enc->quantizer_cg) != 3) { - fprintf(stderr, "Error: Invalid quantizer format. Use Y,Co,Cg (e.g., 5,3,2)\n"); + // Parse quantiser values Y,Co,Cg + if (sscanf(optarg, "%d,%d,%d", &enc->quantiser_y, &enc->quantiser_co, &enc->quantiser_cg) != 3) { + fprintf(stderr, "Error: Invalid quantiser format. Use Y,Co,Cg (e.g., 5,3,2)\n"); cleanup_encoder(enc); return 1; } - enc->quantizer_y = CLAMP(enc->quantizer_y, 1, 100); - enc->quantizer_co = CLAMP(enc->quantizer_co, 1, 100); - enc->quantizer_cg = CLAMP(enc->quantizer_cg, 1, 100); + enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 100); + enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 100); + enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 100); break; /*case 'w': enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1); break;*/ case 'f': enc->output_fps = atoi(optarg); + enc->is_ntsc_framerate = 0; + if (enc->output_fps <= 0) { + fprintf(stderr, "Invalid FPS: %d\n", enc->output_fps); + cleanup_encoder(enc); + return 1; + } break; /*case 'd': enc->decomp_levels = CLAMP(atoi(optarg), 1, MAX_DECOMP_LEVELS); @@ -1704,6 +1799,9 @@ int main(int argc, char *argv[]) { case 1005: // --ictcp enc->ictcp_mode = 1; break; + case 1006: // --intra-only + enc->intra_only = 1; + break; case 1004: // --help show_usage(argv[0]); cleanup_encoder(enc); @@ -1714,7 +1812,12 @@ int main(int argc, char *argv[]) { return 1; } } - + + // adjust encoding parameters for ICtCp + if (enc->ictcp_mode) { + enc->quantiser_cg = enc->quantiser_co; + } + if ((!enc->input_file && !enc->test_mode) || !enc->output_file) { fprintf(stderr, "Error: Input and output files must be specified\n"); show_usage(argv[0]); @@ -1734,7 +1837,11 @@ int main(int argc, char *argv[]) { printf("Resolution: %dx%d\n", enc->width, enc->height); printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible"); printf("Decomposition levels: %d\n", enc->decomp_levels); - printf("Quality: Y=%d, Co=%d, Cg=%d\n", enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg); + if (enc->ictcp_mode) { + printf("Quantiser: I=%d, Ct=%d, Cp=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); + } else { + printf("Quantiser: Y=%d, Co=%d, Cg=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); + } printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R"); // Open output file @@ -1797,6 +1904,10 @@ int main(int argc, char *argv[]) { cleanup_encoder(enc); return 1; } + + if (enc->output_fps != enc->fps) { + printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps); + } printf("Starting encoding...\n"); @@ -1869,9 +1980,20 @@ int main(int argc, char *argv[]) { // Frame parity: even frames (0,2,4...) = bottom fields, odd frames (1,3,5...) = top fields } - // Determine frame type (all frames are keyframes in current implementation) - int is_keyframe = 1; - + // Determine frame type + int is_scene_change = detect_scene_change(enc); + int is_time_keyframe = (frame_count % KEYFRAME_INTERVAL) == 0; + int is_keyframe = enc->intra_only || is_time_keyframe || is_scene_change; + + // Verbose output for keyframe decisions + /*if (enc->verbose && is_keyframe) { + if (is_scene_change && !is_time_keyframe) { + printf("Frame %d: Scene change detected, inserting keyframe\n", frame_count); + } else if (is_time_keyframe) { + printf("Frame %d: Time-based keyframe (interval: %d)\n", frame_count, KEYFRAME_INTERVAL); + } + }*/ + // Debug: check RGB input data /*if (frame_count < 3) { printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count); @@ -1896,23 +2018,6 @@ int main(int argc, char *argv[]) { printf("\n"); }*/ - // Process motion vectors for P-frames - int num_tiles = enc->tiles_x * enc->tiles_y; - for (int tile_idx = 0; tile_idx < num_tiles; tile_idx++) { - int tile_x = tile_idx % enc->tiles_x; - int tile_y = tile_idx / enc->tiles_x; - - if (!is_keyframe && frame_count > 0) { - estimate_motion_280x224(enc->current_frame_y, enc->previous_frame_y, - enc->width, enc->height, tile_x, tile_y, - &enc->motion_vectors[tile_idx]); - } else { - enc->motion_vectors[tile_idx].mv_x = 0; - enc->motion_vectors[tile_idx].mv_y = 0; - enc->motion_vectors[tile_idx].rate_control_factor = 1.0f; - } - } - // Compress and write frame packet uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME; size_t packet_size = compress_and_write_frame(enc, packet_type); @@ -2007,10 +2112,15 @@ static void cleanup_encoder(tav_encoder_t *enc) { free(enc->compressed_buffer); free(enc->mp2_buffer); - // OPTIMIZATION: Free reusable quantization buffers - free(enc->reusable_quantized_y); - free(enc->reusable_quantized_co); - free(enc->reusable_quantized_cg); + // OPTIMIZATION: Free reusable quantisation buffers + free(enc->reusable_quantised_y); + free(enc->reusable_quantised_co); + free(enc->reusable_quantised_cg); + + // Free coefficient delta storage + free(enc->previous_coeffs_y); + free(enc->previous_coeffs_co); + free(enc->previous_coeffs_cg); // Free subtitle list if (enc->subtitles) {