mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-09 22:54:03 +09:00
p-frame for tav
This commit is contained in:
@@ -439,7 +439,6 @@ const roiCoding = (header.extraFlags & 0x08) !== 0
|
|||||||
const isInterlaced = (header.videoFlags & 0x01) !== 0
|
const isInterlaced = (header.videoFlags & 0x01) !== 0
|
||||||
const isNTSC = (header.videoFlags & 0x02) !== 0
|
const isNTSC = (header.videoFlags & 0x02) !== 0
|
||||||
const isLossless = (header.videoFlags & 0x04) !== 0
|
const isLossless = (header.videoFlags & 0x04) !== 0
|
||||||
const multiResolution = (header.videoFlags & 0x08) !== 0
|
|
||||||
|
|
||||||
// Calculate tile dimensions (112x112 vs TEV's 16x16 blocks)
|
// Calculate tile dimensions (112x112 vs TEV's 16x16 blocks)
|
||||||
const tilesX = Math.ceil(header.width / TILE_SIZE)
|
const tilesX = Math.ceil(header.width / TILE_SIZE)
|
||||||
|
|||||||
@@ -826,19 +826,16 @@ transmission capability, and region-of-interest coding.
|
|||||||
uint32 Total Frames: number of video frames
|
uint32 Total Frames: number of video frames
|
||||||
uint8 Wavelet Filter Type: 0=5/3 reversible, 1=9/7 irreversible
|
uint8 Wavelet Filter Type: 0=5/3 reversible, 1=9/7 irreversible
|
||||||
uint8 Decomposition Levels: number of DWT levels (1-4)
|
uint8 Decomposition Levels: number of DWT levels (1-4)
|
||||||
uint8 Quality Index for Y channel (0-99; 100 denotes lossless)
|
uint8 Quantiser Index for Y channel (1: lossless, 255: potato)
|
||||||
uint8 Quality Index for Co channel (0-99; 100 denotes lossless)
|
uint8 Quantiser Index for Co channel (1: lossless, 255: potato)
|
||||||
uint8 Quality Index for Cg channel (0-99; 100 denotes lossless)
|
uint8 Quantiser Index for Cg channel (1: lossless, 255: potato)
|
||||||
uint8 Extra Feature Flags
|
uint8 Extra Feature Flags
|
||||||
- bit 0 = has audio
|
- bit 0 = has audio
|
||||||
- bit 1 = has subtitle
|
- bit 1 = has subtitle
|
||||||
- bit 2 = progressive transmission enabled
|
|
||||||
- bit 3 = region-of-interest coding enabled
|
|
||||||
uint8 Video Flags
|
uint8 Video Flags
|
||||||
- bit 0 = is interlaced
|
- bit 0 = is interlaced (unused)
|
||||||
- bit 1 = is NTSC framerate
|
- bit 1 = is NTSC framerate
|
||||||
- bit 2 = is lossless mode
|
- bit 2 = is lossless mode
|
||||||
- bit 3 = multi-resolution encoding
|
|
||||||
uint8 Reserved[7]: fill with zeros
|
uint8 Reserved[7]: fill with zeros
|
||||||
|
|
||||||
## Packet Types
|
## Packet Types
|
||||||
|
|||||||
@@ -17,17 +17,22 @@ import kotlin.math.*
|
|||||||
class GraphicsJSR223Delegate(private val vm: VM) {
|
class GraphicsJSR223Delegate(private val vm: VM) {
|
||||||
|
|
||||||
// TAV Simulated overlapping tiles constants (must match encoder)
|
// TAV Simulated overlapping tiles constants (must match encoder)
|
||||||
private val TAV_TILE_SIZE_X = 280
|
private val TILE_SIZE_X = 280
|
||||||
private val TAV_TILE_SIZE_Y = 224
|
private val TILE_SIZE_Y = 224
|
||||||
private val TAV_TILE_MARGIN = 32 // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px)
|
private val TAV_TILE_MARGIN = 32 // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px)
|
||||||
private val TAV_PADDED_TILE_SIZE_X = TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN // 280 + 64 = 344px
|
private val PADDED_TILE_SIZE_X = TILE_SIZE_X + 2 * TAV_TILE_MARGIN // 280 + 64 = 344px
|
||||||
private val TAV_PADDED_TILE_SIZE_Y = TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // 224 + 64 = 288px
|
private val PADDED_TILE_SIZE_Y = TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // 224 + 64 = 288px
|
||||||
|
|
||||||
// Reusable working arrays to reduce allocation overhead
|
// Reusable working arrays to reduce allocation overhead
|
||||||
private val tevIdct8TempBuffer = FloatArray(64)
|
private val tevIdct8TempBuffer = FloatArray(64)
|
||||||
private val tevIdct16TempBuffer = FloatArray(256) // For 16x16 IDCT
|
private val tevIdct16TempBuffer = FloatArray(256) // For 16x16 IDCT
|
||||||
private val tevIdct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT
|
private val tevIdct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT
|
||||||
|
|
||||||
|
// TAV coefficient delta storage for previous frame (for efficient P-frames)
|
||||||
|
private var tavPreviousCoeffsY: MutableMap<Int, FloatArray>? = null
|
||||||
|
private var tavPreviousCoeffsCo: MutableMap<Int, FloatArray>? = null
|
||||||
|
private var tavPreviousCoeffsCg: MutableMap<Int, FloatArray>? = null
|
||||||
|
|
||||||
private fun getFirstGPU(): GraphicsAdapter? {
|
private fun getFirstGPU(): GraphicsAdapter? {
|
||||||
return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
|
return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
|
||||||
}
|
}
|
||||||
@@ -1285,7 +1290,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f
|
return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quality settings for quantization (Y channel) - 16x16 tables
|
// Quality settings for quantisation (Y channel) - 16x16 tables
|
||||||
val QUANT_TABLE_Y: IntArray = intArrayOf(
|
val QUANT_TABLE_Y: IntArray = intArrayOf(
|
||||||
16, 14, 12, 11, 11, 13, 16, 20, 24, 30, 39, 48, 54, 61, 67, 73,
|
16, 14, 12, 11, 11, 13, 16, 20, 24, 30, 39, 48, 54, 61, 67, 73,
|
||||||
14, 13, 12, 12, 12, 15, 18, 21, 25, 33, 46, 57, 61, 65, 67, 70,
|
14, 13, 12, 12, 12, 15, 18, 21, 25, 33, 46, 57, 61, 65, 67, 70,
|
||||||
@@ -1304,7 +1309,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
73, 82, 92, 98, 103, 107, 110, 117, 126, 132, 134, 136, 138, 138, 133, 127,
|
73, 82, 92, 98, 103, 107, 110, 117, 126, 132, 134, 136, 138, 138, 133, 127,
|
||||||
86, 98, 109, 112, 114, 116, 118, 124, 133, 135, 129, 125, 128, 130, 128, 127)
|
86, 98, 109, 112, 114, 116, 118, 124, 133, 135, 129, 125, 128, 130, 128, 127)
|
||||||
|
|
||||||
// Quality settings for quantization (Co channel - orange-blue, 8x8)
|
// Quality settings for quantisation (Co channel - orange-blue, 8x8)
|
||||||
val QUANT_TABLE_C: IntArray = intArrayOf(
|
val QUANT_TABLE_C: IntArray = intArrayOf(
|
||||||
17, 18, 24, 47, 99, 99, 99, 99,
|
17, 18, 24, 47, 99, 99, 99, 99,
|
||||||
18, 21, 26, 66, 99, 99, 99, 99,
|
18, 21, 26, 66, 99, 99, 99, 99,
|
||||||
@@ -1527,7 +1532,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Apply Bayer dithering to reduce banding when quantizing to 4-bit
|
* Apply Bayer dithering to reduce banding when quantising to 4-bit
|
||||||
*/
|
*/
|
||||||
private fun ditherValue(value: Int, x: Int, y: Int, f: Int): Int {
|
private fun ditherValue(value: Int, x: Int, y: Int, f: Int): Int {
|
||||||
// Preserve pure values (0 and 255) exactly to maintain colour primaries
|
// Preserve pure values (0 and 255) exactly to maintain colour primaries
|
||||||
@@ -1707,7 +1712,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: IntArray, qualityIndex: Int, rateControlFactor: Float): IntArray {
|
private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: IntArray, qualityIndex: Int, rateControlFactor: Float): IntArray {
|
||||||
val result = IntArray(256) // 16x16 = 256
|
val result = IntArray(256) // 16x16 = 256
|
||||||
|
|
||||||
// Process coefficients and dequantize using preallocated buffer
|
// Process coefficients and dequantise using preallocated buffer
|
||||||
for (u in 0 until 16) {
|
for (u in 0 until 16) {
|
||||||
for (v in 0 until 16) {
|
for (v in 0 until 16) {
|
||||||
val idx = u * 16 + v
|
val idx = u * 16 + v
|
||||||
@@ -2499,7 +2504,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
* @param prevRGBAddr Address of previous frame RGB buffer (for motion compensation)
|
* @param prevRGBAddr Address of previous frame RGB buffer (for motion compensation)
|
||||||
* @param width Frame width in pixels
|
* @param width Frame width in pixels
|
||||||
* @param height Frame height in pixels
|
* @param height Frame height in pixels
|
||||||
* @param quality Quantization quality level (0-7)
|
* @param quality Quantisation quality level (0-7)
|
||||||
* @param frameCounter Frame counter for temporal patterns
|
* @param frameCounter Frame counter for temporal patterns
|
||||||
*/
|
*/
|
||||||
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
@@ -2617,7 +2622,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// tevApplyMotionCompensationTwoPass(yBlock, coBlock, cgBlock, startX, startY, mv[0], mv[1], prevRGBAddr, width, height, prevAddrIncVec)
|
// tevApplyMotionCompensationTwoPass(yBlock, coBlock, cgBlock, startX, startY, mv[0], mv[1], prevRGBAddr, width, height, prevAddrIncVec)
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// Use IDCT on knusperli-optimised coefficients (coefficients are already optimally dequantized)
|
// Use IDCT on knusperli-optimised coefficients (coefficients are already optimally dequantised)
|
||||||
val yPixels = tevIdct16x16_fromOptimisedCoeffs(yBlock)
|
val yPixels = tevIdct16x16_fromOptimisedCoeffs(yBlock)
|
||||||
val coPixels = tevIdct8x8_fromOptimisedCoeffs(coBlock)
|
val coPixels = tevIdct8x8_fromOptimisedCoeffs(coBlock)
|
||||||
val cgPixels = tevIdct8x8_fromOptimisedCoeffs(cgBlock)
|
val cgPixels = tevIdct8x8_fromOptimisedCoeffs(cgBlock)
|
||||||
@@ -2798,7 +2803,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation)
|
0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation)
|
||||||
// Regular lossy mode: quantized int16 coefficients
|
// Regular lossy mode: quantised int16 coefficients
|
||||||
// Optimised bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
|
// Optimised bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
|
||||||
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
|
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
|
||||||
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
|
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
|
||||||
@@ -3141,7 +3146,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val kAlphaSqrt2 = intArrayOf(1024, 1448, 1448, 1448, 1448, 1448, 1448, 1448)
|
val kAlphaSqrt2 = intArrayOf(1024, 1448, 1448, 1448, 1448, 1448, 1448, 1448)
|
||||||
val kHalfSqrt2 = 724 // sqrt(2)/2 in 10-bit fixed-point
|
val kHalfSqrt2 = 724 // sqrt(2)/2 in 10-bit fixed-point
|
||||||
|
|
||||||
// Convert to dequantized FloatArrays and apply knusperli optimisation
|
// Convert to dequantised FloatArrays and apply knusperli optimisation
|
||||||
val optimisedYBlocks = tevConvertAndOptimise16x16Blocks(yBlocks, quantTableY, qY, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2)
|
val optimisedYBlocks = tevConvertAndOptimise16x16Blocks(yBlocks, quantTableY, qY, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2)
|
||||||
val optimisedCoBlocks = tevConvertAndOptimise8x8Blocks(coBlocks, quantTableCo, qCo, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2)
|
val optimisedCoBlocks = tevConvertAndOptimise8x8Blocks(coBlocks, quantTableCo, qCo, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2)
|
||||||
val optimisedCgBlocks = tevConvertAndOptimise8x8Blocks(cgBlocks, quantTableCg, qCg, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2)
|
val optimisedCgBlocks = tevConvertAndOptimise8x8Blocks(cgBlocks, quantTableCg, qCg, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2)
|
||||||
@@ -3149,7 +3154,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
return Triple(optimisedYBlocks, optimisedCoBlocks, optimisedCgBlocks)
|
return Triple(optimisedYBlocks, optimisedCoBlocks, optimisedCgBlocks)
|
||||||
}
|
}
|
||||||
|
|
||||||
// IDCT functions for knusperli-optimised coefficients (coefficients are already dequantized)
|
// IDCT functions for knusperli-optimised coefficients (coefficients are already dequantised)
|
||||||
private fun tevIdct16x16_fromOptimisedCoeffs(coeffs: FloatArray): IntArray {
|
private fun tevIdct16x16_fromOptimisedCoeffs(coeffs: FloatArray): IntArray {
|
||||||
val result = IntArray(256) // 16x16
|
val result = IntArray(256) // 16x16
|
||||||
|
|
||||||
@@ -3214,7 +3219,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
tevProcessBlocksWithKnusperli16x16(blocks, quantTable, qScale, rateControlFactors,
|
tevProcessBlocksWithKnusperli16x16(blocks, quantTable, qScale, rateControlFactors,
|
||||||
blocksX, blocksY, kLinearGradient16, kAlphaSqrt2_16, kHalfSqrt2)
|
blocksX, blocksY, kLinearGradient16, kAlphaSqrt2_16, kHalfSqrt2)
|
||||||
|
|
||||||
// Convert optimised ShortArray blocks to FloatArray (dequantized)
|
// Convert optimised ShortArray blocks to FloatArray (dequantised)
|
||||||
for (blockIndex in 0 until blocks.size) {
|
for (blockIndex in 0 until blocks.size) {
|
||||||
val block = blocks[blockIndex]
|
val block = blocks[blockIndex]
|
||||||
if (block != null) {
|
if (block != null) {
|
||||||
@@ -3243,7 +3248,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val coeffsSize = 256 // 16x16 = 256
|
val coeffsSize = 256 // 16x16 = 256
|
||||||
val numBlocks = blocksX * blocksY
|
val numBlocks = blocksX * blocksY
|
||||||
|
|
||||||
// OPTIMIZATION 1: Pre-compute quantization values to avoid repeated calculations
|
// OPTIMIZATION 1: Pre-compute quantisation values to avoid repeated calculations
|
||||||
val quantValues = Array(numBlocks) { IntArray(coeffsSize) }
|
val quantValues = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
val quantHalfValues = Array(numBlocks) { IntArray(coeffsSize) }
|
val quantHalfValues = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
|
|
||||||
@@ -3254,7 +3259,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val qualityMult = jpeg_quality_to_mult(qScale * rateControlFactor)
|
val qualityMult = jpeg_quality_to_mult(qScale * rateControlFactor)
|
||||||
|
|
||||||
quantValues[blockIndex][0] = 1 // DC is lossless
|
quantValues[blockIndex][0] = 1 // DC is lossless
|
||||||
quantHalfValues[blockIndex][0] = 0 // DC has no quantization interval
|
quantHalfValues[blockIndex][0] = 0 // DC has no quantisation interval
|
||||||
|
|
||||||
for (i in 1 until coeffsSize) {
|
for (i in 1 until coeffsSize) {
|
||||||
val coeffIdx = i.coerceIn(0, quantTable.size - 1)
|
val coeffIdx = i.coerceIn(0, quantTable.size - 1)
|
||||||
@@ -3269,7 +3274,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val blocksMid = Array(numBlocks) { IntArray(coeffsSize) }
|
val blocksMid = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
val blocksOff = Array(numBlocks) { LongArray(coeffsSize) } // Keep Long for accumulation
|
val blocksOff = Array(numBlocks) { LongArray(coeffsSize) } // Keep Long for accumulation
|
||||||
|
|
||||||
// Step 1: Setup dequantized values and initialize adjustments (BULK OPTIMIZED)
|
// Step 1: Setup dequantised values and initialize adjustments (BULK OPTIMIZED)
|
||||||
for (blockIndex in 0 until numBlocks) {
|
for (blockIndex in 0 until numBlocks) {
|
||||||
val block = blocks[blockIndex]
|
val block = blocks[blockIndex]
|
||||||
if (block != null) {
|
if (block != null) {
|
||||||
@@ -3277,8 +3282,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val off = blocksOff[blockIndex]
|
val off = blocksOff[blockIndex]
|
||||||
val quantVals = quantValues[blockIndex]
|
val quantVals = quantValues[blockIndex]
|
||||||
|
|
||||||
// OPTIMIZATION 9: Bulk dequantization using vectorized operations
|
// OPTIMIZATION 9: Bulk dequantisation using vectorized operations
|
||||||
tevBulkDequantizeCoefficients(block, mid, quantVals, coeffsSize)
|
tevBulkDequantiseCoefficients(block, mid, quantVals, coeffsSize)
|
||||||
|
|
||||||
// OPTIMIZATION 10: Bulk zero initialization of adjustments
|
// OPTIMIZATION 10: Bulk zero initialization of adjustments
|
||||||
off.fill(0L)
|
off.fill(0L)
|
||||||
@@ -3315,11 +3320,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 4: Apply corrections and clamp to quantization intervals (BULK OPTIMIZED)
|
// Step 4: Apply corrections and clamp to quantisation intervals (BULK OPTIMIZED)
|
||||||
for (blockIndex in 0 until numBlocks) {
|
for (blockIndex in 0 until numBlocks) {
|
||||||
val block = blocks[blockIndex]
|
val block = blocks[blockIndex]
|
||||||
if (block != null) {
|
if (block != null) {
|
||||||
// OPTIMIZATION 11: Bulk apply corrections and quantization clamping
|
// OPTIMIZATION 11: Bulk apply corrections and quantisation clamping
|
||||||
tevBulkApplyCorrectionsAndClamp(
|
tevBulkApplyCorrectionsAndClamp(
|
||||||
block, blocksMid[blockIndex], blocksOff[blockIndex],
|
block, blocksMid[blockIndex], blocksOff[blockIndex],
|
||||||
quantValues[blockIndex], quantHalfValues[blockIndex],
|
quantValues[blockIndex], quantHalfValues[blockIndex],
|
||||||
@@ -3332,10 +3337,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// BULK MEMORY ACCESS HELPER FUNCTIONS FOR KNUSPERLI
|
// BULK MEMORY ACCESS HELPER FUNCTIONS FOR KNUSPERLI
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* OPTIMIZATION 9: Bulk dequantization using vectorized operations
|
* OPTIMIZATION 9: Bulk dequantisation using vectorized operations
|
||||||
* Performs coefficient * quantization in optimised chunks
|
* Performs coefficient * quantisation in optimised chunks
|
||||||
*/
|
*/
|
||||||
private fun tevBulkDequantizeCoefficients(
|
private fun tevBulkDequantiseCoefficients(
|
||||||
coeffs: ShortArray, result: IntArray, quantVals: IntArray, size: Int
|
coeffs: ShortArray, result: IntArray, quantVals: IntArray, size: Int
|
||||||
) {
|
) {
|
||||||
// Process in chunks of 16 for better vectorization (CPU can process multiple values per instruction)
|
// Process in chunks of 16 for better vectorization (CPU can process multiple values per instruction)
|
||||||
@@ -3372,7 +3377,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* OPTIMIZATION 11: Bulk apply corrections and quantization clamping
|
* OPTIMIZATION 11: Bulk apply corrections and quantisation clamping
|
||||||
* Vectorized correction application with proper bounds checking
|
* Vectorized correction application with proper bounds checking
|
||||||
*/
|
*/
|
||||||
private fun tevBulkApplyCorrectionsAndClamp(
|
private fun tevBulkApplyCorrectionsAndClamp(
|
||||||
@@ -3404,7 +3409,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
mid[i + 6] += corr6
|
mid[i + 6] += corr6
|
||||||
mid[i + 7] += corr7
|
mid[i + 7] += corr7
|
||||||
|
|
||||||
// Apply quantization interval clamping - bulk operations
|
// Apply quantisation interval clamping - bulk operations
|
||||||
val orig0 = block[i].toInt() * quantVals[i]
|
val orig0 = block[i].toInt() * quantVals[i]
|
||||||
val orig1 = block[i + 1].toInt() * quantVals[i + 1]
|
val orig1 = block[i + 1].toInt() * quantVals[i + 1]
|
||||||
val orig2 = block[i + 2].toInt() * quantVals[i + 2]
|
val orig2 = block[i + 2].toInt() * quantVals[i + 2]
|
||||||
@@ -3423,7 +3428,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
mid[i + 6] = mid[i + 6].coerceIn(orig6 - quantHalf[i + 6], orig6 + quantHalf[i + 6])
|
mid[i + 6] = mid[i + 6].coerceIn(orig6 - quantHalf[i + 6], orig6 + quantHalf[i + 6])
|
||||||
mid[i + 7] = mid[i + 7].coerceIn(orig7 - quantHalf[i + 7], orig7 + quantHalf[i + 7])
|
mid[i + 7] = mid[i + 7].coerceIn(orig7 - quantHalf[i + 7], orig7 + quantHalf[i + 7])
|
||||||
|
|
||||||
// Convert back to quantized coefficients - bulk operations
|
// Convert back to quantised coefficients - bulk operations
|
||||||
val quantMax = Short.MAX_VALUE.toInt()
|
val quantMax = Short.MAX_VALUE.toInt()
|
||||||
val quantMin = Short.MIN_VALUE.toInt()
|
val quantMin = Short.MIN_VALUE.toInt()
|
||||||
block[i] = (mid[i] / quantVals[i]).coerceIn(quantMin, quantMax).toShort()
|
block[i] = (mid[i] / quantVals[i]).coerceIn(quantMin, quantMax).toShort()
|
||||||
@@ -3603,7 +3608,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val coeffsSize = 64
|
val coeffsSize = 64
|
||||||
val numBlocks = blocksX * blocksY
|
val numBlocks = blocksX * blocksY
|
||||||
|
|
||||||
// Step 1: Setup quantization intervals for all blocks (using integers like Google's code)
|
// Step 1: Setup quantisation intervals for all blocks (using integers like Google's code)
|
||||||
val blocksMid = Array(numBlocks) { IntArray(coeffsSize) }
|
val blocksMid = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
val blocksMin = Array(numBlocks) { IntArray(coeffsSize) }
|
val blocksMin = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
val blocksMax = Array(numBlocks) { IntArray(coeffsSize) }
|
val blocksMax = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
@@ -3617,19 +3622,19 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val quantIdx = i.coerceIn(0, quantTable.size - 1)
|
val quantIdx = i.coerceIn(0, quantTable.size - 1)
|
||||||
|
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
// DC coefficient: lossless (no quantization)
|
// DC coefficient: lossless (no quantisation)
|
||||||
val dcValue = block[i].toInt()
|
val dcValue = block[i].toInt()
|
||||||
blocksMid[blockIndex][i] = dcValue
|
blocksMid[blockIndex][i] = dcValue
|
||||||
blocksMin[blockIndex][i] = dcValue // No interval for DC
|
blocksMin[blockIndex][i] = dcValue // No interval for DC
|
||||||
blocksMax[blockIndex][i] = dcValue
|
blocksMax[blockIndex][i] = dcValue
|
||||||
} else {
|
} else {
|
||||||
// AC coefficients: use quantization intervals
|
// AC coefficients: use quantisation intervals
|
||||||
val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).coerceIn(1f, 255f).toInt()
|
val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).coerceIn(1f, 255f).toInt()
|
||||||
|
|
||||||
// Standard dequantized value (midpoint)
|
// Standard dequantised value (midpoint)
|
||||||
blocksMid[blockIndex][i] = block[i].toInt() * quant
|
blocksMid[blockIndex][i] = block[i].toInt() * quant
|
||||||
|
|
||||||
// Quantization interval bounds
|
// Quantisation interval bounds
|
||||||
val halfQuant = quant / 2
|
val halfQuant = quant / 2
|
||||||
blocksMin[blockIndex][i] = blocksMid[blockIndex][i] - halfQuant
|
blocksMin[blockIndex][i] = blocksMid[blockIndex][i] - halfQuant
|
||||||
blocksMax[blockIndex][i] = blocksMid[blockIndex][i] + halfQuant
|
blocksMax[blockIndex][i] = blocksMid[blockIndex][i] + halfQuant
|
||||||
@@ -3671,7 +3676,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 4: Apply corrections and return optimised dequantized coefficients
|
// Step 4: Apply corrections and return optimised dequantised coefficients
|
||||||
val result = Array<FloatArray?>(blocks.size) { null }
|
val result = Array<FloatArray?>(blocks.size) { null }
|
||||||
for (blockIndex in 0 until numBlocks) {
|
for (blockIndex in 0 until numBlocks) {
|
||||||
val block = blocks[blockIndex]
|
val block = blocks[blockIndex]
|
||||||
@@ -3680,7 +3685,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Apply corrections with sqrt(2)/2 weighting (Google's exact formula with right shift)
|
// Apply corrections with sqrt(2)/2 weighting (Google's exact formula with right shift)
|
||||||
blocksMid[blockIndex][i] += ((blocksOff[blockIndex][i] * kHalfSqrt2) shr 31).toInt()
|
blocksMid[blockIndex][i] += ((blocksOff[blockIndex][i] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
|
||||||
// Clamp to quantization interval bounds
|
// Clamp to quantisation interval bounds
|
||||||
val optimisedValue = blocksMid[blockIndex][i].coerceIn(
|
val optimisedValue = blocksMid[blockIndex][i].coerceIn(
|
||||||
blocksMin[blockIndex][i],
|
blocksMin[blockIndex][i],
|
||||||
blocksMax[blockIndex][i]
|
blocksMax[blockIndex][i]
|
||||||
@@ -3819,8 +3824,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
var readPtr = blockDataPtr
|
var readPtr = blockDataPtr
|
||||||
|
|
||||||
try {
|
try {
|
||||||
val tilesX = (width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X // 280x224 tiles
|
val tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X // 280x224 tiles
|
||||||
val tilesY = (height + TAV_TILE_SIZE_Y - 1) / TAV_TILE_SIZE_Y
|
val tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
|
||||||
|
|
||||||
// Process each tile
|
// Process each tile
|
||||||
for (tileY in 0 until tilesY) {
|
for (tileY in 0 until tilesY) {
|
||||||
@@ -3836,6 +3841,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val rcf = vm.peekFloat(readPtr)
|
val rcf = vm.peekFloat(readPtr)
|
||||||
readPtr += 4
|
readPtr += 4
|
||||||
|
|
||||||
|
// debug print: raw decompressed bytes
|
||||||
|
/*print("TAV Decode raw bytes (Frame $frameCounter, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
|
||||||
|
for (i in 0 until 32) {
|
||||||
|
print("${vm.peek(blockDataPtr + i).toUint().toString(16).uppercase().padStart(2, '0')} ")
|
||||||
|
}
|
||||||
|
println("...")*/
|
||||||
|
|
||||||
when (mode) {
|
when (mode) {
|
||||||
0x00 -> { // TAV_MODE_SKIP
|
0x00 -> { // TAV_MODE_SKIP
|
||||||
// Copy 280x224 tile from previous frame to current frame
|
// Copy 280x224 tile from previous frame to current frame
|
||||||
@@ -3847,17 +3859,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
width, height, qY, qCo, qCg, rcf,
|
width, height, qY, qCo, qCg, rcf,
|
||||||
waveletFilter, decompLevels, isLossless, tavVersion)
|
waveletFilter, decompLevels, isLossless, tavVersion)
|
||||||
}
|
}
|
||||||
0x02 -> { // TAV_MODE_INTER
|
0x02 -> { // TAV_MODE_DELTA
|
||||||
// Motion compensation + DWT residual to RGB buffer
|
// Coefficient delta encoding for efficient P-frames
|
||||||
readPtr = tavDecodeDWTInterTileRGB(readPtr, tileX, tileY, mvX, mvY,
|
readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr,
|
||||||
currentRGBAddr, prevRGBAddr,
|
width, height, qY, qCo, qCg, rcf,
|
||||||
width, height, qY, qCo, qCg, rcf,
|
waveletFilter, decompLevels, isLossless, tavVersion)
|
||||||
waveletFilter, decompLevels, isLossless, tavVersion)
|
|
||||||
}
|
|
||||||
0x03 -> { // TAV_MODE_MOTION
|
|
||||||
// Motion compensation only (no residual)
|
|
||||||
tavApplyMotionCompensationRGB(tileX, tileY, mvX, mvY,
|
|
||||||
currentRGBAddr, prevRGBAddr, width, height)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3872,13 +3878,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
|
||||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
||||||
// Now reading padded coefficient tiles (344x288) instead of core tiles (280x224)
|
// Now reading padded coefficient tiles (344x288) instead of core tiles (280x224)
|
||||||
val paddedCoeffCount = TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y
|
val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
|
||||||
var ptr = readPtr
|
var ptr = readPtr
|
||||||
|
|
||||||
// Read quantized DWT coefficients for padded tile Y, Co, Cg channels (344x288)
|
// Read quantised DWT coefficients for padded tile Y, Co, Cg channels (344x288)
|
||||||
val quantizedY = ShortArray(paddedCoeffCount)
|
val quantisedY = ShortArray(paddedCoeffCount)
|
||||||
val quantizedCo = ShortArray(paddedCoeffCount)
|
val quantisedCo = ShortArray(paddedCoeffCount)
|
||||||
val quantizedCg = ShortArray(paddedCoeffCount)
|
val quantisedCg = ShortArray(paddedCoeffCount)
|
||||||
|
|
||||||
// OPTIMIZATION: Bulk read all coefficient data (344x288 * 3 channels * 2 bytes = 594,432 bytes)
|
// OPTIMIZATION: Bulk read all coefficient data (344x288 * 3 channels * 2 bytes = 594,432 bytes)
|
||||||
val totalCoeffBytes = paddedCoeffCount * 3 * 2L // 3 channels, 2 bytes per short
|
val totalCoeffBytes = paddedCoeffCount * 3 * 2L // 3 channels, 2 bytes per short
|
||||||
@@ -3888,51 +3894,62 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Convert bulk data to coefficient arrays
|
// Convert bulk data to coefficient arrays
|
||||||
var bufferOffset = 0
|
var bufferOffset = 0
|
||||||
for (i in 0 until paddedCoeffCount) {
|
for (i in 0 until paddedCoeffCount) {
|
||||||
quantizedY[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
|
quantisedY[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
|
||||||
bufferOffset += 2
|
bufferOffset += 2
|
||||||
}
|
}
|
||||||
for (i in 0 until paddedCoeffCount) {
|
for (i in 0 until paddedCoeffCount) {
|
||||||
quantizedCo[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
|
quantisedCo[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
|
||||||
bufferOffset += 2
|
bufferOffset += 2
|
||||||
}
|
}
|
||||||
for (i in 0 until paddedCoeffCount) {
|
for (i in 0 until paddedCoeffCount) {
|
||||||
quantizedCg[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
|
quantisedCg[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
|
||||||
bufferOffset += 2
|
bufferOffset += 2
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr += totalCoeffBytes.toInt()
|
ptr += totalCoeffBytes.toInt()
|
||||||
|
|
||||||
// Dequantize padded coefficient tiles (344x288)
|
// Dequantise padded coefficient tiles (344x288)
|
||||||
val yPaddedTile = FloatArray(paddedCoeffCount)
|
val yPaddedTile = FloatArray(paddedCoeffCount)
|
||||||
val coPaddedTile = FloatArray(paddedCoeffCount)
|
val coPaddedTile = FloatArray(paddedCoeffCount)
|
||||||
val cgPaddedTile = FloatArray(paddedCoeffCount)
|
val cgPaddedTile = FloatArray(paddedCoeffCount)
|
||||||
|
|
||||||
for (i in 0 until paddedCoeffCount) {
|
for (i in 0 until paddedCoeffCount) {
|
||||||
yPaddedTile[i] = quantizedY[i] * qY * rcf
|
yPaddedTile[i] = quantisedY[i] * qY * rcf
|
||||||
coPaddedTile[i] = quantizedCo[i] * qCo * rcf
|
coPaddedTile[i] = quantisedCo[i] * qCo * rcf
|
||||||
cgPaddedTile[i] = quantizedCg[i] * qCg * rcf
|
cgPaddedTile[i] = quantisedCg[i] * qCg * rcf
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store coefficients for future delta reference (for P-frames)
|
||||||
|
val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
|
||||||
|
if (tavPreviousCoeffsY == null) {
|
||||||
|
tavPreviousCoeffsY = mutableMapOf()
|
||||||
|
tavPreviousCoeffsCo = mutableMapOf()
|
||||||
|
tavPreviousCoeffsCg = mutableMapOf()
|
||||||
|
}
|
||||||
|
tavPreviousCoeffsY!![tileIdx] = yPaddedTile.clone()
|
||||||
|
tavPreviousCoeffsCo!![tileIdx] = coPaddedTile.clone()
|
||||||
|
tavPreviousCoeffsCg!![tileIdx] = cgPaddedTile.clone()
|
||||||
|
|
||||||
// Apply inverse DWT on full padded tiles (344x288)
|
// Apply inverse DWT on full padded tiles (344x288)
|
||||||
if (isLossless) {
|
if (isLossless) {
|
||||||
tavApplyDWTInverseMultiLevel(yPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, 0)
|
tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
|
||||||
tavApplyDWTInverseMultiLevel(coPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, 0)
|
tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
|
||||||
tavApplyDWTInverseMultiLevel(cgPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, 0)
|
tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
|
||||||
} else {
|
} else {
|
||||||
tavApplyDWTInverseMultiLevel(yPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
||||||
tavApplyDWTInverseMultiLevel(coPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
||||||
tavApplyDWTInverseMultiLevel(cgPaddedTile, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract core 280x224 pixels from reconstructed padded tiles (344x288)
|
// Extract core 280x224 pixels from reconstructed padded tiles (344x288)
|
||||||
val yTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
|
val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
|
||||||
val coTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
|
val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
|
||||||
val cgTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
|
val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
|
||||||
|
|
||||||
for (y in 0 until TAV_TILE_SIZE_Y) {
|
for (y in 0 until TILE_SIZE_Y) {
|
||||||
for (x in 0 until TAV_TILE_SIZE_X) {
|
for (x in 0 until TILE_SIZE_X) {
|
||||||
val coreIdx = y * TAV_TILE_SIZE_X + x
|
val coreIdx = y * TILE_SIZE_X + x
|
||||||
val paddedIdx = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
|
val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
|
||||||
|
|
||||||
yTile[coreIdx] = yPaddedTile[paddedIdx]
|
yTile[coreIdx] = yPaddedTile[paddedIdx]
|
||||||
coTile[coreIdx] = coPaddedTile[paddedIdx]
|
coTile[coreIdx] = coPaddedTile[paddedIdx]
|
||||||
@@ -3952,17 +3969,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
private fun tavConvertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray,
|
private fun tavConvertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray,
|
||||||
rgbAddr: Long, width: Int, height: Int) {
|
rgbAddr: Long, width: Int, height: Int) {
|
||||||
val startX = tileX * TAV_TILE_SIZE_X
|
val startX = tileX * TILE_SIZE_X
|
||||||
val startY = tileY * TAV_TILE_SIZE_Y
|
val startY = tileY * TILE_SIZE_Y
|
||||||
|
|
||||||
// OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality
|
// OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality
|
||||||
for (y in 0 until TAV_TILE_SIZE_Y) {
|
for (y in 0 until TILE_SIZE_Y) {
|
||||||
val frameY = startY + y
|
val frameY = startY + y
|
||||||
if (frameY >= height) break
|
if (frameY >= height) break
|
||||||
|
|
||||||
// Calculate valid pixel range for this row
|
// Calculate valid pixel range for this row
|
||||||
val validStartX = maxOf(0, startX)
|
val validStartX = maxOf(0, startX)
|
||||||
val validEndX = minOf(width, startX + TAV_TILE_SIZE_X)
|
val validEndX = minOf(width, startX + TILE_SIZE_X)
|
||||||
val validPixelsInRow = validEndX - validStartX
|
val validPixelsInRow = validEndX - validStartX
|
||||||
|
|
||||||
if (validPixelsInRow > 0) {
|
if (validPixelsInRow > 0) {
|
||||||
@@ -3971,7 +3988,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
var bufferIdx = 0
|
var bufferIdx = 0
|
||||||
|
|
||||||
for (x in validStartX until validEndX) {
|
for (x in validStartX until validEndX) {
|
||||||
val tileIdx = y * TAV_TILE_SIZE_X + (x - startX)
|
val tileIdx = y * TILE_SIZE_X + (x - startX)
|
||||||
|
|
||||||
// YCoCg-R to RGB conversion (exact inverse of encoder)
|
// YCoCg-R to RGB conversion (exact inverse of encoder)
|
||||||
val Y = yTile[tileIdx]
|
val Y = yTile[tileIdx]
|
||||||
@@ -3999,17 +4016,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
private fun tavConvertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray,
|
private fun tavConvertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray,
|
||||||
rgbAddr: Long, width: Int, height: Int) {
|
rgbAddr: Long, width: Int, height: Int) {
|
||||||
val startX = tileX * TAV_TILE_SIZE_X
|
val startX = tileX * TILE_SIZE_X
|
||||||
val startY = tileY * TAV_TILE_SIZE_Y
|
val startY = tileY * TILE_SIZE_Y
|
||||||
|
|
||||||
// OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality
|
// OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality
|
||||||
for (y in 0 until TAV_TILE_SIZE_Y) {
|
for (y in 0 until TILE_SIZE_Y) {
|
||||||
val frameY = startY + y
|
val frameY = startY + y
|
||||||
if (frameY >= height) break
|
if (frameY >= height) break
|
||||||
|
|
||||||
// Calculate valid pixel range for this row
|
// Calculate valid pixel range for this row
|
||||||
val validStartX = maxOf(0, startX)
|
val validStartX = maxOf(0, startX)
|
||||||
val validEndX = minOf(width, startX + TAV_TILE_SIZE_X)
|
val validEndX = minOf(width, startX + TILE_SIZE_X)
|
||||||
val validPixelsInRow = validEndX - validStartX
|
val validPixelsInRow = validEndX - validStartX
|
||||||
|
|
||||||
if (validPixelsInRow > 0) {
|
if (validPixelsInRow > 0) {
|
||||||
@@ -4018,7 +4035,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
var bufferIdx = 0
|
var bufferIdx = 0
|
||||||
|
|
||||||
for (x in validStartX until validEndX) {
|
for (x in validStartX until validEndX) {
|
||||||
val tileIdx = y * TAV_TILE_SIZE_X + (x - startX)
|
val tileIdx = y * TILE_SIZE_X + (x - startX)
|
||||||
|
|
||||||
// ICtCp to sRGB conversion (adapted from encoder ICtCp functions)
|
// ICtCp to sRGB conversion (adapted from encoder ICtCp functions)
|
||||||
val I = iTile[tileIdx].toDouble() / 255.0
|
val I = iTile[tileIdx].toDouble() / 255.0
|
||||||
@@ -4060,16 +4077,16 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
private fun tavAddYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray,
|
private fun tavAddYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray,
|
||||||
rgbAddr: Long, width: Int, height: Int) {
|
rgbAddr: Long, width: Int, height: Int) {
|
||||||
val startX = tileX * TAV_TILE_SIZE_X
|
val startX = tileX * TILE_SIZE_X
|
||||||
val startY = tileY * TAV_TILE_SIZE_Y
|
val startY = tileY * TILE_SIZE_Y
|
||||||
|
|
||||||
for (y in 0 until TAV_TILE_SIZE_Y) {
|
for (y in 0 until TILE_SIZE_Y) {
|
||||||
for (x in 0 until TAV_TILE_SIZE_X) {
|
for (x in 0 until TILE_SIZE_X) {
|
||||||
val frameX = startX + x
|
val frameX = startX + x
|
||||||
val frameY = startY + y
|
val frameY = startY + y
|
||||||
|
|
||||||
if (frameX < width && frameY < height) {
|
if (frameX < width && frameY < height) {
|
||||||
val tileIdx = y * TAV_TILE_SIZE_X + x
|
val tileIdx = y * TILE_SIZE_X + x
|
||||||
val pixelIdx = frameY * width + frameX
|
val pixelIdx = frameY * width + frameX
|
||||||
val rgbOffset = pixelIdx * 3L
|
val rgbOffset = pixelIdx * 3L
|
||||||
|
|
||||||
@@ -4105,17 +4122,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
// Helper functions (simplified versions of existing DWT functions)
|
// Helper functions (simplified versions of existing DWT functions)
|
||||||
private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
|
private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
|
||||||
val startX = tileX * TAV_TILE_SIZE_X
|
val startX = tileX * TILE_SIZE_X
|
||||||
val startY = tileY * TAV_TILE_SIZE_Y
|
val startY = tileY * TILE_SIZE_Y
|
||||||
|
|
||||||
// OPTIMIZATION: Copy entire rows at once for maximum performance
|
// OPTIMIZATION: Copy entire rows at once for maximum performance
|
||||||
for (y in 0 until TAV_TILE_SIZE_Y) {
|
for (y in 0 until TILE_SIZE_Y) {
|
||||||
val frameY = startY + y
|
val frameY = startY + y
|
||||||
if (frameY >= height) break
|
if (frameY >= height) break
|
||||||
|
|
||||||
// Calculate valid pixel range for this row
|
// Calculate valid pixel range for this row
|
||||||
val validStartX = maxOf(0, startX)
|
val validStartX = maxOf(0, startX)
|
||||||
val validEndX = minOf(width, startX + TAV_TILE_SIZE_X)
|
val validEndX = minOf(width, startX + TILE_SIZE_X)
|
||||||
val validPixelsInRow = validEndX - validStartX
|
val validPixelsInRow = validEndX - validStartX
|
||||||
|
|
||||||
if (validPixelsInRow > 0) {
|
if (validPixelsInRow > 0) {
|
||||||
@@ -4132,31 +4149,105 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun tavDecodeDWTInterTileRGB(readPtr: Long, tileX: Int, tileY: Int, mvX: Int, mvY: Int,
|
private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||||
currentRGBAddr: Long, prevRGBAddr: Long,
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
|
||||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
|
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
||||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
|
||||||
|
|
||||||
// Step 1: Apply motion compensation
|
val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
|
||||||
tavApplyMotionCompensationRGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height)
|
var ptr = readPtr
|
||||||
|
|
||||||
// Step 2: Add DWT residual (same as intra but add to existing pixels)
|
// Initialize coefficient storage if needed
|
||||||
return tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf,
|
if (tavPreviousCoeffsY == null) {
|
||||||
waveletFilter, decompLevels, isLossless, tavVersion)
|
tavPreviousCoeffsY = mutableMapOf()
|
||||||
|
tavPreviousCoeffsCo = mutableMapOf()
|
||||||
|
tavPreviousCoeffsCg = mutableMapOf()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Coefficient count for padded tiles: 344x288 = 99,072 coefficients per channel
|
||||||
|
val coeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
|
||||||
|
|
||||||
|
// Read delta coefficients (same format as intra: quantised int16 -> float)
|
||||||
|
val deltaY = ShortArray(coeffCount)
|
||||||
|
val deltaCo = ShortArray(coeffCount)
|
||||||
|
val deltaCg = ShortArray(coeffCount)
|
||||||
|
|
||||||
|
vm.bulkPeekShort(ptr.toInt(), deltaY, coeffCount * 2)
|
||||||
|
ptr += coeffCount * 2
|
||||||
|
vm.bulkPeekShort(ptr.toInt(), deltaCo, coeffCount * 2)
|
||||||
|
ptr += coeffCount * 2
|
||||||
|
vm.bulkPeekShort(ptr.toInt(), deltaCg, coeffCount * 2)
|
||||||
|
ptr += coeffCount * 2
|
||||||
|
|
||||||
|
// Get or initialize previous coefficients for this tile
|
||||||
|
val prevY = tavPreviousCoeffsY!![tileIdx] ?: FloatArray(coeffCount)
|
||||||
|
val prevCo = tavPreviousCoeffsCo!![tileIdx] ?: FloatArray(coeffCount)
|
||||||
|
val prevCg = tavPreviousCoeffsCg!![tileIdx] ?: FloatArray(coeffCount)
|
||||||
|
|
||||||
|
// Reconstruct current coefficients: current = previous + delta
|
||||||
|
val currentY = FloatArray(coeffCount)
|
||||||
|
val currentCo = FloatArray(coeffCount)
|
||||||
|
val currentCg = FloatArray(coeffCount)
|
||||||
|
|
||||||
|
for (i in 0 until coeffCount) {
|
||||||
|
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY * rcf)
|
||||||
|
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo * rcf)
|
||||||
|
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg * rcf)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store current coefficients as previous for next frame
|
||||||
|
tavPreviousCoeffsY!![tileIdx] = currentY.clone()
|
||||||
|
tavPreviousCoeffsCo!![tileIdx] = currentCo.clone()
|
||||||
|
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
|
||||||
|
|
||||||
|
// Apply inverse DWT
|
||||||
|
if (isLossless) {
|
||||||
|
tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
|
||||||
|
tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
|
||||||
|
tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
|
||||||
|
} else {
|
||||||
|
tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
||||||
|
tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
||||||
|
tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract core 280x224 pixels and convert to RGB (same as intra)
|
||||||
|
val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
|
||||||
|
val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
|
||||||
|
val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
|
||||||
|
|
||||||
|
for (y in 0 until TILE_SIZE_Y) {
|
||||||
|
for (x in 0 until TILE_SIZE_X) {
|
||||||
|
val coreIdx = y * TILE_SIZE_X + x
|
||||||
|
val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
|
||||||
|
|
||||||
|
yTile[coreIdx] = currentY[paddedIdx]
|
||||||
|
coTile[coreIdx] = currentCo[paddedIdx]
|
||||||
|
cgTile[coreIdx] = currentCg[paddedIdx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to RGB based on TAV version
|
||||||
|
if (tavVersion == 2) {
|
||||||
|
tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
|
||||||
|
} else {
|
||||||
|
tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun tavApplyMotionCompensationRGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int,
|
private fun tavApplyMotionCompensationRGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int,
|
||||||
currentRGBAddr: Long, prevRGBAddr: Long,
|
currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
width: Int, height: Int) {
|
width: Int, height: Int) {
|
||||||
val startX = tileX * TAV_TILE_SIZE_X
|
val startX = tileX * TILE_SIZE_X
|
||||||
val startY = tileY * TAV_TILE_SIZE_Y
|
val startY = tileY * TILE_SIZE_Y
|
||||||
|
|
||||||
// Motion vectors in quarter-pixel precision
|
// Motion vectors in quarter-pixel precision
|
||||||
val refX = startX + (mvX / 4.0f)
|
val refX = startX + (mvX / 4.0f)
|
||||||
val refY = startY + (mvY / 4.0f)
|
val refY = startY + (mvY / 4.0f)
|
||||||
|
|
||||||
for (y in 0 until TAV_TILE_SIZE_Y) {
|
for (y in 0 until TILE_SIZE_Y) {
|
||||||
for (x in 0 until TAV_TILE_SIZE_X) {
|
for (x in 0 until TILE_SIZE_X) {
|
||||||
val currentPixelIdx = (startY + y) * width + (startX + x)
|
val currentPixelIdx = (startY + y) * width + (startX + x)
|
||||||
|
|
||||||
if (currentPixelIdx >= 0 && currentPixelIdx < width * height) {
|
if (currentPixelIdx >= 0 && currentPixelIdx < width * height) {
|
||||||
|
|||||||
@@ -26,11 +26,10 @@
|
|||||||
// Version 1: YCoCg-R (default)
|
// Version 1: YCoCg-R (default)
|
||||||
// Version 2: ICtCp (--ictcp flag)
|
// Version 2: ICtCp (--ictcp flag)
|
||||||
|
|
||||||
// Tile encoding modes (112x112 tiles)
|
// Tile encoding modes (280x224 tiles)
|
||||||
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
|
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
|
||||||
#define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles)
|
#define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles)
|
||||||
#define TAV_MODE_INTER 0x02 // Inter DWT coding with motion compensation
|
#define TAV_MODE_DELTA 0x02 // Coefficient delta encoding (efficient P-frames)
|
||||||
#define TAV_MODE_MOTION 0x03 // Motion vector only (good prediction)
|
|
||||||
|
|
||||||
// Video packet types
|
// Video packet types
|
||||||
#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe)
|
#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe)
|
||||||
@@ -60,6 +59,7 @@
|
|||||||
#define DEFAULT_HEIGHT 448
|
#define DEFAULT_HEIGHT 448
|
||||||
#define DEFAULT_FPS 30
|
#define DEFAULT_FPS 30
|
||||||
#define DEFAULT_QUALITY 2
|
#define DEFAULT_QUALITY 2
|
||||||
|
int KEYFRAME_INTERVAL = 60;
|
||||||
|
|
||||||
// Audio/subtitle constants (reused from TEV)
|
// Audio/subtitle constants (reused from TEV)
|
||||||
#define MP2_DEFAULT_PACKET_SIZE 1152
|
#define MP2_DEFAULT_PACKET_SIZE 1152
|
||||||
@@ -106,10 +106,10 @@ static inline float FCLAMP(float x, float min, float max) {
|
|||||||
// MP2 audio rate table (same as TEV)
|
// MP2 audio rate table (same as TEV)
|
||||||
static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384};
|
static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384};
|
||||||
|
|
||||||
// Quality level to quantization mapping for different channels
|
// Quality level to quantisation mapping for different channels
|
||||||
static const int QUALITY_Y[] = {90, 70, 50, 30, 15, 5}; // Luma (fine)
|
static const int QUALITY_Y[] = {60, 42, 25, 12, 6, 2};
|
||||||
static const int QUALITY_CO[] = {80, 60, 40, 20, 10, 3}; // Chroma Co (aggressive)
|
static const int QUALITY_CO[] = {120, 90, 60, 30, 15, 3};
|
||||||
static const int QUALITY_CG[] = {70, 50, 30, 15, 8, 2}; // Chroma Cg (very aggressive)
|
static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5};
|
||||||
|
|
||||||
// DWT coefficient structure for each subband
|
// DWT coefficient structure for each subband
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -153,7 +153,7 @@ typedef struct {
|
|||||||
|
|
||||||
// Encoding parameters
|
// Encoding parameters
|
||||||
int quality_level;
|
int quality_level;
|
||||||
int quantizer_y, quantizer_co, quantizer_cg;
|
int quantiser_y, quantiser_co, quantiser_cg;
|
||||||
int wavelet_filter;
|
int wavelet_filter;
|
||||||
int decomp_levels;
|
int decomp_levels;
|
||||||
int bitrate_mode;
|
int bitrate_mode;
|
||||||
@@ -168,6 +168,7 @@ typedef struct {
|
|||||||
int verbose;
|
int verbose;
|
||||||
int test_mode;
|
int test_mode;
|
||||||
int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp colour space
|
int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp colour space
|
||||||
|
int intra_only; // Force all tiles to use INTRA mode (disable delta encoding)
|
||||||
|
|
||||||
// Frame buffers
|
// Frame buffers
|
||||||
uint8_t *current_frame_rgb;
|
uint8_t *current_frame_rgb;
|
||||||
@@ -199,9 +200,15 @@ typedef struct {
|
|||||||
size_t compressed_buffer_size;
|
size_t compressed_buffer_size;
|
||||||
|
|
||||||
// OPTIMIZATION: Pre-allocated buffers to avoid malloc/free per tile
|
// OPTIMIZATION: Pre-allocated buffers to avoid malloc/free per tile
|
||||||
int16_t *reusable_quantized_y;
|
int16_t *reusable_quantised_y;
|
||||||
int16_t *reusable_quantized_co;
|
int16_t *reusable_quantised_co;
|
||||||
int16_t *reusable_quantized_cg;
|
int16_t *reusable_quantised_cg;
|
||||||
|
|
||||||
|
// Coefficient delta storage for P-frames (previous frame's coefficients)
|
||||||
|
float *previous_coeffs_y; // Previous frame Y coefficients for all tiles
|
||||||
|
float *previous_coeffs_co; // Previous frame Co coefficients for all tiles
|
||||||
|
float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles
|
||||||
|
int previous_coeffs_allocated; // Flag to track allocation
|
||||||
|
|
||||||
// Statistics
|
// Statistics
|
||||||
size_t total_compressed_size;
|
size_t total_compressed_size;
|
||||||
@@ -217,9 +224,6 @@ static tav_encoder_t* create_encoder(void);
|
|||||||
static void cleanup_encoder(tav_encoder_t *enc);
|
static void cleanup_encoder(tav_encoder_t *enc);
|
||||||
static int initialize_encoder(tav_encoder_t *enc);
|
static int initialize_encoder(tav_encoder_t *enc);
|
||||||
static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
||||||
static int estimate_motion_280x224(const float *current, const float *reference,
|
|
||||||
int width, int height, int tile_x, int tile_y,
|
|
||||||
motion_vector_t *mv);
|
|
||||||
|
|
||||||
// Audio and subtitle processing prototypes (from TEV)
|
// Audio and subtitle processing prototypes (from TEV)
|
||||||
static int start_audio_conversion(tav_encoder_t *enc);
|
static int start_audio_conversion(tav_encoder_t *enc);
|
||||||
@@ -245,7 +249,7 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
||||||
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
|
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
|
||||||
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
|
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
|
||||||
printf(" -Q, --quantizer Y,Co,Cg Quantizer levels 0-100 for each channel\n");
|
printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 0-100 for each channel\n");
|
||||||
// printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
|
// printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
|
||||||
printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
|
printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
|
||||||
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
||||||
@@ -254,14 +258,15 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n");
|
printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n");
|
||||||
// printf(" --enable-progressive Enable progressive transmission\n");
|
// printf(" --enable-progressive Enable progressive transmission\n");
|
||||||
// printf(" --enable-roi Enable region-of-interest coding\n");
|
// printf(" --enable-roi Enable region-of-interest coding\n");
|
||||||
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (generates TAV version 2)\n");
|
printf(" --intra-only Disable delta encoding (force all tiles to use INTRA mode)\n");
|
||||||
|
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
|
||||||
printf(" --help Show this help\n\n");
|
printf(" --help Show this help\n\n");
|
||||||
|
|
||||||
printf("Audio Rate by Quality:\n ");
|
printf("Audio Rate by Quality:\n ");
|
||||||
for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) {
|
for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) {
|
||||||
printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]);
|
printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]);
|
||||||
}
|
}
|
||||||
printf("\n\nQuantizer Value by Quality:\n");
|
printf("\n\nQuantiser Value by Quality:\n");
|
||||||
printf(" Y (Luma): ");
|
printf(" Y (Luma): ");
|
||||||
for (int i = 0; i < 6; i++) {
|
for (int i = 0; i < 6; i++) {
|
||||||
printf("%d: Q%d ", i, QUALITY_Y[i]);
|
printf("%d: Q%d ", i, QUALITY_Y[i]);
|
||||||
@@ -278,8 +283,6 @@ static void show_usage(const char *program_name) {
|
|||||||
printf("\n\nFeatures:\n");
|
printf("\n\nFeatures:\n");
|
||||||
printf(" - 112x112 DWT tiles with multi-resolution encoding\n");
|
printf(" - 112x112 DWT tiles with multi-resolution encoding\n");
|
||||||
printf(" - Full resolution YCoCg-R/ICtCp colour space\n");
|
printf(" - Full resolution YCoCg-R/ICtCp colour space\n");
|
||||||
// printf(" - Progressive transmission and ROI coding\n");
|
|
||||||
// printf(" - Motion compensation with ±16 pixel search range\n");
|
|
||||||
printf(" - Lossless and lossy compression modes\n");
|
printf(" - Lossless and lossy compression modes\n");
|
||||||
|
|
||||||
printf("\nExamples:\n");
|
printf("\nExamples:\n");
|
||||||
@@ -302,9 +305,9 @@ static tav_encoder_t* create_encoder(void) {
|
|||||||
enc->quality_level = DEFAULT_QUALITY;
|
enc->quality_level = DEFAULT_QUALITY;
|
||||||
enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE;
|
enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE;
|
||||||
enc->decomp_levels = MAX_DECOMP_LEVELS;
|
enc->decomp_levels = MAX_DECOMP_LEVELS;
|
||||||
enc->quantizer_y = QUALITY_Y[DEFAULT_QUALITY];
|
enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY];
|
||||||
enc->quantizer_co = QUALITY_CO[DEFAULT_QUALITY];
|
enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY];
|
||||||
enc->quantizer_cg = QUALITY_CG[DEFAULT_QUALITY];
|
enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY];
|
||||||
|
|
||||||
return enc;
|
return enc;
|
||||||
}
|
}
|
||||||
@@ -333,22 +336,37 @@ static int initialize_encoder(tav_encoder_t *enc) {
|
|||||||
enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
|
enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
|
||||||
enc->motion_vectors = malloc(num_tiles * sizeof(motion_vector_t));
|
enc->motion_vectors = malloc(num_tiles * sizeof(motion_vector_t));
|
||||||
|
|
||||||
|
// Initialize motion vectors
|
||||||
|
for (int i = 0; i < num_tiles; i++) {
|
||||||
|
enc->motion_vectors[i].mv_x = 0;
|
||||||
|
enc->motion_vectors[i].mv_y = 0;
|
||||||
|
enc->motion_vectors[i].rate_control_factor = 1.0f; // Initialize to 1.0f
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize ZSTD compression
|
// Initialize ZSTD compression
|
||||||
enc->zstd_ctx = ZSTD_createCCtx();
|
enc->zstd_ctx = ZSTD_createCCtx();
|
||||||
enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max
|
enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max
|
||||||
enc->compressed_buffer = malloc(enc->compressed_buffer_size);
|
enc->compressed_buffer = malloc(enc->compressed_buffer_size);
|
||||||
|
|
||||||
// OPTIMIZATION: Allocate reusable quantization buffers for padded tiles (344x288)
|
// OPTIMIZATION: Allocate reusable quantisation buffers for padded tiles (344x288)
|
||||||
const int padded_coeff_count = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
|
const int padded_coeff_count = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
|
||||||
enc->reusable_quantized_y = malloc(padded_coeff_count * sizeof(int16_t));
|
enc->reusable_quantised_y = malloc(padded_coeff_count * sizeof(int16_t));
|
||||||
enc->reusable_quantized_co = malloc(padded_coeff_count * sizeof(int16_t));
|
enc->reusable_quantised_co = malloc(padded_coeff_count * sizeof(int16_t));
|
||||||
enc->reusable_quantized_cg = malloc(padded_coeff_count * sizeof(int16_t));
|
enc->reusable_quantised_cg = malloc(padded_coeff_count * sizeof(int16_t));
|
||||||
|
|
||||||
|
// Allocate coefficient delta storage for P-frames (per-tile coefficient storage)
|
||||||
|
size_t total_coeff_size = num_tiles * padded_coeff_count * sizeof(float);
|
||||||
|
enc->previous_coeffs_y = malloc(total_coeff_size);
|
||||||
|
enc->previous_coeffs_co = malloc(total_coeff_size);
|
||||||
|
enc->previous_coeffs_cg = malloc(total_coeff_size);
|
||||||
|
enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
|
||||||
|
|
||||||
if (!enc->current_frame_rgb || !enc->previous_frame_rgb ||
|
if (!enc->current_frame_rgb || !enc->previous_frame_rgb ||
|
||||||
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
|
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
|
||||||
!enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg ||
|
!enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg ||
|
||||||
!enc->tiles || !enc->motion_vectors || !enc->zstd_ctx || !enc->compressed_buffer ||
|
!enc->tiles || !enc->motion_vectors || !enc->zstd_ctx || !enc->compressed_buffer ||
|
||||||
!enc->reusable_quantized_y || !enc->reusable_quantized_co || !enc->reusable_quantized_cg) {
|
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
|
||||||
|
!enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -601,14 +619,14 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Quantization for DWT subbands with rate control
|
// Quantisation for DWT subbands with rate control
|
||||||
static void quantize_dwt_coefficients(float *coeffs, int16_t *quantized, int size, int quantizer, float rcf) {
|
static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser, float rcf) {
|
||||||
float effective_q = quantizer * rcf;
|
float effective_q = quantiser * rcf;
|
||||||
effective_q = FCLAMP(effective_q, 1.0f, 255.0f);
|
effective_q = FCLAMP(effective_q, 1.0f, 255.0f);
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
float quantized_val = coeffs[i] / effective_q;
|
float quantised_val = coeffs[i] / effective_q;
|
||||||
quantized[i] = (int16_t)CLAMP((int)(quantized_val + (quantized_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -624,46 +642,96 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
memcpy(buffer + offset, &mv->mv_y, sizeof(int16_t)); offset += sizeof(int16_t);
|
memcpy(buffer + offset, &mv->mv_y, sizeof(int16_t)); offset += sizeof(int16_t);
|
||||||
memcpy(buffer + offset, &mv->rate_control_factor, sizeof(float)); offset += sizeof(float);
|
memcpy(buffer + offset, &mv->rate_control_factor, sizeof(float)); offset += sizeof(float);
|
||||||
|
|
||||||
if (mode == TAV_MODE_SKIP || mode == TAV_MODE_MOTION) {
|
if (mode == TAV_MODE_SKIP) {
|
||||||
// No coefficient data for SKIP/MOTION modes
|
// No coefficient data for SKIP/MOTION modes
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quantize and serialize DWT coefficients (full padded tile: 344x288)
|
// Quantise and serialize DWT coefficients (full padded tile: 344x288)
|
||||||
const int tile_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
|
const int tile_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
|
||||||
// OPTIMIZATION: Use pre-allocated buffers instead of malloc/free per tile
|
// OPTIMIZATION: Use pre-allocated buffers instead of malloc/free per tile
|
||||||
int16_t *quantized_y = enc->reusable_quantized_y;
|
int16_t *quantised_y = enc->reusable_quantised_y;
|
||||||
int16_t *quantized_co = enc->reusable_quantized_co;
|
int16_t *quantised_co = enc->reusable_quantised_co;
|
||||||
int16_t *quantized_cg = enc->reusable_quantized_cg;
|
int16_t *quantised_cg = enc->reusable_quantised_cg;
|
||||||
|
|
||||||
// Debug: check DWT coefficients before quantization
|
// Debug: check DWT coefficients before quantisation
|
||||||
/*if (tile_x == 0 && tile_y == 0) {
|
/*if (tile_x == 0 && tile_y == 0) {
|
||||||
printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): ");
|
printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantisation (first 16): ");
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
printf("%.2f ", tile_y_data[i]);
|
printf("%.2f ", tile_y_data[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n",
|
printf("Encoder Debug: Quantisers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n",
|
||||||
enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor);
|
enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg, mv->rate_control_factor);
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor);
|
if (mode == TAV_MODE_INTRA) {
|
||||||
quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor);
|
// INTRA mode: quantise coefficients directly and store for future reference
|
||||||
quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor);
|
quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor);
|
||||||
|
quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor);
|
||||||
|
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor);
|
||||||
|
|
||||||
// Debug: check quantized coefficients after quantization
|
// Store current coefficients for future delta reference
|
||||||
|
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
||||||
|
float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size);
|
||||||
|
float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size);
|
||||||
|
float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size);
|
||||||
|
memcpy(prev_y, tile_y_data, tile_size * sizeof(float));
|
||||||
|
memcpy(prev_co, tile_co_data, tile_size * sizeof(float));
|
||||||
|
memcpy(prev_cg, tile_cg_data, tile_size * sizeof(float));
|
||||||
|
|
||||||
|
} else if (mode == TAV_MODE_DELTA) {
|
||||||
|
// DELTA mode: compute coefficient deltas and quantise them
|
||||||
|
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
||||||
|
float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size);
|
||||||
|
float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size);
|
||||||
|
float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size);
|
||||||
|
|
||||||
|
// Compute deltas: delta = current - previous
|
||||||
|
float *delta_y = malloc(tile_size * sizeof(float));
|
||||||
|
float *delta_co = malloc(tile_size * sizeof(float));
|
||||||
|
float *delta_cg = malloc(tile_size * sizeof(float));
|
||||||
|
|
||||||
|
for (int i = 0; i < tile_size; i++) {
|
||||||
|
delta_y[i] = tile_y_data[i] - prev_y[i];
|
||||||
|
delta_co[i] = tile_co_data[i] - prev_co[i];
|
||||||
|
delta_cg[i] = tile_cg_data[i] - prev_cg[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quantise the deltas
|
||||||
|
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor);
|
||||||
|
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor);
|
||||||
|
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor);
|
||||||
|
|
||||||
|
// Reconstruct coefficients like decoder will (previous + dequantised_delta)
|
||||||
|
for (int i = 0; i < tile_size; i++) {
|
||||||
|
float dequant_delta_y = (float)quantised_y[i] * enc->quantiser_y * mv->rate_control_factor;
|
||||||
|
float dequant_delta_co = (float)quantised_co[i] * enc->quantiser_co * mv->rate_control_factor;
|
||||||
|
float dequant_delta_cg = (float)quantised_cg[i] * enc->quantiser_cg * mv->rate_control_factor;
|
||||||
|
|
||||||
|
prev_y[i] = prev_y[i] + dequant_delta_y;
|
||||||
|
prev_co[i] = prev_co[i] + dequant_delta_co;
|
||||||
|
prev_cg[i] = prev_cg[i] + dequant_delta_cg;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(delta_y);
|
||||||
|
free(delta_co);
|
||||||
|
free(delta_cg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Debug: check quantised coefficients after quantisation
|
||||||
/*if (tile_x == 0 && tile_y == 0) {
|
/*if (tile_x == 0 && tile_y == 0) {
|
||||||
printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): ");
|
printf("Encoder Debug: Tile (0,0) - Quantised Y coeffs (first 16): ");
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
printf("%d ", quantized_y[i]);
|
printf("%d ", quantised_y[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
// Write quantized coefficients
|
// Write quantised coefficients
|
||||||
memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
memcpy(buffer + offset, quantised_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
||||||
memcpy(buffer + offset, quantized_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
memcpy(buffer + offset, quantised_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
||||||
memcpy(buffer + offset, quantized_cg, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
memcpy(buffer + offset, quantised_cg, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
||||||
|
|
||||||
// OPTIMIZATION: No need to free - using pre-allocated reusable buffers
|
// OPTIMIZATION: No need to free - using pre-allocated reusable buffers
|
||||||
|
|
||||||
@@ -685,8 +753,14 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
|||||||
for (int tile_x = 0; tile_x < enc->tiles_x; tile_x++) {
|
for (int tile_x = 0; tile_x < enc->tiles_x; tile_x++) {
|
||||||
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
||||||
|
|
||||||
// Determine tile mode (simplified)
|
// Determine tile mode based on frame type, coefficient availability, and intra_only flag
|
||||||
uint8_t mode = TAV_MODE_INTRA; // For now, all tiles are INTRA
|
uint8_t mode;
|
||||||
|
int is_keyframe = (packet_type == TAV_PACKET_IFRAME);
|
||||||
|
if (is_keyframe || !enc->previous_coeffs_allocated) {
|
||||||
|
mode = TAV_MODE_INTRA; // I-frames, first frames, or intra-only mode always use INTRA
|
||||||
|
} else {
|
||||||
|
mode = TAV_MODE_DELTA; // P-frames use coefficient delta encoding
|
||||||
|
}
|
||||||
|
|
||||||
// Extract padded tile data (344x288) with neighbour context for overlapping tiles
|
// Extract padded tile data (344x288) with neighbour context for overlapping tiles
|
||||||
float tile_y_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y];
|
float tile_y_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y];
|
||||||
@@ -741,62 +815,12 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
|||||||
enc->total_compressed_size += compressed_size;
|
enc->total_compressed_size += compressed_size;
|
||||||
enc->total_uncompressed_size += uncompressed_offset;
|
enc->total_uncompressed_size += uncompressed_offset;
|
||||||
|
|
||||||
return compressed_size + 5; // packet type + size field + compressed data
|
// Mark coefficient storage as available after first I-frame
|
||||||
}
|
if (packet_type == TAV_PACKET_IFRAME) {
|
||||||
|
enc->previous_coeffs_allocated = 1;
|
||||||
// Motion estimation for 112x112 tiles using SAD
|
|
||||||
static int estimate_motion_280x224(const float *current, const float *reference,
|
|
||||||
int width, int height, int tile_x, int tile_y,
|
|
||||||
motion_vector_t *mv) {
|
|
||||||
const int tile_size_x = TILE_SIZE_X;
|
|
||||||
const int tile_size_y = TILE_SIZE_Y;
|
|
||||||
const int search_range = 32; // ±32 pixels (scaled for larger tiles)
|
|
||||||
const int start_x = tile_x * tile_size_x;
|
|
||||||
const int start_y = tile_y * tile_size_y;
|
|
||||||
|
|
||||||
int best_mv_x = 0, best_mv_y = 0;
|
|
||||||
int min_sad = INT_MAX;
|
|
||||||
|
|
||||||
// Search within ±16 pixel range
|
|
||||||
for (int dy = -search_range; dy <= search_range; dy++) {
|
|
||||||
for (int dx = -search_range; dx <= search_range; dx++) {
|
|
||||||
int ref_x = start_x + dx;
|
|
||||||
int ref_y = start_y + dy;
|
|
||||||
|
|
||||||
// Check bounds
|
|
||||||
if (ref_x < 0 || ref_y < 0 ||
|
|
||||||
ref_x + tile_size_x > width || ref_y + tile_size_y > height) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate SAD
|
|
||||||
int sad = 0;
|
|
||||||
for (int y = 0; y < tile_size_y; y++) {
|
|
||||||
for (int x = 0; x < tile_size_x; x++) {
|
|
||||||
int curr_idx = (start_y + y) * width + (start_x + x);
|
|
||||||
int ref_idx = (ref_y + y) * width + (ref_x + x);
|
|
||||||
|
|
||||||
if (curr_idx >= 0 && curr_idx < width * height &&
|
|
||||||
ref_idx >= 0 && ref_idx < width * height) {
|
|
||||||
int diff = (int)(current[curr_idx] - reference[ref_idx]);
|
|
||||||
sad += abs(diff);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sad < min_sad) {
|
|
||||||
min_sad = sad;
|
|
||||||
best_mv_x = dx * 4; // Convert to 1/4 pixel precision
|
|
||||||
best_mv_y = dy * 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mv->mv_x = best_mv_x;
|
return compressed_size + 5; // packet type + size field + compressed data
|
||||||
mv->mv_y = best_mv_y;
|
|
||||||
mv->rate_control_factor = 1.0f; // TODO: Calculate based on complexity
|
|
||||||
|
|
||||||
return min_sad;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// RGB to YCoCg colour space conversion
|
// RGB to YCoCg colour space conversion
|
||||||
@@ -879,10 +903,16 @@ static inline double HLG_EOTF(double Ep) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sRGB -> LMS matrix
|
// sRGB -> LMS matrix
|
||||||
static const double M_RGB_TO_LMS[3][3] = {
|
/*static const double M_RGB_TO_LMS[3][3] = {
|
||||||
{0.2958564579364564, 0.6230869483219083, 0.08106989398623762},
|
{0.2958564579364564, 0.6230869483219083, 0.08106989398623762},
|
||||||
{0.15627390752659093, 0.727308963512872, 0.11639736914944238},
|
{0.15627390752659093, 0.727308963512872, 0.11639736914944238},
|
||||||
{0.035141262332177715, 0.15657109121101628, 0.8080956851990795}
|
{0.035141262332177715, 0.15657109121101628, 0.8080956851990795}
|
||||||
|
};*/
|
||||||
|
// BT.2100 -> LMS matrix
|
||||||
|
static const double M_RGB_TO_LMS[3][3] = {
|
||||||
|
{1688.0/4096,2146.0/4096, 262.0/4096},
|
||||||
|
{ 683.0/4096,2951.0/4096, 462.0/4096},
|
||||||
|
{ 99.0/4096, 309.0/4096,3688.0/4096}
|
||||||
};
|
};
|
||||||
|
|
||||||
static const double M_LMS_TO_RGB[3][3] = {
|
static const double M_LMS_TO_RGB[3][3] = {
|
||||||
@@ -1046,13 +1076,13 @@ static int write_tav_header(tav_encoder_t *enc) {
|
|||||||
// Encoder parameters
|
// Encoder parameters
|
||||||
fputc(enc->wavelet_filter, enc->output_fp);
|
fputc(enc->wavelet_filter, enc->output_fp);
|
||||||
fputc(enc->decomp_levels, enc->output_fp);
|
fputc(enc->decomp_levels, enc->output_fp);
|
||||||
fputc(enc->quantizer_y, enc->output_fp);
|
fputc(enc->quantiser_y, enc->output_fp);
|
||||||
fputc(enc->quantizer_co, enc->output_fp);
|
fputc(enc->quantiser_co, enc->output_fp);
|
||||||
fputc(enc->quantizer_cg, enc->output_fp);
|
fputc(enc->quantiser_cg, enc->output_fp);
|
||||||
|
|
||||||
// Feature flags
|
// Feature flags
|
||||||
uint8_t extra_flags = 0;
|
uint8_t extra_flags = 0;
|
||||||
if (1) extra_flags |= 0x01; // Has audio (placeholder)
|
if (enc->has_audio) extra_flags |= 0x01; // Has audio (placeholder)
|
||||||
if (enc->subtitle_file) extra_flags |= 0x02; // Has subtitles
|
if (enc->subtitle_file) extra_flags |= 0x02; // Has subtitles
|
||||||
if (enc->enable_progressive_transmission) extra_flags |= 0x04;
|
if (enc->enable_progressive_transmission) extra_flags |= 0x04;
|
||||||
if (enc->enable_roi) extra_flags |= 0x08;
|
if (enc->enable_roi) extra_flags |= 0x08;
|
||||||
@@ -1060,9 +1090,8 @@ static int write_tav_header(tav_encoder_t *enc) {
|
|||||||
|
|
||||||
uint8_t video_flags = 0;
|
uint8_t video_flags = 0;
|
||||||
// if (!enc->progressive) video_flags |= 0x01; // Interlaced
|
// if (!enc->progressive) video_flags |= 0x01; // Interlaced
|
||||||
if (enc->fps == 29 || enc->fps == 30) video_flags |= 0x02; // NTSC
|
if (enc->is_ntsc_framerate) video_flags |= 0x02; // NTSC
|
||||||
if (enc->lossless) video_flags |= 0x04; // Lossless
|
if (enc->lossless) video_flags |= 0x04; // Lossless
|
||||||
if (enc->decomp_levels > 1) video_flags |= 0x08; // Multi-resolution
|
|
||||||
fputc(video_flags, enc->output_fp);
|
fputc(video_flags, enc->output_fp);
|
||||||
|
|
||||||
// Reserved bytes (7 bytes)
|
// Reserved bytes (7 bytes)
|
||||||
@@ -1175,6 +1204,8 @@ static int get_video_metadata(tav_encoder_t *config) {
|
|||||||
// fprintf(stderr, " Resolution: %dx%d (%s)\n", config->width, config->height,
|
// fprintf(stderr, " Resolution: %dx%d (%s)\n", config->width, config->height,
|
||||||
// config->progressive ? "progressive" : "interlaced");
|
// config->progressive ? "progressive" : "interlaced");
|
||||||
fprintf(stderr, " Resolution: %dx%d\n", config->width, config->height);
|
fprintf(stderr, " Resolution: %dx%d\n", config->width, config->height);
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start FFmpeg process for video conversion with frame rate support
|
// Start FFmpeg process for video conversion with frame rate support
|
||||||
@@ -1182,11 +1213,21 @@ static int start_video_conversion(tav_encoder_t *enc) {
|
|||||||
char command[2048];
|
char command[2048];
|
||||||
|
|
||||||
// Use simple FFmpeg command like TEV encoder for reliable EOF detection
|
// Use simple FFmpeg command like TEV encoder for reliable EOF detection
|
||||||
snprintf(command, sizeof(command),
|
if (enc->output_fps > 0 && enc->output_fps != enc->fps) {
|
||||||
"ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 "
|
// Frame rate conversion requested
|
||||||
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
|
snprintf(command, sizeof(command),
|
||||||
"-y - 2>/dev/null",
|
"ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
|
||||||
enc->input_file, enc->width, enc->height, enc->width, enc->height);
|
"-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
|
||||||
|
"-y - 2>&1",
|
||||||
|
enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height);
|
||||||
|
} else {
|
||||||
|
// No frame rate conversion
|
||||||
|
snprintf(command, sizeof(command),
|
||||||
|
"ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
|
||||||
|
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
|
||||||
|
"-y -",
|
||||||
|
enc->input_file, enc->width, enc->height, enc->width, enc->height);
|
||||||
|
}
|
||||||
|
|
||||||
if (enc->verbose) {
|
if (enc->verbose) {
|
||||||
printf("FFmpeg command: %s\n", command);
|
printf("FFmpeg command: %s\n", command);
|
||||||
@@ -1618,6 +1659,53 @@ static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) {
|
|||||||
return bytes_written;
|
return bytes_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Detect scene changes by analysing frame differences
|
||||||
|
static int detect_scene_change(tav_encoder_t *enc) {
|
||||||
|
if (!enc->current_frame_rgb || enc->intra_only) {
|
||||||
|
return 0; // No current frame to compare
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *comparison_buffer = enc->previous_frame_rgb;
|
||||||
|
|
||||||
|
long long total_diff = 0;
|
||||||
|
int changed_pixels = 0;
|
||||||
|
|
||||||
|
// Sample every 4th pixel for performance (still gives good detection)
|
||||||
|
for (int y = 0; y < enc->height; y += 2) {
|
||||||
|
for (int x = 0; x < enc->width; x += 2) {
|
||||||
|
int offset = (y * enc->width + x) * 3;
|
||||||
|
|
||||||
|
// Calculate color difference
|
||||||
|
int r_diff = abs(enc->current_frame_rgb[offset] - comparison_buffer[offset]);
|
||||||
|
int g_diff = abs(enc->current_frame_rgb[offset + 1] - comparison_buffer[offset + 1]);
|
||||||
|
int b_diff = abs(enc->current_frame_rgb[offset + 2] - comparison_buffer[offset + 2]);
|
||||||
|
|
||||||
|
int pixel_diff = r_diff + g_diff + b_diff;
|
||||||
|
total_diff += pixel_diff;
|
||||||
|
|
||||||
|
// Count significantly changed pixels (threshold of 30 per channel average)
|
||||||
|
if (pixel_diff > 90) {
|
||||||
|
changed_pixels++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate metrics for scene change detection
|
||||||
|
int sampled_pixels = (enc->height / 2) * (enc->width / 2);
|
||||||
|
double avg_diff = (double)total_diff / sampled_pixels;
|
||||||
|
double changed_ratio = (double)changed_pixels / sampled_pixels;
|
||||||
|
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scene change thresholds - adjust for interlaced mode
|
||||||
|
// Interlaced fields have more natural differences due to temporal field separation
|
||||||
|
double threshold = 0.30;
|
||||||
|
|
||||||
|
return changed_ratio > threshold;
|
||||||
|
}
|
||||||
|
|
||||||
// Main function
|
// Main function
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
generate_random_filename(TEMP_AUDIO_FILE);
|
generate_random_filename(TEMP_AUDIO_FILE);
|
||||||
@@ -1636,8 +1724,8 @@ int main(int argc, char *argv[]) {
|
|||||||
{"size", required_argument, 0, 's'},
|
{"size", required_argument, 0, 's'},
|
||||||
{"fps", required_argument, 0, 'f'},
|
{"fps", required_argument, 0, 'f'},
|
||||||
{"quality", required_argument, 0, 'q'},
|
{"quality", required_argument, 0, 'q'},
|
||||||
{"quantizer", required_argument, 0, 'Q'},
|
|
||||||
{"quantiser", required_argument, 0, 'Q'},
|
{"quantiser", required_argument, 0, 'Q'},
|
||||||
|
{"quantizer", required_argument, 0, 'Q'},
|
||||||
// {"wavelet", required_argument, 0, 'w'},
|
// {"wavelet", required_argument, 0, 'w'},
|
||||||
// {"decomp", required_argument, 0, 'd'},
|
// {"decomp", required_argument, 0, 'd'},
|
||||||
{"bitrate", required_argument, 0, 'b'},
|
{"bitrate", required_argument, 0, 'b'},
|
||||||
@@ -1648,6 +1736,7 @@ int main(int argc, char *argv[]) {
|
|||||||
{"lossless", no_argument, 0, 1000},
|
{"lossless", no_argument, 0, 1000},
|
||||||
// {"enable-progressive", no_argument, 0, 1002},
|
// {"enable-progressive", no_argument, 0, 1002},
|
||||||
// {"enable-roi", no_argument, 0, 1003},
|
// {"enable-roi", no_argument, 0, 1003},
|
||||||
|
{"intra-only", no_argument, 0, 1006},
|
||||||
{"ictcp", no_argument, 0, 1005},
|
{"ictcp", no_argument, 0, 1005},
|
||||||
{"help", no_argument, 0, 1004},
|
{"help", no_argument, 0, 1004},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
@@ -1664,26 +1753,32 @@ int main(int argc, char *argv[]) {
|
|||||||
break;
|
break;
|
||||||
case 'q':
|
case 'q':
|
||||||
enc->quality_level = CLAMP(atoi(optarg), 0, 5);
|
enc->quality_level = CLAMP(atoi(optarg), 0, 5);
|
||||||
enc->quantizer_y = QUALITY_Y[enc->quality_level];
|
enc->quantiser_y = QUALITY_Y[enc->quality_level];
|
||||||
enc->quantizer_co = QUALITY_CO[enc->quality_level];
|
enc->quantiser_co = QUALITY_CO[enc->quality_level];
|
||||||
enc->quantizer_cg = QUALITY_CG[enc->quality_level];
|
enc->quantiser_cg = QUALITY_CG[enc->quality_level];
|
||||||
break;
|
break;
|
||||||
case 'Q':
|
case 'Q':
|
||||||
// Parse quantizer values Y,Co,Cg
|
// Parse quantiser values Y,Co,Cg
|
||||||
if (sscanf(optarg, "%d,%d,%d", &enc->quantizer_y, &enc->quantizer_co, &enc->quantizer_cg) != 3) {
|
if (sscanf(optarg, "%d,%d,%d", &enc->quantiser_y, &enc->quantiser_co, &enc->quantiser_cg) != 3) {
|
||||||
fprintf(stderr, "Error: Invalid quantizer format. Use Y,Co,Cg (e.g., 5,3,2)\n");
|
fprintf(stderr, "Error: Invalid quantiser format. Use Y,Co,Cg (e.g., 5,3,2)\n");
|
||||||
cleanup_encoder(enc);
|
cleanup_encoder(enc);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
enc->quantizer_y = CLAMP(enc->quantizer_y, 1, 100);
|
enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 100);
|
||||||
enc->quantizer_co = CLAMP(enc->quantizer_co, 1, 100);
|
enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 100);
|
||||||
enc->quantizer_cg = CLAMP(enc->quantizer_cg, 1, 100);
|
enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 100);
|
||||||
break;
|
break;
|
||||||
/*case 'w':
|
/*case 'w':
|
||||||
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1);
|
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1);
|
||||||
break;*/
|
break;*/
|
||||||
case 'f':
|
case 'f':
|
||||||
enc->output_fps = atoi(optarg);
|
enc->output_fps = atoi(optarg);
|
||||||
|
enc->is_ntsc_framerate = 0;
|
||||||
|
if (enc->output_fps <= 0) {
|
||||||
|
fprintf(stderr, "Invalid FPS: %d\n", enc->output_fps);
|
||||||
|
cleanup_encoder(enc);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
/*case 'd':
|
/*case 'd':
|
||||||
enc->decomp_levels = CLAMP(atoi(optarg), 1, MAX_DECOMP_LEVELS);
|
enc->decomp_levels = CLAMP(atoi(optarg), 1, MAX_DECOMP_LEVELS);
|
||||||
@@ -1704,6 +1799,9 @@ int main(int argc, char *argv[]) {
|
|||||||
case 1005: // --ictcp
|
case 1005: // --ictcp
|
||||||
enc->ictcp_mode = 1;
|
enc->ictcp_mode = 1;
|
||||||
break;
|
break;
|
||||||
|
case 1006: // --intra-only
|
||||||
|
enc->intra_only = 1;
|
||||||
|
break;
|
||||||
case 1004: // --help
|
case 1004: // --help
|
||||||
show_usage(argv[0]);
|
show_usage(argv[0]);
|
||||||
cleanup_encoder(enc);
|
cleanup_encoder(enc);
|
||||||
@@ -1715,6 +1813,11 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// adjust encoding parameters for ICtCp
|
||||||
|
if (enc->ictcp_mode) {
|
||||||
|
enc->quantiser_cg = enc->quantiser_co;
|
||||||
|
}
|
||||||
|
|
||||||
if ((!enc->input_file && !enc->test_mode) || !enc->output_file) {
|
if ((!enc->input_file && !enc->test_mode) || !enc->output_file) {
|
||||||
fprintf(stderr, "Error: Input and output files must be specified\n");
|
fprintf(stderr, "Error: Input and output files must be specified\n");
|
||||||
show_usage(argv[0]);
|
show_usage(argv[0]);
|
||||||
@@ -1734,7 +1837,11 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("Resolution: %dx%d\n", enc->width, enc->height);
|
printf("Resolution: %dx%d\n", enc->width, enc->height);
|
||||||
printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible");
|
printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible");
|
||||||
printf("Decomposition levels: %d\n", enc->decomp_levels);
|
printf("Decomposition levels: %d\n", enc->decomp_levels);
|
||||||
printf("Quality: Y=%d, Co=%d, Cg=%d\n", enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg);
|
if (enc->ictcp_mode) {
|
||||||
|
printf("Quantiser: I=%d, Ct=%d, Cp=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
|
||||||
|
} else {
|
||||||
|
printf("Quantiser: Y=%d, Co=%d, Cg=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
|
||||||
|
}
|
||||||
printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
|
printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
|
||||||
|
|
||||||
// Open output file
|
// Open output file
|
||||||
@@ -1798,6 +1905,10 @@ int main(int argc, char *argv[]) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (enc->output_fps != enc->fps) {
|
||||||
|
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
|
||||||
|
}
|
||||||
|
|
||||||
printf("Starting encoding...\n");
|
printf("Starting encoding...\n");
|
||||||
|
|
||||||
// Main encoding loop - process frames until EOF or frame limit
|
// Main encoding loop - process frames until EOF or frame limit
|
||||||
@@ -1869,8 +1980,19 @@ int main(int argc, char *argv[]) {
|
|||||||
// Frame parity: even frames (0,2,4...) = bottom fields, odd frames (1,3,5...) = top fields
|
// Frame parity: even frames (0,2,4...) = bottom fields, odd frames (1,3,5...) = top fields
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine frame type (all frames are keyframes in current implementation)
|
// Determine frame type
|
||||||
int is_keyframe = 1;
|
int is_scene_change = detect_scene_change(enc);
|
||||||
|
int is_time_keyframe = (frame_count % KEYFRAME_INTERVAL) == 0;
|
||||||
|
int is_keyframe = enc->intra_only || is_time_keyframe || is_scene_change;
|
||||||
|
|
||||||
|
// Verbose output for keyframe decisions
|
||||||
|
/*if (enc->verbose && is_keyframe) {
|
||||||
|
if (is_scene_change && !is_time_keyframe) {
|
||||||
|
printf("Frame %d: Scene change detected, inserting keyframe\n", frame_count);
|
||||||
|
} else if (is_time_keyframe) {
|
||||||
|
printf("Frame %d: Time-based keyframe (interval: %d)\n", frame_count, KEYFRAME_INTERVAL);
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
// Debug: check RGB input data
|
// Debug: check RGB input data
|
||||||
/*if (frame_count < 3) {
|
/*if (frame_count < 3) {
|
||||||
@@ -1896,23 +2018,6 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
// Process motion vectors for P-frames
|
|
||||||
int num_tiles = enc->tiles_x * enc->tiles_y;
|
|
||||||
for (int tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
|
|
||||||
int tile_x = tile_idx % enc->tiles_x;
|
|
||||||
int tile_y = tile_idx / enc->tiles_x;
|
|
||||||
|
|
||||||
if (!is_keyframe && frame_count > 0) {
|
|
||||||
estimate_motion_280x224(enc->current_frame_y, enc->previous_frame_y,
|
|
||||||
enc->width, enc->height, tile_x, tile_y,
|
|
||||||
&enc->motion_vectors[tile_idx]);
|
|
||||||
} else {
|
|
||||||
enc->motion_vectors[tile_idx].mv_x = 0;
|
|
||||||
enc->motion_vectors[tile_idx].mv_y = 0;
|
|
||||||
enc->motion_vectors[tile_idx].rate_control_factor = 1.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compress and write frame packet
|
// Compress and write frame packet
|
||||||
uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME;
|
uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME;
|
||||||
size_t packet_size = compress_and_write_frame(enc, packet_type);
|
size_t packet_size = compress_and_write_frame(enc, packet_type);
|
||||||
@@ -2007,10 +2112,15 @@ static void cleanup_encoder(tav_encoder_t *enc) {
|
|||||||
free(enc->compressed_buffer);
|
free(enc->compressed_buffer);
|
||||||
free(enc->mp2_buffer);
|
free(enc->mp2_buffer);
|
||||||
|
|
||||||
// OPTIMIZATION: Free reusable quantization buffers
|
// OPTIMIZATION: Free reusable quantisation buffers
|
||||||
free(enc->reusable_quantized_y);
|
free(enc->reusable_quantised_y);
|
||||||
free(enc->reusable_quantized_co);
|
free(enc->reusable_quantised_co);
|
||||||
free(enc->reusable_quantized_cg);
|
free(enc->reusable_quantised_cg);
|
||||||
|
|
||||||
|
// Free coefficient delta storage
|
||||||
|
free(enc->previous_coeffs_y);
|
||||||
|
free(enc->previous_coeffs_co);
|
||||||
|
free(enc->previous_coeffs_cg);
|
||||||
|
|
||||||
// Free subtitle list
|
// Free subtitle list
|
||||||
if (enc->subtitles) {
|
if (enc->subtitles) {
|
||||||
|
|||||||
Reference in New Issue
Block a user