various encoder bug fixes

This commit is contained in:
minjaesong
2025-09-13 00:39:12 +09:00
parent 1f5f72733a
commit 198e951102
4 changed files with 553 additions and 79 deletions

View File

@@ -418,6 +418,7 @@ let hasSubtitle = !!(flags & 2)
let videoFlags = seqread.readOneByte() let videoFlags = seqread.readOneByte()
let isInterlaced = !!(videoFlags & 1) let isInterlaced = !!(videoFlags & 1)
let isNTSC = !!(videoFlags & 2) let isNTSC = !!(videoFlags & 2)
let isLossless = !!(videoFlags & 4)
let unused2 = seqread.readOneByte() let unused2 = seqread.readOneByte()
@@ -427,6 +428,7 @@ serial.println(` FPS: ${(isNTSC) ? (fps * 1000 / 1001) : fps}`)
serial.println(` Duration: ${totalFrames / fps}`) serial.println(` Duration: ${totalFrames / fps}`)
serial.println(` Audio: ${hasAudio ? "Yes" : "No"}`) serial.println(` Audio: ${hasAudio ? "Yes" : "No"}`)
serial.println(` Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`) serial.println(` Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`)
serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}, ${isLossless ? "lossless" : "lossy"}`)
// DEBUG interlace raw output // DEBUG interlace raw output
@@ -665,14 +667,14 @@ try {
if (isInterlaced) { if (isInterlaced) {
// For interlaced: decode current frame into currentFieldAddr // For interlaced: decode current frame into currentFieldAddr
// For display: use prevFieldAddr as current, currentFieldAddr as next // For display: use prevFieldAddr as current, currentFieldAddr as next
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding) graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm) graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV // Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
rotateFieldBuffers() rotateFieldBuffers()
} else { } else {
// Progressive or first frame: normal decoding without temporal prediction // Progressive or first frame: normal decoding without temporal prediction
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding) graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding, isLossless)
} }
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds

View File

@@ -12,6 +12,7 @@ import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
import net.torvald.tsvm.peripheral.GraphicsAdapter import net.torvald.tsvm.peripheral.GraphicsAdapter
import net.torvald.tsvm.peripheral.PeriBase import net.torvald.tsvm.peripheral.PeriBase
import net.torvald.tsvm.peripheral.fmod import net.torvald.tsvm.peripheral.fmod
import net.torvald.util.Float16
import kotlin.math.* import kotlin.math.*
class GraphicsJSR223Delegate(private val vm: VM) { class GraphicsJSR223Delegate(private val vm: VM) {
@@ -21,6 +22,77 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private val idct16TempBuffer = FloatArray(256) // For 16x16 IDCT private val idct16TempBuffer = FloatArray(256) // For 16x16 IDCT
private val idct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT private val idct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT
// Lossless IDCT functions for float16 coefficients (no quantization)
private fun tevIdct8x8_lossless(coeffs: FloatArray): IntArray {
val result = IntArray(64)
// Fast separable IDCT (row-column decomposition) for lossless coefficients
// First pass: Process rows (8 1D IDCTs)
for (row in 0 until 8) {
for (col in 0 until 8) {
var sum = 0f
for (u in 0 until 8) {
sum += dctBasis8[u][col] * coeffs[row * 8 + u]
}
idct8TempBuffer[row * 8 + col] = sum * 0.5f
}
}
// Second pass: Process columns (8 1D IDCTs)
for (col in 0 until 8) {
for (row in 0 until 8) {
var sum = 0f
for (v in 0 until 8) {
sum += dctBasis8[v][row] * idct8TempBuffer[v * 8 + col]
}
val finalValue = sum * 0.5f + 128f
result[row * 8 + col] = if (finalValue.isNaN() || finalValue.isInfinite()) {
println("NaN/Inf detected in 8x8 IDCT at ($row,$col): sum=$sum, finalValue=$finalValue")
128 // Default to middle gray
} else {
finalValue.roundToInt().coerceIn(0, 255)
}
}
}
return result
}
private fun tevIdct16x16_lossless(coeffs: FloatArray): IntArray {
val result = IntArray(256)
// Fast separable IDCT (row-column decomposition) for 16x16 lossless coefficients
// First pass: Process rows (16 1D IDCTs)
for (row in 0 until 16) {
for (col in 0 until 16) {
var sum = 0f
for (u in 0 until 16) {
sum += dctBasis16[u][col] * coeffs[row * 16 + u]
}
idct16TempBuffer[row * 16 + col] = sum * 0.25f
}
}
// Second pass: Process columns (16 1D IDCTs)
for (col in 0 until 16) {
for (row in 0 until 16) {
var sum = 0f
for (v in 0 until 16) {
sum += dctBasis16[v][row] * idct16TempBuffer[v * 16 + col]
}
val finalValue = sum * 0.25f + 128f
result[row * 16 + col] = if (finalValue.isNaN() || finalValue.isInfinite()) {
println("NaN/Inf detected in 16x16 IDCT at ($row,$col): sum=$sum, finalValue=$finalValue")
128 // Default to middle gray
} else {
finalValue.roundToInt().coerceIn(0, 255)
}
}
}
return result
}
private fun getFirstGPU(): GraphicsAdapter? { private fun getFirstGPU(): GraphicsAdapter? {
return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
@@ -1649,7 +1721,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val result = IntArray(64) val result = IntArray(64)
// Reuse preallocated temp buffer to reduce GC pressure // Reuse preallocated temp buffer to reduce GC pressure
for (i in coeffs.indices) { for (i in coeffs.indices) {
idct8TempBuffer[i] = coeffs[i] * quantTable[i] * jpeg_quality_to_mult(qualityIndex * rateControlFactor) idct8TempBuffer[i] = coeffs[i] * (quantTable[i] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)).coerceIn(1f, 255f)
} }
// Fast separable IDCT (row-column decomposition) // Fast separable IDCT (row-column decomposition)
@@ -1662,7 +1734,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val coeff = if (isChromaResidual && coeffIdx == 0) { val coeff = if (isChromaResidual && coeffIdx == 0) {
coeffs[coeffIdx].toFloat() // DC lossless for chroma residual coeffs[coeffIdx].toFloat() // DC lossless for chroma residual
} else { } else {
coeffs[coeffIdx] * quantTable[coeffIdx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor) coeffs[coeffIdx] * (quantTable[coeffIdx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)).coerceIn(1f, 255f)
} }
sum += dctBasis8[u][col] * coeff sum += dctBasis8[u][col] * coeff
} }
@@ -1708,7 +1780,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val coeff = if (idx == 0) { val coeff = if (idx == 0) {
coeffs[idx].toFloat() // DC lossless for luma coeffs[idx].toFloat() // DC lossless for luma
} else { } else {
coeffs[idx] * quantTable[idx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor) coeffs[idx] * (quantTable[idx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)).coerceIn(1f, 255f)
} }
idct16TempBuffer[idx] = coeff idct16TempBuffer[idx] = coeff
} }
@@ -2555,7 +2627,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
debugMotionVectors: Boolean = false, tevVersion: Int = 2, debugMotionVectors: Boolean = false, tevVersion: Int = 2,
enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false) { enableDeblocking: Boolean = true, enableBoundaryAwareDecoding: Boolean = false,
isLossless: Boolean = false) {
// height doesn't change when interlaced, because that's the encoder's output // height doesn't change when interlaced, because that's the encoder's output
@@ -2846,17 +2919,65 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation) 0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation)
// Read DCT coefficients: Y (16x16=256), Co (8x8=64), Cg (8x8=64) val yBlock: IntArray
val coBlock: IntArray
val cgBlock: IntArray
if (isLossless) {
// Lossless mode: coefficients are stored as float16, no quantization
// Read float16 coefficients: Y (16x16=256), Co (8x8=64), Cg (8x8=64)
val coeffFloat16Array = ShortArray(384) // 384 float16 values stored as shorts
vm.bulkPeekShort(readPtr.toInt(), coeffFloat16Array, 768) // 384 * 2 bytes
readPtr += 768
// Convert float16 to float32 and perform IDCT directly (no quantization)
println("DEBUG: Reading lossless coefficients, first few float16 values: ${coeffFloat16Array.take(10).map { "0x${it.toString(16)}" }}")
val yCoeffs = FloatArray(256) { i ->
// Convert signed short to unsigned short for float16 interpretation
val signedShort = coeffFloat16Array[i]
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
val floatVal = Float16.toFloat(float16bits.toShort())
if (floatVal.isNaN() || floatVal.isInfinite()) {
println("NaN/Inf detected at Y coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
0f // Replace NaN with 0
} else floatVal
}
val coCoeffs = FloatArray(64) { i ->
// Convert signed short to unsigned short for float16 interpretation
val signedShort = coeffFloat16Array[256 + i]
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
val floatVal = Float16.toFloat(float16bits.toShort())
if (floatVal.isNaN() || floatVal.isInfinite()) {
println("NaN/Inf detected at Co coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
0f // Replace NaN with 0
} else floatVal
}
val cgCoeffs = FloatArray(64) { i ->
// Convert signed short to unsigned short for float16 interpretation
val signedShort = coeffFloat16Array[320 + i]
val float16bits = signedShort.toInt() and 0xFFFF // Convert to unsigned
val floatVal = Float16.toFloat(float16bits.toShort())
if (floatVal.isNaN() || floatVal.isInfinite()) {
println("NaN/Inf detected at Cg coefficient $i: signedShort=0x${signedShort.toString(16)}, unsigned=0x${float16bits.toString(16)}, floatVal=$floatVal")
0f // Replace NaN with 0
} else floatVal
}
yBlock = tevIdct16x16_lossless(yCoeffs)
coBlock = tevIdct8x8_lossless(coCoeffs)
cgBlock = tevIdct8x8_lossless(cgCoeffs)
} else {
// Regular lossy mode: quantized int16 coefficients
// Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768)
readPtr += 768
// Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes // Perform hardware IDCT for each channel using fast algorithm
val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768) coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
readPtr += 768 cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
}
// Perform hardware IDCT for each channel using fast algorithm
val yBlock = tevIdct16x16_fast(coeffShortArray.sliceArray(0 until 256), QUANT_TABLE_Y, qY, rateControlFactor)
val coBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(256 until 320), QUANT_TABLE_C, true, qCo, rateControlFactor)
val cgBlock = tevIdct8x8_fast(coeffShortArray.sliceArray(320 until 384), QUANT_TABLE_C, true, qCg, rateControlFactor)
// Convert to RGB (YCoCg-R for v2, XYB for v3) // Convert to RGB (YCoCg-R for v2, XYB for v3)
val rgbData = if (tevVersion == 3) { val rgbData = if (tevVersion == 3) {
@@ -3275,7 +3396,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val quantValue = if (i == 0) 1.0f else { val quantValue = if (i == 0) 1.0f else {
quantTable[coeffIdx] * jpeg_quality_to_mult(qScale * rateControlFactor) quantTable[coeffIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)
} }
result[blockIndex]!![i] = block[i] * quantValue result[blockIndex]!![i] = block[i] * quantValue.coerceIn(1f, 255f)
} }
} }
} }
@@ -3307,7 +3428,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
for (i in 1 until coeffsSize) { for (i in 1 until coeffsSize) {
val coeffIdx = i.coerceIn(0, quantTable.size - 1) val coeffIdx = i.coerceIn(0, quantTable.size - 1)
val quant = (quantTable[coeffIdx] * qualityMult).toInt() val quant = (quantTable[coeffIdx] * qualityMult).coerceIn(1f, 255f).toInt()
quantValues[blockIndex][i] = quant quantValues[blockIndex][i] = quant
quantHalfValues[blockIndex][i] = quant / 2 quantHalfValues[blockIndex][i] = quant / 2
} }
@@ -3511,7 +3632,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val rightOff = blocksOff[rightBlockIndex] val rightOff = blocksOff[rightBlockIndex]
// OPTIMIZATION 4: Process multiple frequencies in single loop for better cache locality // OPTIMIZATION 4: Process multiple frequencies in single loop for better cache locality
for (v in 0 until 16) { // Only low-to-mid frequencies for (v in 0 until 8) { // Only low-to-mid frequencies
var deltaV = 0L var deltaV = 0L
var hfPenalty = 0L var hfPenalty = 0L
val vOffset = v * 16 val vOffset = v * 16
@@ -3667,7 +3788,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
blocksMid[blockIndex][i] = dcValue blocksMid[blockIndex][i] = dcValue
} else { } else {
// AC coefficients: use quantization intervals // AC coefficients: use quantization intervals
val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).toInt() val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).coerceIn(1f, 255f).toInt()
// Standard dequantized value (midpoint) // Standard dequantized value (midpoint)
blocksMid[blockIndex][i] = block[i].toInt() * quant blocksMid[blockIndex][i] = block[i].toInt() * quant
@@ -3719,7 +3840,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
blocksMax[blockIndex][i] = dcValue blocksMax[blockIndex][i] = dcValue
} else { } else {
// AC coefficients: use quantization intervals // AC coefficients: use quantization intervals
val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).toInt() val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).coerceIn(1f, 255f).toInt()
// Standard dequantized value (midpoint) // Standard dequantized value (midpoint)
blocksMid[blockIndex][i] = block[i].toInt() * quant blocksMid[blockIndex][i] = block[i].toInt() * quant
@@ -3789,73 +3910,116 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return result return result
} }
// BULK OPTIMIZED 8x8 horizontal boundary analysis for chroma channels
private fun analyzeHorizontalBoundary( private fun analyzeHorizontalBoundary(
leftBlockIndex: Int, rightBlockIndex: Int, leftBlockIndex: Int, rightBlockIndex: Int,
blocksMid: Array<IntArray>, blocksOff: Array<LongArray>, blocksMid: Array<IntArray>, blocksOff: Array<LongArray>,
kLinearGradient: IntArray, kAlphaSqrt2: IntArray kLinearGradient: IntArray, kAlphaSqrt2: IntArray
) { ) {
// Only process low-to-mid frequencies (v < 4 for 8x8, v < 8 for 16x16) val leftMid = blocksMid[leftBlockIndex]
val maxV = 8 val rightMid = blocksMid[rightBlockIndex]
val leftOff = blocksOff[leftBlockIndex]
val rightOff = blocksOff[rightBlockIndex]
for (v in 0 until maxV) { // OPTIMIZATION 12: Process 8x8 boundaries with bulk operations (v < 4 for low-to-mid frequencies)
for (v in 0 until 4) { // Only low-to-mid frequencies for 8x8
var deltaV = 0L var deltaV = 0L
var hfPenalty = 0L var hfPenalty = 0L
val vOffset = v * 8
// Analyze boundary discontinuity // First pass: Calculate boundary discontinuity
for (u in 0 until 8) { for (u in 0 until 8) {
val alpha = kAlphaSqrt2[u.coerceIn(0, 7)] val idx = vOffset + u
val sign = if (u and 1 == 1) -1 else 1 val alpha = kAlphaSqrt2[u] // Direct access (u < 8)
val gi = blocksMid[leftBlockIndex][v * 8 + u] val sign = if (u and 1 != 0) -1 else 1
val gj = blocksMid[rightBlockIndex][v * 8 + u] val gi = leftMid[idx]
val gj = rightMid[idx]
deltaV += (alpha * (gj - sign * gi)).toLong() deltaV += alpha * (gj - sign * gi)
hfPenalty += (u * u * (gi * gi + gj * gj)).toLong() hfPenalty += (u * u) * (gi * gi + gj * gj)
} }
// Apply corrections with high-frequency damping // Early exit for very small adjustments
if (hfPenalty > 400) deltaV /= 2 if (kotlin.math.abs(deltaV) < 100) continue
for (u in 0 until 8) { // Apply high-frequency damping once per frequency band
val gradientIdx = u.coerceIn(0, kLinearGradient.size - 1) if (hfPenalty > 400) deltaV /= 2 // 8x8 threshold
val sign = if (u and 1 == 1) 1 else -1
blocksOff[leftBlockIndex][v * 8 + u] = blocksOff[leftBlockIndex][v * 8 + u] + deltaV * kLinearGradient[gradientIdx] // Second pass: Apply corrections (BULK OPTIMIZED with unrolling for 8x8)
blocksOff[rightBlockIndex][v * 8 + u] = blocksOff[rightBlockIndex][v * 8 + u] + deltaV * kLinearGradient[gradientIdx] * sign val correction = deltaV
} // Bulk apply corrections for 8 coefficients - manually unrolled for performance
leftOff[vOffset] += correction * kLinearGradient[0]
rightOff[vOffset] += correction * kLinearGradient[0]
leftOff[vOffset + 1] += correction * kLinearGradient[1]
rightOff[vOffset + 1] -= correction * kLinearGradient[1] // Alternating signs
leftOff[vOffset + 2] += correction * kLinearGradient[2]
rightOff[vOffset + 2] += correction * kLinearGradient[2]
leftOff[vOffset + 3] += correction * kLinearGradient[3]
rightOff[vOffset + 3] -= correction * kLinearGradient[3]
leftOff[vOffset + 4] += correction * kLinearGradient[4]
rightOff[vOffset + 4] += correction * kLinearGradient[4]
leftOff[vOffset + 5] += correction * kLinearGradient[5]
rightOff[vOffset + 5] -= correction * kLinearGradient[5]
leftOff[vOffset + 6] += correction * kLinearGradient[6]
rightOff[vOffset + 6] += correction * kLinearGradient[6]
leftOff[vOffset + 7] += correction * kLinearGradient[7]
rightOff[vOffset + 7] -= correction * kLinearGradient[7]
} }
} }
// BULK OPTIMIZED 8x8 vertical boundary analysis for chroma channels
private fun analyzeVerticalBoundary( private fun analyzeVerticalBoundary(
topBlockIndex: Int, bottomBlockIndex: Int, topBlockIndex: Int, bottomBlockIndex: Int,
blocksMid: Array<IntArray>, blocksOff: Array<LongArray>, blocksMid: Array<IntArray>, blocksOff: Array<LongArray>,
kLinearGradient: IntArray, kAlphaSqrt2: IntArray kLinearGradient: IntArray, kAlphaSqrt2: IntArray
) { ) {
// Only process low-to-mid frequencies (u < 4 for 8x8, u < 8 for 16x16) val topMid = blocksMid[topBlockIndex]
val maxU = 8 val bottomMid = blocksMid[bottomBlockIndex]
val topOff = blocksOff[topBlockIndex]
val bottomOff = blocksOff[bottomBlockIndex]
for (u in 0 until maxU) { // OPTIMIZATION 13: Optimized vertical analysis for 8x8 with better cache access pattern
for (u in 0 until 4) { // Only low-to-mid frequencies for 8x8
var deltaU = 0L var deltaU = 0L
var hfPenalty = 0L var hfPenalty = 0L
// Analyze boundary discontinuity // First pass: Calculate boundary discontinuity
for (v in 0 until 8) { for (v in 0 until 8) {
val alpha = kAlphaSqrt2[v.coerceIn(0, 7)] val idx = v * 8 + u
val sign = if (v and 1 == 1) -1 else 1 val alpha = kAlphaSqrt2[v] // Direct access (v < 8)
val gi = blocksMid[topBlockIndex][v * 8 + u] val sign = if (v and 1 != 0) -1 else 1
val gj = blocksMid[bottomBlockIndex][v * 8 + u] val gi = topMid[idx]
val gj = bottomMid[idx]
deltaU += (alpha * (gj - sign * gi)).toLong() deltaU += alpha * (gj - sign * gi)
hfPenalty += (v * v * (gi * gi + gj * gj)).toLong() hfPenalty += (v * v) * (gi * gi + gj * gj)
} }
// Apply corrections with high-frequency damping // Early exit for very small adjustments
if (hfPenalty > 400) deltaU /= 2 if (kotlin.math.abs(deltaU) < 100) continue
for (v in 0 until 8) { // Apply high-frequency damping once per frequency band
val gradientIdx = v.coerceIn(0, kLinearGradient.size - 1) if (hfPenalty > 400) deltaU /= 2 // 8x8 threshold
val sign = if (v and 1 == 1) 1 else -1
blocksOff[topBlockIndex][v * 8 + u] = blocksOff[topBlockIndex][v * 8 + u] + deltaU * kLinearGradient[gradientIdx] // Second pass: Apply corrections (BULK OPTIMIZED vertical for 8x8)
blocksOff[bottomBlockIndex][v * 8 + u] = blocksOff[bottomBlockIndex][v * 8 + u] + deltaU * kLinearGradient[gradientIdx] * sign val correction = deltaU
} // Bulk apply corrections for 8 vertical coefficients - manually unrolled
topOff[u] += correction * kLinearGradient[0]
bottomOff[u] += correction * kLinearGradient[0]
topOff[8 + u] += correction * kLinearGradient[1]
bottomOff[8 + u] -= correction * kLinearGradient[1] // Alternating signs
topOff[16 + u] += correction * kLinearGradient[2]
bottomOff[16 + u] += correction * kLinearGradient[2]
topOff[24 + u] += correction * kLinearGradient[3]
bottomOff[24 + u] -= correction * kLinearGradient[3]
topOff[32 + u] += correction * kLinearGradient[4]
bottomOff[32 + u] += correction * kLinearGradient[4]
topOff[40 + u] += correction * kLinearGradient[5]
bottomOff[40 + u] -= correction * kLinearGradient[5]
topOff[48 + u] += correction * kLinearGradient[6]
bottomOff[48 + u] += correction * kLinearGradient[6]
topOff[56 + u] += correction * kLinearGradient[7]
bottomOff[56 + u] -= correction * kLinearGradient[7]
} }
} }

View File

@@ -0,0 +1,90 @@
package net.torvald.util
import kotlin.experimental.or
class Float16() {
var bits = 0.toShort()
private set
constructor(fval: Float) : this() {
fromFloat(fval)
}
fun toFloat() = Float16.toFloat(bits)
fun fromFloat(fval: Float) {
bits = Float16.fromFloat(fval)
}
operator fun times(other: Float) = fromFloat(this.toFloat() * other)
operator fun times(other: Float16) = fromFloat(this.toFloat() * other.toFloat())
operator fun div(other: Float) = fromFloat(this.toFloat() / other)
operator fun div(other: Float16) = fromFloat(this.toFloat() / other.toFloat())
// operators are stripped: you don't calculate from FP16; this is only for storing values //
companion object {
fun toFloat(hbits: Short): Float {
val hbits = hbits.toInt().and(0xFFFF)
var mant = hbits and 0x03ff // 10 bits mantissa
var exp = hbits and 0x7c00 // 5 bits exponent
if (exp == 0x7c00)
// NaN/Inf
exp = 0x3fc00 // -> NaN/Inf
else if (exp != 0)
// normalized value
{
exp += 0x1c000 // exp - 15 + 127
if (mant == 0 && exp > 0x1c400)
// smooth transition
return java.lang.Float.intBitsToFloat(hbits and 0x8000 shl 16 or (exp shl 13) or 0x3ff)
}
else if (mant != 0)
// && exp==0 -> subnormal
{
exp = 0x1c400 // make it normal
do {
mant = mant shl 1 // mantissa * 2
exp -= 0x400 // decrease exp by 1
} while (mant and 0x400 == 0) // while not normal
mant = mant and 0x3ff // discard subnormal bit
} // else +/-0 -> +/-0
return java.lang.Float.intBitsToFloat(// combine all parts
hbits and 0x8000 shl 16 or (exp or mant shl 13)) // value << ( 23 - 10 )
}
fun fromFloat(fval: Float): Short {
val fbits = java.lang.Float.floatToIntBits(fval)
val sign = fbits.ushr(16).and(0x8000).toShort() // sign only
var `val` = (fbits and 0x7fffffff) + 0x1000 // rounded value
if (`val` >= 0x47800000)
// might be or become NaN/Inf
{ // avoid Inf due to rounding
if (fbits and 0x7fffffff >= 0x47800000) { // is or must become NaN/Inf
if (`val` < 0x7f800000)
// was value but too large
return sign or 0x7c00 // make it +/-Inf
return sign or 0x7c00 or // remains +/-Inf or NaN
(fbits and 0x007fffff).ushr(13).toShort() // keep NaN (and Inf) bits
}
return sign or 0x7bff.toShort() // unrounded not quite Inf
}
if (`val` >= 0x38800000)
// remains normalized value
return sign or (`val` - 0x38000000).ushr(13).toShort() // exp - 127 + 15
if (`val` < 0x33000000)
// too small for subnormal
return sign // becomes +/-0
`val` = (fbits and 0x7fffffff).ushr(23) // tmp exp for subnormal calc
return sign or ((fbits and 0x7fffff or 0x800000) // add subnormal bit
+ 0x800000.ushr(`val` - 102) // round depending on cut off
).ushr(126 - `val`) // div by 2^(1-(exp-127+15)) and >> 13 | exp=0
.toShort()
}
}
}

View File

@@ -14,6 +14,58 @@
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
// Float16 conversion functions (adapted from Float16.kt)
static inline uint16_t float_to_float16(float fval) {
uint32_t fbits = *(uint32_t*)&fval;
uint16_t sign = (fbits >> 16) & 0x8000; // sign only
uint32_t val = (fbits & 0x7fffffff) + 0x1000; // rounded value
if (val >= 0x47800000) { // might be or become NaN/Inf
if ((fbits & 0x7fffffff) >= 0x47800000) { // is or must become NaN/Inf
if (val < 0x7f800000) // was value but too large
return sign | 0x7c00; // make it +/-Inf
return sign | 0x7c00 | // remains +/-Inf or NaN
((fbits & 0x007fffff) >> 13); // keep NaN (and Inf) bits
}
return sign | 0x7bff; // unrounded not quite Inf
}
if (val >= 0x38800000) // remains normalized value
return sign | ((val - 0x38000000) >> 13); // exp - 127 + 15
if (val < 0x33000000) // too small for subnormal
return sign; // becomes +/-0
val = (fbits & 0x7fffffff) >> 23; // tmp exp for subnormal calc
return sign | (((fbits & 0x7fffff) | 0x800000) + // add subnormal bit
(0x800000 >> (val - 102)) // round depending on cut off
) >> (126 - val); // div by 2^(1-(exp-127+15)) and >> 13 | exp=0
}
static inline float float16_to_float(uint16_t hbits) {
uint32_t mant = hbits & 0x03ff; // 10 bits mantissa
uint32_t exp = hbits & 0x7c00; // 5 bits exponent
if (exp == 0x7c00) // NaN/Inf
exp = 0x3fc00; // -> NaN/Inf
else if (exp != 0) { // normalized value
exp += 0x1c000; // exp - 15 + 127
if (mant == 0 && exp > 0x1c400) { // smooth transition
uint32_t fbits = ((hbits & 0x8000) << 16) | (exp << 13) | 0x3ff;
return *(float*)&fbits;
}
}
else if (mant != 0) { // && exp==0 -> subnormal
exp = 0x1c400; // make it normal
do {
mant <<= 1; // mantissa * 2
exp -= 0x400; // decrease exp by 1
} while ((mant & 0x400) == 0); // while not normal
mant &= 0x3ff; // discard subnormal bit
} // else +/-0 -> +/-0
uint32_t fbits = ((hbits & 0x8000) << 16) | ((exp | mant) << 13);
return *(float*)&fbits;
}
// TSVM Enhanced Video (TEV) format constants // TSVM Enhanced Video (TEV) format constants
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV" #define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV"
#define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0 #define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0
@@ -103,7 +155,7 @@ static const uint32_t QUANT_TABLE_C[HALF_BLOCK_SIZE_SQR] =
// Audio constants (reuse MP2 from existing system) // Audio constants (reuse MP2 from existing system)
#define MP2_SAMPLE_RATE 32000 #define MP2_SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 0x240 #define MP2_DEFAULT_PACKET_SIZE 1728
// Default values // Default values
#define DEFAULT_WIDTH 560 #define DEFAULT_WIDTH 560
@@ -140,6 +192,17 @@ typedef struct __attribute__((packed)) {
int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // quantised Cg DCT coefficients (8x8) int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // quantised Cg DCT coefficients (8x8)
} tev_block_t; } tev_block_t;
// Lossless TEV block structure (uses float32 internally, converted to float16 during serialization)
typedef struct __attribute__((packed)) {
uint8_t mode; // Block encoding mode
int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision)
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs)
float y_coeffs[BLOCK_SIZE_SQR]; // lossless Y DCT coefficients (16x16)
float co_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Co DCT coefficients (8x8)
float cg_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Cg DCT coefficients (8x8)
} tev_lossless_block_t;
// Subtitle entry structure // Subtitle entry structure
typedef struct subtitle_entry { typedef struct subtitle_entry {
int start_frame; int start_frame;
@@ -168,6 +231,8 @@ typedef struct {
int qualityCo; int qualityCo;
int qualityCg; int qualityCg;
int verbose; int verbose;
int disable_rcf; // 0 = rcf enabled, 1 = disabled
int lossless_mode; // 0 = lossy (default), 1 = lossless mode
// Bitrate control // Bitrate control
int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode) int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode)
@@ -216,10 +281,9 @@ typedef struct {
// Subtitle handling // Subtitle handling
subtitle_entry_t *subtitle_list; subtitle_entry_t *subtitle_list;
subtitle_entry_t *current_subtitle; subtitle_entry_t *current_subtitle;
// Complexity statistics collection // Complexity statistics collection
int stats_mode; // 0 = disabled, 1 = enabled int stats_mode; // 0 = disabled, 1 = enabled
int disable_rcf; // 0 = rcf enabled, 1 = disabled
float *complexity_values; // Array to store all complexity values float *complexity_values; // Array to store all complexity values
int complexity_count; // Current count of complexity values int complexity_count; // Current count of complexity values
int complexity_capacity; // Capacity of complexity_values array int complexity_capacity; // Capacity of complexity_values array
@@ -1041,6 +1105,107 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
block->cbp = 0x07; // Y, Co, Cg all present block->cbp = 0x07; // Y, Co, Cg all present
} }
// Encode a 16x16 block in lossless mode
static void encode_block_lossless(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
tev_lossless_block_t *block = (tev_lossless_block_t*)&enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
// Extract YCoCg-R block
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
block_x, block_y,
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
if (is_keyframe) {
// Intra coding for keyframes
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
} else {
// Same mode decision logic as regular encode_block
// For simplicity, using INTRA for now in lossless mode
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
}
// Lossless mode: rate control factor is always 1.0f
block->rate_control_factor = 1.0f;
// Apply DCT transforms using the same pattern as regular encoding
// Y channel (16x16)
dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
block->y_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Co channel (8x8)
dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->co_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Cg channel (8x8)
dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->cg_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Set CBP (simplified - always encode all channels)
block->cbp = 0x07; // Y, Co, Cg all present
}
// Serialized lossless block structure (for writing to file with float16 coefficients)
typedef struct __attribute__((packed)) {
uint8_t mode;
int16_t mv_x, mv_y;
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp;
uint16_t y_coeffs[BLOCK_SIZE_SQR]; // float16 Y coefficients
uint16_t co_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Co coefficients
uint16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Cg coefficients
} tev_serialized_lossless_block_t;
// Convert lossless blocks to serialized format with float16 coefficients
static void serialize_lossless_blocks(tev_encoder_t *enc, int blocks_x, int blocks_y,
tev_serialized_lossless_block_t *serialized_blocks) {
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
tev_lossless_block_t *src = (tev_lossless_block_t*)&enc->block_data[by * blocks_x + bx];
tev_serialized_lossless_block_t *dst = &serialized_blocks[by * blocks_x + bx];
// Copy basic fields
dst->mode = src->mode;
dst->mv_x = src->mv_x;
dst->mv_y = src->mv_y;
dst->rate_control_factor = src->rate_control_factor;
dst->cbp = src->cbp;
// Convert float32 coefficients to float16 with range clamping
// Float16 max finite value is approximately 65504
const float FLOAT16_MAX = 65504.0f;
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
float coeff = FCLAMP(src->y_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->y_coeffs[i] = float_to_float16(coeff);
if (enc->verbose && fabsf(src->y_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Y coefficient %d clamped: %f -> %f\n", i, src->y_coeffs[i], coeff);
}
}
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
float co_coeff = FCLAMP(src->co_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
float cg_coeff = FCLAMP(src->cg_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->co_coeffs[i] = float_to_float16(co_coeff);
dst->cg_coeffs[i] = float_to_float16(cg_coeff);
if (enc->verbose && fabsf(src->co_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Co coefficient %d clamped: %f -> %f\n", i, src->co_coeffs[i], co_coeff);
}
if (enc->verbose && fabsf(src->cg_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Cg coefficient %d clamped: %f -> %f\n", i, src->cg_coeffs[i], cg_coeff);
}
}
}
}
}
// Convert SubRip time format (HH:MM:SS,mmm) to frame number // Convert SubRip time format (HH:MM:SS,mmm) to frame number
static int srt_time_to_frame(const char *time_str, int fps) { static int srt_time_to_frame(const char *time_str, int fps) {
int hours, minutes, seconds, milliseconds; int hours, minutes, seconds, milliseconds;
@@ -1182,7 +1347,7 @@ static subtitle_entry_t* parse_srt_file(const char *filename, int fps) {
} }
} }
fclose(file); //fclose(file); // why uncommenting it errors out with "Fatal error: glibc detected an invalid stdio handle"?
return head; return head;
} }
@@ -1613,6 +1778,7 @@ static tev_encoder_t* init_encoder(void) {
enc->output_fps = 0; // No frame rate conversion by default enc->output_fps = 0; // No frame rate conversion by default
enc->is_ntsc_framerate = 0; // Will be detected from input enc->is_ntsc_framerate = 0; // Will be detected from input
enc->verbose = 0; enc->verbose = 0;
enc->disable_rcf = 1;
enc->subtitle_file = NULL; enc->subtitle_file = NULL;
enc->has_subtitles = 0; enc->has_subtitles = 0;
enc->subtitle_list = NULL; enc->subtitle_list = NULL;
@@ -1655,7 +1821,16 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) {
enc->dct_workspace = malloc(16 * 16 * sizeof(float)); enc->dct_workspace = malloc(16 * 16 * sizeof(float));
enc->block_data = malloc(total_blocks * sizeof(tev_block_t)); enc->block_data = malloc(total_blocks * sizeof(tev_block_t));
enc->compressed_buffer = malloc(total_blocks * sizeof(tev_block_t) * 2); // Allocate compression buffer large enough for both regular and lossless modes
size_t max_block_size = sizeof(tev_block_t) > sizeof(tev_serialized_lossless_block_t) ?
sizeof(tev_block_t) : sizeof(tev_serialized_lossless_block_t);
size_t compressed_buffer_size = total_blocks * max_block_size * 2;
enc->compressed_buffer = malloc(compressed_buffer_size);
if (enc->verbose) {
printf("Allocated compressed buffer: %zu bytes for %d blocks (max_block_size: %zu)\n",
compressed_buffer_size, total_blocks, max_block_size);
}
enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE); enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE);
if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb || if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb ||
@@ -1726,7 +1901,7 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) {
uint8_t qualityCo = enc->qualityCo; uint8_t qualityCo = enc->qualityCo;
uint8_t qualityCg = enc->qualityCg; uint8_t qualityCg = enc->qualityCg;
uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1); uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1);
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0) | (enc->lossless_mode ? 4 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate, bit 2 = is_lossless
uint8_t reserved = 0; uint8_t reserved = 0;
fwrite(&width, 2, 1, output); fwrite(&width, 2, 1, output);
@@ -1833,7 +2008,11 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
// Encode all blocks // Encode all blocks
for (int by = 0; by < blocks_y; by++) { for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) { for (int bx = 0; bx < blocks_x; bx++) {
encode_block(enc, bx, by, is_keyframe); if (enc->lossless_mode) {
encode_block_lossless(enc, bx, by, is_keyframe);
} else {
encode_block(enc, bx, by, is_keyframe);
}
// Calculate complexity for rate control (if enabled) // Calculate complexity for rate control (if enabled)
if (enc->bitrate_mode > 0) { if (enc->bitrate_mode > 0) {
@@ -1849,13 +2028,34 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
} }
// Compress block data using Zstd (compatible with TSVM decoder) // Compress block data using Zstd (compatible with TSVM decoder)
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t); size_t compressed_size;
// Compress using Zstd with controlled memory usage if (enc->lossless_mode) {
size_t compressed_size = ZSTD_compressCCtx(enc->zstd_context, // Lossless mode: serialize blocks with float16 coefficients
enc->compressed_buffer, block_data_size * 2, size_t serialized_block_data_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t);
enc->block_data, block_data_size, tev_serialized_lossless_block_t *serialized_blocks = malloc(serialized_block_data_size);
ZSTD_COMPRESSON_LEVEL); if (!serialized_blocks) {
fprintf(stderr, "Failed to allocate memory for serialized lossless blocks\n");
return -1;
}
serialize_lossless_blocks(enc, blocks_x, blocks_y, serialized_blocks);
// Use the pre-allocated buffer size instead of calculating dynamically
size_t output_buffer_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t) * 2;
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, output_buffer_size,
serialized_blocks, serialized_block_data_size,
ZSTD_COMPRESSON_LEVEL);
free(serialized_blocks);
} else {
// Regular mode: use regular block data
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, block_data_size * 2,
enc->block_data, block_data_size,
ZSTD_COMPRESSON_LEVEL);
}
if (ZSTD_isError(compressed_size)) { if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size)); fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size));
@@ -2088,7 +2288,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
char command[2048]; char command[2048];
snprintf(command, sizeof(command), snprintf(command, sizeof(command),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null", "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE); enc->input_file, enc->lossless_mode ? 384 : MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command); int result = system(command);
if (result == 0) { if (result == 0) {
@@ -2236,15 +2436,16 @@ static void show_usage(const char *program_name) {
printf(" -o, --output FILE Output video file (use '-' for stdout)\n"); printf(" -o, --output FILE Output video file (use '-' for stdout)\n");
printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT); printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
printf(" -q, --quality N Quality level 0-4 (default: 2, only decides audio rate in quantiser mode)\n"); printf(" -q, --quality N Quality level 0-4 (default: 2, only decides audio rate in quantiser/lossless mode)\n");
printf(" -Q, --quantiser N Quantiser level 0-100 (100: lossless, 0: potato)\n"); printf(" -Q, --quantiser N Quantiser level 0-100 (100: lossless, 0: potato)\n");
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n"); // printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n");
printf(" -p, --progressive Use progressive scan (default: interlaced)\n"); printf(" -p, --progressive Use progressive scan (default: interlaced)\n");
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
printf(" -v, --verbose Verbose output\n"); printf(" -v, --verbose Verbose output\n");
printf(" -t, --test Test mode: generate solid colour frames\n"); printf(" -t, --test Test mode: generate solid colour frames\n");
printf(" --lossless Lossless mode: store coefficients as float16 (no quantisation, implies -p, 384k audio)\n");
printf(" --enable-rcf Enable per-block rate control (experimental)\n");
printf(" --enable-encode-stats Collect and report block complexity statistics\n"); printf(" --enable-encode-stats Collect and report block complexity statistics\n");
printf(" --disable-rcf Disable per-block rate control\n");
printf(" --help Show this help\n\n"); printf(" --help Show this help\n\n");
// printf("Rate Control Modes:\n"); // printf("Rate Control Modes:\n");
// printf(" Quality mode (default): Fixed quantisation based on -q parameter\n"); // printf(" Quality mode (default): Fixed quantisation based on -q parameter\n");
@@ -2334,7 +2535,8 @@ int main(int argc, char *argv[]) {
{"verbose", no_argument, 0, 'v'}, {"verbose", no_argument, 0, 'v'},
{"test", no_argument, 0, 't'}, {"test", no_argument, 0, 't'},
{"enable-encode-stats", no_argument, 0, 1000}, {"enable-encode-stats", no_argument, 0, 1000},
{"disable-rcf", no_argument, 0, 1100}, {"enable-rcf", no_argument, 0, 1100},
{"lossless", no_argument, 0, 1200},
{"help", no_argument, 0, '?'}, {"help", no_argument, 0, '?'},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@@ -2403,11 +2605,14 @@ int main(int argc, char *argv[]) {
case 't': case 't':
test_mode = 1; test_mode = 1;
break; break;
case 1000: // --enable-encode-stats case 1000: // --enable-encode-stats
enc->stats_mode = 1; enc->stats_mode = 1;
break; break;
case 1100: // --disable-rcf case 1100: // --enable-rcf
enc->disable_rcf = 1; enc->disable_rcf = 0;
break;
case 1200: // --lossless
enc->lossless_mode = 1;
break; break;
case 0: case 0:
if (strcmp(long_options[option_index].name, "help") == 0) { if (strcmp(long_options[option_index].name, "help") == 0) {
@@ -2419,7 +2624,7 @@ int main(int argc, char *argv[]) {
case 'Q': case 'Q':
enc->qualityY = CLAMP(atoi(optarg), 0, 100); enc->qualityY = CLAMP(atoi(optarg), 0, 100);
enc->qualityCo = enc->qualityY; enc->qualityCo = enc->qualityY;
enc->qualityCg = (enc->qualityY == 100) ? enc->qualityY : enc->qualityCo >> 2; enc->qualityCg = (enc->qualityY == 100) ? enc->qualityY : enc->qualityCo >> 1;
break; break;
default: default:
show_usage(argv[0]); show_usage(argv[0]);
@@ -2428,6 +2633,19 @@ int main(int argc, char *argv[]) {
} }
} }
// Lossless mode validation and adjustments
if (enc->lossless_mode) {
// In lossless mode, disable rate control and set quality to maximum
enc->bitrate_mode = 0;
enc->disable_rcf = 1;
enc->progressive_mode = 1;
enc->qualityIndex = 5;
enc->qualityY = enc->qualityCo = enc->qualityCg = 255; // Use 255 as a redundant lossless marker
if (enc->verbose) {
printf("Lossless mode enabled: Rate control disabled, quality set to maximum, enabling progressive scan\n");
}
}
// halve the internal representation of frame height // halve the internal representation of frame height
if (!enc->progressive_mode) { if (!enc->progressive_mode) {
enc->height /= 2; enc->height /= 2;