mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-17 00:16:04 +09:00
TAV: preset implementation
This commit is contained in:
@@ -416,9 +416,10 @@ header.videoFlags = seqread.readOneByte()
|
|||||||
header.qualityLevel = seqread.readOneByte() // the decoder expects biased value
|
header.qualityLevel = seqread.readOneByte() // the decoder expects biased value
|
||||||
header.channelLayout = seqread.readOneByte()
|
header.channelLayout = seqread.readOneByte()
|
||||||
header.entropyCoder = seqread.readOneByte()
|
header.entropyCoder = seqread.readOneByte()
|
||||||
|
header.encoderPreset = seqread.readOneByte() // Byte 28: bit 0 = sports, bit 1 = anime
|
||||||
|
|
||||||
// Skip reserved bytes (2) and device orientation (1)
|
// Skip reserved byte (1) and device orientation (1)
|
||||||
seqread.skip(3)
|
seqread.skip(2)
|
||||||
|
|
||||||
header.fileRole = seqread.readOneByte()
|
header.fileRole = seqread.readOneByte()
|
||||||
|
|
||||||
@@ -1248,7 +1249,8 @@ try {
|
|||||||
header.decompLevels, // TAV-specific parameter
|
header.decompLevels, // TAV-specific parameter
|
||||||
isLossless,
|
isLossless,
|
||||||
header.version, // TAV version for colour space detection
|
header.version, // TAV version for colour space detection
|
||||||
header.entropyCoder // Entropy coder: 0 = Twobit-map, 1 = EZBC
|
header.entropyCoder, // Entropy coder: 0 = Twobit-map, 1 = EZBC
|
||||||
|
header.encoderPreset // Encoder preset: bit 0 = sports, bit 1 = anime
|
||||||
)
|
)
|
||||||
|
|
||||||
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
|
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
|
||||||
@@ -1344,7 +1346,8 @@ try {
|
|||||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||||
header.entropyCoder,
|
header.entropyCoder,
|
||||||
bufferOffset,
|
bufferOffset,
|
||||||
header.temporalMotionCoder
|
header.temporalMotionCoder,
|
||||||
|
header.encoderPreset // Encoder preset: bit 0 = sports, bit 1 = anime
|
||||||
)
|
)
|
||||||
|
|
||||||
asyncDecodeInProgress = true
|
asyncDecodeInProgress = true
|
||||||
@@ -1418,7 +1421,8 @@ try {
|
|||||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||||
header.entropyCoder,
|
header.entropyCoder,
|
||||||
nextOffset,
|
nextOffset,
|
||||||
header.temporalMotionCoder
|
header.temporalMotionCoder,
|
||||||
|
header.encoderPreset
|
||||||
)
|
)
|
||||||
|
|
||||||
// Set async decode tracking variables
|
// Set async decode tracking variables
|
||||||
@@ -1461,7 +1465,8 @@ try {
|
|||||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||||
header.entropyCoder,
|
header.entropyCoder,
|
||||||
decodingOffset,
|
decodingOffset,
|
||||||
header.temporalMotionCoder
|
header.temporalMotionCoder,
|
||||||
|
header.encoderPreset
|
||||||
)
|
)
|
||||||
|
|
||||||
// Set async decode tracking variables
|
// Set async decode tracking variables
|
||||||
@@ -1829,7 +1834,8 @@ try {
|
|||||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||||
header.entropyCoder,
|
header.entropyCoder,
|
||||||
readyGopData.slot * SLOT_SIZE,
|
readyGopData.slot * SLOT_SIZE,
|
||||||
header.temporalMotionCoder
|
header.temporalMotionCoder,
|
||||||
|
header.encoderPreset
|
||||||
)
|
)
|
||||||
|
|
||||||
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
||||||
@@ -2021,7 +2027,8 @@ try {
|
|||||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||||
header.entropyCoder,
|
header.entropyCoder,
|
||||||
decodingGopData.slot * SLOT_SIZE,
|
decodingGopData.slot * SLOT_SIZE,
|
||||||
header.temporalMotionCoder
|
header.temporalMotionCoder,
|
||||||
|
header.encoderPreset
|
||||||
)
|
)
|
||||||
|
|
||||||
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
||||||
@@ -2062,7 +2069,8 @@ try {
|
|||||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||||
header.entropyCoder,
|
header.entropyCoder,
|
||||||
readyGopData.slot * SLOT_SIZE,
|
readyGopData.slot * SLOT_SIZE,
|
||||||
header.temporalMotionCoder
|
header.temporalMotionCoder,
|
||||||
|
header.encoderPreset
|
||||||
)
|
)
|
||||||
readyGopData.needsDecode = false
|
readyGopData.needsDecode = false
|
||||||
readyGopData.startTime = sys.nanoTime()
|
readyGopData.startTime = sys.nanoTime()
|
||||||
@@ -2140,7 +2148,8 @@ try {
|
|||||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||||
header.entropyCoder,
|
header.entropyCoder,
|
||||||
targetOffset,
|
targetOffset,
|
||||||
header.temporalMotionCoder
|
header.temporalMotionCoder,
|
||||||
|
header.encoderPreset
|
||||||
)
|
)
|
||||||
|
|
||||||
asyncDecodeInProgress = true
|
asyncDecodeInProgress = true
|
||||||
|
|||||||
@@ -47,25 +47,19 @@ import kotlin.collections.component2
|
|||||||
import kotlin.collections.component3
|
import kotlin.collections.component3
|
||||||
import kotlin.collections.component4
|
import kotlin.collections.component4
|
||||||
import kotlin.collections.copyOf
|
import kotlin.collections.copyOf
|
||||||
import kotlin.collections.count
|
|
||||||
import kotlin.collections.fill
|
import kotlin.collections.fill
|
||||||
import kotlin.collections.first
|
|
||||||
import kotlin.collections.forEach
|
import kotlin.collections.forEach
|
||||||
import kotlin.collections.forEachIndexed
|
import kotlin.collections.forEachIndexed
|
||||||
import kotlin.collections.indices
|
import kotlin.collections.indices
|
||||||
import kotlin.collections.isNotEmpty
|
import kotlin.collections.isNotEmpty
|
||||||
import kotlin.collections.last
|
|
||||||
import kotlin.collections.listOf
|
import kotlin.collections.listOf
|
||||||
import kotlin.collections.map
|
import kotlin.collections.map
|
||||||
import kotlin.collections.maxOfOrNull
|
|
||||||
import kotlin.collections.mutableListOf
|
import kotlin.collections.mutableListOf
|
||||||
import kotlin.collections.mutableMapOf
|
import kotlin.collections.mutableMapOf
|
||||||
import kotlin.collections.set
|
import kotlin.collections.set
|
||||||
import kotlin.collections.sliceArray
|
import kotlin.collections.sliceArray
|
||||||
import kotlin.collections.sorted
|
import kotlin.collections.sorted
|
||||||
import kotlin.collections.sumOf
|
|
||||||
import kotlin.collections.toFloatArray
|
import kotlin.collections.toFloatArray
|
||||||
import kotlin.collections.toList
|
|
||||||
import kotlin.error
|
import kotlin.error
|
||||||
import kotlin.floatArrayOf
|
import kotlin.floatArrayOf
|
||||||
import kotlin.fromBits
|
import kotlin.fromBits
|
||||||
@@ -5039,9 +5033,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
* - Level 1 (tH): 1.0 × 2^0.8 = 1.74
|
* - Level 1 (tH): 1.0 × 2^0.8 = 1.74
|
||||||
* - Level 2 (tHH): 1.0 × 2^1.6 = 3.03
|
* - Level 2 (tHH): 1.0 × 2^1.6 = 3.03
|
||||||
*/
|
*/
|
||||||
private fun getTemporalQuantizerScale(temporalLevel: Int): Float {
|
private fun getTemporalQuantizerScale(encoderPreset: Int, temporalLevel: Int): Float {
|
||||||
val BETA = 0.6f // Temporal scaling exponent (aggressive for temporal high-pass)
|
val BETA = if (encoderPreset and 0x01 == 1) 0.0f else 0.6f // Temporal scaling exponent (aggressive for temporal high-pass)
|
||||||
val KAPPA = 1.14f
|
val KAPPA = if (encoderPreset and 0x01 == 1) 1.0f else 1.14f
|
||||||
return 2.0f.pow(BETA * temporalLevel.toFloat().pow(KAPPA))
|
return 2.0f.pow(BETA * temporalLevel.toFloat().pow(KAPPA))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5177,8 +5171,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
|
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
|
||||||
// This must be called AFTER dequantization but BEFORE inverse DWT
|
// This must be called AFTER dequantization but BEFORE inverse DWT
|
||||||
private fun removeGrainSynthesisDecoder(coeffs: FloatArray, width: Int, height: Int,
|
private fun tavApplyGrainSynthesis(coeffs: FloatArray, width: Int, height: Int,
|
||||||
frameNum: Int, subbands: List<DWTSubbandInfo>, qYGlobal: Int) {
|
frameNum: Int, subbands: List<DWTSubbandInfo>, qYGlobal: Int, encoderPreset: Int = 0) {
|
||||||
|
// Anime preset: completely disable grain synthesis
|
||||||
|
if ((encoderPreset and 0x02) != 0) {
|
||||||
|
return // Skip grain synthesis entirely
|
||||||
|
}
|
||||||
|
|
||||||
// Only apply to Y channel, excluding LL band
|
// Only apply to Y channel, excluding LL band
|
||||||
// Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
|
// Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
|
||||||
|
|
||||||
@@ -5220,7 +5219,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// New tavDecode function that accepts compressed data and decompresses internally
|
// New tavDecode function that accepts compressed data and decompresses internally
|
||||||
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
|
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
|
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
|
||||||
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap<String, Any> {
|
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap<String, Any> {
|
||||||
|
|
||||||
// Read compressed data from VM memory into byte array
|
// Read compressed data from VM memory into byte array
|
||||||
val compressedData = ByteArray(compressedSize)
|
val compressedData = ByteArray(compressedSize)
|
||||||
@@ -5250,7 +5249,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Call the existing tavDecode function with decompressed data
|
// Call the existing tavDecode function with decompressed data
|
||||||
tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr,
|
tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr,
|
||||||
width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout,
|
width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout,
|
||||||
frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder)
|
frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder, encoderPreset)
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
// Clean up allocated buffer
|
// Clean up allocated buffer
|
||||||
@@ -5266,7 +5265,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Original tavDecode function for backward compatibility (now handles decompressed data)
|
// Original tavDecode function for backward compatibility (now handles decompressed data)
|
||||||
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
|
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
|
||||||
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap<String, Any> {
|
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap<String, Any> {
|
||||||
|
|
||||||
val dbgOut = HashMap<String, Any>()
|
val dbgOut = HashMap<String, Any>()
|
||||||
|
|
||||||
@@ -5328,14 +5327,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
0x01 -> { // TAV_MODE_INTRA
|
0x01 -> { // TAV_MODE_INTRA
|
||||||
// Decode DWT coefficients directly to RGB buffer
|
// Decode DWT coefficients directly to RGB buffer
|
||||||
readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
|
readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
|
||||||
width, height, qY, qCo, qCg, entropyCoder,
|
width, height, qY, qCo, qCg, entropyCoder, encoderPreset,
|
||||||
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
|
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
|
||||||
dbgOut["frameMode"] = "I"
|
dbgOut["frameMode"] = "I"
|
||||||
}
|
}
|
||||||
0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet)
|
0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet)
|
||||||
// Coefficient delta encoding for efficient P-frames
|
// Coefficient delta encoding for efficient P-frames
|
||||||
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
|
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
|
||||||
width, height, qY, qCo, qCg, entropyCoder,
|
width, height, qY, qCo, qCg, entropyCoder, encoderPreset,
|
||||||
waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel)
|
waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel)
|
||||||
dbgOut["frameMode"] = " "
|
dbgOut["frameMode"] = " "
|
||||||
}
|
}
|
||||||
@@ -5351,7 +5350,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int,
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int,
|
||||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int): Long {
|
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int): Long {
|
||||||
// Determine coefficient count based on mode
|
// Determine coefficient count based on mode
|
||||||
val coeffCount = if (isMonoblock) {
|
val coeffCount = if (isMonoblock) {
|
||||||
@@ -5451,7 +5450,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
|
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
|
||||||
// Use perceptual weights since this is the perceptual quantization path
|
// Use perceptual weights since this is the perceptual quantization path
|
||||||
removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
|
tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset)
|
||||||
|
|
||||||
// Apply film grain filter if enabled
|
// Apply film grain filter if enabled
|
||||||
// commented; grain synthesis is now a part of the spec
|
// commented; grain synthesis is now a part of the spec
|
||||||
@@ -5476,7 +5475,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
|
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
|
||||||
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
|
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
|
||||||
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
|
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
|
||||||
removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
|
tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset)
|
||||||
|
|
||||||
// Apply film grain filter if enabled
|
// Apply film grain filter if enabled
|
||||||
// commented; grain synthesis is now a part of the spec
|
// commented; grain synthesis is now a part of the spec
|
||||||
@@ -5774,7 +5773,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int,
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int,
|
||||||
spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long {
|
spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long {
|
||||||
|
|
||||||
val tileIdx = if (isMonoblock) {
|
val tileIdx = if (isMonoblock) {
|
||||||
@@ -5927,7 +5926,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
|
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
|
||||||
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
|
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
|
||||||
// Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
|
// Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
|
||||||
removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, frameCount, subbands, qY)
|
tavApplyGrainSynthesis(currentY, tileWidth, tileHeight, frameCount, subbands, qY, encoderPreset)
|
||||||
|
|
||||||
// Store current coefficients as previous for next frame
|
// Store current coefficients as previous for next frame
|
||||||
tavPreviousCoeffsY!![tileIdx] = currentY.clone()
|
tavPreviousCoeffsY!![tileIdx] = currentY.clone()
|
||||||
@@ -6475,7 +6474,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
temporalLevels: Int = 2,
|
temporalLevels: Int = 2,
|
||||||
entropyCoder: Int = 0,
|
entropyCoder: Int = 0,
|
||||||
bufferOffset: Long = 0,
|
bufferOffset: Long = 0,
|
||||||
temporalMotionCoder: Int = 0
|
temporalMotionCoder: Int = 0,
|
||||||
|
encoderPreset: Int = 0
|
||||||
): Array<Any> {
|
): Array<Any> {
|
||||||
val dbgOut = HashMap<String, Any>()
|
val dbgOut = HashMap<String, Any>()
|
||||||
dbgOut["qY"] = qYGlobal
|
dbgOut["qY"] = qYGlobal
|
||||||
@@ -6547,9 +6547,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Step 5: Dequantize with temporal-spatial scaling
|
// Step 5: Dequantize with temporal-spatial scaling
|
||||||
for (t in 0 until gopSize) {
|
for (t in 0 until gopSize) {
|
||||||
val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
|
val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
|
||||||
val temporalScale = getTemporalQuantizerScale(temporalLevel)
|
val temporalScale = getTemporalQuantizerScale(encoderPreset, temporalLevel)
|
||||||
|
|
||||||
// CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behavior
|
// CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behaviour
|
||||||
// Encoder (encoder_tav.c:3189): temporal_base_quantiser = (int)roundf(temporal_quantiser)
|
// Encoder (encoder_tav.c:3189): temporal_base_quantiser = (int)roundf(temporal_quantiser)
|
||||||
// Without rounding, decoder uses float values (e.g., 1.516) while encoder used integers (e.g., 2)
|
// Without rounding, decoder uses float values (e.g., 1.516) while encoder used integers (e.g., 2)
|
||||||
// This causes ~24% under-reconstruction for odd baseQ values in temporal high-pass frames (Frame 5+)
|
// This causes ~24% under-reconstruction for odd baseQ values in temporal high-pass frames (Frame 5+)
|
||||||
@@ -6587,10 +6587,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// This must happen after dequantization but before inverse DWT
|
// This must happen after dequantization but before inverse DWT
|
||||||
// Use GOP dimensions (may be cropped)
|
// Use GOP dimensions (may be cropped)
|
||||||
for (t in 0 until gopSize) {
|
for (t in 0 until gopSize) {
|
||||||
removeGrainSynthesisDecoder(
|
tavApplyGrainSynthesis(
|
||||||
gopY[t], gopWidth, gopHeight,
|
gopY[t], gopWidth, gopHeight,
|
||||||
rngFrameTick.getAndAdd(1) + t,
|
rngFrameTick.getAndAdd(1) + t,
|
||||||
subbands, qIndex
|
subbands, qIndex, encoderPreset
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6818,7 +6818,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
temporalLevels: Int = 3,
|
temporalLevels: Int = 3,
|
||||||
entropyCoder: Int = 0,
|
entropyCoder: Int = 0,
|
||||||
bufferOffset: Long = 0,
|
bufferOffset: Long = 0,
|
||||||
temporalMotionCoder: Int = 0
|
temporalMotionCoder: Int = 0,
|
||||||
|
encoderPreset: Int = 0
|
||||||
) {
|
) {
|
||||||
// Cancel any existing decode thread
|
// Cancel any existing decode thread
|
||||||
asyncDecodeThread?.interrupt()
|
asyncDecodeThread?.interrupt()
|
||||||
@@ -6836,7 +6837,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
width, height,
|
width, height,
|
||||||
qIndex, qYGlobal, qCoGlobal, qCgGlobal,
|
qIndex, qYGlobal, qCoGlobal, qCgGlobal,
|
||||||
channelLayout, spatialFilter, spatialLevels, temporalLevels,
|
channelLayout, spatialFilter, spatialLevels, temporalLevels,
|
||||||
entropyCoder, bufferOffset, temporalMotionCoder
|
entropyCoder, bufferOffset, temporalMotionCoder, encoderPreset
|
||||||
)
|
)
|
||||||
asyncDecodeResult = result
|
asyncDecodeResult = result
|
||||||
asyncDecodeComplete.set(true)
|
asyncDecodeComplete.set(true)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
#include "decoder_tad.h" // Shared TAD decoder library
|
#include "decoder_tad.h" // Shared TAD decoder library
|
||||||
#include "tav_avx512.h" // AVX-512 SIMD optimisations
|
#include "tav_avx512.h" // AVX-512 SIMD optimisations
|
||||||
|
|
||||||
#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512)"
|
#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512,presets)"
|
||||||
|
|
||||||
// TAV format constants
|
// TAV format constants
|
||||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
|
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
|
||||||
@@ -95,7 +95,8 @@ typedef struct {
|
|||||||
uint8_t encoder_quality;
|
uint8_t encoder_quality;
|
||||||
uint8_t channel_layout;
|
uint8_t channel_layout;
|
||||||
uint8_t entropy_coder;
|
uint8_t entropy_coder;
|
||||||
uint8_t reserved[2];
|
uint8_t encoder_preset; // Byte 28: bit 0 = sports, bit 1 = anime
|
||||||
|
uint8_t reserved;
|
||||||
uint8_t device_orientation;
|
uint8_t device_orientation;
|
||||||
uint8_t file_role;
|
uint8_t file_role;
|
||||||
} __attribute__((packed)) tav_header_t;
|
} __attribute__((packed)) tav_header_t;
|
||||||
@@ -394,10 +395,20 @@ static inline float tav_grain_triangular_noise(uint32_t rng_val) {
|
|||||||
return (u1 + u2) - 1.0f;
|
return (u1 + u2) - 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
|
// Apply grain synthesis from DWT coefficients (decoder subtracts noise)
|
||||||
// This must be called AFTER dequantisation but BEFORE inverse DWT
|
// This must be called AFTER dequantisation but BEFORE inverse DWT
|
||||||
static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
|
static void apply_grain_synthesis(float *coeffs, int width, int height,
|
||||||
int decomp_levels, int frame_num, int q_y_global) {
|
int decomp_levels, int frame_num, int q_y_global, uint8_t encoder_preset, int no_grain_synthesis) {
|
||||||
|
// Command-line override: disable grain synthesis
|
||||||
|
if (no_grain_synthesis) {
|
||||||
|
return; // Skip grain synthesis entirely
|
||||||
|
}
|
||||||
|
|
||||||
|
// Anime preset: completely disable grain synthesis
|
||||||
|
if (encoder_preset & 0x02) {
|
||||||
|
return; // Skip grain synthesis entirely
|
||||||
|
}
|
||||||
|
|
||||||
dwt_subband_info_t subbands[32];
|
dwt_subband_info_t subbands[32];
|
||||||
const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
|
const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
|
||||||
|
|
||||||
@@ -412,7 +423,7 @@ static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
|
|||||||
// Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619)
|
// Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619)
|
||||||
uint32_t band = subband->level + subband->subband_type * 31 + 16777619;
|
uint32_t band = subband->level + subband->subband_type * 31 + 16777619;
|
||||||
|
|
||||||
// Remove noise from each coefficient in this subband
|
// Apply noise from each coefficient in this subband
|
||||||
for (int i = 0; i < subband->coeff_count; i++) {
|
for (int i = 0; i < subband->coeff_count; i++) {
|
||||||
const int idx = subband->coeff_start + i;
|
const int idx = subband->coeff_start + i;
|
||||||
if (idx < width * height) {
|
if (idx < width * height) {
|
||||||
@@ -1226,14 +1237,14 @@ static int get_temporal_subband_level(int frame_idx, int num_frames, int tempora
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate temporal quantiser scale for a given temporal subband level
|
// Calculate temporal quantiser scale for a given temporal subband level
|
||||||
static float get_temporal_quantiser_scale(int temporal_level) {
|
static float get_temporal_quantiser_scale(uint8_t encoder_preset, int temporal_level) {
|
||||||
// Uses exponential scaling: 2^(BETA × level^KAPPA)
|
// Uses exponential scaling: 2^(BETA × level^KAPPA)
|
||||||
// With BETA=0.6, KAPPA=1.14:
|
// With BETA=0.6, KAPPA=1.14:
|
||||||
// - Level 0 (tLL): 2^0.0 = 1.00
|
// - Level 0 (tLL): 2^0.0 = 1.00
|
||||||
// - Level 1 (tH): 2^0.68 = 1.61
|
// - Level 1 (tH): 2^0.68 = 1.61
|
||||||
// - Level 2 (tHH): 2^1.29 = 2.45
|
// - Level 2 (tHH): 2^1.29 = 2.45
|
||||||
const float BETA = 0.6f; // Temporal scaling exponent
|
const float BETA = (encoder_preset & 0x01) ? 0.0f : 0.6f;
|
||||||
const float KAPPA = 1.14f;
|
const float KAPPA = (encoder_preset & 0x01) ? 1.0f : 1.14f;
|
||||||
return powf(2.0f, BETA * powf(temporal_level, KAPPA));
|
return powf(2.0f, BETA * powf(temporal_level, KAPPA));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1812,6 +1823,7 @@ typedef struct {
|
|||||||
int frame_size;
|
int frame_size;
|
||||||
int is_monoblock; // True if version 3-6 (single tile mode)
|
int is_monoblock; // True if version 3-6 (single tile mode)
|
||||||
int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version)
|
int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version)
|
||||||
|
int no_grain_synthesis; // Command-line flag: disable grain synthesis
|
||||||
|
|
||||||
// Screen masking (letterbox/pillarbox) - array of geometry changes
|
// Screen masking (letterbox/pillarbox) - array of geometry changes
|
||||||
screen_mask_entry_t *screen_masks;
|
screen_mask_entry_t *screen_masks;
|
||||||
@@ -2023,10 +2035,11 @@ static int extract_audio_to_wav(const char *input_file, const char *wav_file, in
|
|||||||
// Decoder Initialisation and Cleanup
|
// Decoder Initialisation and Cleanup
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
|
|
||||||
static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file) {
|
static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file, int no_grain_synthesis) {
|
||||||
tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t));
|
tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t));
|
||||||
if (!decoder) return NULL;
|
if (!decoder) return NULL;
|
||||||
|
|
||||||
|
decoder->no_grain_synthesis = no_grain_synthesis;
|
||||||
decoder->input_fp = fopen(input_file, "rb");
|
decoder->input_fp = fopen(input_file, "rb");
|
||||||
if (!decoder->input_fp) {
|
if (!decoder->input_fp) {
|
||||||
free(decoder);
|
free(decoder);
|
||||||
@@ -2511,8 +2524,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
|||||||
|
|
||||||
// Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
|
// Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
|
||||||
// Phase 2: Use decoding dimensions and temporary buffer
|
// Phase 2: Use decoding dimensions and temporary buffer
|
||||||
remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
|
apply_grain_synthesis(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
|
||||||
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
|
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y,
|
||||||
|
decoder->header.encoder_preset, decoder->no_grain_synthesis);
|
||||||
|
|
||||||
// Debug: Check LL band AFTER grain removal
|
// Debug: Check LL band AFTER grain removal
|
||||||
// if (decoder->frame_count == 32) {
|
// if (decoder->frame_count == 32) {
|
||||||
@@ -2712,10 +2726,11 @@ static void print_usage(const char *prog) {
|
|||||||
printf("Version: %s\n\n", DECODER_VENDOR_STRING);
|
printf("Version: %s\n\n", DECODER_VENDOR_STRING);
|
||||||
printf("Usage: %s -i input.tav -o output.mkv\n\n", prog);
|
printf("Usage: %s -i input.tav -o output.mkv\n\n", prog);
|
||||||
printf("Options:\n");
|
printf("Options:\n");
|
||||||
printf(" -i <file> Input TAV file\n");
|
printf(" -i <file> Input TAV file\n");
|
||||||
printf(" -o <file> Output MKV file (optional, auto-generated from input)\n");
|
printf(" -o <file> Output MKV file (optional, auto-generated from input)\n");
|
||||||
printf(" -v Verbose output\n");
|
printf(" -v Verbose output\n");
|
||||||
printf(" -h, --help Show this help\n\n");
|
printf(" --no-grain-synthesis Disable grain synthesis (override encoder preset)\n");
|
||||||
|
printf(" -h, --help Show this help\n\n");
|
||||||
printf("Supported features (matches TSVM decoder):\n");
|
printf("Supported features (matches TSVM decoder):\n");
|
||||||
printf(" - I-frames and P-frames (delta mode)\n");
|
printf(" - I-frames and P-frames (delta mode)\n");
|
||||||
printf(" - GOP unified 3D DWT (temporal compression)\n");
|
printf(" - GOP unified 3D DWT (temporal compression)\n");
|
||||||
@@ -2740,9 +2755,11 @@ int main(int argc, char *argv[]) {
|
|||||||
char *input_file = NULL;
|
char *input_file = NULL;
|
||||||
char *output_file = NULL;
|
char *output_file = NULL;
|
||||||
int verbose = 0;
|
int verbose = 0;
|
||||||
|
int no_grain_synthesis = 0;
|
||||||
|
|
||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
{"help", no_argument, 0, 'h'},
|
{"help", no_argument, 0, 'h'},
|
||||||
|
{"no-grain-synthesis", no_argument, 0, 1000},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2761,6 +2778,12 @@ int main(int argc, char *argv[]) {
|
|||||||
case 'h':
|
case 'h':
|
||||||
print_usage(argv[0]);
|
print_usage(argv[0]);
|
||||||
return 0;
|
return 0;
|
||||||
|
case 1000: // --no-grain-synthesis
|
||||||
|
no_grain_synthesis = 1;
|
||||||
|
if (verbose) {
|
||||||
|
printf("Grain synthesis disabled\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
print_usage(argv[0]);
|
print_usage(argv[0]);
|
||||||
return 1;
|
return 1;
|
||||||
@@ -2819,7 +2842,7 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pass 2: Decode video with audio file
|
// Pass 2: Decode video with audio file
|
||||||
tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file);
|
tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file, no_grain_synthesis);
|
||||||
if (!decoder) {
|
if (!decoder) {
|
||||||
fprintf(stderr, "Failed to initialise decoder\n");
|
fprintf(stderr, "Failed to initialise decoder\n");
|
||||||
unlink(temp_audio_file); // Clean up temp file
|
unlink(temp_audio_file); // Clean up temp file
|
||||||
@@ -3126,7 +3149,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// EZBC mode with perceptual quantisation: coefficients are normalised
|
// EZBC mode with perceptual quantisation: coefficients are normalised
|
||||||
// Need to dequantise using perceptual weights (same as twobit-map mode)
|
// Need to dequantise using perceptual weights (same as twobit-map mode)
|
||||||
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
|
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
|
||||||
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
|
const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
|
||||||
|
|
||||||
// FIX: Use QLUT to convert header quantiser indices to actual values
|
// FIX: Use QLUT to convert header quantiser indices to actual values
|
||||||
const float base_q_y = roundf(QLUT[decoder->header.quantiser_y] * temporal_scale);
|
const float base_q_y = roundf(QLUT[decoder->header.quantiser_y] * temporal_scale);
|
||||||
@@ -3160,7 +3183,7 @@ int main(int argc, char *argv[]) {
|
|||||||
} else if (!is_ezbc) {
|
} else if (!is_ezbc) {
|
||||||
// Normal mode: multiply by quantiser
|
// Normal mode: multiply by quantiser
|
||||||
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
|
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
|
||||||
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
|
const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
|
||||||
|
|
||||||
// CRITICAL: Must ROUND temporal quantiser to match encoder's roundf() behavior
|
// CRITICAL: Must ROUND temporal quantiser to match encoder's roundf() behavior
|
||||||
// FIX: Use QLUT to convert header quantiser indices to actual values
|
// FIX: Use QLUT to convert header quantiser indices to actual values
|
||||||
@@ -3206,9 +3229,10 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// Phase 2: Use GOP dimensions (may be cropped) for grain removal
|
// Phase 2: Use GOP dimensions (may be cropped) for grain removal
|
||||||
for (int t = 0; t < gop_size; t++) {
|
for (int t = 0; t < gop_size; t++) {
|
||||||
remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height,
|
apply_grain_synthesis(gop_y[t], gop_width, gop_height,
|
||||||
decoder->header.decomp_levels, decoder->frame_count + t,
|
decoder->header.decomp_levels, decoder->frame_count + t,
|
||||||
decoder->header.quantiser_y);
|
decoder->header.quantiser_y, decoder->header.encoder_preset,
|
||||||
|
decoder->no_grain_synthesis);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply inverse 3D DWT (spatial + temporal)
|
// Apply inverse 3D DWT (spatial + temporal)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@
|
|||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include "tav_avx512.h" // AVX-512 SIMD optimisations
|
#include "tav_avx512.h" // AVX-512 SIMD optimisations
|
||||||
|
|
||||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512)"
|
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512,presets)"
|
||||||
|
|
||||||
// TSVM Advanced Video (TAV) format constants
|
// TSVM Advanced Video (TAV) format constants
|
||||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
||||||
@@ -1835,6 +1835,7 @@ typedef struct tav_encoder_s {
|
|||||||
int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
|
int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
|
||||||
int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
|
int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
|
||||||
int enable_crop_encoding; // 1 = encode cropped active region only (Phase 2), 0 = encode full frame (default)
|
int enable_crop_encoding; // 1 = encode cropped active region only (Phase 2), 0 = encode full frame (default)
|
||||||
|
uint8_t encoder_preset; // Encoder preset flags: bit 0 = sports (finer temporal quantisation), bit 1 = anime (no grain)
|
||||||
|
|
||||||
// Active region tracking (for Phase 2 crop encoding)
|
// Active region tracking (for Phase 2 crop encoding)
|
||||||
uint16_t active_mask_top, active_mask_right, active_mask_bottom, active_mask_left;
|
uint16_t active_mask_top, active_mask_right, active_mask_bottom, active_mask_left;
|
||||||
@@ -2432,6 +2433,9 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n");
|
printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n");
|
||||||
printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
|
printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
|
||||||
printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
|
printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
|
||||||
|
printf(" --preset PRESET Encoder presets (comma-separated, e.g., 'sports,anime'):\n");
|
||||||
|
printf(" sports (or sport): Finer temporal quantisation for better motion detail\n");
|
||||||
|
printf(" anime (or animation): Disable grain synthesis for cleaner animated content\n");
|
||||||
printf(" --help Show this help\n\n");
|
printf(" --help Show this help\n\n");
|
||||||
|
|
||||||
printf("Audio Rate by Quality:\n ");
|
printf("Audio Rate by Quality:\n ");
|
||||||
@@ -3355,8 +3359,9 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
|
|||||||
int spatial_size,
|
int spatial_size,
|
||||||
int base_quantiser,
|
int base_quantiser,
|
||||||
int is_chroma) {
|
int is_chroma) {
|
||||||
const float BETA = 0.6f; // Temporal scaling exponent (aggressive for temporal high-pass)
|
// Sports preset: use finer temporal quantisation (less aggressive)
|
||||||
const float KAPPA = 1.14f;
|
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
|
||||||
|
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
|
||||||
|
|
||||||
// Process each temporal subband independently (separable approach)
|
// Process each temporal subband independently (separable approach)
|
||||||
for (int t = 0; t < num_frames; t++) {
|
for (int t = 0; t < num_frames; t++) {
|
||||||
@@ -7528,8 +7533,10 @@ static int write_tav_header(tav_encoder_t *enc) {
|
|||||||
// Entropy Coder (0 = Twobit-map, 1 = EZBC, 2 = Raw)
|
// Entropy Coder (0 = Twobit-map, 1 = EZBC, 2 = Raw)
|
||||||
fputc(enc->preprocess_mode, enc->output_fp);
|
fputc(enc->preprocess_mode, enc->output_fp);
|
||||||
|
|
||||||
// Reserved bytes (2 bytes)
|
// Encoder Preset (byte 28): bit 0 = sports, bit 1 = anime
|
||||||
fputc(0, enc->output_fp);
|
fputc(enc->encoder_preset, enc->output_fp);
|
||||||
|
|
||||||
|
// Reserved byte (1 byte)
|
||||||
fputc(0, enc->output_fp);
|
fputc(0, enc->output_fp);
|
||||||
|
|
||||||
// Device Orientation (default: 0 = no rotation)
|
// Device Orientation (default: 0 = no rotation)
|
||||||
@@ -10775,6 +10782,7 @@ int main(int argc, char *argv[]) {
|
|||||||
{"tad-audio", no_argument, 0, 1028},
|
{"tad-audio", no_argument, 0, 1028},
|
||||||
{"raw-coeffs", no_argument, 0, 1029},
|
{"raw-coeffs", no_argument, 0, 1029},
|
||||||
{"single-pass", no_argument, 0, 1050}, // disable two-pass encoding with wavelet-based scene detection
|
{"single-pass", no_argument, 0, 1050}, // disable two-pass encoding with wavelet-based scene detection
|
||||||
|
{"preset", required_argument, 0, 1051}, // Encoder presets: sports, anime (comma-separated)
|
||||||
{"enable-crop-encoding", no_argument, 0, 1052}, // Phase 2: encode cropped active region only (experimental)
|
{"enable-crop-encoding", no_argument, 0, 1052}, // Phase 2: encode cropped active region only (experimental)
|
||||||
{"help", no_argument, 0, '?'},
|
{"help", no_argument, 0, '?'},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
@@ -11012,6 +11020,34 @@ int main(int argc, char *argv[]) {
|
|||||||
enc->two_pass_mode = 0;
|
enc->two_pass_mode = 0;
|
||||||
printf("Two-pass wavelet-based scene change detection disabled\n");
|
printf("Two-pass wavelet-based scene change detection disabled\n");
|
||||||
break;
|
break;
|
||||||
|
case 1051: { // --preset
|
||||||
|
char *preset_str = strdup(optarg);
|
||||||
|
char *token = strtok(preset_str, ",");
|
||||||
|
while (token != NULL) {
|
||||||
|
// Trim leading/trailing whitespace
|
||||||
|
while (*token == ' ' || *token == '\t') token++;
|
||||||
|
char *end = token + strlen(token) - 1;
|
||||||
|
while (end > token && (*end == ' ' || *end == '\t')) {
|
||||||
|
*end = '\0';
|
||||||
|
end--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for presets and aliases
|
||||||
|
if (strcmp(token, "sports") == 0 || strcmp(token, "sport") == 0) {
|
||||||
|
enc->encoder_preset |= 0x01;
|
||||||
|
printf("Preset 'sports' enabled: finer temporal quantisation (BETA=0.25, KAPPA=1.0)\n");
|
||||||
|
} else if (strcmp(token, "anime") == 0 || strcmp(token, "animation") == 0) {
|
||||||
|
enc->encoder_preset |= 0x02;
|
||||||
|
printf("Preset 'anime' enabled: grain synthesis disabled\n");
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Warning: Unknown preset '%s' (valid: sports, anime)\n", token);
|
||||||
|
}
|
||||||
|
|
||||||
|
token = strtok(NULL, ",");
|
||||||
|
}
|
||||||
|
free(preset_str);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 1052: // --enable-crop-encoding
|
case 1052: // --enable-crop-encoding
|
||||||
enc->enable_crop_encoding = 1;
|
enc->enable_crop_encoding = 1;
|
||||||
printf("Phase 2 crop encoding enabled (experimental)\n");
|
printf("Phase 2 crop encoding enabled (experimental)\n");
|
||||||
|
|||||||
@@ -456,7 +456,7 @@ static inline void quantise_dwt_coefficients_avx512(
|
|||||||
quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
|
quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Manual rounding to match scalar behavior (round away from zero)
|
// Manual rounding to match scalar behaviour (round away from zero)
|
||||||
// First add 0.5 or -0.5 based on sign
|
// First add 0.5 or -0.5 based on sign
|
||||||
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
|
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
|
||||||
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
|
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
|
||||||
@@ -510,7 +510,7 @@ static inline void quantise_dwt_coefficients_perceptual_avx512(
|
|||||||
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
|
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
|
||||||
__m512 quant = _mm512_div_ps(coeff, effective_q);
|
__m512 quant = _mm512_div_ps(coeff, effective_q);
|
||||||
|
|
||||||
// Manual rounding to match scalar behavior
|
// Manual rounding to match scalar behaviour
|
||||||
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
|
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
|
||||||
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
|
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
|
||||||
quant = _mm512_add_ps(quant, round_val);
|
quant = _mm512_add_ps(quant, round_val);
|
||||||
|
|||||||
@@ -514,6 +514,7 @@ int main(int argc, char *argv[]) {
|
|||||||
uint8_t quality = header[25];
|
uint8_t quality = header[25];
|
||||||
uint8_t channel_layout = header[26];
|
uint8_t channel_layout = header[26];
|
||||||
uint8_t entropy_coder = header[27];
|
uint8_t entropy_coder = header[27];
|
||||||
|
uint8_t encoder_preset = header[28];
|
||||||
|
|
||||||
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
|
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
|
||||||
static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"};
|
static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"};
|
||||||
@@ -548,6 +549,21 @@ static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"};
|
|||||||
printf(" Quality: n/a\n");
|
printf(" Quality: n/a\n");
|
||||||
printf(" Channel layout: %s\n", CLAYOUT[channel_layout]);
|
printf(" Channel layout: %s\n", CLAYOUT[channel_layout]);
|
||||||
printf(" Entropy coder: %s\n", entropy_coder == 0 ? "Twobit-map" : "EZBC");
|
printf(" Entropy coder: %s\n", entropy_coder == 0 ? "Twobit-map" : "EZBC");
|
||||||
|
printf(" Encoder preset: ");
|
||||||
|
if (encoder_preset == 0) {
|
||||||
|
printf("Default\n");
|
||||||
|
} else {
|
||||||
|
int first = 1;
|
||||||
|
if (encoder_preset & 0x01) {
|
||||||
|
printf("%sSports", first ? "" : ", ");
|
||||||
|
first = 0;
|
||||||
|
}
|
||||||
|
if (encoder_preset & 0x02) {
|
||||||
|
printf("%sAnime", first ? "" : ", ");
|
||||||
|
first = 0;
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
printf(" Flags:\n");
|
printf(" Flags:\n");
|
||||||
printf(" Has audio: %s\n", (extra_flags & 0x01) ? "Yes" : "No");
|
printf(" Has audio: %s\n", (extra_flags & 0x01) ? "Yes" : "No");
|
||||||
printf(" Has subtitles: %s\n", (extra_flags & 0x02) ? "Yes" : "No");
|
printf(" Has subtitles: %s\n", (extra_flags & 0x02) ? "Yes" : "No");
|
||||||
|
|||||||
Reference in New Issue
Block a user