tav: grain synthesis on the spec

This commit is contained in:
minjaesong
2025-10-08 23:47:54 +09:00
parent 17b5063ef0
commit 1a072f6a0c
4 changed files with 212 additions and 61 deletions

View File

@@ -898,7 +898,8 @@ try {
serial.println(` FIELD_SIZE: ${FIELD_SIZE}`) serial.println(` FIELD_SIZE: ${FIELD_SIZE}`)
} }
let thisFrameNoiseLevel = (filmGrainLevel >= 0) ? filmGrainLevel : -(filmGrainLevel - (trueFrameCount % 2)) //let thisFrameNoiseLevel = (filmGrainLevel >= 0) ? filmGrainLevel : -(filmGrainLevel - (trueFrameCount % 2))
// grain synthesis is now part of the spec
// Call new TAV hardware decoder that handles Zstd decompression internally // Call new TAV hardware decoder that handles Zstd decompression internally
// Note: No longer using JS gzip.decompFromTo - Kotlin handles Zstd natively // Note: No longer using JS gzip.decompFromTo - Kotlin handles Zstd natively
@@ -913,8 +914,7 @@ try {
header.waveletFilter, // TAV-specific parameter header.waveletFilter, // TAV-specific parameter
header.decompLevels, // TAV-specific parameter header.decompLevels, // TAV-specific parameter
isLossless, isLossless,
header.version, // TAV version for colour space detection header.version // TAV version for colour space detection
thisFrameNoiseLevel // Undocumented spooky noise filter
) )
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0

View File

@@ -1069,6 +1069,13 @@ This perceptual approach allocates more bits to visually important low-frequency
details while aggressively quantising high-frequency noise, resulting in superior details while aggressively quantising high-frequency noise, resulting in superior
visual quality at equivalent bitrates. visual quality at equivalent bitrates.
#### Grain Synthesis
The decoder must synthesise a film grain on non-LL subbands at the amplitude half of the quantisation level.
The encoder may synthesise the exact same grain in sign-reversed on encoding (but not recommended for practical reasons).
The base noise function must be triangular noise in range [-1.0, 1.0].
## Colour Space ## Colour Space
TAV supports two colour spaces: TAV supports two colour spaces:

View File

@@ -4446,12 +4446,83 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private val tavDebugFrameTarget = -1 // use negative number to disable the debug print private val tavDebugFrameTarget = -1 // use negative number to disable the debug print
private var tavDebugCurrentFrameNumber = 0 private var tavDebugCurrentFrameNumber = 0
// ==============================================================================
// Grain Synthesis Functions (must match encoder implementation)
// ==============================================================================
// Stateless RNG for grain synthesis (matches C encoder implementation)
private inline fun tavGrainSynthesisRNG(frame: UInt, band: UInt, x: UInt, y: UInt): UInt {
val key = frame * 0x9e3779b9u xor band * 0x7f4a7c15u xor (y shl 16) xor x
// rng_hash implementation
var hash = key
hash = hash xor (hash shr 16)
hash = hash * 0x7feb352du
hash = hash xor (hash shr 15)
hash = hash * 0x846ca68bu
hash = hash xor (hash shr 16)
return hash
}
// Generate triangular noise from uint32 RNG (returns value in range [-1.0, 1.0])
private inline fun tavGrainTriangularNoise(rngVal: UInt): Float {
// Get two uniform random values in [0, 1]
val u1 = (rngVal and 0xFFFFu).toFloat() / 65535.0f
val u2 = ((rngVal shr 16) and 0xFFFFu).toFloat() / 65535.0f
// Convert to range [-1, 1] and average for triangular distribution
return (u1 + u2) - 1.0f
}
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
// This must be called AFTER dequantization but BEFORE inverse DWT
private fun removeGrainSynthesisDecoder(coeffs: FloatArray, width: Int, height: Int,
decompLevels: Int, frameNum: Int, quantiser: Float,
subbands: List<DWTSubbandInfo>, qIndex: Int = 3, qYGlobal: Int = 0,
usePerceptualWeights: Boolean = false) {
// Only apply to Y channel, excluding LL band
// Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
// Process each subband (skip LL which is level 0)
for (subband in subbands) {
if (subband.level == 0) continue // Skip LL band
// Calculate perceptual weight for this subband if perceptual mode is enabled
/*val perceptualWeight = if (usePerceptualWeights) {
getPerceptualWeight(qIndex, qYGlobal, subband.level, subband.subbandType, false, decompLevels)
} else {
1.0f
}
// Noise amplitude for this subband
val noiseAmplitude = (quantiser * perceptualWeight) * 0.5f*/
val noiseAmplitude = quantiser.coerceAtMost(32f) * 0.5f
// Remove noise from each coefficient in this subband
for (i in 0 until subband.coeffCount) {
val idx = subband.coeffStart + i
if (idx < coeffs.size) {
// Calculate 2D position from linear index
val y = idx / width
val x = idx % width
// Generate same deterministic noise as encoder
val rngVal = tavGrainSynthesisRNG(frameNum.toUInt(), (subband.level + subband.subbandType * 31 + 16777619).toUInt(), x.toUInt(), y.toUInt())
val noise = tavGrainTriangularNoise(rngVal)
// Subtract noise from coefficient
coeffs[idx] -= noise * noiseAmplitude
}
}
}
}
private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096) private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
// New tavDecode function that accepts compressed data and decompresses internally // New tavDecode function that accepts compressed data and decompresses internally
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long, fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int, width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, filmGrainLevel: Int = 0): HashMap<String, Any> { frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1): HashMap<String, Any> {
// Read compressed data from VM memory into byte array // Read compressed data from VM memory into byte array
val compressedData = ByteArray(compressedSize) val compressedData = ByteArray(compressedSize)
@@ -4481,7 +4552,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Call the existing tavDecode function with decompressed data // Call the existing tavDecode function with decompressed data
tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr, tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr,
width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout, width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout,
frameCount, waveletFilter, decompLevels, isLossless, tavVersion, filmGrainLevel) frameCount, waveletFilter, decompLevels, isLossless, tavVersion)
} finally { } finally {
// Clean up allocated buffer // Clean up allocated buffer
@@ -4497,7 +4568,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Original tavDecode function for backward compatibility (now handles decompressed data) // Original tavDecode function for backward compatibility (now handles decompressed data)
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int, width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, filmGrainLevel: Int = 0): HashMap<String, Any> { frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1): HashMap<String, Any> {
val dbgOut = HashMap<String, Any>() val dbgOut = HashMap<String, Any>()
@@ -4554,14 +4625,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Decode DWT coefficients directly to RGB buffer // Decode DWT coefficients directly to RGB buffer
readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr, readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, filmGrainLevel) waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
dbgOut["frameMode"] = " " dbgOut["frameMode"] = " "
} }
0x02 -> { // TAV_MODE_DELTA 0x02 -> { // TAV_MODE_DELTA
// Coefficient delta encoding for efficient P-frames // Coefficient delta encoding for efficient P-frames
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr, readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, filmGrainLevel) waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
dbgOut["frameMode"] = " " dbgOut["frameMode"] = " "
} }
} }
@@ -4577,7 +4648,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, filmGrainLevel: Int = 0): Long { waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int): Long {
// Determine coefficient count based on mode // Determine coefficient count based on mode
val coeffCount = if (isMonoblock) { val coeffCount = if (isMonoblock) {
// Monoblock mode: entire frame // Monoblock mode: entire frame
@@ -4678,15 +4749,20 @@ class GraphicsJSR223Delegate(private val vm: VM) {
dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels) dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels)
dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels) dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels)
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
// Use perceptual weights since this is the perceptual quantization path
removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands, qIndex, qYGlobal, true)
// Apply film grain filter if enabled // Apply film grain filter if enabled
if (filmGrainLevel > 0) { // commented; grain synthesis is now a part of the spec
/*if (filmGrainLevel > 0) {
val random = java.util.Random() val random = java.util.Random()
for (i in 0 until coeffCount) { for (i in 0 until coeffCount) {
yTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() yTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
// coTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() // coTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
// cgTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() // cgTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
} }
} }*/
// Debug: Check coefficient values before inverse DWT // Debug: Check coefficient values before inverse DWT
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
@@ -4744,15 +4820,22 @@ class GraphicsJSR223Delegate(private val vm: VM) {
cgTile[i] = quantisedCg[i] * qCg.toFloat() cgTile[i] = quantisedCg[i] * qCg.toFloat()
} }
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
// Apply film grain filter if enabled // Apply film grain filter if enabled
if (filmGrainLevel > 0) { // commented; grain synthesis is now a part of the spec
/*if (filmGrainLevel > 0) {
val random = java.util.Random() val random = java.util.Random()
for (i in 0 until coeffCount) { for (i in 0 until coeffCount) {
yTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() yTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
// coTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() // coTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
// cgTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() // cgTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
} }
} }*/
// Debug: Uniform quantisation subband analysis for comparison // Debug: Uniform quantisation subband analysis for comparison
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
@@ -5160,48 +5243,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
} }
// Delta-specific perceptual weight model for motion-optimized coefficient reconstruction
private fun getPerceptualWeightDelta(qualityLevel: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
// Delta coefficients have different perceptual characteristics than full-picture coefficients:
// 1. Motion edges are more perceptually critical than static edges
// 2. Temporal masking allows more aggressive quantisation in high-motion areas
// 3. Smaller delta magnitudes make relative quantisation errors more visible
// 4. Frequency distribution is motion-dependent rather than spatial-dependent
return if (!isChroma) {
// LUMA DELTA CHANNEL: Emphasize motion coherence and edge preservation
when (subbandType) {
0 -> { // LL subband - DC motion changes, still important
// DC motion changes - preserve somewhat but allow coarser quantisation than full-picture
2f // Slightly coarser than full-picture
}
1 -> { // LH subband - horizontal motion edges
// Motion boundaries benefit from temporal masking - allow coarser quantisation
0.9f
}
2 -> { // HL subband - vertical motion edges
// Vertical motion boundaries - equal treatment with horizontal for deltas
1.2f
}
else -> { // HH subband - diagonal motion details
// Diagonal motion deltas can be quantised most aggressively
0.5f
}
}
} else {
// CHROMA DELTA CHANNELS: More aggressive quantisation allowed due to temporal masking
// Motion chroma changes are less perceptually critical than static chroma
val base = getPerceptualModelChromaBase(qualityLevel, level - 1)
when (subbandType) {
0 -> 1.3f // LL chroma deltas - more aggressive than full-picture chroma
1 -> kotlin.math.max(1.2f, kotlin.math.min(120.0f, base * 1.4f)) // LH chroma deltas
2 -> kotlin.math.max(1.4f, kotlin.math.min(140.0f, base * 1.6f)) // HL chroma deltas
else -> kotlin.math.max(1.6f, kotlin.math.min(160.0f, base * 1.8f)) // HH chroma deltas
}
}
}
private fun getPerceptualModelChromaBase(qualityLevel: Int, level: Int): Float { private fun getPerceptualModelChromaBase(qualityLevel: Int, level: Int): Float {
// Simplified chroma base curve // Simplified chroma base curve
return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f) return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f)
@@ -5209,7 +5250,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long, private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, filmGrainLevel: Int = 0): Long { waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long {
val tileIdx = if (isMonoblock) { val tileIdx = if (isMonoblock) {
0 // Single tile index for monoblock 0 // Single tile index for monoblock
@@ -5326,15 +5367,23 @@ class GraphicsJSR223Delegate(private val vm: VM) {
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg) currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
} }
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
// Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
// Apply film grain filter if enabled // Apply film grain filter if enabled
if (filmGrainLevel > 0) { // commented; grain synthesis is now a part of the spec
/*if (filmGrainLevel > 0) {
val random = java.util.Random() val random = java.util.Random()
for (i in 0 until coeffCount) { for (i in 0 until coeffCount) {
currentY[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() currentY[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
// currentCo[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() // currentCo[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
// currentCg[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat() // currentCg[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
} }
} }*/
// Store current coefficients as previous for next frame // Store current coefficients as previous for next frame
tavPreviousCoeffsY!![tileIdx] = currentY.clone() tavPreviousCoeffsY!![tileIdx] = currentY.clone()
@@ -5342,9 +5391,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
// Apply inverse DWT // Apply inverse DWT
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
if (isLossless) { if (isLossless) {
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenLuma) tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenLuma)
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavNullFilter) tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)

View File

@@ -281,6 +281,7 @@ typedef struct tav_encoder_s {
int perceptual_tuning; // 1 = perceptual quantisation (default), 0 = uniform quantisation int perceptual_tuning; // 1 = perceptual quantisation (default), 0 = uniform quantisation
int channel_layout; // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg int channel_layout; // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg
int progressive_mode; // 0 = interlaced (default), 1 = progressive int progressive_mode; // 0 = interlaced (default), 1 = progressive
int grain_synthesis; // 1 = enable grain synthesis (default), 0 = disable
// Frame buffers - ping-pong implementation // Frame buffers - ping-pong implementation
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
@@ -616,6 +617,21 @@ static void free_subtitle_list(subtitle_entry_t *list);
static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text); static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text);
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output); static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output);
// Film grain synthesis
static uint32_t rng_hash(uint32_t x) {
x ^= x >> 16;
x *= 0x7feb352d;
x ^= x >> 15;
x *= 0x846ca68b;
x ^= x >> 16;
return x;
}
static uint32_t grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) {
uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x;
return rng_hash(key);
}
// Show usage information // Show usage information
static void show_usage(const char *program_name) { static void show_usage(const char *program_name) {
int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int); int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int);
@@ -647,6 +663,7 @@ static void show_usage(const char *program_name) {
printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n"); printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n");
printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n"); printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL); printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
printf(" --no-grain-synthesis Disable grain synthesis (enabled by default)\n");
printf(" --help Show this help\n\n"); printf(" --help Show this help\n\n");
printf("Audio Rate by Quality:\n "); printf("Audio Rate by Quality:\n ");
@@ -710,6 +727,7 @@ static tav_encoder_t* create_encoder(void) {
enc->encode_limit = 0; // Default: no frame limit enc->encode_limit = 0; // Default: no frame limit
enc->zstd_level = DEFAULT_ZSTD_LEVEL; // Default Zstd compression level enc->zstd_level = DEFAULT_ZSTD_LEVEL; // Default Zstd compression level
enc->progressive_mode = 1; // Default to progressive mode enc->progressive_mode = 1; // Default to progressive mode
enc->grain_synthesis = 0; // Default: disable grain synthesis (only do it on the decoder)
return enc; return enc;
} }
@@ -1142,6 +1160,67 @@ static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
} }
} }
// ==============================================================================
// Grain Synthesis Functions
// ==============================================================================
// Forward declaration for perceptual weight function
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
// Generate triangular noise from uint32 RNG
// Returns value in range [-1.0, 1.0]
static float grain_triangular_noise(uint32_t rng_val) {
// Get two uniform random values in [0, 1]
float u1 = (rng_val & 0xFFFF) / 65535.0f;
float u2 = ((rng_val >> 16) & 0xFFFF) / 65535.0f;
// Convert to range [-1, 1] and average for triangular distribution
return (u1 + u2) - 1.0f;
}
// Apply grain synthesis to DWT coefficients (encoder adds noise)
static void apply_grain_synthesis_encoder(tav_encoder_t *enc, float *coeffs, int width, int height,
int decomp_levels, uint32_t frame_num,
int quantiser, int is_chroma) {
// Only apply to Y channel, excluding LL band
// Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int idx = y * width + x;
// Check if this is the LL band (level 0)
int level = get_subband_level_2d(x, y, width, height, decomp_levels);
int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
if (level == 0) {
continue; // Skip LL band
}
// Get subband type for perceptual weight calculation
/*int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
// Calculate noise amplitude based on perceptual tuning mode
float noise_amplitude;
if (enc->perceptual_tuning) {
// Perceptual mode: scale by perceptual weight
float perceptual_weight = get_perceptual_weight(enc, level, subband_type, is_chroma, decomp_levels);
noise_amplitude = (quantiser * perceptual_weight) * 0.5f;
} else {
// Uniform mode: use global quantiser
noise_amplitude = quantiser * 0.5f;
}*/
float noise_amplitude = FCLAMP(quantiser, 0.0f, 32.0f) * 0.25f;
// Generate deterministic noise
uint32_t rng_val = grain_synthesis_rng(frame_num, level + subband_type * 31 + 16777219, x, y);
float noise = grain_triangular_noise(rng_val);
// Add noise to coefficient
coeffs[idx] += noise * noise_amplitude;
}
}
}
// 2D DWT forward transform for rectangular padded tile (344x288) // 2D DWT forward transform for rectangular padded tile (344x288)
static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) { static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) {
@@ -2002,6 +2081,21 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
printf("\n"); printf("\n");
}*/ }*/
// Apply grain synthesis to Y channel (after DWT, before quantization)
if (enc->grain_synthesis && mode != TAV_MODE_SKIP) {
// Get the quantiser value that will be used for this frame
int qY_value = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
int actual_qY = QLUT[qY_value];
// Determine dimensions based on mode
int gs_width = enc->monoblock ? enc->width : PADDED_TILE_SIZE_X;
int gs_height = enc->monoblock ? enc->height : PADDED_TILE_SIZE_Y;
// Apply grain synthesis to Y channel only (is_chroma = 0)
apply_grain_synthesis_encoder(enc, tile_y_data, gs_width, gs_height,
enc->decomp_levels, enc->frame_count, actual_qY, 0);
}
// Serialise tile // Serialise tile
size_t tile_size = serialise_tile_data(enc, tile_x, tile_y, size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
tile_y_data, tile_co_data, tile_cg_data, tile_y_data, tile_co_data, tile_cg_data,
@@ -3555,6 +3649,7 @@ int main(int argc, char *argv[]) {
{"zstd-level", required_argument, 0, 1014}, {"zstd-level", required_argument, 0, 1014},
{"interlace", no_argument, 0, 1015}, {"interlace", no_argument, 0, 1015},
{"interlaced", no_argument, 0, 1015}, {"interlaced", no_argument, 0, 1015},
// {"no-grain-synthesis", no_argument, 0, 1016},
{"help", no_argument, 0, '?'}, {"help", no_argument, 0, '?'},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@@ -3704,6 +3799,9 @@ int main(int argc, char *argv[]) {
case 1015: // --interlaced case 1015: // --interlaced
enc->progressive_mode = 0; enc->progressive_mode = 0;
break; break;
case 1016: // --no-grain-synthesis
enc->grain_synthesis = 0;
break;
case 'a': case 'a':
int bitrate = atoi(optarg); int bitrate = atoi(optarg);
int valid_bitrate = validate_mp2_bitrate(bitrate); int valid_bitrate = validate_mp2_bitrate(bitrate);