tav: grain synthesis on the spec

2026-06-08 14:24:05 +09:00 · 2025-10-08 23:47:54 +09:00
parent 17b5063ef0
commit 1a072f6a0c
4 changed files with 212 additions and 61 deletions
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -898,7 +898,8 @@ try {
                        serial.println(`  FIELD_SIZE: ${FIELD_SIZE}`)
                    }
-                    let thisFrameNoiseLevel = (filmGrainLevel >= 0) ? filmGrainLevel : -(filmGrainLevel - (trueFrameCount % 2))
+                    //let thisFrameNoiseLevel = (filmGrainLevel >= 0) ? filmGrainLevel : -(filmGrainLevel - (trueFrameCount % 2))
                    // grain synthesis is now part of the spec
                    // Call new TAV hardware decoder that handles Zstd decompression internally
                    // Note: No longer using JS gzip.decompFromTo - Kotlin handles Zstd natively
@@ -913,8 +914,7 @@ try {
                        header.waveletFilter,      // TAV-specific parameter
                        header.decompLevels,       // TAV-specific parameter
                        isLossless,
-                        header.version,            // TAV version for colour space detection
+                        header.version             // TAV version for colour space detection
                        thisFrameNoiseLevel         // Undocumented spooky noise filter
                    )
                    decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -1069,6 +1069,13 @@ This perceptual approach allocates more bits to visually important low-frequency
 details while aggressively quantising high-frequency noise, resulting in superior
 visual quality at equivalent bitrates.
 #### Grain Synthesis
 The decoder must synthesise a film grain on non-LL subbands at the amplitude half of the quantisation level.
 The encoder may synthesise the exact same grain in sign-reversed on encoding (but not recommended for practical reasons).
 The base noise function must be triangular noise in range [-1.0, 1.0].
 ## Colour Space
 TAV supports two colour spaces:
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -4446,12 +4446,83 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    private val tavDebugFrameTarget = -1 // use negative number to disable the debug print
    private var tavDebugCurrentFrameNumber = 0
    // ==============================================================================
    // Grain Synthesis Functions (must match encoder implementation)
    // ==============================================================================
    // Stateless RNG for grain synthesis (matches C encoder implementation)
    private inline fun tavGrainSynthesisRNG(frame: UInt, band: UInt, x: UInt, y: UInt): UInt {
        val key = frame * 0x9e3779b9u xor band * 0x7f4a7c15u xor (y shl 16) xor x
        // rng_hash implementation
        var hash = key
        hash = hash xor (hash shr 16)
        hash = hash * 0x7feb352du
        hash = hash xor (hash shr 15)
        hash = hash * 0x846ca68bu
        hash = hash xor (hash shr 16)
        return hash
    }
    // Generate triangular noise from uint32 RNG (returns value in range [-1.0, 1.0])
    private inline fun tavGrainTriangularNoise(rngVal: UInt): Float {
        // Get two uniform random values in [0, 1]
        val u1 = (rngVal and 0xFFFFu).toFloat() / 65535.0f
        val u2 = ((rngVal shr 16) and 0xFFFFu).toFloat() / 65535.0f
        // Convert to range [-1, 1] and average for triangular distribution
        return (u1 + u2) - 1.0f
    }
    // Remove grain synthesis from DWT coefficients (decoder subtracts noise)
    // This must be called AFTER dequantization but BEFORE inverse DWT
    private fun removeGrainSynthesisDecoder(coeffs: FloatArray, width: Int, height: Int,
                                           decompLevels: Int, frameNum: Int, quantiser: Float,
                                           subbands: List<DWTSubbandInfo>, qIndex: Int = 3, qYGlobal: Int = 0,
                                           usePerceptualWeights: Boolean = false) {
        // Only apply to Y channel, excluding LL band
        // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
        // Process each subband (skip LL which is level 0)
        for (subband in subbands) {
            if (subband.level == 0) continue // Skip LL band
            // Calculate perceptual weight for this subband if perceptual mode is enabled
            /*val perceptualWeight = if (usePerceptualWeights) {
                getPerceptualWeight(qIndex, qYGlobal, subband.level, subband.subbandType, false, decompLevels)
            } else {
                1.0f
            }
            // Noise amplitude for this subband
            val noiseAmplitude = (quantiser * perceptualWeight) * 0.5f*/
            val noiseAmplitude = quantiser.coerceAtMost(32f) * 0.5f
            // Remove noise from each coefficient in this subband
            for (i in 0 until subband.coeffCount) {
                val idx = subband.coeffStart + i
                if (idx < coeffs.size) {
                    // Calculate 2D position from linear index
                    val y = idx / width
                    val x = idx % width
                    // Generate same deterministic noise as encoder
                    val rngVal = tavGrainSynthesisRNG(frameNum.toUInt(), (subband.level + subband.subbandType * 31 + 16777619).toUInt(), x.toUInt(), y.toUInt())
                    val noise = tavGrainTriangularNoise(rngVal)
                    // Subtract noise from coefficient
                    coeffs[idx] -= noise * noiseAmplitude
                }
            }
        }
    }
    private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
    // New tavDecode function that accepts compressed data and decompresses internally
    fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
                            width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
-                            frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, filmGrainLevel: Int = 0): HashMap<String, Any> {
+                            frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1): HashMap<String, Any> {
        // Read compressed data from VM memory into byte array
        val compressedData = ByteArray(compressedSize)
@@ -4481,7 +4552,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                // Call the existing tavDecode function with decompressed data
                tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr,
                    width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout,
-                    frameCount, waveletFilter, decompLevels, isLossless, tavVersion, filmGrainLevel)
+                    frameCount, waveletFilter, decompLevels, isLossless, tavVersion)
            } finally {
                // Clean up allocated buffer
@@ -4497,7 +4568,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    // Original tavDecode function for backward compatibility (now handles decompressed data)
    fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
                  width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
-                  frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, filmGrainLevel: Int = 0): HashMap<String, Any> {
+                  frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1): HashMap<String, Any> {
        val dbgOut = HashMap<String, Any>()
@@ -4554,14 +4625,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                            // Decode DWT coefficients directly to RGB buffer
                            readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
                                                          width, height, qY, qCo, qCg,
-                                                          waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, filmGrainLevel)
+                                                          waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
                            dbgOut["frameMode"] = " "
                        }
                        0x02 -> { // TAV_MODE_DELTA
                            // Coefficient delta encoding for efficient P-frames
                            readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
                                                      width, height, qY, qCo, qCg,
-                                                      waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, filmGrainLevel)
+                                                      waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
                            dbgOut["frameMode"] = " "
                        }
                    }
@@ -4577,7 +4648,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
                                         width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
-                                         waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, filmGrainLevel: Int = 0): Long {
+                                         waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int): Long {
        // Determine coefficient count based on mode
        val coeffCount = if (isMonoblock) {
            // Monoblock mode: entire frame
@@ -4678,15 +4749,20 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels)
            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels)
            // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
            // Use perceptual weights since this is the perceptual quantization path
            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands, qIndex, qYGlobal, true)
            // Apply film grain filter if enabled
-            if (filmGrainLevel > 0) {
+            // commented; grain synthesis is now a part of the spec
            /*if (filmGrainLevel > 0) {
                val random = java.util.Random()
                for (i in 0 until coeffCount) {
                    yTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
 //                    coTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
 //                    cgTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
                }
-            }
+            }*/
            // Debug: Check coefficient values before inverse DWT
            if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
@@ -4744,15 +4820,22 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                cgTile[i] = quantisedCg[i] * qCg.toFloat()
            }
            // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
            val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
            val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
            val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
            // Apply film grain filter if enabled
-            if (filmGrainLevel > 0) {
+            // commented; grain synthesis is now a part of the spec
            /*if (filmGrainLevel > 0) {
                val random = java.util.Random()
                for (i in 0 until coeffCount) {
                    yTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
 //                    coTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
 //                    cgTile[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
                }
-            }
+            }*/
            // Debug: Uniform quantisation subband analysis for comparison
            if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
@@ -5160,48 +5243,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        }
    }
    // Delta-specific perceptual weight model for motion-optimized coefficient reconstruction
    private fun getPerceptualWeightDelta(qualityLevel: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
        // Delta coefficients have different perceptual characteristics than full-picture coefficients:
        // 1. Motion edges are more perceptually critical than static edges
        // 2. Temporal masking allows more aggressive quantisation in high-motion areas
        // 3. Smaller delta magnitudes make relative quantisation errors more visible
        // 4. Frequency distribution is motion-dependent rather than spatial-dependent
        return if (!isChroma) {
            // LUMA DELTA CHANNEL: Emphasize motion coherence and edge preservation
            when (subbandType) {
                0 -> { // LL subband - DC motion changes, still important
                    // DC motion changes - preserve somewhat but allow coarser quantisation than full-picture
                    2f // Slightly coarser than full-picture
                }
                1 -> { // LH subband - horizontal motion edges
                    // Motion boundaries benefit from temporal masking - allow coarser quantisation
                    0.9f
                }
                2 -> { // HL subband - vertical motion edges
                    // Vertical motion boundaries - equal treatment with horizontal for deltas
                    1.2f
                }
                else -> { // HH subband - diagonal motion details
                    // Diagonal motion deltas can be quantised most aggressively
                    0.5f
                }
            }
        } else {
            // CHROMA DELTA CHANNELS: More aggressive quantisation allowed due to temporal masking
            // Motion chroma changes are less perceptually critical than static chroma
            val base = getPerceptualModelChromaBase(qualityLevel, level - 1)
            when (subbandType) {
                0 -> 1.3f // LL chroma deltas - more aggressive than full-picture chroma
                1 -> kotlin.math.max(1.2f, kotlin.math.min(120.0f, base * 1.4f)) // LH chroma deltas
                2 -> kotlin.math.max(1.4f, kotlin.math.min(140.0f, base * 1.6f)) // HL chroma deltas
                else -> kotlin.math.max(1.6f, kotlin.math.min(160.0f, base * 1.8f)) // HH chroma deltas
            }
        }
    }
    private fun getPerceptualModelChromaBase(qualityLevel: Int, level: Int): Float {
        // Simplified chroma base curve
        return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f)
@@ -5209,7 +5250,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
                                      width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
-                                      waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, filmGrainLevel: Int = 0): Long {
+                                      waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long {
        val tileIdx = if (isMonoblock) {
            0  // Single tile index for monoblock
@@ -5326,15 +5367,23 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
        }
        // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
        val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
        val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
        val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
        // Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
        removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
        // Apply film grain filter if enabled
-        if (filmGrainLevel > 0) {
+        // commented; grain synthesis is now a part of the spec
        /*if (filmGrainLevel > 0) {
            val random = java.util.Random()
            for (i in 0 until coeffCount) {
                currentY[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
 //                currentCo[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
 //                currentCg[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
            }
-        }
+        }*/
        // Store current coefficients as previous for next frame
        tavPreviousCoeffsY!![tileIdx] = currentY.clone()
@@ -5342,9 +5391,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
        // Apply inverse DWT
        val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
        val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
        if (isLossless) {
            tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenLuma)
            tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -281,6 +281,7 @@ typedef struct tav_encoder_s {
    int perceptual_tuning; // 1 = perceptual quantisation (default), 0 = uniform quantisation
    int channel_layout;   // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg
    int progressive_mode;  // 0 = interlaced (default), 1 = progressive
    int grain_synthesis;   // 1 = enable grain synthesis (default), 0 = disable
    // Frame buffers - ping-pong implementation
    uint8_t *frame_rgb[2];      // [0] and [1] alternate between current and previous
@@ -616,6 +617,21 @@ static void free_subtitle_list(subtitle_entry_t *list);
 static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text);
 static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output);
 // Film grain synthesis
 static uint32_t rng_hash(uint32_t x) {
    x ^= x >> 16;
    x *= 0x7feb352d;
    x ^= x >> 15;
    x *= 0x846ca68b;
    x ^= x >> 16;
    return x;
 }
 static uint32_t grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) {
    uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x;
    return rng_hash(key);
 }
 // Show usage information
 static void show_usage(const char *program_name) {
    int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int);
@@ -647,6 +663,7 @@ static void show_usage(const char *program_name) {
    printf("  --dump-frame N          Dump quantised coefficients for frame N (creates .bin files)\n");
    printf("  --wavelet N             Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
    printf("  --zstd-level N          Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
    printf("  --no-grain-synthesis    Disable grain synthesis (enabled by default)\n");
    printf("  --help                  Show this help\n\n");
    printf("Audio Rate by Quality:\n  ");
@@ -710,6 +727,7 @@ static tav_encoder_t* create_encoder(void) {
    enc->encode_limit = 0;  // Default: no frame limit
    enc->zstd_level = DEFAULT_ZSTD_LEVEL;  // Default Zstd compression level
    enc->progressive_mode = 1;  // Default to progressive mode
    enc->grain_synthesis = 0;  // Default: disable grain synthesis (only do it on the decoder)
    return enc;
 }
@@ -1142,6 +1160,67 @@ static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
    }
 }
 // ==============================================================================
 // Grain Synthesis Functions
 // ==============================================================================
 // Forward declaration for perceptual weight function
 static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
 // Generate triangular noise from uint32 RNG
 // Returns value in range [-1.0, 1.0]
 static float grain_triangular_noise(uint32_t rng_val) {
    // Get two uniform random values in [0, 1]
    float u1 = (rng_val & 0xFFFF) / 65535.0f;
    float u2 = ((rng_val >> 16) & 0xFFFF) / 65535.0f;
    // Convert to range [-1, 1] and average for triangular distribution
    return (u1 + u2) - 1.0f;
 }
 // Apply grain synthesis to DWT coefficients (encoder adds noise)
 static void apply_grain_synthesis_encoder(tav_encoder_t *enc, float *coeffs, int width, int height,
                                         int decomp_levels, uint32_t frame_num,
                                         int quantiser, int is_chroma) {
    // Only apply to Y channel, excluding LL band
    // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int idx = y * width + x;
            // Check if this is the LL band (level 0)
            int level = get_subband_level_2d(x, y, width, height, decomp_levels);
            int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
            if (level == 0) {
                continue; // Skip LL band
            }
            // Get subband type for perceptual weight calculation
            /*int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
            // Calculate noise amplitude based on perceptual tuning mode
            float noise_amplitude;
            if (enc->perceptual_tuning) {
                // Perceptual mode: scale by perceptual weight
                float perceptual_weight = get_perceptual_weight(enc, level, subband_type, is_chroma, decomp_levels);
                noise_amplitude = (quantiser * perceptual_weight) * 0.5f;
            } else {
                // Uniform mode: use global quantiser
                noise_amplitude = quantiser * 0.5f;
            }*/
            float noise_amplitude = FCLAMP(quantiser, 0.0f, 32.0f) * 0.25f;
            // Generate deterministic noise
            uint32_t rng_val = grain_synthesis_rng(frame_num, level + subband_type * 31 + 16777219, x, y);
            float noise = grain_triangular_noise(rng_val);
            // Add noise to coefficient
            coeffs[idx] += noise * noise_amplitude;
        }
    }
 }
 // 2D DWT forward transform for rectangular padded tile (344x288)
 static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) {
@@ -2002,6 +2081,21 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                printf("\n");
            }*/
            // Apply grain synthesis to Y channel (after DWT, before quantization)
            if (enc->grain_synthesis && mode != TAV_MODE_SKIP) {
                // Get the quantiser value that will be used for this frame
                int qY_value = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
                int actual_qY = QLUT[qY_value];
                // Determine dimensions based on mode
                int gs_width = enc->monoblock ? enc->width : PADDED_TILE_SIZE_X;
                int gs_height = enc->monoblock ? enc->height : PADDED_TILE_SIZE_Y;
                // Apply grain synthesis to Y channel only (is_chroma = 0)
                apply_grain_synthesis_encoder(enc, tile_y_data, gs_width, gs_height,
                                             enc->decomp_levels, enc->frame_count, actual_qY, 0);
            }
            // Serialise tile
            size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
                                                   tile_y_data, tile_co_data, tile_cg_data,
@@ -3555,6 +3649,7 @@ int main(int argc, char *argv[]) {
        {"zstd-level", required_argument, 0, 1014},
        {"interlace", no_argument, 0, 1015},
        {"interlaced", no_argument, 0, 1015},
 //        {"no-grain-synthesis", no_argument, 0, 1016},
        {"help", no_argument, 0, '?'},
        {0, 0, 0, 0}
    };
@@ -3704,6 +3799,9 @@ int main(int argc, char *argv[]) {
            case 1015: // --interlaced
                enc->progressive_mode = 0;
                break;
            case 1016: // --no-grain-synthesis
                enc->grain_synthesis = 0;
                break;
            case 'a':
                int bitrate = atoi(optarg);
                int valid_bitrate = validate_mp2_bitrate(bitrate);