From e871264ae56dc3f6224d0d727371501eda926697 Mon Sep 17 00:00:00 2001
From: minjaesong <alswo9628@gmail.com>
Date: Mon, 3 Nov 2025 02:36:12 +0900
Subject: [PATCH] TAV: wip

---
 assets/disk0/tvdos/bin/playtav.js    |   4 +-
 latency_simulator_storage_device.kts | 200 +++++++++
 video_encoder/Makefile               |   5 +-
 video_encoder/encoder_tad.h          |  16 +-
 video_encoder/encoder_tav.c          | 644 ++-------------------------
 5 files changed, 233 insertions(+), 636 deletions(-)
 create mode 100644 latency_simulator_storage_device.kts

diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js
index 7fb0f91..33e5427 100644
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -1629,9 +1629,7 @@ try {
 
                 // Apply bias lighting
                 let biasStart = sys.nanoTime()
-                if (currentGopFrameIndex === 0 || currentGopFrameIndex === currentGopSize - 1) {
-                    setBiasLighting()
-                }
+                setBiasLighting()
                 biasTime = (sys.nanoTime() - biasStart) / 1000000.0
 
                 // Fire audio on first frame
diff --git a/latency_simulator_storage_device.kts b/latency_simulator_storage_device.kts
new file mode 100644
index 0000000..cc67dd8
--- /dev/null
+++ b/latency_simulator_storage_device.kts
@@ -0,0 +1,200 @@
+import kotlin.math.ceil
+
+
+object Random {
+    fun uniformRand(low: Int, high: Int) = (Math.random() * (high + 1)).toInt()
+
+    fun triangularRand(low: Float, high: Float): Float {
+        val a = (Math.random() + Math.random()) / 2.0
+    	return ((high - low) * a + low).toFloat()
+    }
+    fun gaussianRand(avg: Float, stddev: Float): Float {
+        // Box-Muller transform to generate random numbers with standard normal distribution
+        // This implementation uses the polar form for better efficiency
+
+        // We need two uniform random values between 0 and 1
+        val random = kotlin.random.Random
+
+        // Using the polar form of the Box-Muller transformation
+        var u: Double
+        var v: Double
+        var s: Double
+
+        do {
+            // Generate two uniform random numbers between -1 and 1
+            u = Math.random() * 2 - 1
+            v = Math.random() * 2 - 1
+
+            // Calculate sum of squares
+            s = u * u + v * v
+        } while (s >= 1 || s == 0.0)
+
+        // Calculate polar transformation
+        val multiplier = kotlin.math.sqrt(-2.0 * kotlin.math.ln(s) / s)
+
+        // Transform to the desired mean and standard deviation
+        // We only use one of the two generated values here
+        return (avg + stddev * u * multiplier).toFloat()
+    }
+}
+
+sealed class SeekSimulator {
+
+    abstract fun computeSeekTime(currentSector: Int, targetSector: Int): Float
+
+    class Tape(
+        val totalSectors: Int,
+        val tapeLengthMeters: Float = 200f,
+        val baseSeekTime: Float = 0.5f,  // seconds base inertia
+        val tapeSpeedMetersPerSec: Float = 2.0f,  // normal speed
+    ) : SeekSimulator() {
+        override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
+            val posCurrent = (currentSector.toFloat() / totalSectors) * tapeLengthMeters
+            val posTarget = (targetSector.toFloat() / totalSectors) * tapeLengthMeters
+            val distance = kotlin.math.abs(posTarget - posCurrent)
+
+            // Inject random tape jitter
+            val effectiveSpeed = tapeSpeedMetersPerSec * Random.triangularRand(0.9f, 1.1f)
+
+            return baseSeekTime + (distance / effectiveSpeed)
+        }
+    }
+
+    class Disc(
+        val totalTracks: Int,
+        val armSeekBaseTime: Float = 0.005f,  // fast seek, seconds
+        val armSeekMultiplier: Float = 0.002f,  // slower for bigger jumps
+        val rotationLatencyAvg: Float = 0.008f,  // seconds (half-rotation average)
+    ) : SeekSimulator() {
+        override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
+            val cylCurrent = sectorToTrack(currentSector)
+            val cylTarget = sectorToTrack(targetSector)
+            val deltaTracks = kotlin.math.abs(cylTarget - cylCurrent)
+
+            val armSeek = armSeekBaseTime + (armSeekMultiplier * kotlin.math.sqrt(deltaTracks.toFloat()))
+            val rotationLatency = Random.gaussianRand(rotationLatencyAvg, rotationLatencyAvg * 0.2f)
+
+            return armSeek + rotationLatency
+        }
+
+        private fun sectorToTrack(sector: Int): Int {
+            // Simplistic assumption: sector layout maps 1:1 to track at this level
+            return sector % totalTracks
+        }
+    }
+
+    class Drum(
+        val rpm: Float = 3000f
+    ) : SeekSimulator() {
+        override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
+            val degreesPerSector = 360.0f / 10000.0f  // Assume 10k sectors per drum circumference
+            val angleCurrent = currentSector * degreesPerSector
+            val angleTarget = targetSector * degreesPerSector
+            val deltaAngle = kotlin.math.abs(angleTarget - angleCurrent) % 360f
+
+            val rotationLatencySeconds = (deltaAngle / 360f) * (60f / rpm)
+
+            // Add a little mechanical jitter
+            val jitteredLatency = rotationLatencySeconds * Random.triangularRand(0.95f, 1.05f)
+
+            return jitteredLatency
+        }
+    }
+}
+
+
+class SeekLatencySampler(
+    val simulator: SeekSimulator,
+    val totalSectors: Int,
+    val sampleCount: Int = 10000
+) {
+    data class Sample(val fromSector: Int, val toSector: Int, val latency: Float)
+
+    val samples = mutableListOf<Sample>()
+
+    fun runSampling() {
+        samples.clear()
+        var lastSector = Random.uniformRand(0, totalSectors - 1)
+
+        repeat(sampleCount) {
+            val nextSector = Random.uniformRand(0, totalSectors - 1)
+            val latency = simulator.computeSeekTime(lastSector, nextSector)
+            samples.add(Sample(lastSector, nextSector, latency))
+            lastSector = nextSector
+        }
+    }
+
+    fun analyzeAndPrint() {
+        if (samples.isEmpty()) {
+            println("No samples generated. Run runSampling() first.")
+            return
+        }
+
+        val latencies = samples.map { it.latency }
+        val minLatency = latencies.minOrNull() ?: 0f
+        val maxLatency = latencies.maxOrNull() ?: 0f
+        val avgLatency = latencies.average().toFloat()
+        val stddevLatency = kotlin.math.sqrt(latencies.map { (it - avgLatency).let { diff -> diff * diff } }.average()).toFloat()
+
+        println("=== Seek Latency Stats ===")
+        println("Samples: $sampleCount")
+        println("Min: ${"%.4f".format(minLatency)} s")
+        println("Max: ${"%.4f".format(maxLatency)} s")
+        println("Avg: ${"%.4f".format(avgLatency)} s")
+        println("Stddev: ${"%.4f".format(stddevLatency)} s")
+
+        printSimpleHistogram(latencies)
+    }
+
+    private fun printSimpleHistogram(latencies: List<Float>, bins: Int = 30) {
+        val min = latencies.minOrNull() ?: return
+        val max = latencies.maxOrNull() ?: return
+        val binSize = (max - min) / bins
+
+        val histogram = IntArray(bins) { 0 }
+
+        latencies.forEach { latency ->
+            val bin = kotlin.math.min(((latency - min) / binSize).toInt(), bins - 1)
+            histogram[bin]++
+        }
+
+        println("--- Latency Distribution ---")
+        histogram.forEachIndexed { index, count ->
+            val lower = min + binSize * index
+            val upper = lower + binSize
+            val bar = "#".repeat(count / (sampleCount / 200))  // Scale bar length
+            println("${"%.4f".format(lower)} - ${"%.4f".format(upper)} s: $bar")
+        }
+    }
+}
+
+fun main() {
+    val tapeSimulator = SeekSimulator.Tape(
+        totalSectors = 100000,
+        tapeLengthMeters = 200f,
+        baseSeekTime = 0.2f,
+        tapeSpeedMetersPerSec = 5.0f
+    )
+
+    val discSimulator = SeekSimulator.Disc(
+        totalTracks = 3810,
+        armSeekBaseTime = 0.005f,
+        armSeekMultiplier = 0.002f,
+        rotationLatencyAvg = 0.008f
+    )
+
+    val drumSimulator = SeekSimulator.Drum(
+        rpm = 3000f
+    )
+
+    listOf(tapeSimulator, discSimulator, drumSimulator).forEach { sim ->
+    	SeekLatencySampler(
+            simulator = sim,
+            totalSectors = 100000,
+            sampleCount = 5000
+        ).also {
+            it.runSampling()
+            it.analyzeAndPrint()
+        }
+    }
+}
diff --git a/video_encoder/Makefile b/video_encoder/Makefile
index b60227c..dec6a62 100644
--- a/video_encoder/Makefile
+++ b/video_encoder/Makefile
@@ -28,7 +28,7 @@ tev: encoder_tev.c
 	rm -f encoder_tev
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o encoder_tev $< $(LIBS)
 
-tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
+tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp
 	rm -f encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav.c -o encoder_tav.o
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
@@ -58,9 +58,6 @@ decoder_tad: decoder_tad.c
 tad: $(TAD_TARGETS)
 
 # Build test programs
-test_mesh_warp: test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
-	rm -f test_mesh_warp test_mesh_warp.o
-	$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o test_mesh_warp test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp $(OPENCV_LIBS)
 
 test_mesh_roundtrip: test_mesh_roundtrip.cpp encoder_tav_opencv.cpp
 	rm -f test_mesh_roundtrip test_mesh_roundtrip.o
diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h
index 6985e0e..3b318b7 100644
--- a/video_encoder/encoder_tad.h
+++ b/video_encoder/encoder_tad.h
@@ -15,23 +15,15 @@
 #define TAD32_CHANNELS 2  // Stereo
 #define TAD32_SIGMAP_2BIT 1  // 2-bit: 00=0, 01=+1, 10=-1, 11=other
 #define TAD32_QUALITY_MIN 0
-#define TAD32_QUALITY_MAX 5
+#define TAD32_QUALITY_MAX 6
 #define TAD32_QUALITY_DEFAULT 3
 #define TAD32_ZSTD_LEVEL 15
 
-/**
- * Convert quality level (0-5) to max_index for quantization
- * Quality 0 = very low quality, small file (max_index=7, 3-bit)
- * Quality 1 = low quality (max_index=15, 4-bit)
- * Quality 2 = medium quality (max_index=31, 5-bit)
- * Quality 3 = good quality (max_index=63, 6-bit) [DEFAULT]
- * Quality 4 = high quality (max_index=127, 7-bit)
- * Quality 5 = very high quality (max_index=255, 8-bit)
- */
+
 static inline int tad32_quality_to_max_index(int quality) {
-    static const int quality_map[6] = {31, 35, 39, 47, 56, 89};
+    static const int quality_map[7] = {31, 35, 39, 47, 56, 89, 127};
     if (quality < 0) quality = 0;
-    if (quality > 5) quality = 5;
+    if (quality > 6) quality = 6;
     return quality_map[quality];
 }
 
diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c
index e26f5d0..fff96a5 100644
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -1712,10 +1712,10 @@ static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
 
 // Quality level to quantisation mapping for different channels
 // the values are indices to the QLUT
-static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2, 1}; // 96, 48, 24, 12, 6, 3, 2
-static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29, 4}; // 240, 180, 120, 90, 60, 30, 5
-static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39, 7}; // 424, 304, 200, 144, 90, 40, 8
-static const int QUALITY_ALPHA[] = {79, 47, 23, 11, 5, 2, 1}; // 96, 48, 24, 12, 6, 3, 2
+static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2, 0}; // 96, 48, 24, 12, 6, 3, 1
+static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29, 3}; // 240, 180, 120, 90, 60, 30, 4
+static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39, 5}; // 424, 304, 200, 144, 90, 40, 6
+static const int QUALITY_ALPHA[] = {79, 47, 23, 11, 5, 2, 0}; // 96, 48, 24, 12, 6, 3, 1
 
 // Dead-zone quantisation thresholds per quality level
 // Higher values = more aggressive (more coefficients set to zero)
@@ -1814,7 +1814,6 @@ typedef struct tav_encoder_s {
     preprocess_mode_t preprocess_mode;  // Coefficient preprocessing mode (TWOBITMAP=default, EZBC, RAW)
     int channel_layout;   // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg
     int progressive_mode;  // 0 = interlaced (default), 1 = progressive
-    int grain_synthesis;   // 1 = enable grain synthesis (default), 0 = disable
     int use_delta_encoding;
     int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled)
     int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
@@ -2287,8 +2286,6 @@ static void dwt_3d_forward(float **gop_data, int width, int height, int num_fram
                           int spatial_levels, int temporal_levels, int spatial_filter);
 static void dwt_3d_forward_mc(tav_encoder_t *enc, float **gop_y, float **gop_co, float **gop_cg,
                               int num_frames, int spatial_levels, int temporal_levels, int spatial_filter);
-static void dwt_3d_inverse(float **gop_data, int width, int height, int num_frames,
-                          int spatial_levels, int temporal_levels, int spatial_filter);
 static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                        int *frame_numbers, int actual_gop_size);
 static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
@@ -2316,21 +2313,6 @@ static size_t preprocess_gop_unified(preprocess_mode_t preprocess_mode, int16_t
                                      int num_frames, int num_pixels, int width, int height, int channel_layout,
                                      uint8_t *output_buffer);
 
-// Film grain synthesis
-static uint32_t rng_hash(uint32_t x) {
-    x ^= x >> 16;
-    x *= 0x7feb352d;
-    x ^= x >> 15;
-    x *= 0x846ca68b;
-    x ^= x >> 16;
-    return x;
-}
-
-static uint32_t grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) {
-    uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x;
-    return rng_hash(key);
-}
-
 // Show usage information
 static void show_usage(const char *program_name) {
     int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int);
@@ -2350,7 +2332,7 @@ static void show_usage(const char *program_name) {
     printf("                          Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
 //    printf("  --separate-audio-track  Write entire audio track as single packet instead of interleaved\n");
     printf("  --pcm8-audio            Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
-    printf("  --tad-audio             Use TAD (DWT-based perceptual) audio codec (packet 0x24, quality follows -q)\n");
+    printf("  --tad-audio             Use TAD (DWT-based perceptual) audio codec\n");
     printf("  -S, --subtitles FILE    SubRip (.srt) or SAMI (.smi) subtitle file\n");
     printf("  --fontrom-lo FILE       Low font ROM file for internationalised subtitles\n");
     printf("  --fontrom-hi FILE       High font ROM file for internationalised subtitles\n");
@@ -2360,11 +2342,11 @@ static void show_usage(const char *program_name) {
     printf("  --intra-only            Disable delta and skip encoding\n");
     printf("  --enable-delta          Enable delta encoding\n");
     printf("  --delta-haar N          Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
-    printf("  --3d-dwt                Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
+    printf("  --3d-dwt                Enable temporal 3D DWT (GOP-based encoding with temporal transform; the default encoding mode)\n");
     printf("  --single-pass           Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n");
-    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
-    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) entropy coding\n");
-    printf("  --raw-coeffs            Use raw coefficients (no significance map preprocessing, for testing)\n");
+//    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
+    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) entropy coding. May help reducing file size on high-quality videos\n");
+    printf("  --raw-coeffs            Use raw coefficients (no coefficient preprocessing, for testing)\n");
     printf("  --ictcp                 Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
     printf("  --no-perceptual-tuning  Disable perceptual quantisation\n");
     printf("  --no-dead-zone          Disable dead-zone quantisation (for comparison/testing)\n");
@@ -2372,7 +2354,6 @@ static void show_usage(const char *program_name) {
     printf("  --dump-frame N          Dump quantised coefficients for frame N (creates .bin files)\n");
     printf("  --wavelet N             Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
     printf("  --zstd-level N          Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
-//    printf("  --no-grain-synthesis    Disable grain synthesis (enabled by default)\n");
     printf("  --help                  Show this help\n\n");
 
     printf("Audio Rate by Quality:\n  ");
@@ -2437,7 +2418,6 @@ static tav_encoder_t* create_encoder(void) {
     enc->encode_limit = 0;  // Default: no frame limit
     enc->zstd_level = DEFAULT_ZSTD_LEVEL;  // Default Zstd compression level
     enc->progressive_mode = 1;  // Default to progressive mode
-    enc->grain_synthesis = 0;  // Default: disable grain synthesis (only do it on the decoder)
     enc->use_delta_encoding = 0;
     enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
     enc->separate_audio_track = 0;  // Default: interleave audio packets
@@ -3253,70 +3233,6 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
     }
 }
 
-// =============================================================================
-// Mesh Differential Encoding for Compression
-// =============================================================================
-
-// Encode mesh motion vectors with selective affine using temporal and spatial prediction
-// Returns the number of bytes written to output buffer
-// Format:
-//   1. Mesh dimensions (2 bytes each: width, height)
-//   2. Affine significance mask (1 bit per cell per frame, packed into bytes)
-//   3. Translation dx/dy for ALL cells (temporal + spatial differential encoding)
-//   4. Affine parameters a11, a12, a21, a22 for cells where mask=1 (temporal + spatial differential)
-// Simplified mesh encoding - translation only (no affine)
-static size_t encode_mesh_differential(
-    int16_t **mesh_dx, int16_t **mesh_dy,
-    int gop_size, int temporal_mesh_width, int temporal_mesh_height,
-    uint8_t *output_buffer, size_t buffer_capacity
-) {
-    int mesh_points = temporal_mesh_width * temporal_mesh_height;
-    size_t bytes_written = 0;
-
-    // Write mesh dimensions (2 bytes each)
-    if (bytes_written + 4 > buffer_capacity) return 0;
-    uint16_t mesh_w_16 = (uint16_t)temporal_mesh_width;
-    uint16_t mesh_h_16 = (uint16_t)temporal_mesh_height;
-    memcpy(output_buffer + bytes_written, &mesh_w_16, sizeof(uint16_t));
-    bytes_written += sizeof(uint16_t);
-    memcpy(output_buffer + bytes_written, &mesh_h_16, sizeof(uint16_t));
-    bytes_written += sizeof(uint16_t);
-
-    // Encode translation data for all cells with temporal + spatial prediction
-    for (int t = 0; t < gop_size; t++) {
-        for (int i = 0; i < mesh_points; i++) {
-            int16_t dx = mesh_dx[t][i];
-            int16_t dy = mesh_dy[t][i];
-
-            // Temporal prediction
-            if (t > 0) {
-                dx -= mesh_dx[t - 1][i];
-                dy -= mesh_dy[t - 1][i];
-            }
-
-            // Spatial prediction
-            if (i > 0 && (i % temporal_mesh_width) != 0) {
-                int16_t left_dx = mesh_dx[t][i - 1];
-                int16_t left_dy = mesh_dy[t][i - 1];
-                if (t > 0) {
-                    left_dx -= mesh_dx[t - 1][i - 1];
-                    left_dy -= mesh_dy[t - 1][i - 1];
-                }
-                dx -= left_dx;
-                dy -= left_dy;
-            }
-
-            if (bytes_written + 4 > buffer_capacity) return 0;
-            memcpy(output_buffer + bytes_written, &dx, sizeof(int16_t));
-            bytes_written += sizeof(int16_t);
-            memcpy(output_buffer + bytes_written, &dy, sizeof(int16_t));
-            bytes_written += sizeof(int16_t);
-        }
-    }
-
-    return bytes_written;
-}
-
 // =============================================================================
 // Block MV Differential Encoding for MC-EZBC
 // =============================================================================
@@ -3378,62 +3294,6 @@ static size_t encode_block_mvs_differential(
     return bytes_written;
 }
 
-// Decode mesh motion vectors from differential encoding
-// Returns 0 on success, -1 on error
-// This is the inverse of encode_mesh_differential()
-static int decode_mesh_differential(
-    const uint8_t *input_buffer, size_t buffer_size,
-    int16_t **mesh_dx, int16_t **mesh_dy,
-    int gop_size, int *out_temporal_mesh_width, int *out_temporal_mesh_height
-) {
-    size_t bytes_read = 0;
-
-    // Read mesh dimensions
-    if (bytes_read + 4 > buffer_size) return -1;
-    uint16_t mesh_w_16, mesh_h_16;
-    memcpy(&mesh_w_16, input_buffer + bytes_read, sizeof(uint16_t));
-    bytes_read += sizeof(uint16_t);
-    memcpy(&mesh_h_16, input_buffer + bytes_read, sizeof(uint16_t));
-    bytes_read += sizeof(uint16_t);
-
-    int temporal_mesh_width = (int)mesh_w_16;
-    int temporal_mesh_height = (int)mesh_h_16;
-    int mesh_points = temporal_mesh_width * temporal_mesh_height;
-
-    *out_temporal_mesh_width = temporal_mesh_width;
-    *out_temporal_mesh_height = temporal_mesh_height;
-
-    // Decode mesh data for all frames
-    for (int t = 0; t < gop_size; t++) {
-        for (int i = 0; i < mesh_points; i++) {
-            // Read differential values
-            if (bytes_read + 4 > buffer_size) return -1;
-            int16_t dx_delta, dy_delta;
-            memcpy(&dx_delta, input_buffer + bytes_read, sizeof(int16_t));
-            bytes_read += sizeof(int16_t);
-            memcpy(&dy_delta, input_buffer + bytes_read, sizeof(int16_t));
-            bytes_read += sizeof(int16_t);
-
-            // Reconstruct: reverse spatial prediction first
-            if (i > 0 && (i % temporal_mesh_width) != 0) {
-                dx_delta += mesh_dx[t][i - 1];
-                dy_delta += mesh_dy[t][i - 1];
-            }
-
-            // Then reverse temporal prediction
-            if (t > 0) {
-                dx_delta += mesh_dx[t - 1][i];
-                dy_delta += mesh_dy[t - 1][i];
-            }
-
-            mesh_dx[t][i] = dx_delta;
-            mesh_dy[t][i] = dy_delta;
-        }
-    }
-
-    return 0;
-}
-
 // =============================================================================
 // MPEG-Style Motion Estimation and Residual Coding
 // =============================================================================
@@ -3471,106 +3331,6 @@ static float interpolate_subpixel(const float *frame, int width, int height, flo
     return p0 * (1.0f - fy) + p1 * fy;
 }
 
-// Block-matching motion estimation with 1/4-pixel precision
-// Returns the Sum of Absolute Differences (SAD) for the best match
-// Search is centered around predicted MV for spatial coherence
-static float block_matching_sad(const float *current, const float *reference,
-                               int width, int height,
-                               int block_x, int block_y, int block_size,
-                               int search_range,
-                               int16_t pred_mv_x, int16_t pred_mv_y,
-                               int16_t *best_mv_x, int16_t *best_mv_y) {
-    float best_sad = 1e9f;
-    int best_dx = 0, best_dy = 0;
-
-    // Block coordinates in current frame
-    int block_start_x = block_x * block_size;
-    int block_start_y = block_y * block_size;
-
-    // Convert predicted MV from 1/4-pixel units to full pixels for search center
-    int pred_dx = pred_mv_x / 4;
-    int pred_dy = pred_mv_y / 4;
-
-    // Full-pixel search centered around prediction
-    for (int dy = pred_dy - search_range; dy <= pred_dy + search_range; dy++) {
-        for (int dx = pred_dx - search_range; dx <= pred_dx + search_range; dx++) {
-            float sad = 0.0f;
-
-            // Calculate SAD for this displacement
-            for (int by = 0; by < block_size; by++) {
-                for (int bx = 0; bx < block_size; bx++) {
-                    int curr_x = block_start_x + bx;
-                    int curr_y = block_start_y + by;
-
-                    if (curr_x >= width || curr_y >= height) continue;
-
-                    int ref_x = curr_x + dx;
-                    int ref_y = curr_y + dy;
-
-                    // Clamp reference coordinates
-                    if (ref_x < 0) ref_x = 0;
-                    if (ref_y < 0) ref_y = 0;
-                    if (ref_x >= width) ref_x = width - 1;
-                    if (ref_y >= height) ref_y = height - 1;
-
-                    float curr_val = current[curr_y * width + curr_x];
-                    float ref_val = reference[ref_y * width + ref_x];
-
-                    sad += fabsf(curr_val - ref_val);
-                }
-            }
-
-            if (sad < best_sad) {
-                best_sad = sad;
-                best_dx = dx;
-                best_dy = dy;
-            }
-        }
-    }
-
-    // Sub-pixel refinement (1/4-pixel precision)
-    // Search in a 3x3 pattern around the best full-pixel match
-    for (int qpy = -2; qpy <= 2; qpy++) {
-        for (int qpx = -2; qpx <= 2; qpx++) {
-            float dx_subpel = best_dx + qpx * 0.25f;
-            float dy_subpel = best_dy + qpy * 0.25f;
-
-            float sad = 0.0f;
-
-            for (int by = 0; by < block_size; by++) {
-                for (int bx = 0; bx < block_size; bx++) {
-                    int curr_x = block_start_x + bx;
-                    int curr_y = block_start_y + by;
-
-                    if (curr_x >= width || curr_y >= height) continue;
-
-                    float ref_x = curr_x + dx_subpel;
-                    float ref_y = curr_y + dy_subpel;
-
-                    float curr_val = current[curr_y * width + curr_x];
-                    float ref_val = interpolate_subpixel(reference, width, height, ref_x, ref_y);
-
-                    sad += fabsf(curr_val - ref_val);
-                }
-            }
-
-            if (sad < best_sad) {
-                best_sad = sad;
-                *best_mv_x = (int16_t)roundf(dx_subpel * 4.0f);  // Store in 1/4-pixel units
-                *best_mv_y = (int16_t)roundf(dy_subpel * 4.0f);
-            }
-        }
-    }
-
-    // If sub-pixel search didn't improve, use full-pixel result
-    if (best_sad == 1e9f || (*best_mv_x == 0 && *best_mv_y == 0 && (best_dx != 0 || best_dy != 0))) {
-        *best_mv_x = best_dx * 4;  // Convert to 1/4-pixel units
-        *best_mv_y = best_dy * 4;
-    }
-
-    return best_sad;
-}
-
 // Helper function: compute median of three values (for MV prediction)
 static int16_t median3(int16_t a, int16_t b, int16_t c) {
     if (a > b) {
@@ -4013,29 +3773,6 @@ static int allocate_lookahead_buffer(tav_encoder_t *enc) {
     return 0;
 }
 
-// Free lookahead buffer
-static void free_lookahead_buffer(tav_encoder_t *enc) {
-    if (!enc->residual_coding_lookahead_buffer_y) return;
-
-    for (int i = 0; i < enc->residual_coding_lookahead_buffer_capacity; i++) {
-        free(enc->residual_coding_lookahead_buffer_y[i]);
-        free(enc->residual_coding_lookahead_buffer_co[i]);
-        free(enc->residual_coding_lookahead_buffer_cg[i]);
-    }
-
-    free(enc->residual_coding_lookahead_buffer_y);
-    free(enc->residual_coding_lookahead_buffer_co);
-    free(enc->residual_coding_lookahead_buffer_cg);
-    free(enc->residual_coding_lookahead_buffer_display_index);
-
-    enc->residual_coding_lookahead_buffer_y = NULL;
-    enc->residual_coding_lookahead_buffer_co = NULL;
-    enc->residual_coding_lookahead_buffer_cg = NULL;
-    enc->residual_coding_lookahead_buffer_display_index = NULL;
-    enc->residual_coding_lookahead_buffer_capacity = 0;
-    enc->residual_coding_lookahead_buffer_count = 0;
-}
-
 // Add current frame to lookahead buffer
 // Returns 0 if buffer not full yet, 1 if buffer is now full and ready to encode
 static int add_frame_to_buffer(tav_encoder_t *enc, int display_index) {
@@ -5211,6 +4948,20 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
     quantise_3d_dwt_coefficients(enc, gop_cg_coeffs, quant_cg, actual_gop_size,
                                  num_pixels, qCg, 1);  // Chroma Cg
 
+    // Debug: print LL coefficients for frames 0 and 1 (first 10 pixels)
+    /*if (enc->quality_level == 5 && enc->verbose) {
+        int ll_width = valid_width >> enc->decomp_levels;
+        int ll_height = valid_height >> enc->decomp_levels;
+        printf("DEBUG Q5: LL coefficients for first 10 pixels:\n");
+        for (int f = 0; f < (actual_gop_size < 2 ? actual_gop_size : 2); f++) {
+            printf("  Frame %d: ", f);
+            for (int i = 0; i < 10 && i < ll_width * ll_height; i++) {
+                printf("%d ", quant_y[f][i]);
+            }
+            printf("\n");
+        }
+    }*/
+
     // Step 4: Preprocessing and compression
     size_t total_bytes_written = 0;
 
@@ -5641,125 +5392,6 @@ static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_q
 // Temporal DWT Functions
 // =============================================================================
 
-// Invert mesh for backward warping (MC-lifting update step)
-// Forward mesh: warps F0 to F1
-// Backward mesh: warps F1 to F0 (negated motion vectors)
-static void invert_mesh(
-    const short *mesh_dx, const short *mesh_dy,
-    int temporal_mesh_width, int temporal_mesh_height,
-    short *inv_mesh_dx, short *inv_mesh_dy
-) {
-    int num_points = temporal_mesh_width * temporal_mesh_height;
-    for (int i = 0; i < num_points; i++) {
-        inv_mesh_dx[i] = -mesh_dx[i];
-        inv_mesh_dy[i] = -mesh_dy[i];
-    }
-}
-
-// Build block-based reliability mask for selective motion compensation
-// Process 16×16 blocks for efficiency (matches block matching resolution)
-// Returns mask where 1 = use MC, 0 = fall back to plain Haar
-/*static void build_reliability_mask(
-    const uint8_t *frame0_rgb, const uint8_t *frame1_rgb,
-    const float *flow_fwd_x, const float *flow_fwd_y,
-    const float *flow_bwd_x, const float *flow_bwd_y,
-    int width, int height,
-    uint8_t *mask
-) {
-    const int block_size = 16;  // Match block matching resolution
-    int num_pixels = width * height;
-
-    // Relaxed thresholds for better coverage
-    float motion_threshold = 1.0f;   // pixels (relaxed from 2.0)
-    float fb_threshold = 2.0f;       // pixels (relaxed from 1.0)
-    float texture_threshold = 10.0f; // gradient magnitude
-
-    int reliable_blocks = 0;
-    int total_blocks = 0;
-    int reliable_pixels = 0;
-
-    // Process in 16×16 blocks
-    for (int by = 0; by < height; by += block_size) {
-        for (int bx = 0; bx < width; bx += block_size) {
-            total_blocks++;
-
-            // Compute block statistics
-            float sum_motion = 0.0f;
-            float sum_fb_error = 0.0f;
-            float sum_texture = 0.0f;
-            int block_pixels = 0;
-
-            int bh = (by + block_size <= height) ? block_size : (height - by);
-            int bw = (bx + block_size <= width) ? block_size : (width - bx);
-
-            for (int y = by; y < by + bh; y++) {
-                for (int x = bx; x < bx + bw; x++) {
-                    int idx = y * width + x;
-
-                    // Motion magnitude
-                    float fx = flow_fwd_x[idx];
-                    float fy = flow_fwd_y[idx];
-                    sum_motion += sqrtf(fx * fx + fy * fy);
-
-                    // Forward-backward consistency
-                    int x_warped = (int)(x + fx + 0.5f);
-                    int y_warped = (int)(y + fy + 0.5f);
-                    if (x_warped >= 0 && x_warped < width && y_warped >= 0 && y_warped < height) {
-                        int idx_w = y_warped * width + x_warped;
-                        float bx_val = flow_bwd_x[idx_w];
-                        float by_val = flow_bwd_y[idx_w];
-                        float err_x = fx + bx_val;
-                        float err_y = fy + by_val;
-                        sum_fb_error += sqrtf(err_x * err_x + err_y * err_y);
-                    } else {
-                        sum_fb_error += 999.0f;
-                    }
-
-                    // Texture (simple gradient)
-                    if (x > 0 && x < width - 1 && y > 0 && y < height - 1) {
-                        int rgb_idx = idx * 3;
-                        int rgb_idx_r = (y * width + (x + 1)) * 3;
-                        int rgb_idx_d = ((y + 1) * width + x) * 3;
-
-                        float gx = (frame0_rgb[rgb_idx_r] - frame0_rgb[rgb_idx]);
-                        float gy = (frame0_rgb[rgb_idx_d] - frame0_rgb[rgb_idx]);
-                        sum_texture += sqrtf(gx * gx + gy * gy);
-                    }
-
-                    block_pixels++;
-                }
-            }
-
-            // Average block statistics
-            float avg_motion = sum_motion / block_pixels;
-            float avg_fb_error = sum_fb_error / block_pixels;
-            float avg_texture = sum_texture / block_pixels;
-
-            // Decide if block is reliable
-            int block_reliable = (avg_motion > motion_threshold) &&
-                                (avg_fb_error < fb_threshold) &&
-                                (avg_texture > texture_threshold);
-
-            if (block_reliable) reliable_blocks++;
-
-            // Apply decision to all pixels in block
-            for (int y = by; y < by + bh; y++) {
-                for (int x = bx; x < bx + bw; x++) {
-                    int idx = y * width + x;
-                    mask[idx] = block_reliable ? 1 : 0;
-                    if (mask[idx]) reliable_pixels++;
-                }
-            }
-        }
-    }
-
-    // Debug output
-    printf("  Reliability mask: %d/%d blocks (%d/%d pixels, %.1f%%) - motion>%.1fpx, texture>%.1f, fb_err<%.1fpx\n",
-           reliable_blocks, total_blocks, reliable_pixels, num_pixels,
-           100.0f * reliable_pixels / num_pixels,
-           motion_threshold, texture_threshold, fb_threshold);
-}*/
-
 // Simple translation-based frame alignment (legacy, non-MC-EZBC path)
 // Shifts entire frame by (dx, dy) pixels with bilinear interpolation
 static void apply_translation(
@@ -5913,20 +5545,6 @@ static void mc_lifting_forward_pair(
     free(update_cg);
 }
 
-// Apply 1D temporal DWT along time axis for a spatial location (encoder side)
-// data[i] = frame i's coefficient value at this spatial location
-// Applies LGT 5/3 wavelet for reversibility
-static void dwt_temporal_1d_forward_53(float *temporal_data, int num_frames) {
-    if (num_frames < 2) return;
-    dwt_53_forward_1d(temporal_data, num_frames);
-}
-
-// Apply inverse 1D temporal DWT (decoder side)
-static void dwt_temporal_1d_inverse_53(float *temporal_data, int num_frames) {
-    if (num_frames < 2) return;
-    dwt_53_inverse_1d(temporal_data, num_frames);
-}
-
 // Apply 3D DWT with motion-compensated lifting (MC-lifting)
 // Integrates motion compensation directly into wavelet lifting steps
 // This replaces separate warping + DWT for better invertibility and compression
@@ -6054,7 +5672,6 @@ static void dwt_3d_forward(float **gop_data, int width, int height, int num_fram
             for (int level = 0; level < temporal_levels; level++) {
                 int level_frames = temporal_lengths[level];
                 if (level_frames >= 2) {
-//                    dwt_temporal_1d_forward_53(temporal_line, level_frames);  // CDF 5/3 worse for motion-compensated frames
                     dwt_haar_forward_1d(temporal_line, level_frames);  // Haar better for imperfect alignment
                 }
             }
@@ -6076,64 +5693,6 @@ static void dwt_3d_forward(float **gop_data, int width, int height, int num_fram
     }
 }
 
-// Apply inverse 3D DWT: inverse spatial DWT on each temporal subband, then inverse temporal DWT
-static void dwt_3d_inverse(float **gop_data, int width, int height, int num_frames,
-                          int spatial_levels, int temporal_levels, int spatial_filter) {
-    if (num_frames < 2 || width < 2 || height < 2) return;
-
-    // Step 1: Apply inverse 2D spatial DWT to each temporal subband
-    for (int t = 0; t < num_frames; t++) {
-        // Note: Need to implement appropriate inverse function based on filter type
-        // For now, using Haar inverse as reference (will need proper inverse for 5/3, 9/7, etc.)
-        if (spatial_filter == WAVELET_HAAR) {
-            dwt_2d_haar_inverse_flexible(gop_data[t], width, height, spatial_levels);
-        } else {
-            // TODO: Implement proper inverse for other wavelets (5/3, 9/7, etc.)
-            // For now, log warning
-            fprintf(stderr, "Warning: Inverse spatial DWT not fully implemented for filter %d\n", spatial_filter);
-        }
-    }
-
-    // Step 2: Apply inverse temporal DWT to each spatial location
-    int num_pixels = width * height;
-    float *temporal_line = malloc(num_frames * sizeof(float));
-
-    // Pre-calculate all intermediate lengths for temporal DWT (same fix as TAD)
-    // This ensures correct reconstruction for non-power-of-2 GOP sizes
-    int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
-    temporal_lengths[0] = num_frames;
-    for (int i = 1; i <= temporal_levels; i++) {
-        temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
-    }
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int pixel_idx = y * width + x;
-
-            // Extract temporal coefficients for this spatial location
-            for (int t = 0; t < num_frames; t++) {
-                temporal_line[t] = gop_data[t][pixel_idx];
-            }
-
-            // Apply inverse temporal DWT with multiple levels using pre-calculated lengths (reverse order)
-            for (int level = temporal_levels - 1; level >= 0; level--) {
-                int level_frames = temporal_lengths[level];
-                if (level_frames >= 2) {
-                    dwt_temporal_1d_inverse_53(temporal_line, level_frames);
-                }
-            }
-
-            // Write back reconstructed values
-            for (int t = 0; t < num_frames; t++) {
-                gop_data[t][pixel_idx] = temporal_line[t];
-            }
-        }
-    }
-
-    free(temporal_lengths);
-    free(temporal_line);
-}
-
 // Extract padded tile with margins for seamless DWT processing (correct implementation)
 static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
                                float *padded_y, float *padded_co, float *padded_cg) {
@@ -6230,61 +5789,6 @@ static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
 // Forward declaration for perceptual weight function
 static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
 
-// Generate triangular noise from uint32 RNG
-// Returns value in range [-1.0, 1.0]
-static float grain_triangular_noise(uint32_t rng_val) {
-    // Get two uniform random values in [0, 1]
-    float u1 = (rng_val & 0xFFFF) / 65535.0f;
-    float u2 = ((rng_val >> 16) & 0xFFFF) / 65535.0f;
-
-    // Convert to range [-1, 1] and average for triangular distribution
-    return (u1 + u2) - 1.0f;
-}
-
-// Apply grain synthesis to DWT coefficients (encoder adds noise)
-static void apply_grain_synthesis_encoder(tav_encoder_t *enc, float *coeffs, int width, int height,
-                                         int decomp_levels, uint32_t frame_num,
-                                         int quantiser, int is_chroma) {
-    // Only apply to Y channel, excluding LL band
-    // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int idx = y * width + x;
-
-            // Check if this is the LL band (level 0)
-            int level = get_subband_level_2d(x, y, width, height, decomp_levels);
-            int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
-            if (level == 0) {
-                continue; // Skip LL band
-            }
-
-            // Get subband type for perceptual weight calculation
-            /*int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
-
-            // Calculate noise amplitude based on perceptual tuning mode
-            float noise_amplitude;
-            if (enc->perceptual_tuning) {
-                // Perceptual mode: scale by perceptual weight
-                float perceptual_weight = get_perceptual_weight(enc, level, subband_type, is_chroma, decomp_levels);
-                noise_amplitude = (quantiser * perceptual_weight) * 0.5f;
-            } else {
-                // Uniform mode: use global quantiser
-                noise_amplitude = quantiser * 0.5f;
-            }*/
-            float noise_amplitude = FCLAMP(quantiser, 0.0f, 32.0f) * 0.5f;
-
-            // Generate deterministic noise
-            uint32_t rng_val = grain_synthesis_rng(frame_num, level + subband_type * 31 + 16777219, x, y);
-            float noise = grain_triangular_noise(rng_val);
-
-            // Add noise to coefficient
-            coeffs[idx] += noise * noise_amplitude;
-        }
-    }
-}
-
-
 // 2D DWT forward transform for rectangular padded tile (344x288)
 static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) {
     const int width = PADDED_TILE_SIZE_X;   // 344
@@ -7212,13 +6716,15 @@ static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_
         }
 
         // Step 3: Round to discrete quantization levels
-        quantised_val = roundf(quantised_val);
+        quantised_val = roundf(quantised_val); // file size explodes without rounding
 
         // Step 4: Denormalize - multiply back by quantizer to restore magnitude
         // This gives us quantized values at original scale (not shrunken to 0-10 range)
         float denormalized = quantised_val * effective_q;
 
-        quantised[i] = (int16_t)CLAMP((int)denormalized, -32768, 32767);
+        // CRITICAL FIX: Must round (not truncate) to match decoder behavior
+        // With odd baseQ values and fractional weights, truncation causes mismatch with Sigmap mode
+        quantised[i] = (int16_t)CLAMP((int)roundf(denormalized), -32768, 32767);
     }
 }
 
@@ -7622,21 +7128,6 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                 printf("\n");
             }*/
 
-            // Apply grain synthesis to Y channel (after DWT, before quantization)
-            if (enc->grain_synthesis && mode != TAV_MODE_SKIP) {
-                // Get the quantiser value that will be used for this frame
-                int qY_value = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
-                int actual_qY = QLUT[qY_value];
-
-                // Determine dimensions based on mode
-                int gs_width = enc->monoblock ? enc->width : PADDED_TILE_SIZE_X;
-                int gs_height = enc->monoblock ? enc->height : PADDED_TILE_SIZE_Y;
-
-                // Apply grain synthesis to Y channel only (is_chroma = 0)
-                apply_grain_synthesis_encoder(enc, tile_y_data, gs_width, gs_height,
-                                             enc->decomp_levels, enc->frame_count, actual_qY, 0);
-            }
-
             // Serialise tile
             size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
                                                    tile_y_data, tile_co_data, tile_cg_data,
@@ -10347,83 +9838,6 @@ static int detect_still_frame(tav_encoder_t *enc) {
     return (changed_pixels == 0);
 }
 
-// Detect still frames by comparing quantised DWT coefficients
-// Returns 1 if quantised coefficients are identical (frame is truly still), 0 otherwise
-// Benefits: quality-aware (lower quality = more SKIP frames), pure integer math
-// DISABLED - should work in theory, not actually
-static int detect_still_frame_dwt(tav_encoder_t *enc) {
-    if (!enc->previous_coeffs_allocated || enc->intra_only) {
-        return 0; // No previous coefficients to compare or intra-only mode
-    }
-
-    // Only compare against I-frames to avoid DELTA quantization drift
-    // previous_coeffs are updated by DELTA frames with reconstructed values that accumulate error
-    if (enc->last_frame_packet_type != TAV_PACKET_IFRAME) {
-        return 0; // Must compare against clean I-frame, not DELTA reconstruction
-    }
-
-    // Get current quantisers (use adjusted quantiser from bitrate control if applicable)
-    int qY = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
-    int this_frame_qY = QLUT[qY];
-    int this_frame_qCo = QLUT[enc->quantiser_co];
-    int this_frame_qCg = QLUT[enc->quantiser_cg];
-
-    // Coefficient count (monoblock mode)
-    const int coeff_count = enc->width * enc->height;
-
-    // Quantise current DWT coefficients
-    int16_t *quantised_y = enc->reusable_quantised_y;
-    int16_t *quantised_co = enc->reusable_quantised_co;
-    int16_t *quantised_cg = enc->reusable_quantised_cg;
-
-    if (enc->perceptual_tuning) {
-        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->current_dwt_y, quantised_y, coeff_count, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
-        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->current_dwt_co, quantised_co, coeff_count, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
-        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->current_dwt_cg, quantised_cg, coeff_count, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
-    } else {
-        quantise_dwt_coefficients(enc->current_dwt_y, quantised_y, coeff_count, this_frame_qY, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 0);
-        quantise_dwt_coefficients(enc->current_dwt_co, quantised_co, coeff_count, this_frame_qCo, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
-        quantise_dwt_coefficients(enc->current_dwt_cg, quantised_cg, coeff_count, this_frame_qCg, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
-    }
-
-    // Quantise previous DWT coefficients (stored from last I-frame)
-    int16_t *prev_quantised_y = malloc(coeff_count * sizeof(int16_t));
-    int16_t *prev_quantised_co = malloc(coeff_count * sizeof(int16_t));
-    int16_t *prev_quantised_cg = malloc(coeff_count * sizeof(int16_t));
-
-    if (enc->perceptual_tuning) {
-        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->previous_coeffs_y, prev_quantised_y, coeff_count, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
-        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->previous_coeffs_co, prev_quantised_co, coeff_count, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
-        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->previous_coeffs_cg, prev_quantised_cg, coeff_count, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
-    } else {
-        quantise_dwt_coefficients(enc->previous_coeffs_y, prev_quantised_y, coeff_count, this_frame_qY, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 0);
-        quantise_dwt_coefficients(enc->previous_coeffs_co, prev_quantised_co, coeff_count, this_frame_qCo, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
-        quantise_dwt_coefficients(enc->previous_coeffs_cg, prev_quantised_cg, coeff_count, this_frame_qCg, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
-    }
-
-    // Compare quantised coefficients - pure integer math
-    int diff_count = 0;
-    for (int i = 0; i < coeff_count; i++) {
-        if (quantised_y[i] != prev_quantised_y[i] ||
-            quantised_co[i] != prev_quantised_co[i] ||
-            quantised_cg[i] != prev_quantised_cg[i]) {
-            diff_count++;
-        }
-    }
-
-    free(prev_quantised_y);
-    free(prev_quantised_co);
-    free(prev_quantised_cg);
-
-    if (enc->verbose) {
-        printf("Still frame detection (DWT): %d/%d coeffs differ\n", diff_count, coeff_count);
-    }
-
-    // If all quantised coefficients match, frames are identical after compression
-    return (diff_count == 0);
-}
-
-
 // Main function
 int main(int argc, char *argv[]) {
     generate_random_filename(TEMP_AUDIO_FILE);
@@ -10472,7 +9886,6 @@ int main(int argc, char *argv[]) {
         {"zstd-level", required_argument, 0, 1014},
         {"interlace", no_argument, 0, 1015},
         {"interlaced", no_argument, 0, 1015},
-//        {"no-grain-synthesis", no_argument, 0, 1016},
         {"enable-delta", no_argument, 0, 1017},
         {"delta-haar", required_argument, 0, 1018},
         {"temporal-dwt", no_argument, 0, 1019},
@@ -10643,9 +10056,6 @@ int main(int argc, char *argv[]) {
             case 1015: // --interlaced
                 enc->progressive_mode = 0;
                 break;
-            case 1016: // --no-grain-synthesis
-                enc->grain_synthesis = 0;
-                break;
             case 1017: // --enable-delta
                 enc->use_delta_encoding = 1;
                 enc->enable_temporal_dwt = 0;