TAV: wip

2026-06-06 05:28:31 +09:00 · 2025-11-03 02:36:12 +09:00
parent f3b68e1164
commit e871264ae5
5 changed files with 233 additions and 636 deletions
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -1629,9 +1629,7 @@ try {
                // Apply bias lighting
                let biasStart = sys.nanoTime()
-                if (currentGopFrameIndex === 0 || currentGopFrameIndex === currentGopSize - 1) {
+                setBiasLighting()
                    setBiasLighting()
                }
                biasTime = (sys.nanoTime() - biasStart) / 1000000.0
                // Fire audio on first frame
--- a/latency_simulator_storage_device.kts
+++ b/latency_simulator_storage_device.kts
@@ -0,0 +1,200 @@
 import kotlin.math.ceil
 object Random {
    fun uniformRand(low: Int, high: Int) = (Math.random() * (high + 1)).toInt()
    fun triangularRand(low: Float, high: Float): Float {
        val a = (Math.random() + Math.random()) / 2.0
    	return ((high - low) * a + low).toFloat()
    }
    fun gaussianRand(avg: Float, stddev: Float): Float {
        // Box-Muller transform to generate random numbers with standard normal distribution
        // This implementation uses the polar form for better efficiency
        // We need two uniform random values between 0 and 1
        val random = kotlin.random.Random
        // Using the polar form of the Box-Muller transformation
        var u: Double
        var v: Double
        var s: Double
        do {
            // Generate two uniform random numbers between -1 and 1
            u = Math.random() * 2 - 1
            v = Math.random() * 2 - 1
            // Calculate sum of squares
            s = u * u + v * v
        } while (s >= 1 || s == 0.0)
        // Calculate polar transformation
        val multiplier = kotlin.math.sqrt(-2.0 * kotlin.math.ln(s) / s)
        // Transform to the desired mean and standard deviation
        // We only use one of the two generated values here
        return (avg + stddev * u * multiplier).toFloat()
    }
 }
 sealed class SeekSimulator {
    abstract fun computeSeekTime(currentSector: Int, targetSector: Int): Float
    class Tape(
        val totalSectors: Int,
        val tapeLengthMeters: Float = 200f,
        val baseSeekTime: Float = 0.5f,  // seconds base inertia
        val tapeSpeedMetersPerSec: Float = 2.0f,  // normal speed
    ) : SeekSimulator() {
        override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
            val posCurrent = (currentSector.toFloat() / totalSectors) * tapeLengthMeters
            val posTarget = (targetSector.toFloat() / totalSectors) * tapeLengthMeters
            val distance = kotlin.math.abs(posTarget - posCurrent)
            // Inject random tape jitter
            val effectiveSpeed = tapeSpeedMetersPerSec * Random.triangularRand(0.9f, 1.1f)
            return baseSeekTime + (distance / effectiveSpeed)
        }
    }
    class Disc(
        val totalTracks: Int,
        val armSeekBaseTime: Float = 0.005f,  // fast seek, seconds
        val armSeekMultiplier: Float = 0.002f,  // slower for bigger jumps
        val rotationLatencyAvg: Float = 0.008f,  // seconds (half-rotation average)
    ) : SeekSimulator() {
        override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
            val cylCurrent = sectorToTrack(currentSector)
            val cylTarget = sectorToTrack(targetSector)
            val deltaTracks = kotlin.math.abs(cylTarget - cylCurrent)
            val armSeek = armSeekBaseTime + (armSeekMultiplier * kotlin.math.sqrt(deltaTracks.toFloat()))
            val rotationLatency = Random.gaussianRand(rotationLatencyAvg, rotationLatencyAvg * 0.2f)
            return armSeek + rotationLatency
        }
        private fun sectorToTrack(sector: Int): Int {
            // Simplistic assumption: sector layout maps 1:1 to track at this level
            return sector % totalTracks
        }
    }
    class Drum(
        val rpm: Float = 3000f
    ) : SeekSimulator() {
        override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
            val degreesPerSector = 360.0f / 10000.0f  // Assume 10k sectors per drum circumference
            val angleCurrent = currentSector * degreesPerSector
            val angleTarget = targetSector * degreesPerSector
            val deltaAngle = kotlin.math.abs(angleTarget - angleCurrent) % 360f
            val rotationLatencySeconds = (deltaAngle / 360f) * (60f / rpm)
            // Add a little mechanical jitter
            val jitteredLatency = rotationLatencySeconds * Random.triangularRand(0.95f, 1.05f)
            return jitteredLatency
        }
    }
 }
 class SeekLatencySampler(
    val simulator: SeekSimulator,
    val totalSectors: Int,
    val sampleCount: Int = 10000
 ) {
    data class Sample(val fromSector: Int, val toSector: Int, val latency: Float)
    val samples = mutableListOf<Sample>()
    fun runSampling() {
        samples.clear()
        var lastSector = Random.uniformRand(0, totalSectors - 1)
        repeat(sampleCount) {
            val nextSector = Random.uniformRand(0, totalSectors - 1)
            val latency = simulator.computeSeekTime(lastSector, nextSector)
            samples.add(Sample(lastSector, nextSector, latency))
            lastSector = nextSector
        }
    }
    fun analyzeAndPrint() {
        if (samples.isEmpty()) {
            println("No samples generated. Run runSampling() first.")
            return
        }
        val latencies = samples.map { it.latency }
        val minLatency = latencies.minOrNull() ?: 0f
        val maxLatency = latencies.maxOrNull() ?: 0f
        val avgLatency = latencies.average().toFloat()
        val stddevLatency = kotlin.math.sqrt(latencies.map { (it - avgLatency).let { diff -> diff * diff } }.average()).toFloat()
        println("=== Seek Latency Stats ===")
        println("Samples: $sampleCount")
        println("Min: ${"%.4f".format(minLatency)} s")
        println("Max: ${"%.4f".format(maxLatency)} s")
        println("Avg: ${"%.4f".format(avgLatency)} s")
        println("Stddev: ${"%.4f".format(stddevLatency)} s")
        printSimpleHistogram(latencies)
    }
    private fun printSimpleHistogram(latencies: List<Float>, bins: Int = 30) {
        val min = latencies.minOrNull() ?: return
        val max = latencies.maxOrNull() ?: return
        val binSize = (max - min) / bins
        val histogram = IntArray(bins) { 0 }
        latencies.forEach { latency ->
            val bin = kotlin.math.min(((latency - min) / binSize).toInt(), bins - 1)
            histogram[bin]++
        }
        println("--- Latency Distribution ---")
        histogram.forEachIndexed { index, count ->
            val lower = min + binSize * index
            val upper = lower + binSize
            val bar = "#".repeat(count / (sampleCount / 200))  // Scale bar length
            println("${"%.4f".format(lower)} - ${"%.4f".format(upper)} s: $bar")
        }
    }
 }
 fun main() {
    val tapeSimulator = SeekSimulator.Tape(
        totalSectors = 100000,
        tapeLengthMeters = 200f,
        baseSeekTime = 0.2f,
        tapeSpeedMetersPerSec = 5.0f
    )
    val discSimulator = SeekSimulator.Disc(
        totalTracks = 3810,
        armSeekBaseTime = 0.005f,
        armSeekMultiplier = 0.002f,
        rotationLatencyAvg = 0.008f
    )
    val drumSimulator = SeekSimulator.Drum(
        rpm = 3000f
    )
    listOf(tapeSimulator, discSimulator, drumSimulator).forEach { sim ->
    	SeekLatencySampler(
            simulator = sim,
            totalSectors = 100000,
            sampleCount = 5000
        ).also {
            it.runSampling()
            it.analyzeAndPrint()
        }
    }
 }
--- a/video_encoder/Makefile
+++ b/video_encoder/Makefile
@@ -28,7 +28,7 @@ tev: encoder_tev.c
 	rm -f encoder_tev
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o encoder_tev $< $(LIBS)
-tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
+tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp
 	rm -f encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav.c -o encoder_tav.o
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
@@ -58,9 +58,6 @@ decoder_tad: decoder_tad.c
 tad: $(TAD_TARGETS)
 # Build test programs
 test_mesh_warp: test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
 	rm -f test_mesh_warp test_mesh_warp.o
 	$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o test_mesh_warp test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp $(OPENCV_LIBS)
 test_mesh_roundtrip: test_mesh_roundtrip.cpp encoder_tav_opencv.cpp
 	rm -f test_mesh_roundtrip test_mesh_roundtrip.o
--- a/video_encoder/encoder_tad.h
+++ b/video_encoder/encoder_tad.h
@@ -15,23 +15,15 @@
 #define TAD32_CHANNELS 2  // Stereo
 #define TAD32_SIGMAP_2BIT 1  // 2-bit: 00=0, 01=+1, 10=-1, 11=other
 #define TAD32_QUALITY_MIN 0
-#define TAD32_QUALITY_MAX 5
+#define TAD32_QUALITY_MAX 6
 #define TAD32_QUALITY_DEFAULT 3
 #define TAD32_ZSTD_LEVEL 15
-/**
+
 * Convert quality level (0-5) to max_index for quantization
 * Quality 0 = very low quality, small file (max_index=7, 3-bit)
 * Quality 1 = low quality (max_index=15, 4-bit)
 * Quality 2 = medium quality (max_index=31, 5-bit)
 * Quality 3 = good quality (max_index=63, 6-bit) [DEFAULT]
 * Quality 4 = high quality (max_index=127, 7-bit)
 * Quality 5 = very high quality (max_index=255, 8-bit)
 */
 static inline int tad32_quality_to_max_index(int quality) {
-    static const int quality_map[6] = {31, 35, 39, 47, 56, 89};
+    static const int quality_map[7] = {31, 35, 39, 47, 56, 89, 127};
    if (quality < 0) quality = 0;
-    if (quality > 5) quality = 5;
+    if (quality > 6) quality = 6;
    return quality_map[quality];
 }
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -1712,10 +1712,10 @@ static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
 // Quality level to quantisation mapping for different channels
 // the values are indices to the QLUT
-static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2, 1}; // 96, 48, 24, 12, 6, 3, 2
+static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2, 0}; // 96, 48, 24, 12, 6, 3, 1
-static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29, 4}; // 240, 180, 120, 90, 60, 30, 5
+static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29, 3}; // 240, 180, 120, 90, 60, 30, 4
-static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39, 7}; // 424, 304, 200, 144, 90, 40, 8
+static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39, 5}; // 424, 304, 200, 144, 90, 40, 6
-static const int QUALITY_ALPHA[] = {79, 47, 23, 11, 5, 2, 1}; // 96, 48, 24, 12, 6, 3, 2
+static const int QUALITY_ALPHA[] = {79, 47, 23, 11, 5, 2, 0}; // 96, 48, 24, 12, 6, 3, 1
 // Dead-zone quantisation thresholds per quality level
 // Higher values = more aggressive (more coefficients set to zero)
@@ -1814,7 +1814,6 @@ typedef struct tav_encoder_s {
    preprocess_mode_t preprocess_mode;  // Coefficient preprocessing mode (TWOBITMAP=default, EZBC, RAW)
    int channel_layout;   // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg
    int progressive_mode;  // 0 = interlaced (default), 1 = progressive
    int grain_synthesis;   // 1 = enable grain synthesis (default), 0 = disable
    int use_delta_encoding;
    int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled)
    int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
@@ -2287,8 +2286,6 @@ static void dwt_3d_forward(float **gop_data, int width, int height, int num_fram
                          int spatial_levels, int temporal_levels, int spatial_filter);
 static void dwt_3d_forward_mc(tav_encoder_t *enc, float **gop_y, float **gop_co, float **gop_cg,
                              int num_frames, int spatial_levels, int temporal_levels, int spatial_filter);
 static void dwt_3d_inverse(float **gop_data, int width, int height, int num_frames,
                          int spatial_levels, int temporal_levels, int spatial_filter);
 static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                       int *frame_numbers, int actual_gop_size);
 static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
@@ -2316,21 +2313,6 @@ static size_t preprocess_gop_unified(preprocess_mode_t preprocess_mode, int16_t
                                     int num_frames, int num_pixels, int width, int height, int channel_layout,
                                     uint8_t *output_buffer);
 // Film grain synthesis
 static uint32_t rng_hash(uint32_t x) {
    x ^= x >> 16;
    x *= 0x7feb352d;
    x ^= x >> 15;
    x *= 0x846ca68b;
    x ^= x >> 16;
    return x;
 }
 static uint32_t grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) {
    uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x;
    return rng_hash(key);
 }
 // Show usage information
 static void show_usage(const char *program_name) {
    int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int);
@@ -2350,7 +2332,7 @@ static void show_usage(const char *program_name) {
    printf("                          Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
 //    printf("  --separate-audio-track  Write entire audio track as single packet instead of interleaved\n");
    printf("  --pcm8-audio            Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
-    printf("  --tad-audio             Use TAD (DWT-based perceptual) audio codec (packet 0x24, quality follows -q)\n");
+    printf("  --tad-audio             Use TAD (DWT-based perceptual) audio codec\n");
    printf("  -S, --subtitles FILE    SubRip (.srt) or SAMI (.smi) subtitle file\n");
    printf("  --fontrom-lo FILE       Low font ROM file for internationalised subtitles\n");
    printf("  --fontrom-hi FILE       High font ROM file for internationalised subtitles\n");
@@ -2360,11 +2342,11 @@ static void show_usage(const char *program_name) {
    printf("  --intra-only            Disable delta and skip encoding\n");
    printf("  --enable-delta          Enable delta encoding\n");
    printf("  --delta-haar N          Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
-    printf("  --3d-dwt                Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
+    printf("  --3d-dwt                Enable temporal 3D DWT (GOP-based encoding with temporal transform; the default encoding mode)\n");
    printf("  --single-pass           Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n");
-    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
+//    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
-    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) entropy coding\n");
+    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) entropy coding. May help reducing file size on high-quality videos\n");
-    printf("  --raw-coeffs            Use raw coefficients (no significance map preprocessing, for testing)\n");
+    printf("  --raw-coeffs            Use raw coefficients (no coefficient preprocessing, for testing)\n");
    printf("  --ictcp                 Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
    printf("  --no-perceptual-tuning  Disable perceptual quantisation\n");
    printf("  --no-dead-zone          Disable dead-zone quantisation (for comparison/testing)\n");
@@ -2372,7 +2354,6 @@ static void show_usage(const char *program_name) {
    printf("  --dump-frame N          Dump quantised coefficients for frame N (creates .bin files)\n");
    printf("  --wavelet N             Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
    printf("  --zstd-level N          Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
 //    printf("  --no-grain-synthesis    Disable grain synthesis (enabled by default)\n");
    printf("  --help                  Show this help\n\n");
    printf("Audio Rate by Quality:\n  ");
@@ -2437,7 +2418,6 @@ static tav_encoder_t* create_encoder(void) {
    enc->encode_limit = 0;  // Default: no frame limit
    enc->zstd_level = DEFAULT_ZSTD_LEVEL;  // Default Zstd compression level
    enc->progressive_mode = 1;  // Default to progressive mode
    enc->grain_synthesis = 0;  // Default: disable grain synthesis (only do it on the decoder)
    enc->use_delta_encoding = 0;
    enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
    enc->separate_audio_track = 0;  // Default: interleave audio packets
@@ -3253,70 +3233,6 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
    }
 }
 // =============================================================================
 // Mesh Differential Encoding for Compression
 // =============================================================================
 // Encode mesh motion vectors with selective affine using temporal and spatial prediction
 // Returns the number of bytes written to output buffer
 // Format:
 //   1. Mesh dimensions (2 bytes each: width, height)
 //   2. Affine significance mask (1 bit per cell per frame, packed into bytes)
 //   3. Translation dx/dy for ALL cells (temporal + spatial differential encoding)
 //   4. Affine parameters a11, a12, a21, a22 for cells where mask=1 (temporal + spatial differential)
 // Simplified mesh encoding - translation only (no affine)
 static size_t encode_mesh_differential(
    int16_t **mesh_dx, int16_t **mesh_dy,
    int gop_size, int temporal_mesh_width, int temporal_mesh_height,
    uint8_t *output_buffer, size_t buffer_capacity
 ) {
    int mesh_points = temporal_mesh_width * temporal_mesh_height;
    size_t bytes_written = 0;
    // Write mesh dimensions (2 bytes each)
    if (bytes_written + 4 > buffer_capacity) return 0;
    uint16_t mesh_w_16 = (uint16_t)temporal_mesh_width;
    uint16_t mesh_h_16 = (uint16_t)temporal_mesh_height;
    memcpy(output_buffer + bytes_written, &mesh_w_16, sizeof(uint16_t));
    bytes_written += sizeof(uint16_t);
    memcpy(output_buffer + bytes_written, &mesh_h_16, sizeof(uint16_t));
    bytes_written += sizeof(uint16_t);
    // Encode translation data for all cells with temporal + spatial prediction
    for (int t = 0; t < gop_size; t++) {
        for (int i = 0; i < mesh_points; i++) {
            int16_t dx = mesh_dx[t][i];
            int16_t dy = mesh_dy[t][i];
            // Temporal prediction
            if (t > 0) {
                dx -= mesh_dx[t - 1][i];
                dy -= mesh_dy[t - 1][i];
            }
            // Spatial prediction
            if (i > 0 && (i % temporal_mesh_width) != 0) {
                int16_t left_dx = mesh_dx[t][i - 1];
                int16_t left_dy = mesh_dy[t][i - 1];
                if (t > 0) {
                    left_dx -= mesh_dx[t - 1][i - 1];
                    left_dy -= mesh_dy[t - 1][i - 1];
                }
                dx -= left_dx;
                dy -= left_dy;
            }
            if (bytes_written + 4 > buffer_capacity) return 0;
            memcpy(output_buffer + bytes_written, &dx, sizeof(int16_t));
            bytes_written += sizeof(int16_t);
            memcpy(output_buffer + bytes_written, &dy, sizeof(int16_t));
            bytes_written += sizeof(int16_t);
        }
    }
    return bytes_written;
 }
 // =============================================================================
 // Block MV Differential Encoding for MC-EZBC
 // =============================================================================
@@ -3378,62 +3294,6 @@ static size_t encode_block_mvs_differential(
    return bytes_written;
 }
 // Decode mesh motion vectors from differential encoding
 // Returns 0 on success, -1 on error
 // This is the inverse of encode_mesh_differential()
 static int decode_mesh_differential(
    const uint8_t *input_buffer, size_t buffer_size,
    int16_t **mesh_dx, int16_t **mesh_dy,
    int gop_size, int *out_temporal_mesh_width, int *out_temporal_mesh_height
 ) {
    size_t bytes_read = 0;
    // Read mesh dimensions
    if (bytes_read + 4 > buffer_size) return -1;
    uint16_t mesh_w_16, mesh_h_16;
    memcpy(&mesh_w_16, input_buffer + bytes_read, sizeof(uint16_t));
    bytes_read += sizeof(uint16_t);
    memcpy(&mesh_h_16, input_buffer + bytes_read, sizeof(uint16_t));
    bytes_read += sizeof(uint16_t);
    int temporal_mesh_width = (int)mesh_w_16;
    int temporal_mesh_height = (int)mesh_h_16;
    int mesh_points = temporal_mesh_width * temporal_mesh_height;
    *out_temporal_mesh_width = temporal_mesh_width;
    *out_temporal_mesh_height = temporal_mesh_height;
    // Decode mesh data for all frames
    for (int t = 0; t < gop_size; t++) {
        for (int i = 0; i < mesh_points; i++) {
            // Read differential values
            if (bytes_read + 4 > buffer_size) return -1;
            int16_t dx_delta, dy_delta;
            memcpy(&dx_delta, input_buffer + bytes_read, sizeof(int16_t));
            bytes_read += sizeof(int16_t);
            memcpy(&dy_delta, input_buffer + bytes_read, sizeof(int16_t));
            bytes_read += sizeof(int16_t);
            // Reconstruct: reverse spatial prediction first
            if (i > 0 && (i % temporal_mesh_width) != 0) {
                dx_delta += mesh_dx[t][i - 1];
                dy_delta += mesh_dy[t][i - 1];
            }
            // Then reverse temporal prediction
            if (t > 0) {
                dx_delta += mesh_dx[t - 1][i];
                dy_delta += mesh_dy[t - 1][i];
            }
            mesh_dx[t][i] = dx_delta;
            mesh_dy[t][i] = dy_delta;
        }
    }
    return 0;
 }
 // =============================================================================
 // MPEG-Style Motion Estimation and Residual Coding
 // =============================================================================
@@ -3471,106 +3331,6 @@ static float interpolate_subpixel(const float *frame, int width, int height, flo
    return p0 * (1.0f - fy) + p1 * fy;
 }
 // Block-matching motion estimation with 1/4-pixel precision
 // Returns the Sum of Absolute Differences (SAD) for the best match
 // Search is centered around predicted MV for spatial coherence
 static float block_matching_sad(const float *current, const float *reference,
                               int width, int height,
                               int block_x, int block_y, int block_size,
                               int search_range,
                               int16_t pred_mv_x, int16_t pred_mv_y,
                               int16_t *best_mv_x, int16_t *best_mv_y) {
    float best_sad = 1e9f;
    int best_dx = 0, best_dy = 0;
    // Block coordinates in current frame
    int block_start_x = block_x * block_size;
    int block_start_y = block_y * block_size;
    // Convert predicted MV from 1/4-pixel units to full pixels for search center
    int pred_dx = pred_mv_x / 4;
    int pred_dy = pred_mv_y / 4;
    // Full-pixel search centered around prediction
    for (int dy = pred_dy - search_range; dy <= pred_dy + search_range; dy++) {
        for (int dx = pred_dx - search_range; dx <= pred_dx + search_range; dx++) {
            float sad = 0.0f;
            // Calculate SAD for this displacement
            for (int by = 0; by < block_size; by++) {
                for (int bx = 0; bx < block_size; bx++) {
                    int curr_x = block_start_x + bx;
                    int curr_y = block_start_y + by;
                    if (curr_x >= width || curr_y >= height) continue;
                    int ref_x = curr_x + dx;
                    int ref_y = curr_y + dy;
                    // Clamp reference coordinates
                    if (ref_x < 0) ref_x = 0;
                    if (ref_y < 0) ref_y = 0;
                    if (ref_x >= width) ref_x = width - 1;
                    if (ref_y >= height) ref_y = height - 1;
                    float curr_val = current[curr_y * width + curr_x];
                    float ref_val = reference[ref_y * width + ref_x];
                    sad += fabsf(curr_val - ref_val);
                }
            }
            if (sad < best_sad) {
                best_sad = sad;
                best_dx = dx;
                best_dy = dy;
            }
        }
    }
    // Sub-pixel refinement (1/4-pixel precision)
    // Search in a 3x3 pattern around the best full-pixel match
    for (int qpy = -2; qpy <= 2; qpy++) {
        for (int qpx = -2; qpx <= 2; qpx++) {
            float dx_subpel = best_dx + qpx * 0.25f;
            float dy_subpel = best_dy + qpy * 0.25f;
            float sad = 0.0f;
            for (int by = 0; by < block_size; by++) {
                for (int bx = 0; bx < block_size; bx++) {
                    int curr_x = block_start_x + bx;
                    int curr_y = block_start_y + by;
                    if (curr_x >= width || curr_y >= height) continue;
                    float ref_x = curr_x + dx_subpel;
                    float ref_y = curr_y + dy_subpel;
                    float curr_val = current[curr_y * width + curr_x];
                    float ref_val = interpolate_subpixel(reference, width, height, ref_x, ref_y);
                    sad += fabsf(curr_val - ref_val);
                }
            }
            if (sad < best_sad) {
                best_sad = sad;
                *best_mv_x = (int16_t)roundf(dx_subpel * 4.0f);  // Store in 1/4-pixel units
                *best_mv_y = (int16_t)roundf(dy_subpel * 4.0f);
            }
        }
    }
    // If sub-pixel search didn't improve, use full-pixel result
    if (best_sad == 1e9f || (*best_mv_x == 0 && *best_mv_y == 0 && (best_dx != 0 || best_dy != 0))) {
        *best_mv_x = best_dx * 4;  // Convert to 1/4-pixel units
        *best_mv_y = best_dy * 4;
    }
    return best_sad;
 }
 // Helper function: compute median of three values (for MV prediction)
 static int16_t median3(int16_t a, int16_t b, int16_t c) {
    if (a > b) {
@@ -4013,29 +3773,6 @@ static int allocate_lookahead_buffer(tav_encoder_t *enc) {
    return 0;
 }
 // Free lookahead buffer
 static void free_lookahead_buffer(tav_encoder_t *enc) {
    if (!enc->residual_coding_lookahead_buffer_y) return;
    for (int i = 0; i < enc->residual_coding_lookahead_buffer_capacity; i++) {
        free(enc->residual_coding_lookahead_buffer_y[i]);
        free(enc->residual_coding_lookahead_buffer_co[i]);
        free(enc->residual_coding_lookahead_buffer_cg[i]);
    }
    free(enc->residual_coding_lookahead_buffer_y);
    free(enc->residual_coding_lookahead_buffer_co);
    free(enc->residual_coding_lookahead_buffer_cg);
    free(enc->residual_coding_lookahead_buffer_display_index);
    enc->residual_coding_lookahead_buffer_y = NULL;
    enc->residual_coding_lookahead_buffer_co = NULL;
    enc->residual_coding_lookahead_buffer_cg = NULL;
    enc->residual_coding_lookahead_buffer_display_index = NULL;
    enc->residual_coding_lookahead_buffer_capacity = 0;
    enc->residual_coding_lookahead_buffer_count = 0;
 }
 // Add current frame to lookahead buffer
 // Returns 0 if buffer not full yet, 1 if buffer is now full and ready to encode
 static int add_frame_to_buffer(tav_encoder_t *enc, int display_index) {
@@ -5211,6 +4948,20 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
    quantise_3d_dwt_coefficients(enc, gop_cg_coeffs, quant_cg, actual_gop_size,
                                 num_pixels, qCg, 1);  // Chroma Cg
    // Debug: print LL coefficients for frames 0 and 1 (first 10 pixels)
    /*if (enc->quality_level == 5 && enc->verbose) {
        int ll_width = valid_width >> enc->decomp_levels;
        int ll_height = valid_height >> enc->decomp_levels;
        printf("DEBUG Q5: LL coefficients for first 10 pixels:\n");
        for (int f = 0; f < (actual_gop_size < 2 ? actual_gop_size : 2); f++) {
            printf("  Frame %d: ", f);
            for (int i = 0; i < 10 && i < ll_width * ll_height; i++) {
                printf("%d ", quant_y[f][i]);
            }
            printf("\n");
        }
    }*/
    // Step 4: Preprocessing and compression
    size_t total_bytes_written = 0;
@@ -5641,125 +5392,6 @@ static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_q
 // Temporal DWT Functions
 // =============================================================================
 // Invert mesh for backward warping (MC-lifting update step)
 // Forward mesh: warps F0 to F1
 // Backward mesh: warps F1 to F0 (negated motion vectors)
 static void invert_mesh(
    const short *mesh_dx, const short *mesh_dy,
    int temporal_mesh_width, int temporal_mesh_height,
    short *inv_mesh_dx, short *inv_mesh_dy
 ) {
    int num_points = temporal_mesh_width * temporal_mesh_height;
    for (int i = 0; i < num_points; i++) {
        inv_mesh_dx[i] = -mesh_dx[i];
        inv_mesh_dy[i] = -mesh_dy[i];
    }
 }
 // Build block-based reliability mask for selective motion compensation
 // Process 16×16 blocks for efficiency (matches block matching resolution)
 // Returns mask where 1 = use MC, 0 = fall back to plain Haar
 /*static void build_reliability_mask(
    const uint8_t *frame0_rgb, const uint8_t *frame1_rgb,
    const float *flow_fwd_x, const float *flow_fwd_y,
    const float *flow_bwd_x, const float *flow_bwd_y,
    int width, int height,
    uint8_t *mask
 ) {
    const int block_size = 16;  // Match block matching resolution
    int num_pixels = width * height;
    // Relaxed thresholds for better coverage
    float motion_threshold = 1.0f;   // pixels (relaxed from 2.0)
    float fb_threshold = 2.0f;       // pixels (relaxed from 1.0)
    float texture_threshold = 10.0f; // gradient magnitude
    int reliable_blocks = 0;
    int total_blocks = 0;
    int reliable_pixels = 0;
    // Process in 16×16 blocks
    for (int by = 0; by < height; by += block_size) {
        for (int bx = 0; bx < width; bx += block_size) {
            total_blocks++;
            // Compute block statistics
            float sum_motion = 0.0f;
            float sum_fb_error = 0.0f;
            float sum_texture = 0.0f;
            int block_pixels = 0;
            int bh = (by + block_size <= height) ? block_size : (height - by);
            int bw = (bx + block_size <= width) ? block_size : (width - bx);
            for (int y = by; y < by + bh; y++) {
                for (int x = bx; x < bx + bw; x++) {
                    int idx = y * width + x;
                    // Motion magnitude
                    float fx = flow_fwd_x[idx];
                    float fy = flow_fwd_y[idx];
                    sum_motion += sqrtf(fx * fx + fy * fy);
                    // Forward-backward consistency
                    int x_warped = (int)(x + fx + 0.5f);
                    int y_warped = (int)(y + fy + 0.5f);
                    if (x_warped >= 0 && x_warped < width && y_warped >= 0 && y_warped < height) {
                        int idx_w = y_warped * width + x_warped;
                        float bx_val = flow_bwd_x[idx_w];
                        float by_val = flow_bwd_y[idx_w];
                        float err_x = fx + bx_val;
                        float err_y = fy + by_val;
                        sum_fb_error += sqrtf(err_x * err_x + err_y * err_y);
                    } else {
                        sum_fb_error += 999.0f;
                    }
                    // Texture (simple gradient)
                    if (x > 0 && x < width - 1 && y > 0 && y < height - 1) {
                        int rgb_idx = idx * 3;
                        int rgb_idx_r = (y * width + (x + 1)) * 3;
                        int rgb_idx_d = ((y + 1) * width + x) * 3;
                        float gx = (frame0_rgb[rgb_idx_r] - frame0_rgb[rgb_idx]);
                        float gy = (frame0_rgb[rgb_idx_d] - frame0_rgb[rgb_idx]);
                        sum_texture += sqrtf(gx * gx + gy * gy);
                    }
                    block_pixels++;
                }
            }
            // Average block statistics
            float avg_motion = sum_motion / block_pixels;
            float avg_fb_error = sum_fb_error / block_pixels;
            float avg_texture = sum_texture / block_pixels;
            // Decide if block is reliable
            int block_reliable = (avg_motion > motion_threshold) &&
                                (avg_fb_error < fb_threshold) &&
                                (avg_texture > texture_threshold);
            if (block_reliable) reliable_blocks++;
            // Apply decision to all pixels in block
            for (int y = by; y < by + bh; y++) {
                for (int x = bx; x < bx + bw; x++) {
                    int idx = y * width + x;
                    mask[idx] = block_reliable ? 1 : 0;
                    if (mask[idx]) reliable_pixels++;
                }
            }
        }
    }
    // Debug output
    printf("  Reliability mask: %d/%d blocks (%d/%d pixels, %.1f%%) - motion>%.1fpx, texture>%.1f, fb_err<%.1fpx\n",
           reliable_blocks, total_blocks, reliable_pixels, num_pixels,
           100.0f * reliable_pixels / num_pixels,
           motion_threshold, texture_threshold, fb_threshold);
 }*/
 // Simple translation-based frame alignment (legacy, non-MC-EZBC path)
 // Shifts entire frame by (dx, dy) pixels with bilinear interpolation
 static void apply_translation(
@@ -5913,20 +5545,6 @@ static void mc_lifting_forward_pair(
    free(update_cg);
 }
 // Apply 1D temporal DWT along time axis for a spatial location (encoder side)
 // data[i] = frame i's coefficient value at this spatial location
 // Applies LGT 5/3 wavelet for reversibility
 static void dwt_temporal_1d_forward_53(float *temporal_data, int num_frames) {
    if (num_frames < 2) return;
    dwt_53_forward_1d(temporal_data, num_frames);
 }
 // Apply inverse 1D temporal DWT (decoder side)
 static void dwt_temporal_1d_inverse_53(float *temporal_data, int num_frames) {
    if (num_frames < 2) return;
    dwt_53_inverse_1d(temporal_data, num_frames);
 }
 // Apply 3D DWT with motion-compensated lifting (MC-lifting)
 // Integrates motion compensation directly into wavelet lifting steps
 // This replaces separate warping + DWT for better invertibility and compression
@@ -6054,7 +5672,6 @@ static void dwt_3d_forward(float **gop_data, int width, int height, int num_fram
            for (int level = 0; level < temporal_levels; level++) {
                int level_frames = temporal_lengths[level];
                if (level_frames >= 2) {
 //                    dwt_temporal_1d_forward_53(temporal_line, level_frames);  // CDF 5/3 worse for motion-compensated frames
                    dwt_haar_forward_1d(temporal_line, level_frames);  // Haar better for imperfect alignment
                }
            }
@@ -6076,64 +5693,6 @@ static void dwt_3d_forward(float **gop_data, int width, int height, int num_fram
    }
 }
 // Apply inverse 3D DWT: inverse spatial DWT on each temporal subband, then inverse temporal DWT
 static void dwt_3d_inverse(float **gop_data, int width, int height, int num_frames,
                          int spatial_levels, int temporal_levels, int spatial_filter) {
    if (num_frames < 2 || width < 2 || height < 2) return;
    // Step 1: Apply inverse 2D spatial DWT to each temporal subband
    for (int t = 0; t < num_frames; t++) {
        // Note: Need to implement appropriate inverse function based on filter type
        // For now, using Haar inverse as reference (will need proper inverse for 5/3, 9/7, etc.)
        if (spatial_filter == WAVELET_HAAR) {
            dwt_2d_haar_inverse_flexible(gop_data[t], width, height, spatial_levels);
        } else {
            // TODO: Implement proper inverse for other wavelets (5/3, 9/7, etc.)
            // For now, log warning
            fprintf(stderr, "Warning: Inverse spatial DWT not fully implemented for filter %d\n", spatial_filter);
        }
    }
    // Step 2: Apply inverse temporal DWT to each spatial location
    int num_pixels = width * height;
    float *temporal_line = malloc(num_frames * sizeof(float));
    // Pre-calculate all intermediate lengths for temporal DWT (same fix as TAD)
    // This ensures correct reconstruction for non-power-of-2 GOP sizes
    int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
    temporal_lengths[0] = num_frames;
    for (int i = 1; i <= temporal_levels; i++) {
        temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
    }
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int pixel_idx = y * width + x;
            // Extract temporal coefficients for this spatial location
            for (int t = 0; t < num_frames; t++) {
                temporal_line[t] = gop_data[t][pixel_idx];
            }
            // Apply inverse temporal DWT with multiple levels using pre-calculated lengths (reverse order)
            for (int level = temporal_levels - 1; level >= 0; level--) {
                int level_frames = temporal_lengths[level];
                if (level_frames >= 2) {
                    dwt_temporal_1d_inverse_53(temporal_line, level_frames);
                }
            }
            // Write back reconstructed values
            for (int t = 0; t < num_frames; t++) {
                gop_data[t][pixel_idx] = temporal_line[t];
            }
        }
    }
    free(temporal_lengths);
    free(temporal_line);
 }
 // Extract padded tile with margins for seamless DWT processing (correct implementation)
 static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
                               float *padded_y, float *padded_co, float *padded_cg) {
@@ -6230,61 +5789,6 @@ static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
 // Forward declaration for perceptual weight function
 static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
 // Generate triangular noise from uint32 RNG
 // Returns value in range [-1.0, 1.0]
 static float grain_triangular_noise(uint32_t rng_val) {
    // Get two uniform random values in [0, 1]
    float u1 = (rng_val & 0xFFFF) / 65535.0f;
    float u2 = ((rng_val >> 16) & 0xFFFF) / 65535.0f;
    // Convert to range [-1, 1] and average for triangular distribution
    return (u1 + u2) - 1.0f;
 }
 // Apply grain synthesis to DWT coefficients (encoder adds noise)
 static void apply_grain_synthesis_encoder(tav_encoder_t *enc, float *coeffs, int width, int height,
                                         int decomp_levels, uint32_t frame_num,
                                         int quantiser, int is_chroma) {
    // Only apply to Y channel, excluding LL band
    // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int idx = y * width + x;
            // Check if this is the LL band (level 0)
            int level = get_subband_level_2d(x, y, width, height, decomp_levels);
            int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
            if (level == 0) {
                continue; // Skip LL band
            }
            // Get subband type for perceptual weight calculation
            /*int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
            // Calculate noise amplitude based on perceptual tuning mode
            float noise_amplitude;
            if (enc->perceptual_tuning) {
                // Perceptual mode: scale by perceptual weight
                float perceptual_weight = get_perceptual_weight(enc, level, subband_type, is_chroma, decomp_levels);
                noise_amplitude = (quantiser * perceptual_weight) * 0.5f;
            } else {
                // Uniform mode: use global quantiser
                noise_amplitude = quantiser * 0.5f;
            }*/
            float noise_amplitude = FCLAMP(quantiser, 0.0f, 32.0f) * 0.5f;
            // Generate deterministic noise
            uint32_t rng_val = grain_synthesis_rng(frame_num, level + subband_type * 31 + 16777219, x, y);
            float noise = grain_triangular_noise(rng_val);
            // Add noise to coefficient
            coeffs[idx] += noise * noise_amplitude;
        }
    }
 }
 // 2D DWT forward transform for rectangular padded tile (344x288)
 static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) {
    const int width = PADDED_TILE_SIZE_X;   // 344
@@ -7212,13 +6716,15 @@ static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_
        }
        // Step 3: Round to discrete quantization levels
-        quantised_val = roundf(quantised_val);
+        quantised_val = roundf(quantised_val); // file size explodes without rounding
        // Step 4: Denormalize - multiply back by quantizer to restore magnitude
        // This gives us quantized values at original scale (not shrunken to 0-10 range)
        float denormalized = quantised_val * effective_q;
-        quantised[i] = (int16_t)CLAMP((int)denormalized, -32768, 32767);
+        // CRITICAL FIX: Must round (not truncate) to match decoder behavior
        // With odd baseQ values and fractional weights, truncation causes mismatch with Sigmap mode
        quantised[i] = (int16_t)CLAMP((int)roundf(denormalized), -32768, 32767);
    }
 }
@@ -7622,21 +7128,6 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                printf("\n");
            }*/
            // Apply grain synthesis to Y channel (after DWT, before quantization)
            if (enc->grain_synthesis && mode != TAV_MODE_SKIP) {
                // Get the quantiser value that will be used for this frame
                int qY_value = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
                int actual_qY = QLUT[qY_value];
                // Determine dimensions based on mode
                int gs_width = enc->monoblock ? enc->width : PADDED_TILE_SIZE_X;
                int gs_height = enc->monoblock ? enc->height : PADDED_TILE_SIZE_Y;
                // Apply grain synthesis to Y channel only (is_chroma = 0)
                apply_grain_synthesis_encoder(enc, tile_y_data, gs_width, gs_height,
                                             enc->decomp_levels, enc->frame_count, actual_qY, 0);
            }
            // Serialise tile
            size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
                                                   tile_y_data, tile_co_data, tile_cg_data,
@@ -10347,83 +9838,6 @@ static int detect_still_frame(tav_encoder_t *enc) {
    return (changed_pixels == 0);
 }
 // Detect still frames by comparing quantised DWT coefficients
 // Returns 1 if quantised coefficients are identical (frame is truly still), 0 otherwise
 // Benefits: quality-aware (lower quality = more SKIP frames), pure integer math
 // DISABLED - should work in theory, not actually
 static int detect_still_frame_dwt(tav_encoder_t *enc) {
    if (!enc->previous_coeffs_allocated || enc->intra_only) {
        return 0; // No previous coefficients to compare or intra-only mode
    }
    // Only compare against I-frames to avoid DELTA quantization drift
    // previous_coeffs are updated by DELTA frames with reconstructed values that accumulate error
    if (enc->last_frame_packet_type != TAV_PACKET_IFRAME) {
        return 0; // Must compare against clean I-frame, not DELTA reconstruction
    }
    // Get current quantisers (use adjusted quantiser from bitrate control if applicable)
    int qY = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
    int this_frame_qY = QLUT[qY];
    int this_frame_qCo = QLUT[enc->quantiser_co];
    int this_frame_qCg = QLUT[enc->quantiser_cg];
    // Coefficient count (monoblock mode)
    const int coeff_count = enc->width * enc->height;
    // Quantise current DWT coefficients
    int16_t *quantised_y = enc->reusable_quantised_y;
    int16_t *quantised_co = enc->reusable_quantised_co;
    int16_t *quantised_cg = enc->reusable_quantised_cg;
    if (enc->perceptual_tuning) {
        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->current_dwt_y, quantised_y, coeff_count, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->current_dwt_co, quantised_co, coeff_count, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->current_dwt_cg, quantised_cg, coeff_count, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
    } else {
        quantise_dwt_coefficients(enc->current_dwt_y, quantised_y, coeff_count, this_frame_qY, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 0);
        quantise_dwt_coefficients(enc->current_dwt_co, quantised_co, coeff_count, this_frame_qCo, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
        quantise_dwt_coefficients(enc->current_dwt_cg, quantised_cg, coeff_count, this_frame_qCg, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
    }
    // Quantise previous DWT coefficients (stored from last I-frame)
    int16_t *prev_quantised_y = malloc(coeff_count * sizeof(int16_t));
    int16_t *prev_quantised_co = malloc(coeff_count * sizeof(int16_t));
    int16_t *prev_quantised_cg = malloc(coeff_count * sizeof(int16_t));
    if (enc->perceptual_tuning) {
        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->previous_coeffs_y, prev_quantised_y, coeff_count, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->previous_coeffs_co, prev_quantised_co, coeff_count, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
        quantise_dwt_coefficients_perceptual_per_coeff(enc, enc->previous_coeffs_cg, prev_quantised_cg, coeff_count, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
    } else {
        quantise_dwt_coefficients(enc->previous_coeffs_y, prev_quantised_y, coeff_count, this_frame_qY, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 0);
        quantise_dwt_coefficients(enc->previous_coeffs_co, prev_quantised_co, coeff_count, this_frame_qCo, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
        quantise_dwt_coefficients(enc->previous_coeffs_cg, prev_quantised_cg, coeff_count, this_frame_qCg, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1);
    }
    // Compare quantised coefficients - pure integer math
    int diff_count = 0;
    for (int i = 0; i < coeff_count; i++) {
        if (quantised_y[i] != prev_quantised_y[i] ||
            quantised_co[i] != prev_quantised_co[i] ||
            quantised_cg[i] != prev_quantised_cg[i]) {
            diff_count++;
        }
    }
    free(prev_quantised_y);
    free(prev_quantised_co);
    free(prev_quantised_cg);
    if (enc->verbose) {
        printf("Still frame detection (DWT): %d/%d coeffs differ\n", diff_count, coeff_count);
    }
    // If all quantised coefficients match, frames are identical after compression
    return (diff_count == 0);
 }
 // Main function
 int main(int argc, char *argv[]) {
    generate_random_filename(TEMP_AUDIO_FILE);
@@ -10472,7 +9886,6 @@ int main(int argc, char *argv[]) {
        {"zstd-level", required_argument, 0, 1014},
        {"interlace", no_argument, 0, 1015},
        {"interlaced", no_argument, 0, 1015},
 //        {"no-grain-synthesis", no_argument, 0, 1016},
        {"enable-delta", no_argument, 0, 1017},
        {"delta-haar", required_argument, 0, 1018},
        {"temporal-dwt", no_argument, 0, 1019},
@@ -10643,9 +10056,6 @@ int main(int argc, char *argv[]) {
            case 1015: // --interlaced
                enc->progressive_mode = 0;
                break;
            case 1016: // --no-grain-synthesis
                enc->grain_synthesis = 0;
                break;
            case 1017: // --enable-delta
                enc->use_delta_encoding = 1;
                enc->enable_temporal_dwt = 0;