diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
index 7ac3e67..b58a796 100644
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -14,6 +14,7 @@ import net.torvald.tsvm.peripheral.PeriBase
 import net.torvald.tsvm.peripheral.fmod
 import java.io.ByteArrayInputStream
 import java.util.*
+import java.util.concurrent.atomic.AtomicInteger
 import kotlin.Any
 import kotlin.Array
 import kotlin.Boolean
@@ -5078,9 +5079,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
     // Remove grain synthesis from DWT coefficients (decoder subtracts noise)
     // This must be called AFTER dequantization but BEFORE inverse DWT
     private fun removeGrainSynthesisDecoder(coeffs: FloatArray, width: Int, height: Int,
-                                           decompLevels: Int, frameNum: Int, quantiser: Float,
-                                           subbands: List<DWTSubbandInfo>, qIndex: Int = 3, qYGlobal: Int = 0,
-                                           usePerceptualWeights: Boolean = false) {
+                                           frameNum: Int, subbands: List<DWTSubbandInfo>, qYGlobal: Int) {
         // Only apply to Y channel, excluding LL band
         // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
 
@@ -5096,8 +5095,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             }*/
 
             // Noise amplitude for this subband
-//            val noiseAmplitude = quantiser.coerceAtMost(32f) * 0.5f
-            val noiseAmplitude = qYGlobal.coerceAtMost(32) * 0.5f // using qYGlobal because quantiser is variable on bitrate-control mode and varying grain amp annoys viewer
+            val noiseAmplitude = qYGlobal.coerceAtMost(32) * 0.8f // using qYGlobal because quantiser is variable on bitrate-control mode and varying grain amp annoys viewer
 
             // Remove noise from each coefficient in this subband
             for (i in 0 until subband.coeffCount) {
@@ -5354,7 +5352,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
 
             // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
             // Use perceptual weights since this is the perceptual quantization path
-            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands, qIndex, qYGlobal, true)
+            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
 
             // Apply film grain filter if enabled
             // commented; grain synthesis is now a part of the spec
@@ -5379,7 +5377,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
             val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
             val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
-            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
+            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
 
             // Apply film grain filter if enabled
             // commented; grain synthesis is now a part of the spec
@@ -5830,18 +5828,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
         val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
         // Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
-        removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, decompLevels, frameCount, qY.toFloat(), subbands)
-
-        // Apply film grain filter if enabled
-        // commented; grain synthesis is now a part of the spec
-        /*if (filmGrainLevel > 0) {
-            val random = java.util.Random()
-            for (i in 0 until coeffCount) {
-                currentY[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
-//                currentCo[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
-//                currentCg[i] += (random.nextInt(filmGrainLevel * 2 + 1) - filmGrainLevel).toFloat()
-            }
-        }*/
+        removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, frameCount, subbands, qY)
 
         // Store current coefficients as previous for next frame
         tavPreviousCoeffsY!![tileIdx] = currentY.clone()
@@ -6401,6 +6388,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         System.arraycopy(output, 0, frameData, 0, frameData.size)
     }
 
+    private val rngFrameTick = AtomicInteger(0)
+
     /**
      * Decode GOP frames directly into GraphicsAdapter.videoBuffer (Java heap).
      * This avoids allocating GOP frames in VM user memory, saving ~6 MB for 8-frame GOPs.
@@ -6522,6 +6511,16 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
 
 
+        // Step 5.5: Remove grain synthesis from Y channel for each GOP frame
+        // This must happen after dequantization but before inverse DWT
+        for (t in 0 until gopSize) {
+            removeGrainSynthesisDecoder(
+                gopY[t], width, height,
+                rngFrameTick.getAndAdd(1) + t,
+                subbands, qIndex
+            )
+        }
+
         // Step 6: Apply inverse 3D DWT
         tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
         tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c
index 5228eba..012520c 100644
--- a/video_encoder/decoder_tav.c
+++ b/video_encoder/decoder_tav.c
@@ -364,8 +364,8 @@ static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
     dwt_subband_info_t subbands[32];
     const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
 
-    // Noise amplitude (matches Kotlin: qYGlobal.coerceAtMost(32) * 0.5f)
-    const float noise_amplitude = (q_y_global < 32 ? q_y_global : 32) * 0.5f;
+    // Noise amplitude (matches Kotlin: qYGlobal.coerceAtMost(32) * 0.8f)
+    const float noise_amplitude = (q_y_global < 32 ? q_y_global : 32) * 0.25f; // somehow noise amplitude works differently than Kotlin?
 
     // Process each subband (skip LL band which is level 0)
     for (int s = 0; s < subband_count; s++) {
@@ -479,6 +479,317 @@ static void postprocess_coefficients_twobit(uint8_t *compressed_data, int coeff_
     }
 }
 
+//=============================================================================
+// EZBC (Embedded Zero Block Coding) Decoder
+//=============================================================================
+
+// EZBC Block structure for quadtree
+typedef struct {
+    int x, y;
+    int width, height;
+} ezbc_block_t;
+
+// EZBC bitstream reader state
+typedef struct {
+    const uint8_t *data;
+    size_t size;
+    size_t byte_pos;
+    int bit_pos;
+} ezbc_bitreader_t;
+
+// Read N bits from EZBC bitstream (LSB-first within each byte)
+static int ezbc_read_bits(ezbc_bitreader_t *reader, int num_bits) {
+    int result = 0;
+    for (int i = 0; i < num_bits; i++) {
+        if (reader->byte_pos >= reader->size) {
+            return result;  // End of stream
+        }
+
+        const int bit = (reader->data[reader->byte_pos] >> reader->bit_pos) & 1;
+        result |= (bit << i);
+
+        reader->bit_pos++;
+        if (reader->bit_pos == 8) {
+            reader->bit_pos = 0;
+            reader->byte_pos++;
+        }
+    }
+    return result;
+}
+
+// EZBC block queues (simple dynamic arrays)
+typedef struct {
+    ezbc_block_t *blocks;
+    int count;
+    int capacity;
+} ezbc_block_queue_t;
+
+static void ezbc_queue_init(ezbc_block_queue_t *q) {
+    q->capacity = 256;
+    q->count = 0;
+    q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
+}
+
+static void ezbc_queue_free(ezbc_block_queue_t *q) {
+    free(q->blocks);
+    q->blocks = NULL;
+    q->count = 0;
+}
+
+static void ezbc_queue_add(ezbc_block_queue_t *q, ezbc_block_t block) {
+    if (q->count >= q->capacity) {
+        q->capacity *= 2;
+        q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
+    }
+    q->blocks[q->count++] = block;
+}
+
+// Forward declaration
+static int ezbc_process_significant_block_recursive(
+    ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold,
+    int16_t *output, int width, int8_t *significant, int *first_bitplane,
+    ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant);
+
+// EZBC recursive block decoder (matches Kotlin implementation)
+static int ezbc_process_significant_block_recursive(
+    ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold,
+    int16_t *output, int width, int8_t *significant, int *first_bitplane,
+    ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant) {
+
+    int sign_bits_read = 0;
+
+    // If 1x1 block: read sign bit and add to significant queue
+    if (block.width == 1 && block.height == 1) {
+        const int idx = block.y * width + block.x;
+        const int sign_bit = ezbc_read_bits(reader, 1);
+        sign_bits_read++;
+
+        // Set coefficient to threshold value with sign
+        output[idx] = sign_bit ? -threshold : threshold;
+        significant[idx] = 1;
+        first_bitplane[idx] = bitplane;
+        ezbc_queue_add(next_significant, block);
+        return sign_bits_read;
+    }
+
+    // Block is > 1x1: subdivide and recursively process children
+    int mid_x = block.width / 2;
+    int mid_y = block.height / 2;
+    if (mid_x == 0) mid_x = 1;
+    if (mid_y == 0) mid_y = 1;
+
+    // Top-left child
+    ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
+    const int tl_flag = ezbc_read_bits(reader, 1);
+    if (tl_flag) {
+        sign_bits_read += ezbc_process_significant_block_recursive(
+            reader, tl, bitplane, threshold, output, width, significant, first_bitplane,
+            next_significant, next_insignificant);
+    } else {
+        ezbc_queue_add(next_insignificant, tl);
+    }
+
+    // Top-right child (if exists)
+    if (block.width > mid_x) {
+        ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
+        const int tr_flag = ezbc_read_bits(reader, 1);
+        if (tr_flag) {
+            sign_bits_read += ezbc_process_significant_block_recursive(
+                reader, tr, bitplane, threshold, output, width, significant, first_bitplane,
+                next_significant, next_insignificant);
+        } else {
+            ezbc_queue_add(next_insignificant, tr);
+        }
+    }
+
+    // Bottom-left child (if exists)
+    if (block.height > mid_y) {
+        ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
+        const int bl_flag = ezbc_read_bits(reader, 1);
+        if (bl_flag) {
+            sign_bits_read += ezbc_process_significant_block_recursive(
+                reader, bl, bitplane, threshold, output, width, significant, first_bitplane,
+                next_significant, next_insignificant);
+        } else {
+            ezbc_queue_add(next_insignificant, bl);
+        }
+    }
+
+    // Bottom-right child (if exists)
+    if (block.width > mid_x && block.height > mid_y) {
+        ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
+        const int br_flag = ezbc_read_bits(reader, 1);
+        if (br_flag) {
+            sign_bits_read += ezbc_process_significant_block_recursive(
+                reader, br, bitplane, threshold, output, width, significant, first_bitplane,
+                next_significant, next_insignificant);
+        } else {
+            ezbc_queue_add(next_insignificant, br);
+        }
+    }
+
+    return sign_bits_read;
+}
+
+// Decode a single channel with EZBC
+static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t size,
+                               int16_t *output, int expected_count) {
+    ezbc_bitreader_t reader = {ezbc_data, offset + size, offset, 0};
+
+    // Debug: Print first few bytes
+    fprintf(stderr, "[EZBC] Channel decode: offset=%zu, size=%zu, first 5 bytes: %02X %02X %02X %02X %02X\n",
+           offset, size,
+           ezbc_data[offset], ezbc_data[offset+1], ezbc_data[offset+2],
+           ezbc_data[offset+3], ezbc_data[offset+4]);
+
+    // Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
+    const int msb_bitplane = ezbc_read_bits(&reader, 8);
+    const int width = ezbc_read_bits(&reader, 16);
+    const int height = ezbc_read_bits(&reader, 16);
+
+    fprintf(stderr, "[EZBC] Decoded header: MSB=%d, width=%d, height=%d (expected pixels=%d)\n",
+           msb_bitplane, width, height, expected_count);
+
+    if (width * height != expected_count) {
+        fprintf(stderr, "EZBC dimension mismatch: %dx%d != %d\n", width, height, expected_count);
+        memset(output, 0, expected_count * sizeof(int16_t));
+        return;
+    }
+
+    // Initialize output and state tracking
+    memset(output, 0, expected_count * sizeof(int16_t));
+    int8_t *significant = calloc(expected_count, sizeof(int8_t));
+    int *first_bitplane = calloc(expected_count, sizeof(int));
+
+    // Initialize queues
+    ezbc_block_queue_t insignificant, next_insignificant, significant_queue, next_significant;
+    ezbc_queue_init(&insignificant);
+    ezbc_queue_init(&next_insignificant);
+    ezbc_queue_init(&significant_queue);
+    ezbc_queue_init(&next_significant);
+
+    // Start with root block
+    ezbc_block_t root = {0, 0, width, height};
+    ezbc_queue_add(&insignificant, root);
+
+    // Process bitplanes from MSB to LSB
+    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
+        const int threshold = 1 << bitplane;
+
+        // Process insignificant blocks
+        for (int i = 0; i < insignificant.count; i++) {
+            const int flag = ezbc_read_bits(&reader, 1);
+
+            if (flag == 0) {
+                // Still insignificant
+                ezbc_queue_add(&next_insignificant, insignificant.blocks[i]);
+            } else {
+                // Became significant - use recursive processing
+                ezbc_process_significant_block_recursive(
+                    &reader, insignificant.blocks[i], bitplane, threshold,
+                    output, width, significant, first_bitplane,
+                    &next_significant, &next_insignificant);
+            }
+        }
+
+        // Process significant 1x1 blocks (refinement)
+        for (int i = 0; i < significant_queue.count; i++) {
+            ezbc_block_t block = significant_queue.blocks[i];
+            const int idx = block.y * width + block.x;
+            const int refine_bit = ezbc_read_bits(&reader, 1);
+
+            // Add refinement bit at current bitplane
+            if (refine_bit) {
+                const int bit_value = 1 << bitplane;
+                if (output[idx] < 0) {
+                    output[idx] -= bit_value;
+                } else {
+                    output[idx] += bit_value;
+                }
+            }
+
+            // Keep in significant queue
+            ezbc_queue_add(&next_significant, block);
+        }
+
+        // Swap queues
+        ezbc_block_queue_t temp_insig = insignificant;
+        insignificant = next_insignificant;
+        next_insignificant = temp_insig;
+        next_insignificant.count = 0;
+
+        ezbc_block_queue_t temp_sig = significant_queue;
+        significant_queue = next_significant;
+        next_significant = temp_sig;
+        next_significant.count = 0;
+    }
+
+    // Cleanup
+    free(significant);
+    free(first_bitplane);
+    ezbc_queue_free(&insignificant);
+    ezbc_queue_free(&next_insignificant);
+    ezbc_queue_free(&significant_queue);
+    ezbc_queue_free(&next_significant);
+
+    // Debug: Count non-zero coefficients
+    int nonzero_count = 0;
+    int16_t max_val = 0, min_val = 0;
+    for (int i = 0; i < expected_count; i++) {
+        if (output[i] != 0) {
+            nonzero_count++;
+            if (output[i] > max_val) max_val = output[i];
+            if (output[i] < min_val) min_val = output[i];
+        }
+    }
+    fprintf(stderr, "[EZBC] Decoded %d non-zero coeffs (%.1f%%), range: [%d, %d]\n",
+           nonzero_count, 100.0 * nonzero_count / expected_count, min_val, max_val);
+}
+
+// EZBC postprocessing for single frames
+static void postprocess_coefficients_ezbc(uint8_t *compressed_data, int coeff_count,
+                                          int16_t *output_y, int16_t *output_co, int16_t *output_cg,
+                                          int channel_layout) {
+    const int has_y = (channel_layout & 0x04) == 0;
+    const int has_co = (channel_layout & 0x02) == 0;
+    const int has_cg = (channel_layout & 0x02) == 0;
+
+    int offset = 0;
+
+    // Decode Y channel
+    if (has_y && output_y) {
+        const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
+                             ((uint32_t)compressed_data[offset + 1] << 8) |
+                             ((uint32_t)compressed_data[offset + 2] << 16) |
+                             ((uint32_t)compressed_data[offset + 3] << 24);
+        offset += 4;
+        decode_channel_ezbc(compressed_data, offset, size, output_y, coeff_count);
+        offset += size;
+    }
+
+    // Decode Co channel
+    if (has_co && output_co) {
+        const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
+                             ((uint32_t)compressed_data[offset + 1] << 8) |
+                             ((uint32_t)compressed_data[offset + 2] << 16) |
+                             ((uint32_t)compressed_data[offset + 3] << 24);
+        offset += 4;
+        decode_channel_ezbc(compressed_data, offset, size, output_co, coeff_count);
+        offset += size;
+    }
+
+    // Decode Cg channel
+    if (has_cg && output_cg) {
+        const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
+                             ((uint32_t)compressed_data[offset + 1] << 8) |
+                             ((uint32_t)compressed_data[offset + 2] << 16) |
+                             ((uint32_t)compressed_data[offset + 3] << 24);
+        offset += 4;
+        decode_channel_ezbc(compressed_data, offset, size, output_cg, coeff_count);
+        offset += size;
+    }
+}
+
 //=============================================================================
 // DWT Inverse Transforms (matches TSVM)
 //=============================================================================
@@ -714,6 +1025,453 @@ static void apply_inverse_dwt_multilevel(float *data, int width, int height, int
     free(temp_col);
 }
 
+//=============================================================================
+// Temporal DWT and GOP Decoding (matches TSVM)
+//=============================================================================
+
+// Get temporal subband level for a given frame index in a GOP
+static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
+    // Match encoder logic exactly (encoder_tav.c:1487-1501)
+    // After temporal DWT with 2 levels:
+    // Frames 0...num_frames/(2^2) = tLL (temporal low-low, coarsest, level 0)
+    // Frames in first half but after tLL = tLH (level 1)
+    // Remaining frames = tH from first level (level 2, finest)
+
+    const int frames_per_level0 = num_frames >> temporal_levels;  // e.g., 16 >> 2 = 4, or 8 >> 2 = 2
+
+    if (frame_idx < frames_per_level0) {
+        return 0;  // Coarsest temporal level (tLL)
+    } else if (frame_idx < (num_frames >> 1)) {
+        return 1;  // First level high-pass (tLH)
+    } else {
+        return 2;  // Finest level high-pass (tH from level 1)
+    }
+}
+
+// Calculate temporal quantizer scale for a given temporal subband level
+static float get_temporal_quantizer_scale(int temporal_level) {
+    // Uses exponential scaling: 2^(BETA × level^KAPPA)
+    // With BETA=0.6, KAPPA=1.14:
+    //   - Level 0 (tLL):  2^0.0 = 1.00
+    //   - Level 1 (tH):   2^0.68 = 1.61
+    //   - Level 2 (tHH):  2^1.29 = 2.45
+    const float BETA = 0.6f;  // Temporal scaling exponent
+    const float KAPPA = 1.14f;
+    return powf(2.0f, BETA * powf(temporal_level, KAPPA));
+}
+
+// Inverse Haar 1D DWT
+static void dwt_haar_inverse_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = malloc(length * sizeof(float));
+    const int half = (length + 1) / 2;
+
+    // Inverse Haar transform: reconstruct from averages and differences
+    // Read directly from data array (already has low-pass then high-pass layout)
+    for (int i = 0; i < half; i++) {
+        if (2 * i + 1 < length) {
+            // Reconstruct adjacent pairs from average and difference
+            temp[2 * i] = data[i] + data[half + i];      // average + difference
+            temp[2 * i + 1] = data[i] - data[half + i];  // average - difference
+        } else {
+            // Handle odd length: last sample comes from low-pass only
+            temp[2 * i] = data[i];
+        }
+    }
+
+    // Copy reconstructed data back
+    for (int i = 0; i < length; i++) {
+        data[i] = temp[i];
+    }
+
+    free(temp);
+}
+
+// Apply inverse 3D DWT to GOP data (spatial + temporal)
+// Order: SPATIAL first (each frame), then TEMPORAL (across frames)
+static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
+                                int width, int height, int gop_size,
+                                int spatial_levels, int temporal_levels, int filter_type) {
+    // Step 1: Apply inverse 2D spatial DWT to each frame
+    for (int t = 0; t < gop_size; t++) {
+        apply_inverse_dwt_multilevel(gop_y[t], width, height, spatial_levels, filter_type);
+        apply_inverse_dwt_multilevel(gop_co[t], width, height, spatial_levels, filter_type);
+        apply_inverse_dwt_multilevel(gop_cg[t], width, height, spatial_levels, filter_type);
+    }
+
+    // Step 2: Apply inverse temporal DWT to each spatial location
+    // Only needed for GOPs with multiple frames (skip for I-frames)
+    if (gop_size < 2) return;
+
+    // Pre-calculate all intermediate lengths for temporal DWT (same fix as TAD)
+    // This ensures correct reconstruction for non-power-of-2 GOP sizes
+    int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
+    temporal_lengths[0] = gop_size;
+    for (int i = 1; i <= temporal_levels; i++) {
+        temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
+    }
+
+    float *temporal_line = malloc(gop_size * sizeof(float));
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            const int pixel_idx = y * width + x;
+
+            // Process Y channel
+            for (int t = 0; t < gop_size; t++) {
+                temporal_line[t] = gop_y[t][pixel_idx];
+            }
+            for (int level = temporal_levels - 1; level >= 0; level--) {
+                const int level_frames = temporal_lengths[level];
+                if (level_frames >= 2) {
+                    dwt_haar_inverse_1d(temporal_line, level_frames);
+                }
+            }
+            for (int t = 0; t < gop_size; t++) {
+                gop_y[t][pixel_idx] = temporal_line[t];
+            }
+
+            // Process Co channel
+            for (int t = 0; t < gop_size; t++) {
+                temporal_line[t] = gop_co[t][pixel_idx];
+            }
+            for (int level = temporal_levels - 1; level >= 0; level--) {
+                const int level_frames = temporal_lengths[level];
+                if (level_frames >= 2) {
+                    dwt_haar_inverse_1d(temporal_line, level_frames);
+                }
+            }
+            for (int t = 0; t < gop_size; t++) {
+                gop_co[t][pixel_idx] = temporal_line[t];
+            }
+
+            // Process Cg channel
+            for (int t = 0; t < gop_size; t++) {
+                temporal_line[t] = gop_cg[t][pixel_idx];
+            }
+            for (int level = temporal_levels - 1; level >= 0; level--) {
+                const int level_frames = temporal_lengths[level];
+                if (level_frames >= 2) {
+                    dwt_haar_inverse_1d(temporal_line, level_frames);
+                }
+            }
+            for (int t = 0; t < gop_size; t++) {
+                gop_cg[t][pixel_idx] = temporal_line[t];
+            }
+        }
+    }
+
+    free(temporal_line);
+    free(temporal_lengths);
+}
+
+// Postprocess GOP unified block to per-frame coefficients (2-bit map format)
+static int16_t ***postprocess_gop_unified(const uint8_t *decompressed_data, size_t data_size,
+                                         int gop_size, int num_pixels, int channel_layout) {
+    // 2 bits per coefficient
+    const int map_bytes_per_frame = (num_pixels * 2 + 7) / 8;
+
+    // Determine which channels are present
+    // Bit 0: has alpha, Bit 1: has chroma (inverted), Bit 2: has luma (inverted)
+    const int has_y = (channel_layout & 0x04) == 0;
+    const int has_co = (channel_layout & 0x02) == 0;  // Inverted: 0 = has chroma
+    const int has_cg = (channel_layout & 0x02) == 0;  // Inverted: 0 = has chroma
+
+    // Calculate buffer positions for maps
+    int read_ptr = 0;
+    const int y_maps_start = has_y ? read_ptr : -1;
+    if (has_y) read_ptr += map_bytes_per_frame * gop_size;
+
+    const int co_maps_start = has_co ? read_ptr : -1;
+    if (has_co) read_ptr += map_bytes_per_frame * gop_size;
+
+    const int cg_maps_start = has_cg ? read_ptr : -1;
+    if (has_cg) read_ptr += map_bytes_per_frame * gop_size;
+
+    // Count "other" values (code 11) across ALL frames
+    int y_other_count = 0;
+    int co_other_count = 0;
+    int cg_other_count = 0;
+
+    for (int frame = 0; frame < gop_size; frame++) {
+        const int frame_map_offset = frame * map_bytes_per_frame;
+        for (int i = 0; i < num_pixels; i++) {
+            const int bit_pos = i * 2;
+            const int byte_idx = bit_pos / 8;
+            const int bit_offset = bit_pos % 8;
+
+            if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) {
+                int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
+                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
+                    const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
+                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
+                }
+                if (code == 3) y_other_count++;
+            }
+            if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) {
+                int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
+                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
+                    const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
+                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
+                }
+                if (code == 3) co_other_count++;
+            }
+            if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) {
+                int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
+                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
+                    const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
+                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
+                }
+                if (code == 3) cg_other_count++;
+            }
+        }
+    }
+
+    // Value arrays start after all maps
+    const int y_values_start = read_ptr;
+    read_ptr += y_other_count * 2;
+
+    const int co_values_start = read_ptr;
+    read_ptr += co_other_count * 2;
+
+    const int cg_values_start = read_ptr;
+
+    // Allocate output arrays: [gop_size][3 channels][num_pixels]
+    int16_t ***output = malloc(gop_size * sizeof(int16_t **));
+    for (int t = 0; t < gop_size; t++) {
+        output[t] = malloc(3 * sizeof(int16_t *));
+        output[t][0] = calloc(num_pixels, sizeof(int16_t));  // Y
+        output[t][1] = calloc(num_pixels, sizeof(int16_t));  // Co
+        output[t][2] = calloc(num_pixels, sizeof(int16_t));  // Cg
+    }
+
+    int y_value_idx = 0;
+    int co_value_idx = 0;
+    int cg_value_idx = 0;
+
+    for (int frame = 0; frame < gop_size; frame++) {
+        const int frame_map_offset = frame * map_bytes_per_frame;
+        for (int i = 0; i < num_pixels; i++) {
+            const int bit_pos = i * 2;
+            const int byte_idx = bit_pos / 8;
+            const int bit_offset = bit_pos % 8;
+
+            // Decode Y
+            if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) {
+                int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
+                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
+                    const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
+                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
+                }
+                if (code == 0) {
+                    output[frame][0][i] = 0;
+                } else if (code == 1) {
+                    output[frame][0][i] = 1;
+                } else if (code == 2) {
+                    output[frame][0][i] = -1;
+                } else {  // code == 3
+                    const int val_offset = y_values_start + y_value_idx * 2;
+                    y_value_idx++;
+                    if (val_offset + 1 < (int)data_size) {
+                        const int lo = decompressed_data[val_offset] & 0xFF;
+                        const int hi = (int8_t)decompressed_data[val_offset + 1];
+                        output[frame][0][i] = (int16_t)((hi << 8) | lo);
+                    } else {
+                        output[frame][0][i] = 0;
+                    }
+                }
+            }
+
+            // Decode Co
+            if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) {
+                int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
+                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
+                    const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
+                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
+                }
+                if (code == 0) {
+                    output[frame][1][i] = 0;
+                } else if (code == 1) {
+                    output[frame][1][i] = 1;
+                } else if (code == 2) {
+                    output[frame][1][i] = -1;
+                } else {  // code == 3
+                    const int val_offset = co_values_start + co_value_idx * 2;
+                    co_value_idx++;
+                    if (val_offset + 1 < (int)data_size) {
+                        const int lo = decompressed_data[val_offset] & 0xFF;
+                        const int hi = (int8_t)decompressed_data[val_offset + 1];
+                        output[frame][1][i] = (int16_t)((hi << 8) | lo);
+                    } else {
+                        output[frame][1][i] = 0;
+                    }
+                }
+            }
+
+            // Decode Cg
+            if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) {
+                int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
+                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
+                    const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
+                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
+                }
+                if (code == 0) {
+                    output[frame][2][i] = 0;
+                } else if (code == 1) {
+                    output[frame][2][i] = 1;
+                } else if (code == 2) {
+                    output[frame][2][i] = -1;
+                } else {  // code == 3
+                    const int val_offset = cg_values_start + cg_value_idx * 2;
+                    cg_value_idx++;
+                    if (val_offset + 1 < (int)data_size) {
+                        const int lo = decompressed_data[val_offset] & 0xFF;
+                        const int hi = (int8_t)decompressed_data[val_offset + 1];
+                        output[frame][2][i] = (int16_t)((hi << 8) | lo);
+                    } else {
+                        output[frame][2][i] = 0;
+                    }
+                }
+            }
+        }
+    }
+
+    return output;
+}
+
+// Postprocess GOP RAW format to per-frame coefficients (entropyCoder=2)
+// Layout: [All_Y_coeffs][All_Co_coeffs][All_Cg_coeffs] (raw int16 arrays)
+static int16_t ***postprocess_gop_raw(const uint8_t *decompressed_data, size_t data_size,
+                                     int gop_size, int num_pixels, int channel_layout) {
+    // Determine which channels are present
+    const int has_y = (channel_layout & 0x04) == 0;
+    const int has_co = (channel_layout & 0x02) == 0;
+    const int has_cg = (channel_layout & 0x02) == 0;
+
+    // Allocate output arrays: [gop_size][3 channels][num_pixels]
+    int16_t ***output = malloc(gop_size * sizeof(int16_t **));
+    for (int t = 0; t < gop_size; t++) {
+        output[t] = malloc(3 * sizeof(int16_t *));
+        output[t][0] = calloc(num_pixels, sizeof(int16_t));  // Y
+        output[t][1] = calloc(num_pixels, sizeof(int16_t));  // Co
+        output[t][2] = calloc(num_pixels, sizeof(int16_t));  // Cg
+    }
+
+    int offset = 0;
+
+    // Read Y channel (all frames concatenated)
+    if (has_y) {
+        const int channel_size = gop_size * num_pixels * sizeof(int16_t);
+        if (offset + channel_size > (int)data_size) {
+            fprintf(stderr, "Error: Not enough data for Y channel in RAW GOP\n");
+            goto error_cleanup;
+        }
+        const int16_t *y_data = (const int16_t *)(decompressed_data + offset);
+        for (int t = 0; t < gop_size; t++) {
+            memcpy(output[t][0], y_data + t * num_pixels, num_pixels * sizeof(int16_t));
+        }
+        offset += channel_size;
+    }
+
+    // Read Co channel (all frames concatenated)
+    if (has_co) {
+        const int channel_size = gop_size * num_pixels * sizeof(int16_t);
+        if (offset + channel_size > (int)data_size) {
+            fprintf(stderr, "Error: Not enough data for Co channel in RAW GOP\n");
+            goto error_cleanup;
+        }
+        const int16_t *co_data = (const int16_t *)(decompressed_data + offset);
+        for (int t = 0; t < gop_size; t++) {
+            memcpy(output[t][1], co_data + t * num_pixels, num_pixels * sizeof(int16_t));
+        }
+        offset += channel_size;
+    }
+
+    // Read Cg channel (all frames concatenated)
+    if (has_cg) {
+        const int channel_size = gop_size * num_pixels * sizeof(int16_t);
+        if (offset + channel_size > (int)data_size) {
+            fprintf(stderr, "Error: Not enough data for Cg channel in RAW GOP\n");
+            goto error_cleanup;
+        }
+        const int16_t *cg_data = (const int16_t *)(decompressed_data + offset);
+        for (int t = 0; t < gop_size; t++) {
+            memcpy(output[t][2], cg_data + t * num_pixels, num_pixels * sizeof(int16_t));
+        }
+        offset += channel_size;
+    }
+
+    return output;
+
+error_cleanup:
+    for (int t = 0; t < gop_size; t++) {
+        free(output[t][0]);
+        free(output[t][1]);
+        free(output[t][2]);
+        free(output[t]);
+    }
+    free(output);
+    return NULL;
+}
+
+// Postprocess GOP EZBC format to per-frame coefficients (entropyCoder=1)
+// Layout: [frame0_size(4)][frame0_ezbc_data][frame1_size(4)][frame1_ezbc_data]...
+// Note: EZBC is a complex embedded bitplane codec - this is a simplified placeholder
+static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t data_size,
+                                      int gop_size, int num_pixels, int channel_layout) {
+    // Allocate output arrays: [gop_size][3 channels][num_pixels]
+    int16_t ***output = malloc(gop_size * sizeof(int16_t **));
+    for (int t = 0; t < gop_size; t++) {
+        output[t] = malloc(3 * sizeof(int16_t *));
+        output[t][0] = calloc(num_pixels, sizeof(int16_t));  // Y
+        output[t][1] = calloc(num_pixels, sizeof(int16_t));  // Co
+        output[t][2] = calloc(num_pixels, sizeof(int16_t));  // Cg
+    }
+
+    int offset = 0;
+
+    // Read each frame
+    for (int t = 0; t < gop_size; t++) {
+        if (offset + 4 > (int)data_size) {
+            fprintf(stderr, "Error: Not enough data for frame %d size in EZBC GOP\n", t);
+            goto error_cleanup;
+        }
+
+        // Read frame size (4 bytes, little-endian)
+        const uint32_t frame_size = ((uint32_t)decompressed_data[offset + 0]) |
+                                   ((uint32_t)decompressed_data[offset + 1] << 8) |
+                                   ((uint32_t)decompressed_data[offset + 2] << 16) |
+                                   ((uint32_t)decompressed_data[offset + 3] << 24);
+        offset += 4;
+
+        if (offset + frame_size > data_size) {
+            fprintf(stderr, "Error: Frame %d EZBC data exceeds buffer (size=%u, available=%zu)\n",
+                   t, frame_size, data_size - offset);
+            goto error_cleanup;
+        }
+
+        // Decode EZBC frame using the single-frame EZBC decoder
+        postprocess_coefficients_ezbc(
+            (uint8_t *)(decompressed_data + offset), num_pixels,
+            output[t][0], output[t][1], output[t][2],
+            channel_layout);
+
+        offset += frame_size;
+    }
+
+    return output;
+
+error_cleanup:
+    for (int t = 0; t < gop_size; t++) {
+        free(output[t][0]);
+        free(output[t][1]);
+        free(output[t][2]);
+        free(output[t]);
+    }
+    free(output);
+    return NULL;
+}
+
 //=============================================================================
 // YCoCg-R / ICtCp to RGB Conversion (matches TSVM)
 //=============================================================================
@@ -1064,8 +1822,15 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
             goto write_frame;
         }
 
-        // Use 2-bit map format (entropyCoder=0 / Twobit-map)
-        postprocess_coefficients_twobit(ptr, coeff_count, quantized_y, quantized_co, quantized_cg);
+        // Postprocess coefficients based on entropy_coder value
+        if (decoder->header.entropy_coder == 1) {
+            // EZBC format (stub implementation)
+            postprocess_coefficients_ezbc(ptr, coeff_count, quantized_y, quantized_co, quantized_cg,
+                                         decoder->header.channel_layout);
+        } else {
+            // Default: Twobitmap format (entropy_coder=0)
+            postprocess_coefficients_twobit(ptr, coeff_count, quantized_y, quantized_co, quantized_cg);
+        }
 
         // Debug: Check first few coefficients
         if (decoder->frame_count == 32) {
@@ -1086,7 +1851,17 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
 
         // Dequantize (perceptual for versions 5-8, uniform for 1-4)
         const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
-        if (is_perceptual) {
+        const int is_ezbc = (decoder->header.entropy_coder == 1);
+
+        if (is_ezbc) {
+            // EZBC mode: coefficients are already denormalized by encoder
+            // Just convert int16 to float without multiplying by quantizer
+            for (int i = 0; i < coeff_count; i++) {
+                decoder->dwt_buffer_y[i] = (float)quantized_y[i];
+                decoder->dwt_buffer_co[i] = (float)quantized_co[i];
+                decoder->dwt_buffer_cg[i] = (float)quantized_cg[i];
+            }
+        } else if (is_perceptual) {
             dequantize_dwt_subbands_perceptual(0, qy, quantized_y, decoder->dwt_buffer_y,
                                               decoder->header.width, decoder->header.height,
                                               decoder->header.decomp_levels, qy, 0, decoder->frame_count);
@@ -1397,14 +2172,24 @@ int main(int argc, char *argv[]) {
     int total_packets = 0;
     int iframe_count = 0;
     while (result > 0) {
+        // Check file position before reading packet
+        long file_pos = ftell(decoder->input_fp);
+
         uint8_t packet_type;
         if (fread(&packet_type, 1, 1, decoder->input_fp) != 1) {
+            if (verbose) {
+                fprintf(stderr, "Reached EOF at file position %ld after %d packets\n", file_pos, total_packets);
+            }
             result = 0; // EOF
             break;
         }
 
         total_packets++;
 
+        if (verbose && total_packets <= 30) {
+            fprintf(stderr, "Packet %d at file pos %ld: Type 0x%02X\n", total_packets, file_pos, packet_type);
+        }
+
         // Handle sync packets (no size field)
         if (packet_type == TAV_PACKET_SYNC || packet_type == TAV_PACKET_SYNC_NTSC) {
             if (verbose && total_packets < 20) {
@@ -1431,17 +2216,18 @@ int main(int argc, char *argv[]) {
 
         // Handle GOP sync packets (no size field, just 1 byte frame count)
         if (packet_type == TAV_PACKET_GOP_SYNC) {
-            uint8_t frame_count;
-            if (fread(&frame_count, 1, 1, decoder->input_fp) != 1) {
+            uint8_t gop_frame_count;
+            if (fread(&gop_frame_count, 1, 1, decoder->input_fp) != 1) {
                 fprintf(stderr, "Error: Failed to read GOP sync frame count\n");
                 result = -1;
                 break;
             }
             if (verbose) {
                 fprintf(stderr, "Packet %d: GOP_SYNC (0x%02X) - %u frames from GOP\n",
-                       total_packets, packet_type, frame_count);
+                       total_packets, packet_type, gop_frame_count);
             }
-            // Frame count is informational only for now
+            // Update decoder frame count (GOP already wrote frames)
+            decoder->frame_count += gop_frame_count;
             continue;
         }
 
@@ -1455,13 +2241,256 @@ int main(int argc, char *argv[]) {
                 result = -1;
                 break;
             }
-            if (verbose && total_packets < 20) {
-                fprintf(stderr, "Packet %d: GOP_UNIFIED (0x%02X), %u frames, %u bytes - skipping\n",
+
+            if (verbose) {
+                fprintf(stderr, "Packet %d: GOP_UNIFIED (0x%02X), %u frames, %u bytes\n",
                        total_packets, packet_type, gop_size, compressed_size);
             }
-            // Skip GOP data for now
-            fseek(decoder->input_fp, compressed_size, SEEK_CUR);
-            fprintf(stderr, "\nWarning: GOP unified packets not yet implemented (skipping %u frames)\n", gop_size);
+
+            // Read compressed GOP data
+            uint8_t *compressed_data = malloc(compressed_size);
+            if (!compressed_data) {
+                fprintf(stderr, "Error: Failed to allocate GOP compressed buffer (%u bytes)\n", compressed_size);
+                result = -1;
+                break;
+            }
+
+            if (fread(compressed_data, 1, compressed_size, decoder->input_fp) != compressed_size) {
+                fprintf(stderr, "Error: Failed to read GOP compressed data\n");
+                free(compressed_data);
+                result = -1;
+                break;
+            }
+
+            // Decompress with Zstd
+            const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, compressed_size);
+            if (decompressed_bound == ZSTD_CONTENTSIZE_ERROR || decompressed_bound == ZSTD_CONTENTSIZE_UNKNOWN) {
+                fprintf(stderr, "Error: Invalid Zstd frame in GOP data\n");
+                free(compressed_data);
+                result = -1;
+                break;
+            }
+
+            uint8_t *decompressed_data = malloc(decompressed_bound);
+            if (!decompressed_data) {
+                fprintf(stderr, "Error: Failed to allocate GOP decompressed buffer (%zu bytes)\n", decompressed_bound);
+                free(compressed_data);
+                result = -1;
+                break;
+            }
+
+            const size_t decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound,
+                                                            compressed_data, compressed_size);
+            free(compressed_data);
+
+            if (ZSTD_isError(decompressed_size)) {
+                fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(decompressed_size));
+                free(decompressed_data);
+                result = -1;
+                break;
+            }
+
+            // Postprocess coefficients based on entropy_coder value
+            const int num_pixels = decoder->header.width * decoder->header.height;
+            int16_t ***quantized_gop;
+
+            if (decoder->header.entropy_coder == 2) {
+                // RAW format: simple concatenated int16 arrays
+                if (verbose) {
+                    fprintf(stderr, "  Using RAW postprocessing (entropy_coder=2)\n");
+                }
+                quantized_gop = postprocess_gop_raw(decompressed_data, decompressed_size,
+                                                   gop_size, num_pixels, decoder->header.channel_layout);
+            } else if (decoder->header.entropy_coder == 1) {
+                // EZBC format: embedded zero-block coding
+                if (verbose) {
+                    fprintf(stderr, "  Using EZBC postprocessing (entropy_coder=1)\n");
+                }
+                quantized_gop = postprocess_gop_ezbc(decompressed_data, decompressed_size,
+                                                    gop_size, num_pixels, decoder->header.channel_layout);
+            } else {
+                // Default: Twobitmap format (entropy_coder=0)
+                if (verbose) {
+                    fprintf(stderr, "  Using Twobitmap postprocessing (entropy_coder=0)\n");
+                }
+                quantized_gop = postprocess_gop_unified(decompressed_data, decompressed_size,
+                                                       gop_size, num_pixels, decoder->header.channel_layout);
+            }
+
+            free(decompressed_data);
+
+            if (!quantized_gop) {
+                fprintf(stderr, "Error: Failed to postprocess GOP data\n");
+                result = -1;
+                break;
+            }
+
+            // Allocate GOP float buffers
+            float **gop_y = malloc(gop_size * sizeof(float *));
+            float **gop_co = malloc(gop_size * sizeof(float *));
+            float **gop_cg = malloc(gop_size * sizeof(float *));
+
+            for (int t = 0; t < gop_size; t++) {
+                gop_y[t] = calloc(num_pixels, sizeof(float));
+                gop_co[t] = calloc(num_pixels, sizeof(float));
+                gop_cg[t] = calloc(num_pixels, sizeof(float));
+            }
+
+            // Dequantize with temporal scaling (perceptual quantization for versions 5-8)
+            const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
+            const int is_ezbc = (decoder->header.entropy_coder == 1);
+            const int temporal_levels = 2;  // Fixed for TAV GOP encoding
+
+            for (int t = 0; t < gop_size; t++) {
+                if (is_ezbc) {
+                    // EZBC mode: coefficients are already denormalized by encoder
+                    // Just convert int16 to float without multiplying by quantizer
+                    for (int i = 0; i < num_pixels; i++) {
+                        gop_y[t][i] = (float)quantized_gop[t][0][i];
+                        gop_co[t][i] = (float)quantized_gop[t][1][i];
+                        gop_cg[t][i] = (float)quantized_gop[t][2][i];
+                    }
+
+                    if (t == 0) {
+                        // Debug first frame
+                        int16_t max_y = 0, min_y = 0;
+                        for (int i = 0; i < num_pixels; i++) {
+                            if (quantized_gop[t][0][i] > max_y) max_y = quantized_gop[t][0][i];
+                            if (quantized_gop[t][0][i] < min_y) min_y = quantized_gop[t][0][i];
+                        }
+                        fprintf(stderr, "[GOP-EZBC] Frame 0 Y coeffs range: [%d, %d], first 5: %d %d %d %d %d\n",
+                               min_y, max_y,
+                               quantized_gop[t][0][0], quantized_gop[t][0][1], quantized_gop[t][0][2],
+                               quantized_gop[t][0][3], quantized_gop[t][0][4]);
+                    }
+                } else {
+                    // Normal mode: multiply by quantizer
+                    const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
+                    const float temporal_scale = get_temporal_quantizer_scale(temporal_level);
+
+                    // CRITICAL: Must ROUND temporal quantizer to match encoder's roundf() behavior
+                    const float base_q_y = roundf(decoder->header.quantiser_y * temporal_scale);
+                    const float base_q_co = roundf(decoder->header.quantiser_co * temporal_scale);
+                    const float base_q_cg = roundf(decoder->header.quantiser_cg * temporal_scale);
+
+                    if (is_perceptual) {
+                        dequantize_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
+                                                          quantized_gop[t][0], gop_y[t],
+                                                          decoder->header.width, decoder->header.height,
+                                                          decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
+                        dequantize_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
+                                                          quantized_gop[t][1], gop_co[t],
+                                                          decoder->header.width, decoder->header.height,
+                                                          decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
+                        dequantize_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
+                                                          quantized_gop[t][2], gop_cg[t],
+                                                          decoder->header.width, decoder->header.height,
+                                                          decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
+                    } else {
+                        // Uniform quantization for older versions
+                        for (int i = 0; i < num_pixels; i++) {
+                            gop_y[t][i] = quantized_gop[t][0][i] * base_q_y;
+                            gop_co[t][i] = quantized_gop[t][1][i] * base_q_co;
+                            gop_cg[t][i] = quantized_gop[t][2][i] * base_q_cg;
+                        }
+                    }
+                }
+            }
+
+            // Free quantized coefficients
+            for (int t = 0; t < gop_size; t++) {
+                free(quantized_gop[t][0]);
+                free(quantized_gop[t][1]);
+                free(quantized_gop[t][2]);
+                free(quantized_gop[t]);
+            }
+            free(quantized_gop);
+
+            // Remove grain synthesis from Y channel for each GOP frame
+            // This must happen after dequantization but before inverse DWT
+            for (int t = 0; t < gop_size; t++) {
+                remove_grain_synthesis_decoder(gop_y[t], decoder->header.width, decoder->header.height,
+                                              decoder->header.decomp_levels, decoder->frame_count + t,
+                                              decoder->header.quantiser_y);
+            }
+
+            // Apply inverse 3D DWT (spatial + temporal)
+            apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, decoder->header.width, decoder->header.height,
+                               gop_size, decoder->header.decomp_levels, temporal_levels,
+                               decoder->header.wavelet_filter);
+
+            // Debug: Check spatial coefficients after inverse temporal DWT (before inverse spatial DWT)
+            if (is_ezbc) {
+                float max_y = 0.0f, min_y = 0.0f;
+                for (int i = 0; i < num_pixels; i++) {
+                    if (gop_y[0][i] > max_y) max_y = gop_y[0][i];
+                    if (gop_y[0][i] < min_y) min_y = gop_y[0][i];
+                }
+                fprintf(stderr, "[GOP-EZBC] After inverse temporal DWT, Frame 0 Y spatial coeffs range: [%.1f, %.1f], first 5: %.1f %.1f %.1f %.1f %.1f\n",
+                       min_y, max_y,
+                       gop_y[0][0], gop_y[0][1], gop_y[0][2], gop_y[0][3], gop_y[0][4]);
+            }
+
+            // Convert YCoCg→RGB and write all GOP frames
+            const int is_ictcp = (decoder->header.version % 2 == 0);
+
+            for (int t = 0; t < gop_size; t++) {
+                // Allocate frame buffer
+                uint8_t *frame_rgb = malloc(decoder->frame_size * 3);
+                if (!frame_rgb) {
+                    fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n");
+                    result = -1;
+                    break;
+                }
+
+                // Convert to RGB
+                for (int i = 0; i < decoder->frame_size; i++) {
+                    uint8_t r, g, b;
+                    if (is_ictcp) {
+                        ictcp_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
+                    } else {
+                        ycocg_r_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
+                    }
+                    frame_rgb[i * 3 + 0] = r;
+                    frame_rgb[i * 3 + 1] = g;
+                    frame_rgb[i * 3 + 2] = b;
+                }
+
+                // Write frame to FFmpeg video pipe
+                const size_t bytes_to_write = decoder->frame_size * 3;
+                const size_t bytes_written = fwrite(frame_rgb, 1, bytes_to_write, decoder->video_pipe);
+                if (bytes_written != bytes_to_write) {
+                    fprintf(stderr, "Error: Failed to write GOP frame %d to FFmpeg (wrote %zu/%zu bytes)\n",
+                           t, bytes_written, bytes_to_write);
+                    free(frame_rgb);
+                    result = -1;
+                    break;
+                }
+                fflush(decoder->video_pipe);
+
+                free(frame_rgb);
+            }
+
+            // Free GOP buffers
+            for (int t = 0; t < gop_size; t++) {
+                free(gop_y[t]);
+                free(gop_co[t]);
+                free(gop_cg[t]);
+            }
+            free(gop_y);
+            free(gop_co);
+            free(gop_cg);
+
+            // BUGFIX: Only break on error (result < 0), not on success (result = 1)
+            if (result < 0) break;
+
+            // GOP decoding doesn't update frame_count here - GOP_SYNC packet will do it
+            if (verbose) {
+                long pos_after_gop = ftell(decoder->input_fp);
+                fprintf(stderr, "[DEBUG] After GOP: file pos = %ld, %d frames written (waiting for GOP_SYNC)\n",
+                       pos_after_gop, gop_size);
+            }
+
             continue;
         }