diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
index e7baceb..d1b945d 100644
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -4230,6 +4230,335 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
     }
 
+    /**
+     * EZBC (Embedded Zero Block Coding) decoder for a single channel
+     * Decodes hierarchical zero-block coded bitstream
+     *
+     * @param ezbc_data EZBC-encoded bitstream
+     * @param offset Starting offset in ezbc_data
+     * @param size Size of EZBC data in bytes
+     * @param outputCoeffs Output array for decoded coefficients
+     */
+    private fun decodeChannelEZBC(ezbcData: ByteArray, offset: Int, size: Int, outputCoeffs: ShortArray) {
+        var bytePos = offset
+        var bitPos = 0
+
+        // Helper: read N bits from bitstream
+        var hitEndOfStream = false
+        fun readBits(numBits: Int): Int {
+            var result = 0
+            for (i in 0 until numBits) {
+                if (bytePos >= offset + size) {
+                    if (!hitEndOfStream) {
+                        println("[EZBC-BITS] HIT END OF STREAM at byte $bytePos (size=$size, requested $numBits bits)")
+                        hitEndOfStream = true
+                    }
+                    return result
+                }
+
+                val bit = (ezbcData[bytePos].toInt() shr bitPos) and 1
+                result = result or (bit shl i)
+
+                bitPos++
+                if (bitPos == 8) {
+                    bitPos = 0
+                    bytePos++
+                }
+            }
+            return result
+        }
+
+        // Debug: print raw bytes before reading header
+        if (ezbcData.size >= offset + 9) {
+            println("[EZBC-DEC] First 9 bytes at offset $offset: ${(0..8).map {
+                String.format("%02X", ezbcData[offset + it].toInt() and 0xFF)
+            }.joinToString(" ")}")
+        }
+
+        // Read header: MSB bitplane, width, height
+        val msbBitplane = readBits(8)
+        val width = readBits(16)
+        val height = readBits(16)
+
+        println("[EZBC-DEC] Decoded header: MSB=$msbBitplane, width=$width, height=$height")
+
+        if (width * height != outputCoeffs.size) {
+            System.err.println("EZBC dimension mismatch: ${width}x${height} != ${outputCoeffs.size}")
+            return
+        }
+
+        // Initialize coefficient state tracking
+        val significant = BooleanArray(outputCoeffs.size)
+        val firstBitplane = IntArray(outputCoeffs.size)
+
+        // Initialize output to zero
+        outputCoeffs.fill(0)
+
+        var totalSignificantCoeffs = 0
+
+        // Queue structures for block processing
+        data class Block(val x: Int, val y: Int, val width: Int, val height: Int)
+
+        var insignificantQueue = ArrayList<Block>()
+        var nextInsignificant = ArrayList<Block>()
+        var significantQueue = ArrayList<Block>()
+        var nextSignificant = ArrayList<Block>()
+
+        // Start with root block
+        insignificantQueue.add(Block(0, 0, width, height))
+
+        // Recursive function to process a significant block and its children
+        fun processSignificantBlockRecursive(block: Block, bitplane: Int, threshold: Int): Int {
+            var signBitsRead = 0
+
+            // If 1x1 block: read sign bit and add to significant queue
+            if (block.width == 1 && block.height == 1) {
+                val idx = block.y * width + block.x
+                val signBit = readBits(1)
+                signBitsRead++
+
+                // Set coefficient to threshold value with sign
+                outputCoeffs[idx] = (if (signBit == 1) -threshold else threshold).toShort()
+                significant[idx] = true
+                firstBitplane[idx] = bitplane
+                nextSignificant.add(block)
+                return signBitsRead
+            }
+
+            // Block is > 1x1: subdivide and recursively process children
+            var midX = block.width / 2
+            var midY = block.height / 2
+            if (midX == 0) midX = 1
+            if (midY == 0) midY = 1
+
+            // Top-left child
+            val tl = Block(block.x, block.y, midX, midY)
+            val tlFlag = readBits(1)
+            if (tlFlag == 1) {
+                signBitsRead += processSignificantBlockRecursive(tl, bitplane, threshold)
+            } else {
+                nextInsignificant.add(tl)
+            }
+
+            // Top-right child (if exists)
+            if (block.width > midX) {
+                val tr = Block(block.x + midX, block.y, block.width - midX, midY)
+                val trFlag = readBits(1)
+                if (trFlag == 1) {
+                    signBitsRead += processSignificantBlockRecursive(tr, bitplane, threshold)
+                } else {
+                    nextInsignificant.add(tr)
+                }
+            }
+
+            // Bottom-left child (if exists)
+            if (block.height > midY) {
+                val bl = Block(block.x, block.y + midY, midX, block.height - midY)
+                val blFlag = readBits(1)
+                if (blFlag == 1) {
+                    signBitsRead += processSignificantBlockRecursive(bl, bitplane, threshold)
+                } else {
+                    nextInsignificant.add(bl)
+                }
+            }
+
+            // Bottom-right child (if exists)
+            if (block.width > midX && block.height > midY) {
+                val br = Block(block.x + midX, block.y + midY, block.width - midX, block.height - midY)
+                val brFlag = readBits(1)
+                if (brFlag == 1) {
+                    signBitsRead += processSignificantBlockRecursive(br, bitplane, threshold)
+                } else {
+                    nextInsignificant.add(br)
+                }
+            }
+
+            return signBitsRead
+        }
+
+        // Process bitplanes from MSB to LSB
+        for (bitplane in msbBitplane downTo 0) {
+            val threshold = 1 shl bitplane
+            val insignifCountBefore = insignificantQueue.size
+            val signifCountBefore = significantQueue.size
+
+            // Process insignificant blocks
+            for (block in insignificantQueue) {
+                val flag = readBits(1)
+
+                if (flag == 0) {
+                    // Still insignificant
+                    nextInsignificant.add(block)
+                } else {
+                    // Became significant - use recursive processing
+                    val signBitsRead = processSignificantBlockRecursive(block, bitplane, threshold)
+                    totalSignificantCoeffs += signBitsRead
+                }
+            }
+
+            // Process significant 1x1 blocks (refinement)
+            for (block in significantQueue) {
+                val idx = block.y * width + block.x
+                val refineBit = readBits(1)
+
+                // Add refinement bit at current bitplane
+                if (refineBit == 1) {
+                    val bitValue = 1 shl bitplane
+                    if (outputCoeffs[idx] < 0) {
+                        outputCoeffs[idx] = (outputCoeffs[idx] - bitValue).toShort()
+                    } else {
+                        outputCoeffs[idx] = (outputCoeffs[idx] + bitValue).toShort()
+                    }
+                }
+
+                // Keep in significant queue
+                nextSignificant.add(block)
+            }
+
+            // Swap queues
+            insignificantQueue = nextInsignificant
+            significantQueue = nextSignificant
+            nextInsignificant = ArrayList()
+            nextSignificant = ArrayList()
+
+            if (bitplane == msbBitplane || bitplane == 0 || (msbBitplane - bitplane) % 3 == 0) {
+                println("[EZBC-BP] Bitplane $bitplane: threshold=$threshold, insignif=${insignifCountBefore}->${insignificantQueue.size}, signif=${signifCountBefore}->${significantQueue.size}, totalSig=$totalSignificantCoeffs")
+            }
+        }
+
+        // Debug summary
+        println("[EZBC-CH] Decoded $totalSignificantCoeffs significant coefficients out of ${outputCoeffs.size}")
+        val nonZeroCount = outputCoeffs.count { it != 0.toShort() }
+        println("[EZBC-CH] Non-zero coefficients: $nonZeroCount")
+        val maxVal = outputCoeffs.maxOrNull() ?: 0
+        val minVal = outputCoeffs.minOrNull() ?: 0
+        println("[EZBC-CH] Value range: [$minVal, $maxVal]")
+    }
+
+    /**
+     * EZBC decoder wrapper for variable channel layout
+     * Detects and decodes EZBC-encoded significance maps
+     *
+     * Format: [size_y(4)][ezbc_y][size_co(4)][ezbc_co][size_cg(4)][ezbc_cg]...
+     */
+    private fun postprocessCoefficientsEZBC(compressedData: ByteArray, compressedOffset: Int, coeffCount: Int,
+                                           channelLayout: Int, outputY: ShortArray?, outputCo: ShortArray?,
+                                           outputCg: ShortArray?, outputAlpha: ShortArray?) {
+        // Determine active channels based on channel_layout bitfield
+        // Bit 2 (value 4): 0=has Y/I, 1=no Y/I
+        // Bit 1 (value 2): 0=has Co/Cg or Ct/Cp, 1=no chroma
+        // Bit 0 (value 1): 1=has alpha, 0=no alpha
+        val hasY = (channelLayout and 4) == 0
+        val hasCo = (channelLayout and 2) == 0
+        val hasCg = (channelLayout and 2) == 0  // Same as Co - both chroma channels present together
+        val hasAlpha = (channelLayout and 1) != 0
+
+        println("[EZBC] Decoding: coeffCount=$coeffCount, channelLayout=$channelLayout, hasY=$hasY, hasCo=$hasCo, hasCg=$hasCg")
+
+        var offset = compressedOffset
+
+        // Decode Y channel
+        if (hasY && outputY != null) {
+            val size = ((compressedData[offset].toInt() and 0xFF) or
+                       ((compressedData[offset + 1].toInt() and 0xFF) shl 8) or
+                       ((compressedData[offset + 2].toInt() and 0xFF) shl 16) or
+                       ((compressedData[offset + 3].toInt() and 0xFF) shl 24))
+            println("[EZBC] Y channel: size=$size, offset=$offset")
+            offset += 4
+            decodeChannelEZBC(compressedData, offset, size, outputY)
+            println("[EZBC] Y channel decoded: first 10 values = ${outputY.take(10)}")
+            offset += size
+        }
+
+        // Decode Co channel
+        if (hasCo && outputCo != null) {
+            val size = ((compressedData[offset].toInt() and 0xFF) or
+                       ((compressedData[offset + 1].toInt() and 0xFF) shl 8) or
+                       ((compressedData[offset + 2].toInt() and 0xFF) shl 16) or
+                       ((compressedData[offset + 3].toInt() and 0xFF) shl 24))
+            println("[EZBC] Co channel: size=$size, offset=$offset")
+            offset += 4
+            decodeChannelEZBC(compressedData, offset, size, outputCo)
+            println("[EZBC] Co channel decoded: first 10 values = ${outputCo.take(10)}")
+            offset += size
+        }
+
+        // Decode Cg channel
+        if (hasCg && outputCg != null) {
+            val size = ((compressedData[offset].toInt() and 0xFF) or
+                       ((compressedData[offset + 1].toInt() and 0xFF) shl 8) or
+                       ((compressedData[offset + 2].toInt() and 0xFF) shl 16) or
+                       ((compressedData[offset + 3].toInt() and 0xFF) shl 24))
+            println("[EZBC] Cg channel: size=$size, offset=$offset")
+            offset += 4
+            decodeChannelEZBC(compressedData, offset, size, outputCg)
+            println("[EZBC] Cg channel decoded: first 10 values = ${outputCg.take(10)}")
+            offset += size
+        }
+
+        // Decode Alpha channel
+        if (hasAlpha && outputAlpha != null) {
+            val size = ((compressedData[offset].toInt() and 0xFF) or
+                       ((compressedData[offset + 1].toInt() and 0xFF) shl 8) or
+                       ((compressedData[offset + 2].toInt() and 0xFF) shl 16) or
+                       ((compressedData[offset + 3].toInt() and 0xFF) shl 24))
+            offset += 4
+            decodeChannelEZBC(compressedData, offset, size, outputAlpha)
+        }
+    }
+
+    /**
+     * Auto-detecting coefficient decoder wrapper
+     * Detects EZBC vs twobit-map format and calls appropriate decoder
+     */
+    private fun postprocessCoefficientsAuto(compressedData: ByteArray, compressedOffset: Int, coeffCount: Int,
+                                            channelLayout: Int, outputY: ShortArray?, outputCo: ShortArray?,
+                                            outputCg: ShortArray?, outputAlpha: ShortArray?): Boolean {
+        // TEMPORARY: Force EZBC mode until entropy coding method flag is added
+        val isEZBC = true
+
+        /* Auto-detection disabled for now - will use entropy coding method flag later
+        // Better auto-detection: Check EZBC header structure
+        // EZBC format: [size(4)][msb_bitplane(1)][width(2)][height(2)][bits...]
+        // Twobit-map format: [2-bit map + values...]
+
+        val isEZBC = if (compressedData.size >= compressedOffset + 9) {
+            // Read first uint32 (should be EZBC channel size)
+            val possibleSize = ((compressedData[compressedOffset].toInt() and 0xFF) or
+                              ((compressedData[compressedOffset + 1].toInt() and 0xFF) shl 8) or
+                              ((compressedData[compressedOffset + 2].toInt() and 0xFF) shl 16) or
+                              ((compressedData[compressedOffset + 3].toInt() and 0xFF) shl 24))
+            val msbBitplane = compressedData[compressedOffset + 4].toInt() and 0xFF
+            val width = ((compressedData[compressedOffset + 5].toInt() and 0xFF) or
+                         ((compressedData[compressedOffset + 6].toInt() and 0xFF) shl 8))
+            val height = ((compressedData[compressedOffset + 7].toInt() and 0xFF) or
+                          ((compressedData[compressedOffset + 8].toInt() and 0xFF) shl 8))
+
+            println("[AUTO] Checking EZBC: possibleSize=$possibleSize, msb=$msbBitplane, w=$width, h=$height, coeffCount=$coeffCount")
+
+            // Valid EZBC header should have reasonable size, MSB bitplane, and dimensions matching coeffCount
+            val detected = possibleSize in 10..(coeffCount * 4) && msbBitplane < 20 && width > 0 && height > 0 && width * height == coeffCount
+            println("[AUTO] Detection result: isEZBC=$detected")
+            detected
+        } else {
+            println("[AUTO] Not enough data for EZBC detection, using twobit-map")
+            false
+        }
+        */
+
+        if (isEZBC) {
+            println("[AUTO] Using EZBC decoder (FORCED)")
+            postprocessCoefficientsEZBC(compressedData, compressedOffset, coeffCount,
+                                       channelLayout, outputY, outputCo, outputCg, outputAlpha)
+        } else {
+            println("[AUTO] Using twobit-map decoder")
+            postprocessCoefficientsVariableLayout(compressedData, compressedOffset, coeffCount,
+                                                 channelLayout, outputY, outputCo, outputCg, outputAlpha)
+        }
+
+        return isEZBC
+    }
+
     /**
      * Reconstruct per-frame coefficients from unified GOP block (2-bit format)
      * Reverse of encoder's preprocess_gop_unified()
@@ -4408,6 +4737,95 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         return output
     }
 
+    /**
+     * Reconstruct per-frame coefficients from unified GOP block (EZBC format)
+     * Format: [frame0_size(4)][frame0_ezbc][frame1_size(4)][frame1_ezbc]...
+     *
+     * @param decompressedData Unified EZBC block data (after Zstd decompression)
+     * @param numFrames Number of frames in GOP
+     * @param numPixels Pixels per frame (width × height)
+     * @param channelLayout Channel layout (0=YCoCg, 2=Y-only, etc)
+     * @return Array of [frame][channel] where channel: 0=Y, 1=Co, 2=Cg
+     */
+    private fun tavPostprocessGopEZBC(
+        decompressedData: ByteArray,
+        numFrames: Int,
+        numPixels: Int,
+        channelLayout: Int
+    ): Array<Array<ShortArray>> {
+        // Allocate output arrays
+        val output = Array(numFrames) { Array(3) { ShortArray(numPixels) } }
+
+        var offset = 0
+        for (frame in 0 until numFrames) {
+            if (offset + 4 > decompressedData.size) break
+
+            // Read frame size
+            val frameSize = ((decompressedData[offset].toInt() and 0xFF) or
+                           ((decompressedData[offset + 1].toInt() and 0xFF) shl 8) or
+                           ((decompressedData[offset + 2].toInt() and 0xFF) shl 16) or
+                           ((decompressedData[offset + 3].toInt() and 0xFF) shl 24))
+            offset += 4
+
+            if (offset + frameSize > decompressedData.size) break
+
+            // Decode this frame with EZBC
+            postprocessCoefficientsEZBC(
+                decompressedData, offset, numPixels, channelLayout,
+                output[frame][0], output[frame][1], output[frame][2], null
+            )
+
+            offset += frameSize
+        }
+
+        return output
+    }
+
+    /**
+     * Auto-detecting GOP postprocessor
+     * Detects EZBC vs twobit-map format and calls appropriate decoder
+     */
+    private fun tavPostprocessGopAuto(
+        decompressedData: ByteArray,
+        numFrames: Int,
+        numPixels: Int,
+        channelLayout: Int
+    ): Pair<Boolean, Array<Array<ShortArray>>> {
+        // TEMPORARY: Force EZBC mode until entropy coding method flag is added
+        val isEZBC = true
+
+        /* Auto-detection disabled for now - will use entropy coding method flag later
+        // Auto-detect: EZBC format has frame size headers
+        // Check if first 4 bytes look like a reasonable frame size
+        val isEZBC = if (decompressedData.size >= 8) {
+            val possibleSize = ((decompressedData[0].toInt() and 0xFF) or
+                              ((decompressedData[1].toInt() and 0xFF) shl 8) or
+                              ((decompressedData[2].toInt() and 0xFF) shl 16) or
+                              ((decompressedData[3].toInt() and 0xFF) shl 24))
+
+            // Check if this looks like an EZBC header (size followed by MSB bitplane)
+            if (possibleSize in 10..(numPixels * 16)) {
+                val msbBitplane = decompressedData[4].toInt() and 0xFF
+                msbBitplane < 20  // Valid MSB bitplane
+            } else {
+                false
+            }
+        } else {
+            false
+        }
+        */
+
+        println("[GOP AUTO] Using ${if (isEZBC) "EZBC (FORCED)" else "twobit-map"} decoder")
+
+        val data = if (isEZBC) {
+            tavPostprocessGopEZBC(decompressedData, numFrames, numPixels, channelLayout)
+        } else {
+            tavPostprocessGopUnified(decompressedData, numFrames, numPixels, channelLayout)
+        }
+
+        return Pair(isEZBC, data)
+    }
+
     // TAV Simulated overlapping tiles constants (must match encoder)
     private val TAV_TILE_SIZE_X = 640
     private val TAV_TILE_SIZE_Y = 540
@@ -4658,7 +5076,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
     }
 
     private fun dequantiseDWTSubbandsPerceptual(qIndex: Int, qYGlobal: Int, quantised: ShortArray, dequantised: FloatArray,
-                                               subbands: List<DWTSubbandInfo>, baseQuantiser: Float, isChroma: Boolean, decompLevels: Int) {
+                                               subbands: List<DWTSubbandInfo>, baseQuantiser: Float, isChroma: Boolean, decompLevels: Int,
+                                               isEZBC: Boolean) {
 
         // CRITICAL FIX: Encoder stores coefficients in LINEAR order, not subband-mapped order!
         // The subband layout calculation is only used for determining perceptual weights,
@@ -4681,10 +5100,23 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
 
         // Apply linear dequantisation with perceptual weights (matching encoder's linear storage)
+        // EZBC mode: coefficients are ALREADY DENORMALIZED by encoder
+        //            e.g., encoder: coeff=377 → quantize: 377/48=7.85→8 → denormalize: 8*48=384 → store 384
+        //            decoder: read 384 → pass through as-is (already in correct range for IDWT)
+        // Significance-map mode: coefficients are normalized (quantized only)
+        //                       e.g., encoder stores 8 = round(377/48)
+        //                       decoder must multiply: 8 * 48 = 384 (denormalize for IDWT)
         for (i in quantised.indices) {
             if (i < dequantised.size) {
                 val effectiveQuantiser = baseQuantiser * weights[i]
-                dequantised[i] = quantised[i] * effectiveQuantiser
+
+                dequantised[i] = if (isEZBC) {
+                    // EZBC mode: pass through as-is (coefficients already denormalized)
+                    quantised[i].toFloat()
+                } else {
+                    // Significance-map mode: multiply to denormalize (coefficients are normalized)
+                    quantised[i] * effectiveQuantiser
+                }
             }
         }
 
@@ -4696,11 +5128,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             val weightRange = if (weightStats.isNotEmpty())
                 "weights: ${weightStats.first()}-${weightStats.last()}" else "no weights"
 
-            for (coeff in quantised) {
-                if (coeff != 0.toShort()) nonZeroCoeffs++
+            for (i in quantised.indices) {
+                if (quantised[i] != 0.toShort()) {
+                    nonZeroCoeffs++
+                }
             }
 
-            println("LINEAR PERCEPTUAL DEQUANT: $channelType - coeffs=${quantised.size}, nonzero=$nonZeroCoeffs, $weightRange")
+            val mode = if (isEZBC) "EZBC (pass-through)" else "Sigmap (multiply)"
+            println("LINEAR PERCEPTUAL DEQUANT: $channelType - mode=$mode, coeffs=${quantised.size}, nonzero=$nonZeroCoeffs, $weightRange")
         }
     }
 
@@ -4971,8 +5406,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             return count
         }
 
-        // Use variable channel layout concatenated maps format
-        postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, quantisedAlpha)
+        // Use auto-detecting decoder (EZBC or variable channel layout concatenated maps)
+        val isEZBCMode = postprocessCoefficientsAuto(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, quantisedAlpha)
 
         // Calculate total size for variable channel layout format
         val numChannels = when (channelLayout) {
@@ -5013,9 +5448,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
             val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
 
-            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedY, yTile, subbands, qY.toFloat(), false, decompLevels)
-            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels)
-            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels)
+            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedY, yTile, subbands, qY.toFloat(), false, decompLevels, isEZBCMode)
+            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels, isEZBCMode)
+            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels, isEZBCMode)
 
             // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
             // Use perceptual weights since this is the perceptual quantization path
@@ -5584,8 +6019,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             return count
         }
 
-        // Use variable channel layout concatenated maps format for deltas
-        postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, deltaAlpha)
+        // Use auto-detecting decoder for deltas (EZBC or variable channel layout concatenated maps)
+        postprocessCoefficientsAuto(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, deltaAlpha)
 
         // Calculate total size for variable channel layout format (deltas)
         val numChannels = when (channelLayout) {
@@ -6352,8 +6787,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             return arrayOf(0, dbgOut)
         }
 
-        // Step 2: Postprocess unified block to per-frame coefficients
-        val quantizedCoeffs = tavPostprocessGopUnified(
+        // Step 2: Postprocess unified block to per-frame coefficients (auto-detect EZBC vs twobit-map)
+        val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
             decompressedData,
             gopSize,
             canvasPixels,  // Use expanded canvas size
@@ -6382,19 +6817,22 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             dequantiseDWTSubbandsPerceptual(
                 qIndex, qYGlobal,
                 quantizedCoeffs[t][0], gopY[t],
-                subbands, baseQY, false, spatialLevels  // isChroma=false
+                subbands, baseQY, false, spatialLevels,  // isChroma=false
+                isEZBCMode
             )
 
             dequantiseDWTSubbandsPerceptual(
                 qIndex, qYGlobal,
                 quantizedCoeffs[t][1], gopCo[t],
-                subbands, baseQCo, true, spatialLevels  // isChroma=true
+                subbands, baseQCo, true, spatialLevels,  // isChroma=true
+                isEZBCMode
             )
 
             dequantiseDWTSubbandsPerceptual(
                 qIndex, qYGlobal,
                 quantizedCoeffs[t][2], gopCg[t],
-                subbands, baseQCg, true, spatialLevels  // isChroma=true
+                subbands, baseQCg, true, spatialLevels,  // isChroma=true
+                isEZBCMode
             )
         }
 
diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c
index c283a52..a4a1917 100644
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -4,6 +4,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <stddef.h>
+#include <stdbool.h>
 #include <string.h>
 #include <math.h>
 #include <zstd.h>
@@ -116,8 +117,8 @@ static int needs_alpha_channel(int channel_layout) {
 #define DEFAULT_FPS 30
 #define DEFAULT_QUALITY 3
 #define DEFAULT_ZSTD_LEVEL 9
-#define TEMPORAL_GOP_SIZE 8
-#define TEMPORAL_DECOMP_LEVEL 2
+#define TEMPORAL_GOP_SIZE 24//8
+#define TEMPORAL_DECOMP_LEVEL 3
 #define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
 
 // Audio/subtitle constants (reused from TEV)
@@ -199,6 +200,374 @@ typedef struct quad_tree_node {
     struct quad_tree_node *children[4];  // NW, NE, SW, SE children (NULL if leaf)
 } quad_tree_node_t;
 
+// ====================================================================================
+// EZBC (Embedded Zero Block Coding) Structures and Functions
+// ====================================================================================
+
+// Bitstream writer for EZBC
+typedef struct {
+    uint8_t *data;
+    size_t capacity;
+    size_t byte_pos;
+    uint8_t bit_pos;  // 0-7, current bit position in current byte
+} bitstream_t;
+
+// Block structure for EZBC quadtree
+typedef struct {
+    int x, y;           // Top-left position in 2D coefficient array
+    int width, height;  // Block dimensions
+} ezbc_block_t;
+
+// Queue for EZBC block processing
+typedef struct {
+    ezbc_block_t *blocks;
+    size_t count;
+    size_t capacity;
+} block_queue_t;
+
+// Track coefficient state for refinement
+typedef struct {
+    bool significant;     // Has been marked significant
+    int first_bitplane;   // Bitplane where it became significant
+} coeff_state_t;
+
+// Bitstream operations
+static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
+    bs->capacity = initial_capacity;
+    bs->data = calloc(1, initial_capacity);
+    bs->byte_pos = 0;
+    bs->bit_pos = 0;
+}
+
+static void bitstream_write_bit(bitstream_t *bs, int bit) {
+    // Grow if needed
+    if (bs->byte_pos >= bs->capacity) {
+        bs->capacity *= 2;
+        bs->data = realloc(bs->data, bs->capacity);
+        // Clear new memory
+        memset(bs->data + bs->byte_pos, 0, bs->capacity - bs->byte_pos);
+    }
+
+    if (bit) {
+        bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
+    }
+
+    bs->bit_pos++;
+    if (bs->bit_pos == 8) {
+        bs->bit_pos = 0;
+        bs->byte_pos++;
+    }
+}
+
+static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
+    for (int i = 0; i < num_bits; i++) {
+        bitstream_write_bit(bs, (value >> i) & 1);
+    }
+}
+
+static size_t bitstream_size(bitstream_t *bs) {
+    return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
+}
+
+static void bitstream_free(bitstream_t *bs) {
+    free(bs->data);
+}
+
+// Block queue operations
+static void queue_init(block_queue_t *q) {
+    q->capacity = 1024;
+    q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
+    q->count = 0;
+}
+
+static void queue_push(block_queue_t *q, ezbc_block_t block) {
+    if (q->count >= q->capacity) {
+        q->capacity *= 2;
+        q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
+    }
+    q->blocks[q->count++] = block;
+}
+
+static void queue_free(block_queue_t *q) {
+    free(q->blocks);
+}
+
+// Check if all coefficients in block have |coeff| < threshold
+static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
+                                const ezbc_block_t *block, int threshold) {
+    for (int y = block->y; y < block->y + block->height && y < height; y++) {
+        for (int x = block->x; x < block->x + block->width && x < width; x++) {
+            int idx = y * width + x;
+            if (abs(coeffs[idx]) >= threshold) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+// Find maximum absolute coefficient value for determining MSB
+static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
+    int max_abs = 0;
+    for (size_t i = 0; i < count; i++) {
+        int abs_val = abs(coeffs[i]);
+        if (abs_val > max_abs) {
+            max_abs = abs_val;
+        }
+    }
+    return max_abs;
+}
+
+// Get MSB position (bitplane number)
+// Returns floor(log2(value)), i.e., the position of the highest set bit
+static int get_msb_bitplane(int value) {
+    if (value == 0) return 0;
+    int bitplane = 0;
+    while (value > 1) {
+        value >>= 1;
+        bitplane++;
+    }
+    return bitplane;
+}
+
+// Forward declarations for recursive EZBC
+typedef struct {
+    bitstream_t *bs;
+    int16_t *coeffs;
+    coeff_state_t *states;
+    int width;
+    int height;
+    int bitplane;
+    int threshold;
+    block_queue_t *next_insignificant;
+    block_queue_t *next_significant;
+    int *sign_count;
+} ezbc_context_t;
+
+// Recursively process a significant block - subdivide until 1x1
+static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
+    // If 1x1 block: emit sign bit and add to significant queue
+    if (block.width == 1 && block.height == 1) {
+        int idx = block.y * ctx->width + block.x;
+        bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
+        (*ctx->sign_count)++;
+        ctx->states[idx].significant = true;
+        ctx->states[idx].first_bitplane = ctx->bitplane;
+        queue_push(ctx->next_significant, block);
+        return;
+    }
+
+    // Block is > 1x1: subdivide into children and recursively process each
+    int mid_x = block.width / 2;
+    int mid_y = block.height / 2;
+    if (mid_x == 0) mid_x = 1;
+    if (mid_y == 0) mid_y = 1;
+
+    // Process top-left child
+    ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
+    if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
+        bitstream_write_bit(ctx->bs, 1);  // Significant
+        process_significant_block_recursive(ctx, tl);
+    } else {
+        bitstream_write_bit(ctx->bs, 0);  // Insignificant
+        queue_push(ctx->next_insignificant, tl);
+    }
+
+    // Process top-right child (if exists)
+    if (block.width > mid_x) {
+        ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
+        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
+            bitstream_write_bit(ctx->bs, 1);
+            process_significant_block_recursive(ctx, tr);
+        } else {
+            bitstream_write_bit(ctx->bs, 0);
+            queue_push(ctx->next_insignificant, tr);
+        }
+    }
+
+    // Process bottom-left child (if exists)
+    if (block.height > mid_y) {
+        ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
+        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
+            bitstream_write_bit(ctx->bs, 1);
+            process_significant_block_recursive(ctx, bl);
+        } else {
+            bitstream_write_bit(ctx->bs, 0);
+            queue_push(ctx->next_insignificant, bl);
+        }
+    }
+
+    // Process bottom-right child (if exists)
+    if (block.width > mid_x && block.height > mid_y) {
+        ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
+        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
+            bitstream_write_bit(ctx->bs, 1);
+            process_significant_block_recursive(ctx, br);
+        } else {
+            bitstream_write_bit(ctx->bs, 0);
+            queue_push(ctx->next_insignificant, br);
+        }
+    }
+}
+
+// EZBC encoding for a single channel (Fixed version from significance_map_granularity_test.c)
+// Uses two separate queues for insignificant blocks and significant 1x1 blocks
+// Returns encoded size and allocates output buffer
+static size_t encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
+                                   uint8_t **output) {
+    bitstream_t bs;
+    bitstream_init(&bs, count / 4);  // Initial guess
+
+    // Track coefficient significance
+    coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
+
+    // Find maximum value to determine MSB bitplane
+    int max_abs = find_max_abs_ezbc(coeffs, count);
+    int msb_bitplane = get_msb_bitplane(max_abs);
+
+    // Write header: MSB bitplane and dimensions
+    bitstream_write_bits(&bs, msb_bitplane, 8);
+    bitstream_write_bits(&bs, width, 16);
+    bitstream_write_bits(&bs, height, 16);
+
+    if (0) {
+        fprintf(stderr, "[EZBC-ENC] Encoding: max_abs=%d, msb_bitplane=%d, dims=%dx%d, count=%zu\n",
+                max_abs, msb_bitplane, width, height, count);
+        fprintf(stderr, "[EZBC-ENC] Header bytes: MSB=0x%02X, W=%d (0x%04X), H=%d (0x%04X)\n",
+                msb_bitplane, width, width, height, height);
+        fprintf(stderr, "[EZBC-ENC] First 9 bytes of bitstream: %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
+                bs.data[0], bs.data[1], bs.data[2], bs.data[3], bs.data[4],
+                bs.data[5], bs.data[6], bs.data[7], bs.data[8]);
+    }
+
+    // Initialize two queues: insignificant blocks and significant 1x1 blocks
+    block_queue_t insignificant_queue, next_insignificant;
+    block_queue_t significant_queue, next_significant;
+
+    queue_init(&insignificant_queue);
+    queue_init(&next_insignificant);
+    queue_init(&significant_queue);
+    queue_init(&next_significant);
+
+    // Start with root block as insignificant
+    ezbc_block_t root = {0, 0, width, height};
+    queue_push(&insignificant_queue, root);
+
+    // Process bitplanes from MSB to LSB
+    int bitplanes_processed = 0;
+    int total_flags_written = 0;
+    int total_ones_written = 0;
+    int total_sign_bits_written = 0;
+    int total_refinement_bits_written = 0;
+    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
+        int threshold = 1 << bitplane;
+        bitplanes_processed++;
+
+        size_t insignif_before = insignificant_queue.count;
+        size_t signif_before = significant_queue.count;
+
+        int flags_this_bitplane = 0;
+        int ones_this_bitplane = 0;
+        int sign_bits_this_bitplane = 0;
+        int refinement_bits_this_bitplane = 0;
+
+        // Process insignificant blocks - check if they become significant
+        for (size_t i = 0; i < insignificant_queue.count; i++) {
+            ezbc_block_t block = insignificant_queue.blocks[i];
+
+            // Check if this block has any coefficient >= threshold
+            if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
+                // Still insignificant: emit 0
+                bitstream_write_bit(&bs, 0);
+                flags_this_bitplane++;
+                // Keep in insignificant queue for next bitplane
+                queue_push(&next_insignificant, block);
+            } else {
+                // Became significant: emit 1
+                bitstream_write_bit(&bs, 1);
+                flags_this_bitplane++;
+                ones_this_bitplane++;
+                total_ones_written++;
+
+                // Use recursive subdivision to process this block and all children
+                ezbc_context_t ctx = {
+                    .bs = &bs,
+                    .coeffs = coeffs,
+                    .states = states,
+                    .width = width,
+                    .height = height,
+                    .bitplane = bitplane,
+                    .threshold = threshold,
+                    .next_insignificant = &next_insignificant,
+                    .next_significant = &next_significant,
+                    .sign_count = &sign_bits_this_bitplane
+                };
+                process_significant_block_recursive(&ctx, block);
+            }
+        }
+
+        // Process significant 1x1 blocks - emit refinement bits
+        for (size_t i = 0; i < significant_queue.count; i++) {
+            ezbc_block_t block = significant_queue.blocks[i];
+            int idx = block.y * width + block.x;
+            int abs_val = abs(coeffs[idx]);
+
+            // Emit refinement bit at current bitplane
+            int bit = (abs_val >> bitplane) & 1;
+            bitstream_write_bit(&bs, bit);
+            refinement_bits_this_bitplane++;
+            total_refinement_bits_written++;
+
+            // Keep in significant queue for next bitplane
+            queue_push(&next_significant, block);
+        }
+
+        // Swap queues for next bitplane
+        queue_free(&insignificant_queue);
+        queue_free(&significant_queue);
+        insignificant_queue = next_insignificant;
+        significant_queue = next_significant;
+        queue_init(&next_insignificant);
+        queue_init(&next_significant);
+
+        total_flags_written += flags_this_bitplane;
+        total_sign_bits_written += sign_bits_this_bitplane;
+        total_refinement_bits_written += refinement_bits_this_bitplane;
+
+        if (0 && (bitplane == msb_bitplane || bitplane == 0 || bitplane % 3 == 0)) {
+            fprintf(stderr, "[EZBC-BP] Bitplane %2d: threshold=%4d, flags=%d (ones=%d), sign=%d, refine=%d, insignif=%zu->%zu, signif=%zu->%zu\n",
+                    bitplane, threshold, flags_this_bitplane, ones_this_bitplane,
+                    sign_bits_this_bitplane, refinement_bits_this_bitplane,
+                    insignif_before, insignificant_queue.count,
+                    signif_before, significant_queue.count);
+        }
+    }
+
+    if (0) {
+        fprintf(stderr, "[EZBC-ENC] Processed %d bitplanes, wrote %d flags (%d ones), %d sign bits, %d refinement bits\n",
+                bitplanes_processed, total_flags_written, total_ones_written,
+                total_sign_bits_written, total_refinement_bits_written);
+    }
+
+    queue_free(&insignificant_queue);
+    queue_free(&significant_queue);
+    free(states);
+
+    size_t final_size = bitstream_size(&bs);
+    *output = bs.data;
+
+    if (0) {
+        fprintf(stderr, "[EZBC-ENC] Completed: final_size=%zu bytes (%.1f bits/coeff)\n",
+                final_size, (final_size * 8.0) / count);
+    }
+
+    return final_size;
+}
+
+// ====================================================================================
+// End of EZBC Implementation
+// ====================================================================================
+
 // Partitioning decision based on residual variance
 static float compute_block_variance(const float *residual, int width, int x, int y, int block_size) {
     double sum = 0.0;
@@ -1365,6 +1734,7 @@ typedef struct tav_encoder_s {
     int intra_only;       // Force all tiles to use INTRA mode (disable delta encoding)
     int monoblock;        // Single DWT tile mode (encode entire frame as one tile)
     int perceptual_tuning; // 1 = perceptual quantisation (default), 0 = uniform quantisation
+    int enable_ezbc;       // 1 = use EZBC (Embedded Zero Block Coding) for significance maps, 0 = use twobit-map (default)
     int channel_layout;   // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg
     int progressive_mode;  // 0 = interlaced (default), 1 = progressive
     int grain_synthesis;   // 1 = enable grain synthesis (default), 0 = disable
@@ -1395,22 +1765,17 @@ typedef struct tav_encoder_s {
     int16_t *temporal_gop_translation_y;  // [frame] - Translation Y in 1/16-pixel units (for 0x12 packets)
     int temporal_decomp_levels;  // Number of temporal DWT levels (default: 2)
 
-    // Mesh warping for temporal 3D DWT (0x13 packets)
-    int temporal_enable_mesh_warp;        // Flag to enable mesh warping (default: 0, uses translation if temporal_dwt enabled)
-    int temporal_mesh_width;              // Number of control points horizontally (default: 32 for 1920×1080)
-    int temporal_mesh_height;             // Number of control points vertically (default: 18 for 1920×1080)
+    // MC-EZBC block-based motion compensation for temporal 3D DWT (0x13 packets)
+    int temporal_enable_mcezbc;           // Flag to enable MC-EZBC block compensation (default: 0, uses translation if temporal_dwt enabled)
+    int temporal_block_size;              // Block size for motion compensation (default: 16)
+    int temporal_num_blocks_x;            // Number of blocks horizontally
+    int temporal_num_blocks_y;            // Number of blocks vertically
 
-    int16_t **temporal_gop_mesh_dx;       // [frame][mesh_w * mesh_h] - Mesh displacement X in 1/8-pixel units
-    int16_t **temporal_gop_mesh_dy;       // [frame][mesh_w * mesh_h] - Mesh displacement Y in 1/8-pixel units
-
-    // Dense optical flow for reliability masking (used with mesh warping)
-    float **temporal_gop_flow_fwd_x;      // [frame][width * height] - Forward flow X (F[i-1] → F[i])
-    float **temporal_gop_flow_fwd_y;      // [frame][width * height] - Forward flow Y
-    float **temporal_gop_flow_bwd_x;      // [frame][width * height] - Backward flow X (F[i] → F[i-1])
-    float **temporal_gop_flow_bwd_y;      // [frame][width * height] - Backward flow Y
-
-    float temporal_mesh_smoothness;       // Laplacian smoothing weight (0.0-1.0, default: 0.5)
-    int temporal_mesh_smooth_iterations;  // Number of smoothing iterations (default: 8)
+    // Motion vectors for MC-EZBC lifting (forward and backward for bidirectional prediction)
+    int16_t **temporal_gop_mvs_fwd_x;     // [frame][num_blocks] - Forward MVs X in 1/4-pixel units (F[t-1] → F[t])
+    int16_t **temporal_gop_mvs_fwd_y;     // [frame][num_blocks] - Forward MVs Y in 1/4-pixel units
+    int16_t **temporal_gop_mvs_bwd_x;     // [frame][num_blocks] - Backward MVs X in 1/4-pixel units (F[t+1] → F[t])
+    int16_t **temporal_gop_mvs_bwd_y;     // [frame][num_blocks] - Backward MVs Y in 1/4-pixel units
 
     // MPEG-style residual coding (0x14/0x15 packets) - replaces temporal DWT
     int enable_residual_coding;  // Flag to enable MPEG-style residual coding (I/P/B frames)
@@ -1777,6 +2142,25 @@ extern void estimate_optical_flow_motion(
     int width, int height, int block_size,
     int16_t *mvs_x, int16_t *mvs_y
 );
+
+// MC-EZBC block-based motion compensation (external C++ functions)
+extern void warp_block_motion(
+    const float *src, int width, int height,
+    const int16_t *mvs_x, const int16_t *mvs_y,
+    int block_size, float *dst
+);
+
+extern void warp_bidirectional(
+    const float *f0, const float *f1,
+    int width, int height,
+    const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y,
+    const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y,
+    int block_size, float *prediction
+);
+
+// Helper functions for motion compensation
+static void apply_translation(const float *src, int width, int height, float dx, float dy, float *dst);
+
 static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels);
 static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels);
 static int get_subband_level(int linear_idx, int width, int height, int decomp_levels);
@@ -1811,6 +2195,9 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                        int *frame_numbers, int actual_gop_size);
 static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                                    int *frame_numbers, int force_flush);
+static int detect_scene_change_between_frames(const uint8_t *frame1_rgb, const uint8_t *frame2_rgb,
+                                               int width, int height,
+                                               double *out_avg_diff, double *out_changed_ratio);
 static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
                                   const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data,
                                   uint8_t mode, uint8_t *buffer);
@@ -1820,10 +2207,15 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
                                                            float *coeffs, int16_t *quantised, int size,
                                                            int base_quantiser, int width, int height,
                                                            int decomp_levels, int is_chroma, int frame_count);
-static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
-                                                     int coeff_count, int channel_layout, uint8_t *output_buffer);
-static size_t preprocess_gop_unified(int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
-                                     int num_frames, int num_pixels, int channel_layout,
+static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_encoder_t *enc,
+                                                           float *coeffs, int16_t *quantised, int size,
+                                                           int base_quantiser, int width, int height,
+                                                           int decomp_levels, int is_chroma, int frame_count);
+static size_t preprocess_coefficients_variable_layout(int enable_ezbc, int width, int height,
+                                                       int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
+                                                       int coeff_count, int channel_layout, uint8_t *output_buffer);
+static size_t preprocess_gop_unified(int enable_ezbc, int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
+                                     int num_frames, int num_pixels, int width, int height, int channel_layout,
                                      uint8_t *output_buffer);
 
 // Film grain synthesis
@@ -1868,7 +2260,8 @@ static void show_usage(const char *program_name) {
     printf("  --enable-delta          Enable delta encoding\n");
     printf("  --delta-haar N          Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
     printf("  --temporal-dwt          Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
-    printf("  --mesh-warp             Enable mesh-based motion compensation (requires --temporal-dwt, uses optical flow)\n");
+    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt)\n");
+    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) for significance maps\n");
     printf("  --ictcp                 Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
     printf("  --no-perceptual-tuning  Disable perceptual quantisation\n");
     printf("  --no-dead-zone          Disable dead-zone quantisation (for comparison/testing)\n");
@@ -1935,6 +2328,7 @@ static tav_encoder_t* create_encoder(void) {
     enc->intra_only = 0;
     enc->monoblock = 1;  // Default to monoblock mode
     enc->perceptual_tuning = 1;  // Default to perceptual quantisation (versions 5/6)
+    enc->enable_ezbc = 0;  // Default to twobit-map (EZBC adds overhead for small files)
     enc->channel_layout = CHANNEL_LAYOUT_YCOCG;  // Default to Y-Co-Cg
     enc->audio_bitrate = 0;  // 0 = use quality table
     enc->encode_limit = 0;  // Default: no frame limit
@@ -1956,18 +2350,15 @@ static tav_encoder_t* create_encoder(void) {
     enc->temporal_gop_translation_x = NULL;
     enc->temporal_gop_translation_y = NULL;
 
-    // Mesh warping settings (for 0x13 packets)
-    enc->temporal_enable_mesh_warp = 0;  // Default: disabled (use translation-based 0x12)
-    enc->temporal_mesh_width = 0;  // Will be calculated based on frame dimensions
-    enc->temporal_mesh_height = 0;
-    enc->temporal_gop_mesh_dx = NULL;
-    enc->temporal_gop_mesh_dy = NULL;
-    enc->temporal_gop_flow_fwd_x = NULL;  // Dense flow for reliability masking
-    enc->temporal_gop_flow_fwd_y = NULL;
-    enc->temporal_gop_flow_bwd_x = NULL;
-    enc->temporal_gop_flow_bwd_y = NULL;
-    enc->temporal_mesh_smoothness = 0.5f;  // 50% smoothness weight
-    enc->temporal_mesh_smooth_iterations = 8;  // 8 iterations
+    // MC-EZBC block-based motion compensation settings (for 0x13 packets)
+    enc->temporal_enable_mcezbc = 0;  // Default: disabled (use translation-based 0x12)
+    enc->temporal_block_size = 16;    // 16×16 blocks (standard for MC-EZBC)
+    enc->temporal_num_blocks_x = 0;   // Will be calculated based on frame dimensions
+    enc->temporal_num_blocks_y = 0;
+    enc->temporal_gop_mvs_fwd_x = NULL;
+    enc->temporal_gop_mvs_fwd_y = NULL;
+    enc->temporal_gop_mvs_bwd_x = NULL;
+    enc->temporal_gop_mvs_bwd_y = NULL;
 
     // MPEG-style residual coding settings (for 0x14/0x15 packets)
     enc->enable_residual_coding = 0;  // Default: disabled (use temporal DWT)
@@ -2204,67 +2595,52 @@ static int initialise_encoder(tav_encoder_t *enc) {
         memset(enc->temporal_gop_translation_x, 0, enc->temporal_gop_capacity * sizeof(int16_t));
         memset(enc->temporal_gop_translation_y, 0, enc->temporal_gop_capacity * sizeof(int16_t));
 
-        // Calculate mesh dimensions if mesh warping is enabled
-        if (enc->temporal_enable_mesh_warp) {
-            // Calculate mesh grid: approximately 16×16 pixel cells
-            // For 560×448: 18×14 mesh (252 points), for 1920×1080: 60×34 mesh (2040 points)
-            // Mesh density: 32×32 pixel cells for finer motion modeling
-            // Matches block matching resolution (16×16 blocks → 2 blocks per cell)
-            int mesh_cell_size = 32;  // Pixel size of each mesh cell (finer than 64)
-            enc->temporal_mesh_width = (enc->width + mesh_cell_size - 1) / mesh_cell_size;   // Round up
-            enc->temporal_mesh_height = (enc->height + mesh_cell_size - 1) / mesh_cell_size;
+        // Calculate block dimensions if MC-EZBC is enabled
+        if (enc->temporal_enable_mcezbc) {
+            // Calculate block grid for MC-EZBC
+            // Block size: 16×16 (standard for MC-EZBC and MPEG-style codecs)
+            // For 560×448: 35×28 blocks (980 blocks), for 1920×1080: 120×68 blocks (8160 blocks)
+            enc->temporal_num_blocks_x = (enc->width + enc->temporal_block_size - 1) / enc->temporal_block_size;
+            enc->temporal_num_blocks_y = (enc->height + enc->temporal_block_size - 1) / enc->temporal_block_size;
 
-            // Ensure minimum 2×2 mesh
-            if (enc->temporal_mesh_width < 2) enc->temporal_mesh_width = 2;
-            if (enc->temporal_mesh_height < 2) enc->temporal_mesh_height = 2;
+            int num_blocks = enc->temporal_num_blocks_x * enc->temporal_num_blocks_y;
 
-            int mesh_size = enc->temporal_mesh_width * enc->temporal_mesh_height;
+            // Allocate motion vector arrays for each GOP frame
+            enc->temporal_gop_mvs_fwd_x = malloc(enc->temporal_gop_capacity * sizeof(int16_t*));
+            enc->temporal_gop_mvs_fwd_y = malloc(enc->temporal_gop_capacity * sizeof(int16_t*));
+            enc->temporal_gop_mvs_bwd_x = malloc(enc->temporal_gop_capacity * sizeof(int16_t*));
+            enc->temporal_gop_mvs_bwd_y = malloc(enc->temporal_gop_capacity * sizeof(int16_t*));
 
-            // Allocate mesh arrays for each GOP frame
-            enc->temporal_gop_mesh_dx = malloc(enc->temporal_gop_capacity * sizeof(int16_t*));
-            enc->temporal_gop_mesh_dy = malloc(enc->temporal_gop_capacity * sizeof(int16_t*));
-            // Allocate dense flow arrays for motion estimation
-            enc->temporal_gop_flow_fwd_x = malloc(enc->temporal_gop_capacity * sizeof(float*));
-            enc->temporal_gop_flow_fwd_y = malloc(enc->temporal_gop_capacity * sizeof(float*));
-            enc->temporal_gop_flow_bwd_x = malloc(enc->temporal_gop_capacity * sizeof(float*));
-            enc->temporal_gop_flow_bwd_y = malloc(enc->temporal_gop_capacity * sizeof(float*));
-
-            if (!enc->temporal_gop_mesh_dx || !enc->temporal_gop_mesh_dy ||
-                !enc->temporal_gop_flow_fwd_x || !enc->temporal_gop_flow_fwd_y ||
-                !enc->temporal_gop_flow_bwd_x || !enc->temporal_gop_flow_bwd_y) {
-                fprintf(stderr, "Failed to allocate GOP mesh arrays\n");
+            if (!enc->temporal_gop_mvs_fwd_x || !enc->temporal_gop_mvs_fwd_y ||
+                !enc->temporal_gop_mvs_bwd_x || !enc->temporal_gop_mvs_bwd_y) {
+                fprintf(stderr, "Failed to allocate GOP motion vector arrays\n");
                 return -1;
             }
 
-            // Allocate individual mesh and flow buffers
-            int flow_size = enc->width * enc->height;
+            // Allocate individual motion vector buffers
             for (int i = 0; i < enc->temporal_gop_capacity; i++) {
-                enc->temporal_gop_mesh_dx[i] = malloc(mesh_size * sizeof(int16_t));
-                enc->temporal_gop_mesh_dy[i] = malloc(mesh_size * sizeof(int16_t));
+                enc->temporal_gop_mvs_fwd_x[i] = malloc(num_blocks * sizeof(int16_t));
+                enc->temporal_gop_mvs_fwd_y[i] = malloc(num_blocks * sizeof(int16_t));
+                enc->temporal_gop_mvs_bwd_x[i] = malloc(num_blocks * sizeof(int16_t));
+                enc->temporal_gop_mvs_bwd_y[i] = malloc(num_blocks * sizeof(int16_t));
 
-                // Allocate dense flow buffers for motion estimation
-                enc->temporal_gop_flow_fwd_x[i] = malloc(flow_size * sizeof(float));
-                enc->temporal_gop_flow_fwd_y[i] = malloc(flow_size * sizeof(float));
-                enc->temporal_gop_flow_bwd_x[i] = malloc(flow_size * sizeof(float));
-                enc->temporal_gop_flow_bwd_y[i] = malloc(flow_size * sizeof(float));
-
-                if (!enc->temporal_gop_mesh_dx[i] || !enc->temporal_gop_mesh_dy[i] ||
-                    !enc->temporal_gop_flow_fwd_x[i] || !enc->temporal_gop_flow_fwd_y[i] ||
-                    !enc->temporal_gop_flow_bwd_x[i] || !enc->temporal_gop_flow_bwd_y[i]) {
-                    fprintf(stderr, "Failed to allocate GOP mesh buffers\n");
+                if (!enc->temporal_gop_mvs_fwd_x[i] || !enc->temporal_gop_mvs_fwd_y[i] ||
+                    !enc->temporal_gop_mvs_bwd_x[i] || !enc->temporal_gop_mvs_bwd_y[i]) {
+                    fprintf(stderr, "Failed to allocate GOP motion vector buffers\n");
                     return -1;
                 }
 
                 // Initialize to zero
-                memset(enc->temporal_gop_mesh_dx[i], 0, mesh_size * sizeof(int16_t));
-                memset(enc->temporal_gop_mesh_dy[i], 0, mesh_size * sizeof(int16_t));
+                memset(enc->temporal_gop_mvs_fwd_x[i], 0, num_blocks * sizeof(int16_t));
+                memset(enc->temporal_gop_mvs_fwd_y[i], 0, num_blocks * sizeof(int16_t));
+                memset(enc->temporal_gop_mvs_bwd_x[i], 0, num_blocks * sizeof(int16_t));
+                memset(enc->temporal_gop_mvs_bwd_y[i], 0, num_blocks * sizeof(int16_t));
             }
 
             if (enc->verbose) {
-                printf("Mesh warping enabled: mesh=%dx%d (approx %dx%d px cells), smoothness=%.2f, iterations=%d\n",
-                       enc->temporal_mesh_width, enc->temporal_mesh_height,
-                       enc->width / enc->temporal_mesh_width, enc->height / enc->temporal_mesh_height,
-                       enc->temporal_mesh_smoothness, enc->temporal_mesh_smooth_iterations);
+                printf("MC-EZBC enabled: %dx%d blocks (%d total), block size=%dx%d\n",
+                       enc->temporal_num_blocks_x, enc->temporal_num_blocks_y, num_blocks,
+                       enc->temporal_block_size, enc->temporal_block_size);
             }
         }
 
@@ -2638,220 +3014,10 @@ static void dwt_53_inverse_1d(float *data, int length) {
     free(temp);
 }
 
-// FFT-based phase correlation for global motion estimation
-// Uses FFTW3 to compute cross-power spectrum and find translation peak
-// Returns 1/16-pixel precision translation vectors
-static void phase_correlate_fft(const uint8_t *frame1_rgb, const uint8_t *frame2_rgb,
-                               int width, int height, int16_t *dx_qpel, int16_t *dy_qpel) {
-    // Step 1: Convert RGB to grayscale
-    float *gray1 = fftwf_malloc(width * height * sizeof(float));
-    float *gray2 = fftwf_malloc(width * height * sizeof(float));
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int idx = y * width + x;
-            int rgb_idx = idx * 3;
-
-            // ITU-R BT.601 grayscale conversion
-            gray1[idx] = 0.299f * frame1_rgb[rgb_idx] +
-                        0.587f * frame1_rgb[rgb_idx + 1] +
-                        0.114f * frame1_rgb[rgb_idx + 2];
-            gray2[idx] = 0.299f * frame2_rgb[rgb_idx] +
-                        0.587f * frame2_rgb[rgb_idx + 1] +
-                        0.114f * frame2_rgb[rgb_idx + 2];
-        }
-    }
-
-    // Step 2: Plan FFTs (r2c = real to complex)
-    int fft_height = height;
-    int fft_width = width / 2 + 1;  // R2C FFT only stores half + 1 complex values
-
-    fftwf_complex *fft1 = fftwf_malloc(fft_height * fft_width * sizeof(fftwf_complex));
-    fftwf_complex *fft2 = fftwf_malloc(fft_height * fft_width * sizeof(fftwf_complex));
-    fftwf_complex *cross_power = fftwf_malloc(fft_height * fft_width * sizeof(fftwf_complex));
-    float *correlation = fftwf_malloc(width * height * sizeof(float));
-
-    fftwf_plan plan_fwd1 = fftwf_plan_dft_r2c_2d(height, width, gray1, fft1, FFTW_ESTIMATE);
-    fftwf_plan plan_fwd2 = fftwf_plan_dft_r2c_2d(height, width, gray2, fft2, FFTW_ESTIMATE);
-    fftwf_plan plan_inv = fftwf_plan_dft_c2r_2d(height, width, cross_power, correlation, FFTW_ESTIMATE);
-
-    // Step 3: Execute forward FFTs
-    fftwf_execute(plan_fwd1);
-    fftwf_execute(plan_fwd2);
-
-    // Step 4: Compute cross-power spectrum: F1 * conj(F2) / |F1 * conj(F2)|
-    for (int i = 0; i < fft_height * fft_width; i++) {
-        float re1 = fft1[i][0];
-        float im1 = fft1[i][1];
-        float re2 = fft2[i][0];
-        float im2 = fft2[i][1];
-
-        // F1 * conj(F2)
-        float cross_re = re1 * re2 + im1 * im2;
-        float cross_im = im1 * re2 - re1 * im2;
-
-        // Magnitude
-        float mag = sqrtf(cross_re * cross_re + cross_im * cross_im);
-
-        // Normalize (avoid division by zero)
-        if (mag > 1e-10f) {
-            cross_power[i][0] = cross_re / mag;
-            cross_power[i][1] = cross_im / mag;
-        } else {
-            cross_power[i][0] = 0.0f;
-            cross_power[i][1] = 0.0f;
-        }
-    }
-
-    // Step 5: Inverse FFT to get correlation surface
-    fftwf_execute(plan_inv);
-
-    // Step 6: Find peak in correlation surface (integer pixel accuracy)
-    float max_val = -1e30f;
-    int peak_x = 0, peak_y = 0;
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            float val = correlation[y * width + x];
-            if (val > max_val) {
-                max_val = val;
-                peak_x = x;
-                peak_y = y;
-            }
-        }
-    }
-
-    // Convert to signed displacement (FFT shift: peak at (0,0) means no motion)
-    // Peak in second half means negative motion
-    int dx = peak_x;
-    int dy = peak_y;
-    if (dx > width / 2) dx -= width;
-    if (dy > height / 2) dy -= height;
-
-    // Step 7: Subpixel refinement using parabolic interpolation
-    // Only refine if peak is not at boundary
-    float subpixel_dx = 0.0f;
-    float subpixel_dy = 0.0f;
-
-    if (peak_x > 0 && peak_x < width - 1) {
-        float left = correlation[peak_y * width + ((peak_x - 1 + width) % width)];
-        float center = correlation[peak_y * width + peak_x];
-        float right = correlation[peak_y * width + ((peak_x + 1) % width)];
-
-        // Parabolic fit: offset = (left - right) / (2 * (left - 2*center + right))
-        float denom = 2.0f * (left - 2.0f * center + right);
-        if (fabsf(denom) > 1e-6f) {
-            subpixel_dx = (left - right) / denom;
-            subpixel_dx = CLAMP(subpixel_dx, -0.5f, 0.5f);
-        }
-    }
-
-    if (peak_y > 0 && peak_y < height - 1) {
-        float top = correlation[((peak_y - 1 + height) % height) * width + peak_x];
-        float center = correlation[peak_y * width + peak_x];
-        float bottom = correlation[((peak_y + 1) % height) * width + peak_x];
-
-        float denom = 2.0f * (top - 2.0f * center + bottom);
-        if (fabsf(denom) > 1e-6f) {
-            subpixel_dy = (top - bottom) / denom;
-            subpixel_dy = CLAMP(subpixel_dy, -0.5f, 0.5f);
-        }
-    }
-
-    // Step 8: Convert to 1/16-pixel units (sixteenth-pixel precision)
-    float final_dx = dx + subpixel_dx;
-    float final_dy = dy + subpixel_dy;
-
-    *dx_qpel = (int16_t)roundf(final_dx * 16.0f);
-    *dy_qpel = (int16_t)roundf(final_dy * 16.0f);
-
-    // Cleanup
-    fftwf_destroy_plan(plan_fwd1);
-    fftwf_destroy_plan(plan_fwd2);
-    fftwf_destroy_plan(plan_inv);
-    fftwf_free(gray1);
-    fftwf_free(gray2);
-    fftwf_free(fft1);
-    fftwf_free(fft2);
-    fftwf_free(cross_power);
-    fftwf_free(correlation);
-}
-
-// ==============================================================================
-// Optical Flow and Mesh Warping Functions
-// ==============================================================================
-
-// Forward declaration for optical flow function (implemented in encoder_tav_opencv.cpp)
-// Forward declarations for OpenCV functions (implemented in encoder_tav_opencv.cpp)
-extern void estimate_motion_optical_flow(
-    const uint8_t *frame1_rgb, const uint8_t *frame2_rgb,
-    int width, int height,
-    float **out_flow_x, float **out_flow_y
-);
-
-extern void build_mesh_from_flow(
-    const float *flow_x, const float *flow_y,
-    int width, int height, int mesh_w, int mesh_h,
-    int16_t *mesh_dx, int16_t *mesh_dy
-);
-
-extern void smooth_mesh_laplacian(
-    int16_t *mesh_dx, int16_t *mesh_dy,
-    int temporal_mesh_width, int temporal_mesh_height,
-    float smoothness, int iterations
-);
-
-extern void warp_frame_with_mesh(
-    const float *src_frame, int width, int height,
-    const int16_t *mesh_dx, const int16_t *mesh_dy,
-    int temporal_mesh_width, int temporal_mesh_height,
-    float *dst_frame
-);
 
 // Note: build_mesh_from_flow, smooth_mesh_laplacian, warp_frame_with_mesh,
 // and estimate_motion_optical_flow are implemented in encoder_tav_opencv.cpp
 
-// Apply translation to frame (for frame alignment before temporal DWT)
-// NO PADDING - only extracts the valid region that will be common across all frames
-static void apply_translation(float *frame_data, int width, int height,
-                             int16_t dx_qpel, int16_t dy_qpel, float *output) {
-    // Convert 1/16-pixel to pixel (for now, just use integer translation)
-    int dx = dx_qpel / 16;
-    int dy = dy_qpel / 16;
-
-    // Apply translation WITHOUT padding - just shift the content
-    // Out-of-bounds regions will be cropped away later
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int src_x = x - dx;
-            int src_y = y - dy;
-
-            // Clamp to valid region (this will create edge repetition, but those
-            // edges will be cropped away, so it doesn't matter what we put there)
-            src_x = CLAMP(src_x, 0, width - 1);
-            src_y = CLAMP(src_y, 0, height - 1);
-
-            output[y * width + x] = frame_data[src_y * width + src_x];
-        }
-    }
-}
-
-// Extract cropped region from a frame after alignment
-static void extract_crop(const float *frame_data, int width, int height,
-                        int crop_left, int crop_right, int crop_top, int crop_bottom,
-                        float *cropped_output) {
-    int valid_width = width - crop_left - crop_right;
-    int valid_height = height - crop_top - crop_bottom;
-
-    for (int y = 0; y < valid_height; y++) {
-        for (int x = 0; x < valid_width; x++) {
-            int src_x = x + crop_left;
-            int src_y = y + crop_top;
-            cropped_output[y * valid_width + x] = frame_data[src_y * width + src_x];
-        }
-    }
-}
-
 // =============================================================================
 // Temporal Subband Quantization
 // =============================================================================
@@ -2931,18 +3097,37 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
         //   Q_effective = tH_base × spatial_weight
         // Where spatial_weight depends on spatial frequency (LL, LH, HL, HH subbands)
         // This reuses all existing perceptual weighting and dead-zone logic
-        quantise_dwt_coefficients_perceptual_per_coeff(
-            enc,
-            gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
-            quantised[t],            // Output: quantised spatial coefficients
-            spatial_size,            // Number of spatial coefficients
-            temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base)
-            enc->width,              // Frame width
-            enc->height,             // Frame height
-            enc->decomp_levels,      // Spatial decomposition levels (typically 6)
-            is_chroma,               // Is chroma channel (gets additional quantization)
-            enc->frame_count + t     // Frame number (for any frame-dependent logic)
-        );
+        //
+        // CRITICAL: Use no_normalisation variant when EZBC is enabled
+        // - EZBC mode: coefficients must be denormalized (quantize + multiply back)
+        // - Twobit-map mode: coefficients stay normalized (quantize only)
+        if (enc->enable_ezbc) {
+            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(
+                enc,
+                gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
+                quantised[t],            // Output: quantised spatial coefficients (denormalized for EZBC)
+                spatial_size,            // Number of spatial coefficients
+                temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base)
+                enc->width,              // Frame width
+                enc->height,             // Frame height
+                enc->decomp_levels,      // Spatial decomposition levels (typically 6)
+                is_chroma,               // Is chroma channel (gets additional quantization)
+                enc->frame_count + t     // Frame number (for any frame-dependent logic)
+            );
+        } else {
+            quantise_dwt_coefficients_perceptual_per_coeff(
+                enc,
+                gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
+                quantised[t],            // Output: quantised spatial coefficients (normalized for twobit-map)
+                spatial_size,            // Number of spatial coefficients
+                temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base)
+                enc->width,              // Frame width
+                enc->height,             // Frame height
+                enc->decomp_levels,      // Spatial decomposition levels (typically 6)
+                is_chroma,               // Is chroma channel (gets additional quantization)
+                enc->frame_count + t     // Frame number (for any frame-dependent logic)
+            );
+        }
 
         if (enc->verbose && (t == 0 || t == num_frames - 1)) {
             printf("  Temporal subband %d: level=%d, tH_base=%d\n",
@@ -3015,6 +3200,67 @@ static size_t encode_mesh_differential(
     return bytes_written;
 }
 
+// =============================================================================
+// Block MV Differential Encoding for MC-EZBC
+// =============================================================================
+
+// Encode block motion vectors with temporal and spatial prediction
+// Returns the number of bytes written to output buffer
+// Format:
+//   1. Block grid dimensions (1 byte each: blocks_x, blocks_y)
+//   2. Forward MVs for all blocks (temporal + spatial differential encoding)
+// Note: Backward MVs are computed as negation of forward MVs, so not stored separately
+static size_t encode_block_mvs_differential(
+    int16_t **mvs_x, int16_t **mvs_y,
+    int gop_size, int num_blocks_x, int num_blocks_y,
+    uint8_t *output_buffer, size_t buffer_capacity
+) {
+    int num_blocks = num_blocks_x * num_blocks_y;
+    size_t bytes_written = 0;
+
+    // Write block grid dimensions (1 byte each)
+    if (bytes_written + 2 > buffer_capacity) return 0;
+    uint8_t blocks_x_8 = (uint8_t)num_blocks_x;
+    uint8_t blocks_y_8 = (uint8_t)num_blocks_y;
+    output_buffer[bytes_written++] = blocks_x_8;
+    output_buffer[bytes_written++] = blocks_y_8;
+
+    // Encode forward MVs for all blocks with temporal + spatial prediction
+    for (int t = 0; t < gop_size; t++) {
+        for (int i = 0; i < num_blocks; i++) {
+            int16_t dx = mvs_x[t][i];
+            int16_t dy = mvs_y[t][i];
+
+            // Temporal prediction (from previous frame)
+            if (t > 0) {
+                dx -= mvs_x[t - 1][i];
+                dy -= mvs_y[t - 1][i];
+            }
+
+            // Spatial prediction (from left block)
+            if (i > 0 && (i % num_blocks_x) != 0) {
+                int16_t left_dx = mvs_x[t][i - 1];
+                int16_t left_dy = mvs_y[t][i - 1];
+                if (t > 0) {
+                    left_dx -= mvs_x[t - 1][i - 1];
+                    left_dy -= mvs_y[t - 1][i - 1];
+                }
+                dx -= left_dx;
+                dy -= left_dy;
+            }
+
+            // Write differentially encoded MVs
+            if (bytes_written + 4 > buffer_capacity) return 0;
+            memcpy(output_buffer + bytes_written, &dx, sizeof(int16_t));
+            bytes_written += sizeof(int16_t);
+            memcpy(output_buffer + bytes_written, &dy, sizeof(int16_t));
+            bytes_written += sizeof(int16_t);
+        }
+    }
+
+    return bytes_written;
+}
+
 // Decode mesh motion vectors from differential encoding
 // Returns 0 on success, -1 on error
 // This is the inverse of encode_mesh_differential()
@@ -3789,27 +4035,53 @@ static size_t encode_pframe_residual(tav_encoder_t *enc, int qY) {
     dwt_2d_forward_flexible(residual_co_dwt, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter);
     dwt_2d_forward_flexible(residual_cg_dwt, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter);
 
-    // Step 5: Quantize residual coefficients
+    // Step 5: Quantize residual coefficients (skip for EZBC - it handles quantization implicitly)
     int16_t *quantised_y = enc->reusable_quantised_y;
     int16_t *quantised_co = enc->reusable_quantised_co;
     int16_t *quantised_cg = enc->reusable_quantised_cg;
 
-    quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_y_dwt, quantised_y, frame_size,
-                                                  qY, enc->width, enc->height,
-                                                  enc->decomp_levels, 0, 0);
-    quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size,
-                                                  enc->quantiser_co, enc->width, enc->height,
-                                                  enc->decomp_levels, 1, 0);
-    quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size,
-                                                  enc->quantiser_cg, enc->width, enc->height,
-                                                  enc->decomp_levels, 1, 0);
+    if (enc->enable_ezbc) {
+        // EZBC mode: Quantize with perceptual weighting but no normalization (division by quantizer)
+        // EZBC will compress by encoding only significant bitplanes
+        fprintf(stderr, "[EZBC-QUANT-PFRAME] Using perceptual quantization without normalization\n");
+        quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_y_dwt, quantised_y, frame_size,
+                                                      qY, enc->width, enc->height,
+                                                      enc->decomp_levels, 0, 0);
+        quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_co_dwt, quantised_co, frame_size,
+                                                      enc->quantiser_co, enc->width, enc->height,
+                                                      enc->decomp_levels, 1, 0);
+        quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_cg_dwt, quantised_cg, frame_size,
+                                                      enc->quantiser_cg, enc->width, enc->height,
+                                                      enc->decomp_levels, 1, 0);
+
+        // Print max abs for debug
+        int max_y = 0, max_co = 0, max_cg = 0;
+        for (int i = 0; i < frame_size; i++) {
+            if (abs(quantised_y[i]) > max_y) max_y = abs(quantised_y[i]);
+            if (abs(quantised_co[i]) > max_co) max_co = abs(quantised_co[i]);
+            if (abs(quantised_cg[i]) > max_cg) max_cg = abs(quantised_cg[i]);
+        }
+        fprintf(stderr, "[EZBC-QUANT-PFRAME] Quantized coeff max: Y=%d, Co=%d, Cg=%d\n", max_y, max_co, max_cg);
+    } else {
+        // Twobit-map mode: Use traditional quantization
+        quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_y_dwt, quantised_y, frame_size,
+                                                      qY, enc->width, enc->height,
+                                                      enc->decomp_levels, 0, 0);
+        quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size,
+                                                      enc->quantiser_co, enc->width, enc->height,
+                                                      enc->decomp_levels, 1, 0);
+        quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size,
+                                                      enc->quantiser_cg, enc->width, enc->height,
+                                                      enc->decomp_levels, 1, 0);
+    }
 
     // Step 6: Preprocess coefficients (significance map compression)
     int total_coeffs = frame_size * 3;  // Y + Co + Cg
     uint8_t *preprocessed = malloc(total_coeffs * sizeof(int16_t) + 1024);  // Extra space for map
-    size_t preprocessed_size = preprocess_coefficients_variable_layout(quantised_y, quantised_co, quantised_cg,
-                                                                      NULL, frame_size, enc->channel_layout,
-                                                                      preprocessed);
+    size_t preprocessed_size = preprocess_coefficients_variable_layout(enc->enable_ezbc, enc->width, enc->height,
+                                                                       quantised_y, quantised_co, quantised_cg,
+                                                                       NULL, frame_size, enc->channel_layout,
+                                                                       preprocessed);
 
     // Step 7: Compress preprocessed coefficients with Zstd
     size_t compressed_bound = ZSTD_compressBound(preprocessed_size);
@@ -4100,9 +4372,10 @@ static size_t encode_pframe_adaptive(tav_encoder_t *enc, int qY) {
     // Step 8: Preprocess coefficients
     int total_coeffs = frame_size * 3;
     uint8_t *preprocessed = malloc(total_coeffs * sizeof(int16_t) + 1024);
-    size_t preprocessed_size = preprocess_coefficients_variable_layout(quantised_y, quantised_co, quantised_cg,
-                                                                      NULL, frame_size, enc->channel_layout,
-                                                                      preprocessed);
+    size_t preprocessed_size = preprocess_coefficients_variable_layout(enc->enable_ezbc, enc->width, enc->height,
+                                                                       quantised_y, quantised_co, quantised_cg,
+                                                                       NULL, frame_size, enc->channel_layout,
+                                                                       preprocessed);
 
     // Step 9: Compress preprocessed coefficients
     size_t compressed_bound = ZSTD_compressBound(preprocessed_size);
@@ -4332,9 +4605,10 @@ static size_t encode_bframe_adaptive(tav_encoder_t *enc, int qY) {
     // Step 8: Preprocess coefficients
     int total_coeffs = frame_size * 3;
     uint8_t *preprocessed = malloc(total_coeffs * sizeof(int16_t) + 1024);
-    size_t preprocessed_size = preprocess_coefficients_variable_layout(quantised_y, quantised_co, quantised_cg,
-                                                                      NULL, frame_size, enc->channel_layout,
-                                                                      preprocessed);
+    size_t preprocessed_size = preprocess_coefficients_variable_layout(enc->enable_ezbc, enc->width, enc->height,
+                                                                       quantised_y, quantised_co, quantised_cg,
+                                                                       NULL, frame_size, enc->channel_layout,
+                                                                       preprocessed);
 
     // Step 9: Compress preprocessed coefficients
     size_t compressed_bound = ZSTD_compressBound(preprocessed_size);
@@ -4411,7 +4685,7 @@ static size_t encode_bframe_adaptive(tav_encoder_t *enc, int qY) {
 
 // Add frame to GOP buffer
 // Returns 0 on success, -1 on error
-static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
+static int temporal_gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
                          const float *frame_y, const float *frame_co, const float *frame_cg) {
     if (!enc->enable_temporal_dwt || enc->temporal_gop_frame_count >= enc->temporal_gop_capacity) {
         return -1;
@@ -4427,69 +4701,53 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
     memcpy(enc->temporal_gop_co_frames[frame_idx], frame_co, frame_channel_size);
     memcpy(enc->temporal_gop_cg_frames[frame_idx], frame_cg, frame_channel_size);
 
-    // Compute motion estimation if mesh warping is enabled
-    if (enc->temporal_enable_mesh_warp && frame_idx > 0) {
-        // Compute forward optical flow (F[i-1] → F[i])
-        estimate_motion_optical_flow(
-            enc->temporal_gop_rgb_frames[frame_idx - 1],
-            enc->temporal_gop_rgb_frames[frame_idx],
+    // Compute block-based motion estimation if MC-EZBC is enabled
+    if (enc->temporal_enable_mcezbc && frame_idx > 0) {
+        // Compute forward motion vectors (F[i-1] → F[i]) using optical flow
+        // This uses the proven estimate_optical_flow_motion function from encoder_tav_opencv.cpp
+        estimate_optical_flow_motion(
+            enc->temporal_gop_y_frames[frame_idx],       // Current frame Y channel
+            enc->temporal_gop_y_frames[frame_idx - 1],   // Reference frame Y channel
             enc->width, enc->height,
-            &enc->temporal_gop_flow_fwd_x[frame_idx],
-            &enc->temporal_gop_flow_fwd_y[frame_idx]
+            enc->temporal_block_size,
+            enc->temporal_gop_mvs_fwd_x[frame_idx],      // Output: forward MVs X (1/4-pixel units)
+            enc->temporal_gop_mvs_fwd_y[frame_idx]       // Output: forward MVs Y (1/4-pixel units)
         );
 
-        // Compute backward optical flow (F[i] → F[i-1]) for reliability masking
-        estimate_motion_optical_flow(
-            enc->temporal_gop_rgb_frames[frame_idx],
-            enc->temporal_gop_rgb_frames[frame_idx - 1],
-            enc->width, enc->height,
-            &enc->temporal_gop_flow_bwd_x[frame_idx],
-            &enc->temporal_gop_flow_bwd_y[frame_idx]
-        );
-
-        // Build distortion mesh from forward flow field
-        build_mesh_from_flow(
-            enc->temporal_gop_flow_fwd_x[frame_idx], enc->temporal_gop_flow_fwd_y[frame_idx],
-            enc->width, enc->height,
-            enc->temporal_mesh_width, enc->temporal_mesh_height,
-            enc->temporal_gop_mesh_dx[frame_idx], enc->temporal_gop_mesh_dy[frame_idx]
-        );
-
-        // Apply Laplacian smoothing to regularize mesh
-        smooth_mesh_laplacian(
-            enc->temporal_gop_mesh_dx[frame_idx], enc->temporal_gop_mesh_dy[frame_idx],
-            enc->temporal_mesh_width, enc->temporal_mesh_height,
-            enc->temporal_mesh_smoothness, enc->temporal_mesh_smooth_iterations
-        );
-
-        // Flow is now stored in enc->gop_flow_*[frame_idx], don't free
-        // Mesh dx/dy already set by build_mesh_from_flow() above (translation only)
+        // Compute backward motion vectors (F[i] → F[i-1]) by inverting forward MVs
+        // MC-EZBC uses bidirectional prediction for better temporal decorrelation
+        int num_blocks = enc->temporal_num_blocks_x * enc->temporal_num_blocks_y;
+        for (int i = 0; i < num_blocks; i++) {
+            enc->temporal_gop_mvs_bwd_x[frame_idx][i] = -enc->temporal_gop_mvs_fwd_x[frame_idx][i];
+            enc->temporal_gop_mvs_bwd_y[frame_idx][i] = -enc->temporal_gop_mvs_fwd_y[frame_idx][i];
+        }
 
         if (enc->verbose && (frame_idx < 3 || frame_idx == enc->temporal_gop_capacity - 1)) {
-            // Compute average mesh displacement for verbose output
-            int mesh_points = enc->temporal_mesh_width * enc->temporal_mesh_height;
-            float avg_dx = 0.0f, avg_dy = 0.0f;
-            for (int i = 0; i < mesh_points; i++) {
-                avg_dx += fabsf(enc->temporal_gop_mesh_dx[frame_idx][i] / 8.0f);
-                avg_dy += fabsf(enc->temporal_gop_mesh_dy[frame_idx][i] / 8.0f);
+            // Compute average motion vector magnitude for verbose output
+            float avg_mvx = 0.0f, avg_mvy = 0.0f;
+            for (int i = 0; i < num_blocks; i++) {
+                avg_mvx += fabsf(enc->temporal_gop_mvs_fwd_x[frame_idx][i] / 4.0f);
+                avg_mvy += fabsf(enc->temporal_gop_mvs_fwd_y[frame_idx][i] / 4.0f);
             }
-            avg_dx /= mesh_points;
-            avg_dy /= mesh_points;
-            printf("  GOP frame %d: mesh avg=(%.2f,%.2f)px, mesh=%dx%d\n",
-                   frame_idx, avg_dx, avg_dy,
-                   enc->temporal_mesh_width, enc->temporal_mesh_height);
+            avg_mvx /= num_blocks;
+            avg_mvy /= num_blocks;
+            printf("  GOP frame %d: motion avg=(%.2f,%.2f)px, blocks=%dx%d\n",
+                   frame_idx, avg_mvx, avg_mvy,
+                   enc->temporal_num_blocks_x, enc->temporal_num_blocks_y);
         }
     } else if (frame_idx == 0) {
         // First frame has no motion (reference frame)
-        if (enc->temporal_enable_mesh_warp) {
-            int mesh_points = enc->temporal_mesh_width * enc->temporal_mesh_height;
-            memset(enc->temporal_gop_mesh_dx[0], 0, mesh_points * sizeof(int16_t));
-            memset(enc->temporal_gop_mesh_dy[0], 0, mesh_points * sizeof(int16_t));
+        if (enc->temporal_enable_mcezbc) {
+            int num_blocks = enc->temporal_num_blocks_x * enc->temporal_num_blocks_y;
+            memset(enc->temporal_gop_mvs_fwd_x[0], 0, num_blocks * sizeof(int16_t));
+            memset(enc->temporal_gop_mvs_fwd_y[0], 0, num_blocks * sizeof(int16_t));
+            memset(enc->temporal_gop_mvs_bwd_x[0], 0, num_blocks * sizeof(int16_t));
+            memset(enc->temporal_gop_mvs_bwd_y[0], 0, num_blocks * sizeof(int16_t));
         }
     }
 
-    // Legacy translation vectors (used when mesh warp is disabled)
-    if (!enc->temporal_enable_mesh_warp) {
+    // Legacy translation vectors (used when MC-EZBC is disabled)
+    if (!enc->temporal_enable_mcezbc) {
         enc->temporal_gop_translation_x[frame_idx] = 0.0f;
         enc->temporal_gop_translation_y[frame_idx] = 0.0f;
     }
@@ -4625,9 +4883,9 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
     }
 
     // Step 0.6: Motion compensation note
-    // For mesh warping: MC-lifting integrates motion compensation directly into the lifting steps
+    // For MC-EZBC: MC-lifting integrates motion compensation directly into the lifting steps
     // For translation: still use pre-alignment (old method for backwards compatibility)
-    if (!enc->temporal_enable_mesh_warp) {
+    if (!enc->temporal_enable_mcezbc) {
         // Translation-based motion compensation: align using global translation vectors
         for (int i = 1; i < actual_gop_size; i++) {  // Skip frame 0 (reference frame)
             float *aligned_y = malloc(num_pixels * sizeof(float));
@@ -4661,20 +4919,20 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
             free(aligned_cg);
         }
     } else {
-        // Mesh-based motion compensation uses MC-lifting (integrated into temporal DWT)
+        // MC-EZBC block-based motion compensation uses MC-lifting (integrated into temporal DWT)
         if (enc->verbose) {
-            printf("Using motion-compensated lifting (MC-lifting) (%dx%d mesh)\n",
-                   enc->temporal_mesh_width, enc->temporal_mesh_height);
+            printf("Using motion-compensated lifting (MC-EZBC) (%dx%d blocks)\n",
+                   enc->temporal_num_blocks_x, enc->temporal_num_blocks_y);
         }
     }
 
     // Step 0.7: Expand frames to larger canvas (only for translation-based alignment)
-    // Mesh-based warping doesn't need canvas expansion since the mesh handles local distortions
+    // MC-EZBC doesn't need canvas expansion since motion compensation is integrated into lifting
     int canvas_width = enc->width;
     int canvas_height = enc->height;
     int canvas_pixels = num_pixels;
 
-    if (!enc->temporal_enable_mesh_warp) {
+    if (!enc->temporal_enable_mcezbc) {
         // Calculate expanded canvas size (UNION of all aligned frames)
         canvas_width = enc->width + crop_left + crop_right;   // Original width + total shift range
         canvas_height = enc->height + crop_top + crop_bottom; // Original height + total shift range
@@ -4688,11 +4946,11 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
         }
     } else {
         if (enc->verbose) {
-            printf("Using original frame dimensions (mesh warping handles distortions)\n");
+            printf("Using original frame dimensions (MC-EZBC handles motion compensation)\n");
         }
     }
 
-    // Allocate canvas buffers (expanded for translation, original size for mesh)
+    // Allocate canvas buffers (expanded for translation, original size for MC-EZBC)
     float **canvas_y_coeffs = malloc(actual_gop_size * sizeof(float*));
     float **canvas_co_coeffs = malloc(actual_gop_size * sizeof(float*));
     float **canvas_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
@@ -4702,8 +4960,8 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
         canvas_co_coeffs[i] = calloc(canvas_pixels, sizeof(float));
         canvas_cg_coeffs[i] = calloc(canvas_pixels, sizeof(float));
 
-        if (enc->temporal_enable_mesh_warp) {
-            // Mesh warping: simply copy frames (no expansion needed)
+        if (enc->temporal_enable_mcezbc) {
+            // MC-EZBC: simply copy frames (no expansion needed)
             memcpy(canvas_y_coeffs[i], gop_y_coeffs[i], canvas_pixels * sizeof(float));
             memcpy(canvas_co_coeffs[i], gop_co_coeffs[i], canvas_pixels * sizeof(float));
             memcpy(canvas_cg_coeffs[i], gop_cg_coeffs[i], canvas_pixels * sizeof(float));
@@ -4798,8 +5056,8 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
         // Note: This modifies gop_*_coeffs in-place
         // Use cropped dimensions to encode only the valid region
 
-        if (enc->temporal_enable_mesh_warp) {
-            // Use MC-lifting: motion compensation integrated into lifting steps
+        if (enc->temporal_enable_mcezbc) {
+            // Use MC-EZBC lifting: motion compensation integrated into lifting steps
             dwt_3d_forward_mc(enc, gop_y_coeffs, gop_co_coeffs, gop_cg_coeffs,
                              actual_gop_size, enc->decomp_levels,
                              enc->temporal_decomp_levels, enc->wavelet_filter);
@@ -4916,7 +5174,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
     } else {
         // Multi-frame GOP: use unified 3D DWT encoding
         // Choose packet type based on motion compensation method
-        uint8_t packet_type = enc->temporal_enable_mesh_warp ? TAV_PACKET_GOP_UNIFIED_MESH : TAV_PACKET_GOP_UNIFIED;
+        uint8_t packet_type = enc->temporal_enable_mcezbc ? TAV_PACKET_GOP_UNIFIED_MESH : TAV_PACKET_GOP_UNIFIED;
         fwrite(&packet_type, 1, 1, output);
         total_bytes_written += 1;
 
@@ -4925,22 +5183,23 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
         fwrite(&gop_size_byte, 1, 1, output);
         total_bytes_written += 1;
 
-        if (enc->temporal_enable_mesh_warp) {
-            // Packet 0x13: Mesh-based warping (translation only, no affine)
-            // Encode mesh motion vectors and compress with Zstd
-            // Max size: mesh dimensions (4) + translation (4 bytes/cell)
-            size_t max_mesh_size = 4 + (actual_gop_size * enc->temporal_mesh_width * enc->temporal_mesh_height * 4);
-            uint8_t *mesh_buffer = malloc(max_mesh_size);
+        if (enc->temporal_enable_mcezbc) {
+            // Packet 0x13: MC-EZBC block-based motion compensation
+            // Encode block motion vectors and compress with Zstd
+            // Max size: block dimensions (2) + MVs (4 bytes per block × 2 directions)
+            int num_blocks = enc->temporal_num_blocks_x * enc->temporal_num_blocks_y;
+            size_t max_mv_size = 2 + (actual_gop_size * num_blocks * 4 * 2);  // fwd + bwd MVs
+            uint8_t *mv_buffer = malloc(max_mv_size);
 
-            size_t mesh_size = encode_mesh_differential(
-                enc->temporal_gop_mesh_dx, enc->temporal_gop_mesh_dy,
-                actual_gop_size, enc->temporal_mesh_width, enc->temporal_mesh_height,
-                mesh_buffer, max_mesh_size
+            size_t mv_size = encode_block_mvs_differential(
+                enc->temporal_gop_mvs_fwd_x, enc->temporal_gop_mvs_fwd_y,
+                actual_gop_size, enc->temporal_num_blocks_x, enc->temporal_num_blocks_y,
+                mv_buffer, max_mv_size
             );
 
-            if (mesh_size == 0) {
-                fprintf(stderr, "Error: Failed to encode mesh motion vectors\n");
-                free(mesh_buffer);
+            if (mv_size == 0) {
+                fprintf(stderr, "Error: Failed to encode block motion vectors\n");
+                free(mv_buffer);
                 // Free all allocated buffers
                 for (int i = 0; i < actual_gop_size; i++) {
                     free(gop_y_coeffs[i]);
@@ -4959,19 +5218,19 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                 return 0;
             }
 
-            // Compress mesh data with Zstd
-            size_t max_compressed_mesh = ZSTD_compressBound(mesh_size);
-            uint8_t *compressed_mesh = malloc(max_compressed_mesh);
-            size_t compressed_mesh_size = ZSTD_compress(
-                compressed_mesh, max_compressed_mesh,
-                mesh_buffer, mesh_size,
+            // Compress MV data with Zstd
+            size_t max_compressed_mv = ZSTD_compressBound(mv_size);
+            uint8_t *compressed_mv = malloc(max_compressed_mv);
+            size_t compressed_mv_size = ZSTD_compress(
+                compressed_mv, max_compressed_mv,
+                mv_buffer, mv_size,
                 enc->zstd_level
             );
 
-            if (ZSTD_isError(compressed_mesh_size)) {
-                fprintf(stderr, "Error: Zstd compression failed for mesh data\n");
-                free(mesh_buffer);
-                free(compressed_mesh);
+            if (ZSTD_isError(compressed_mv_size)) {
+                fprintf(stderr, "Error: Zstd compression failed for motion vector data\n");
+                free(mv_buffer);
+                free(compressed_mv);
                 // Free all allocated buffers
                 for (int i = 0; i < actual_gop_size; i++) {
                     free(gop_y_coeffs[i]);
@@ -4990,20 +5249,20 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                 return 0;
             }
 
-            // Write compressed mesh size and data
-            uint32_t compressed_mesh_size_32 = (uint32_t)compressed_mesh_size;
-            fwrite(&compressed_mesh_size_32, sizeof(uint32_t), 1, output);
-            fwrite(compressed_mesh, 1, compressed_mesh_size, output);
-            total_bytes_written += sizeof(uint32_t) + compressed_mesh_size;
+            // Write compressed MV size and data
+            uint32_t compressed_mv_size_32 = (uint32_t)compressed_mv_size;
+            fwrite(&compressed_mv_size_32, sizeof(uint32_t), 1, output);
+            fwrite(compressed_mv, 1, compressed_mv_size, output);
+            total_bytes_written += sizeof(uint32_t) + compressed_mv_size;
 
             if (enc->verbose) {
-                printf("Mesh data: %zu bytes raw, %zu bytes compressed (%.1f%% compression)\n",
-                       mesh_size, compressed_mesh_size,
-                       100.0 * compressed_mesh_size / mesh_size);
+                printf("Motion vectors: %zu bytes raw, %zu bytes compressed (%.1f%% compression)\n",
+                       mv_size, compressed_mv_size,
+                       100.0 * compressed_mv_size / mv_size);
             }
 
-            free(mesh_buffer);
-            free(compressed_mesh);
+            free(mv_buffer);
+            free(compressed_mv);
         } else {
             // Packet 0x12: Translation-based alignment
             // Write canvas expansion information (4 bytes)
@@ -5033,8 +5292,8 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
         uint8_t *preprocessed_buffer = malloc(max_preprocessed_size);
 
         size_t preprocessed_size = preprocess_gop_unified(
-            quant_y, quant_co, quant_cg,
-            actual_gop_size, num_pixels, enc->channel_layout,
+            enc->enable_ezbc, quant_y, quant_co, quant_cg,
+            actual_gop_size, num_pixels, enc->width, enc->height, enc->channel_layout,
             preprocessed_buffer);
 
         // Compress entire GOP with Zstd (single compression for all frames)
@@ -5128,44 +5387,21 @@ static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_q
     // Check for scene changes within the GOP
     if (!force_flush) {
         for (int i = 1; i < enc->temporal_gop_frame_count; i++) {
-            // Compare consecutive frames using RGB data
-            uint8_t *frame1 = enc->temporal_gop_rgb_frames[i - 1];
-            uint8_t *frame2 = enc->temporal_gop_rgb_frames[i];
+            // Compare consecutive frames using unified scene change detection
+            double avg_diff, changed_ratio;
+            int is_scene_change = detect_scene_change_between_frames(
+                enc->temporal_gop_rgb_frames[i - 1],
+                enc->temporal_gop_rgb_frames[i],
+                enc->width,
+                enc->height,
+                &avg_diff,
+                &changed_ratio
+            );
 
-            long long total_diff = 0;
-            int changed_pixels = 0;
-
-            // Sample every 4th pixel for performance (still gives good detection)
-            for (int y = 0; y < enc->height; y += 2) {
-                for (int x = 0; x < enc->width; x += 2) {
-                    int offset = (y * enc->width + x) * 3;
-
-                    // Calculate colour difference
-                    int r_diff = abs(frame2[offset] - frame1[offset]);
-                    int g_diff = abs(frame2[offset + 1] - frame1[offset + 1]);
-                    int b_diff = abs(frame2[offset + 2] - frame1[offset + 2]);
-
-                    int pixel_diff = r_diff + g_diff + b_diff;
-                    total_diff += pixel_diff;
-
-                    // Count significantly changed pixels (threshold of 30 per channel average)
-                    if (pixel_diff > 90) {
-                        changed_pixels++;
-                    }
-                }
-            }
-
-            // Scene change thresholds (same as detect_scene_change)
-            int sampled_pixels = (enc->height / 2) * (enc->width / 2);
-            double avg_diff = (double)total_diff / sampled_pixels;
-            double changed_ratio = (double)changed_pixels / sampled_pixels;
-            double threshold = 0.30;
-
-            // Scene change detected if either threshold exceeded
-            if (changed_ratio > threshold) {
+            if (is_scene_change) {
                 scene_change_frame = i;
                 if (enc->verbose) {
-                    printf("Scene change detected within GOP at frame %d (avg_diff=%.2f, change_ratio=%.2f)\n",
+                    printf("Scene change detected within GOP at frame %d (avg_diff=%.2f, change_ratio=%.4f)\n",
                            frame_numbers[i], avg_diff, changed_ratio);
                 }
                 break;
@@ -5341,130 +5577,157 @@ static void invert_mesh(
            motion_threshold, texture_threshold, fb_threshold);
 }*/
 
-// Motion-compensated lifting: Haar wavelet with SYMMETRIC bi-directional warping
-// Implements predict-update lifting scheme with symmetric motion compensation
+// Simple translation-based frame alignment (legacy, non-MC-EZBC path)
+// Shifts entire frame by (dx, dy) pixels with bilinear interpolation
+static void apply_translation(
+    const float *src, int width, int height,
+    float dx, float dy,
+    float *dst
+) {
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            // Source position (backward warping)
+            float src_x = x - dx;
+            float src_y = y - dy;
+
+            // Clamp to valid range
+            if (src_x < 0.0f) src_x = 0.0f;
+            if (src_x >= width - 1) src_x = width - 1.001f;
+            if (src_y < 0.0f) src_y = 0.0f;
+            if (src_y >= height - 1) src_y = height - 1.001f;
+
+            // Bilinear interpolation
+            int x0 = (int)src_x;
+            int y0 = (int)src_y;
+            int x1 = x0 + 1;
+            int y1 = y0 + 1;
+
+            float fx = src_x - x0;
+            float fy = src_y - y0;
+
+            float val00 = src[y0 * width + x0];
+            float val10 = src[y0 * width + x1];
+            float val01 = src[y1 * width + x0];
+            float val11 = src[y1 * width + x1];
+
+            float val_top = (1.0f - fx) * val00 + fx * val10;
+            float val_bot = (1.0f - fx) * val01 + fx * val11;
+            float val = (1.0f - fy) * val_top + fy * val_bot;
+
+            dst[y * width + x] = val;
+        }
+    }
+}
+
+// MC-EZBC Motion-Compensated Lifting (Proper Implementation)
+// Implements the predict-update lifting scheme from MC-EZBC paper
+// Based on MC-EZBC.md documentation
 //
-// Symmetric lifting (both frames meet in the middle):
-//   Predict: H = warp(F1, -½·mesh) - warp(F0, +½·mesh)
-//   Update:  L = 0.5 * (warp(F0, +½·mesh) + warp(F1, -½·mesh))
-//
-// With reliability masking:
-//   For reliable pixels: use MC-lifting (mesh warp)
-//   For unreliable pixels: fall back to plain Haar (H = F1 - F0, L = 0.5*(F0+F1))
+// MC-EZBC Lifting Steps:
+//   Predict: H[i] = F_odd[i] - 0.5 * (warp(F_even[i], MV_fw) + warp(F_even[i+1], MV_bw))
+//   Update:  L[i] = F_even[i] + 0.25 * (warp(H[i-1], MV_bw) + warp(H[i], MV_fw))
 //
 // This produces:
-//   L (lowband): symmetric motion-aligned temporal average (where reliable)
-//   H (highband): symmetric motion-compensated residual (where reliable)
+//   L (lowband): temporal low-pass with motion-compensated update
+//   H (highband): temporal high-pass residual after bidirectional prediction
 //
-// Benefits:
-//   - Symmetric treatment of both frames (no bias)
-//   - Better invertibility (both frames warp equally)
-//   - Lower residual energy (frames meet at temporal midpoint)
-//   - Eliminates errors from occlusions, low-texture regions, and bad matches
+// Benefits over mesh warping:
+//   - Standard block-based approach (proven in JPEG 2000, H.264)
+//   - Perfect invertibility
+//   - Lower computational cost
+//   - Smaller motion vector overhead
 static void mc_lifting_forward_pair(
     tav_encoder_t *enc,
-    float **f0_y, float **f0_co, float **f0_cg,  // Frame 0 (reference)
-    float **f1_y, float **f1_co, float **f1_cg,  // Frame 1 (current)
-    const short *mesh_dx, const short *mesh_dy,  // Forward mesh (F0→F1)
-//    const uint8_t *reliability_mask,             // Reliability mask (1=use MC, 0=plain Haar)
+    float **f0_y, float **f0_co, float **f0_cg,  // F_even (frame at even index)
+    float **f1_y, float **f1_co, float **f1_cg,  // F_odd (frame at odd index)
+    int f0_idx, int f1_idx,                       // Frame indices for MV lookup
     float **out_l_y, float **out_l_co, float **out_l_cg,  // Lowband output
     float **out_h_y, float **out_h_co, float **out_h_cg   // Highband output
 ) {
     int width = enc->width;
     int height = enc->height;
     int num_pixels = width * height;
-    int num_mesh_points = enc->temporal_mesh_width * enc->temporal_mesh_height;
 
-    // Allocate temporary buffers
-    float *warped_f0_fwd_y = malloc(num_pixels * sizeof(float));
-    float *warped_f0_fwd_co = malloc(num_pixels * sizeof(float));
-    float *warped_f0_fwd_cg = malloc(num_pixels * sizeof(float));
+    // Get motion vectors for this frame pair
+    int16_t *mvs_fwd_x = enc->temporal_gop_mvs_fwd_x[f1_idx];  // F0 → F1
+    int16_t *mvs_fwd_y = enc->temporal_gop_mvs_fwd_y[f1_idx];
+    int16_t *mvs_bwd_x = enc->temporal_gop_mvs_bwd_x[f1_idx];  // F1 → F0
+    int16_t *mvs_bwd_y = enc->temporal_gop_mvs_bwd_y[f1_idx];
 
-    float *warped_f1_back_y = malloc(num_pixels * sizeof(float));
-    float *warped_f1_back_co = malloc(num_pixels * sizeof(float));
-    float *warped_f1_back_cg = malloc(num_pixels * sizeof(float));
+    // Allocate temporary buffers for predictions
+    float *pred_y = malloc(num_pixels * sizeof(float));
+    float *pred_co = malloc(num_pixels * sizeof(float));
+    float *pred_cg = malloc(num_pixels * sizeof(float));
 
-    short *half_mesh_dx = malloc(num_mesh_points * sizeof(short));
-    short *half_mesh_dy = malloc(num_mesh_points * sizeof(short));
-    short *neg_half_mesh_dx = malloc(num_mesh_points * sizeof(short));
-    short *neg_half_mesh_dy = malloc(num_mesh_points * sizeof(short));
-
-    if (!warped_f0_fwd_y || !warped_f0_fwd_co || !warped_f0_fwd_cg ||
-        !warped_f1_back_y || !warped_f1_back_co || !warped_f1_back_cg ||
-        !half_mesh_dx || !half_mesh_dy ||
-        !neg_half_mesh_dx || !neg_half_mesh_dy) {
-        fprintf(stderr, "Error: Failed to allocate MC-lifting buffers\n");
-        goto cleanup;
+    if (!pred_y || !pred_co || !pred_cg) {
+        fprintf(stderr, "Error: Failed to allocate MC-EZBC lifting buffers\n");
+        free(pred_y);
+        free(pred_co);
+        free(pred_cg);
+        return;
     }
 
-    // Create half-step meshes for symmetric warping
-    // +½·mesh for F0 forward, -½·mesh for F1 backward
-    for (int i = 0; i < num_mesh_points; i++) {
-        half_mesh_dx[i] = mesh_dx[i] / 2;
-        half_mesh_dy[i] = mesh_dy[i] / 2;
-        neg_half_mesh_dx[i] = -half_mesh_dx[i];
-        neg_half_mesh_dy[i] = -half_mesh_dy[i];
-    }
+    // ===== MC-EZBC PREDICT STEP =====
+    // H = F_odd - 0.5 * (warp(F_even, MV_fw) + warp(F_even, MV_bw))
+    // Use bidirectional prediction for better temporal decorrelation
+    warp_bidirectional(*f0_y, *f1_y, width, height,
+                       mvs_fwd_x, mvs_fwd_y, mvs_bwd_x, mvs_bwd_y,
+                       enc->temporal_block_size, pred_y);
+    warp_bidirectional(*f0_co, *f1_co, width, height,
+                       mvs_fwd_x, mvs_fwd_y, mvs_bwd_x, mvs_bwd_y,
+                       enc->temporal_block_size, pred_co);
+    warp_bidirectional(*f0_cg, *f1_cg, width, height,
+                       mvs_fwd_x, mvs_fwd_y, mvs_bwd_x, mvs_bwd_y,
+                       enc->temporal_block_size, pred_cg);
 
-    // ===== SYMMETRIC PREDICT STEP: H = warp(F1, -½·mesh) - warp(F0, +½·mesh) =====
-    // Warp F0 forward by half-step (toward temporal midpoint)
-    warp_frame_with_mesh(*f0_y, width, height, half_mesh_dx, half_mesh_dy,
-                         enc->temporal_mesh_width, enc->temporal_mesh_height, warped_f0_fwd_y);
-    warp_frame_with_mesh(*f0_co, width, height, half_mesh_dx, half_mesh_dy,
-                         enc->temporal_mesh_width, enc->temporal_mesh_height, warped_f0_fwd_co);
-    warp_frame_with_mesh(*f0_cg, width, height, half_mesh_dx, half_mesh_dy,
-                         enc->temporal_mesh_width, enc->temporal_mesh_height, warped_f0_fwd_cg);
-
-    // Warp F1 backward by half-step (toward temporal midpoint)
-    warp_frame_with_mesh(*f1_y, width, height, neg_half_mesh_dx, neg_half_mesh_dy,
-                         enc->temporal_mesh_width, enc->temporal_mesh_height, warped_f1_back_y);
-    warp_frame_with_mesh(*f1_co, width, height, neg_half_mesh_dx, neg_half_mesh_dy,
-                         enc->temporal_mesh_width, enc->temporal_mesh_height, warped_f1_back_co);
-    warp_frame_with_mesh(*f1_cg, width, height, neg_half_mesh_dx, neg_half_mesh_dy,
-                         enc->temporal_mesh_width, enc->temporal_mesh_height, warped_f1_back_cg);
-
-    float ALPHA = 0.5f; // lowering the alpha also drops the visual quality
-
-    // Compute temporal highband and lowband with selective MC
-    // For reliable pixels: use MC-lifting (warped frames)
-    // For unreliable pixels: fall back to plain Haar (original frames)
+    // Compute high-pass (temporal residual)
     for (int i = 0; i < num_pixels; i++) {
-//        if (reliability_mask[i]) {
-            // ===== RELIABLE: MC-LIFTING =====
-            // Predict: H = warp(F1, -½·mesh) - warp(F0, +½·mesh)
-            (*out_h_y)[i] = warped_f1_back_y[i] - warped_f0_fwd_y[i];
-            (*out_h_co)[i] = warped_f1_back_co[i] - warped_f0_fwd_co[i];
-            (*out_h_cg)[i] = warped_f1_back_cg[i] - warped_f0_fwd_cg[i];
-
-            // Update: L = 0.5 * (warp(F0, +½·mesh) + warp(F1, -½·mesh))
-            (*out_l_y)[i] = ALPHA * (warped_f0_fwd_y[i] + warped_f1_back_y[i]);
-            (*out_l_co)[i] = ALPHA * (warped_f0_fwd_co[i] + warped_f1_back_co[i]);
-            (*out_l_cg)[i] = ALPHA * (warped_f0_fwd_cg[i] + warped_f1_back_cg[i]);
-        /*} else {
-            // ===== UNRELIABLE: PLAIN HAAR (no warping) =====
-            // Predict: H = F1 - F0
-            (*out_h_y)[i] = (*f1_y)[i] - (*f0_y)[i];
-            (*out_h_co)[i] = (*f1_co)[i] - (*f0_co)[i];
-            (*out_h_cg)[i] = (*f1_cg)[i] - (*f0_cg)[i];
-
-            // Update: L = 0.5 * (F0 + F1)
-            (*out_l_y)[i] = ALPHA * ((*f0_y)[i] + (*f1_y)[i]);
-            (*out_l_co)[i] = ALPHA * ((*f0_co)[i] + (*f1_co)[i]);
-            (*out_l_cg)[i] = ALPHA * ((*f0_cg)[i] + (*f1_cg)[i]);
-        }*/
+        (*out_h_y)[i] = (*f1_y)[i] - pred_y[i];
+        (*out_h_co)[i] = (*f1_co)[i] - pred_co[i];
+        (*out_h_cg)[i] = (*f1_cg)[i] - pred_cg[i];
     }
 
-cleanup:
-    free(warped_f0_fwd_y);
-    free(warped_f0_fwd_co);
-    free(warped_f0_fwd_cg);
-    free(warped_f1_back_y);
-    free(warped_f1_back_co);
-    free(warped_f1_back_cg);
-    free(half_mesh_dx);
-    free(half_mesh_dy);
-    free(neg_half_mesh_dx);
-    free(neg_half_mesh_dy);
+    // ===== MC-EZBC UPDATE STEP =====
+    // L = F_even + 0.25 * warp(H, MV_bw)
+    // (Note: In full implementation, this would use both H[i-1] and H[i],
+    // but for single-level decomposition, we only have current H)
+    float *update_y = malloc(num_pixels * sizeof(float));
+    float *update_co = malloc(num_pixels * sizeof(float));
+    float *update_cg = malloc(num_pixels * sizeof(float));
+
+    if (!update_y || !update_co || !update_cg) {
+        fprintf(stderr, "Error: Failed to allocate MC-EZBC update buffers\n");
+        free(pred_y);
+        free(pred_co);
+        free(pred_cg);
+        free(update_y);
+        free(update_co);
+        free(update_cg);
+        return;
+    }
+
+    // Warp H (high-pass) back to F_even using backward MVs
+    warp_block_motion(*out_h_y, width, height, mvs_bwd_x, mvs_bwd_y,
+                      enc->temporal_block_size, update_y);
+    warp_block_motion(*out_h_co, width, height, mvs_bwd_x, mvs_bwd_y,
+                      enc->temporal_block_size, update_co);
+    warp_block_motion(*out_h_cg, width, height, mvs_bwd_x, mvs_bwd_y,
+                      enc->temporal_block_size, update_cg);
+
+    // Compute low-pass (temporal approximation)
+    for (int i = 0; i < num_pixels; i++) {
+        (*out_l_y)[i] = (*f0_y)[i] + 0.25f * update_y[i];
+        (*out_l_co)[i] = (*f0_co)[i] + 0.25f * update_co[i];
+        (*out_l_cg)[i] = (*f0_cg)[i] + 0.25f * update_cg[i];
+    }
+
+    // Cleanup
+    free(pred_y);
+    free(pred_co);
+    free(pred_cg);
+    free(update_y);
+    free(update_co);
+    free(update_cg);
 }
 
 // Apply 1D temporal DWT along time axis for a spatial location (encoder side)
@@ -5525,39 +5788,16 @@ static void dwt_3d_forward_mc(
 
             if (f1_idx >= level_frames) break;
 
-            // Get mesh for this frame pair (mesh[f1] describes motion from f0 to f1)
-            const short *mesh_dx = enc->temporal_gop_mesh_dx[f1_idx];
-            const short *mesh_dy = enc->temporal_gop_mesh_dy[f1_idx];
-
-            // Build reliability mask for selective motion compensation
-            /*uint8_t *reliability_mask = malloc(num_pixels * sizeof(uint8_t));
-            if (reliability_mask && enc->temporal_gop_flow_fwd_x[f1_idx] && enc->temporal_gop_flow_bwd_x[f1_idx]) {
-                build_reliability_mask(
-                    enc->temporal_gop_rgb_frames[f0_idx], enc->temporal_gop_rgb_frames[f1_idx],
-                    enc->temporal_gop_flow_fwd_x[f1_idx], enc->temporal_gop_flow_fwd_y[f1_idx],
-                    enc->temporal_gop_flow_bwd_x[f1_idx], enc->temporal_gop_flow_bwd_y[f1_idx],
-                    width, height,
-                    reliability_mask
-                );
-            } else {
-                // Fallback: use MC everywhere (all pixels reliable)
-                if (reliability_mask) {
-                    memset(reliability_mask, 1, num_pixels * sizeof(uint8_t));
-                }
-            }*/
-
-            // Apply MC-lifting: (L, H) = mc_lift(F0, F1, mesh, mask)
+            // Apply MC-EZBC lifting: (L, H) = mc_lift_ezbc(F0, F1, MVs)
+            // Motion vectors are stored per frame and looked up by frame index
             mc_lifting_forward_pair(
                 enc,
-                &gop_y[f0_idx], &gop_co[f0_idx], &gop_cg[f0_idx],  // F0
-                &gop_y[f1_idx], &gop_co[f1_idx], &gop_cg[f1_idx],  // F1
-                mesh_dx, mesh_dy,
-//                reliability_mask,                                    // Reliability mask
+                &gop_y[f0_idx], &gop_co[f0_idx], &gop_cg[f0_idx],  // F_even
+                &gop_y[f1_idx], &gop_co[f1_idx], &gop_cg[f1_idx],  // F_odd
+                f0_idx, f1_idx,                                     // Frame indices for MV lookup
                 &temp_l_y[i/2], &temp_l_co[i/2], &temp_l_cg[i/2],  // L output
                 &temp_h_y[level_frames/2 + i/2], &temp_h_co[level_frames/2 + i/2], &temp_h_cg[level_frames/2 + i/2]  // H output
             );
-
-//            free(reliability_mask);
         }
 
         // Copy L and H bands back to gop buffers for next level
@@ -6014,11 +6254,10 @@ static void dwt_2d_haar_inverse_flexible(float *tile_data, int width, int height
     free(temp_col);
 }
 
-// Variable channel layout preprocessing for concatenated maps
 // Significance Map v2.1 (twobit-map): 2 bits per coefficient
 // 00=zero, 01=+1, 10=-1, 11=other (stored as int16)
-static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
-                                                     int coeff_count, int channel_layout, uint8_t *output_buffer) {
+static size_t preprocess_coefficients_twobitmap(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
+                                                int coeff_count, int channel_layout, uint8_t *output_buffer) {
     const channel_layout_config_t *config = &channel_layouts[channel_layout];
     int map_bytes = (coeff_count * 2 + 7) / 8;  // 2 bits per coefficient
     int total_maps = config->num_channels;
@@ -6106,12 +6345,88 @@ static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t
     return map_bytes * total_maps + total_others * sizeof(int16_t);
 }
 
+// EZBC preprocessing: encode each channel with embedded zero block coding
+static size_t preprocess_coefficients_ezbc(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
+                                            int coeff_count, int width, int height, int channel_layout,
+                                            uint8_t *output_buffer) {
+    const channel_layout_config_t *config = &channel_layouts[channel_layout];
+    size_t total_size = 0;
+    uint8_t *write_ptr = output_buffer;
+
+    // Encode each active channel separately with EZBC
+    int16_t *channel_coeffs[4] = {coeffs_y, coeffs_co, coeffs_cg, coeffs_alpha};
+    int channel_active[4] = {config->has_y, config->has_co, config->has_cg, config->has_alpha};
+
+    for (int ch = 0; ch < 4; ch++) {
+        if (!channel_active[ch] || !channel_coeffs[ch]) continue;
+
+        // Encode this channel with EZBC
+        uint8_t *ezbc_data = NULL;
+        size_t ezbc_size = encode_channel_ezbc(channel_coeffs[ch], coeff_count, width, height, &ezbc_data);
+
+        // Write size header (uint32_t) for this channel
+        *((uint32_t*)write_ptr) = (uint32_t)ezbc_size;
+        write_ptr += sizeof(uint32_t);
+        total_size += sizeof(uint32_t);
+
+        // Copy EZBC-encoded data
+        memcpy(write_ptr, ezbc_data, ezbc_size);
+        write_ptr += ezbc_size;
+        total_size += ezbc_size;
+
+        // Free EZBC buffer
+        free(ezbc_data);
+    }
+
+    return total_size;
+}
+
+// Wrapper: select between EZBC and twobit-map based on encoder settings
+static size_t preprocess_coefficients_variable_layout(int enable_ezbc, int width, int height,
+                                                       int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
+                                                       int coeff_count, int channel_layout, uint8_t *output_buffer) {
+    if (enable_ezbc) {
+        return preprocess_coefficients_ezbc(coeffs_y, coeffs_co, coeffs_cg, coeffs_alpha,
+                                             coeff_count, width, height, channel_layout, output_buffer);
+    } else {
+        return preprocess_coefficients_twobitmap(coeffs_y, coeffs_co, coeffs_cg, coeffs_alpha,
+                                                 coeff_count, channel_layout, output_buffer);
+    }
+}
+
 // Unified GOP preprocessing: single significance map for all frames and channels
-// Layout: [All_Y_maps][All_Co_maps][All_Cg_maps][All_Y_values][All_Co_values][All_Cg_values]
+// Layout (twobit-map): [All_Y_maps][All_Co_maps][All_Cg_maps][All_Y_values][All_Co_values][All_Cg_values]
+// Layout (EZBC): [frame0_size(4)][frame0_ezbc][frame1_size(4)][frame1_ezbc]...
 // This enables optimal cross-frame compression in the temporal dimension
-static size_t preprocess_gop_unified(int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
-                                     int num_frames, int num_pixels, int channel_layout,
+static size_t preprocess_gop_unified(int enable_ezbc, int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
+                                     int num_frames, int num_pixels, int width, int height, int channel_layout,
                                      uint8_t *output_buffer) {
+    // EZBC mode: encode each frame separately with EZBC
+    if (enable_ezbc) {
+        size_t total_size = 0;
+        uint8_t *write_ptr = output_buffer;
+
+        for (int frame = 0; frame < num_frames; frame++) {
+            // Encode this frame with EZBC
+            size_t frame_size = preprocess_coefficients_ezbc(
+                quant_y ? quant_y[frame] : NULL,
+                quant_co ? quant_co[frame] : NULL,
+                quant_cg ? quant_cg[frame] : NULL,
+                NULL,  // No alpha in GOP mode
+                num_pixels, width, height, channel_layout,
+                write_ptr + sizeof(uint32_t)  // Leave space for size header
+            );
+
+            // Write frame size header
+            *((uint32_t*)write_ptr) = (uint32_t)frame_size;
+            write_ptr += sizeof(uint32_t) + frame_size;
+            total_size += sizeof(uint32_t) + frame_size;
+        }
+
+        return total_size;
+    }
+
+    // Twobit-map mode: original unified GOP preprocessing
     const channel_layout_config_t *config = &channel_layouts[channel_layout];
     const int map_bytes_per_frame = (num_pixels * 2 + 7) / 8;  // 2 bits per coefficient
     const int total_coeffs = num_pixels * num_frames;
@@ -6549,6 +6864,65 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
     }
 }
 
+// Quantization for EZBC mode: quantizes to discrete levels but doesn't normalize (shrink) values
+// This reduces coefficient precision while preserving magnitude for EZBC's bitplane encoding
+static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_encoder_t *enc,
+                                                          float *coeffs, int16_t *quantised, int size,
+                                                          int base_quantiser, int width, int height,
+                                                          int decomp_levels, int is_chroma, int frame_count) {
+    (void)frame_count;  // Unused parameter
+
+    float effective_base_q = base_quantiser;
+    effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
+
+    for (int i = 0; i < size; i++) {
+        // Apply perceptual weight based on coefficient's position in DWT layout
+        float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
+        float effective_q = effective_base_q * weight;
+
+        // Step 1: Quantize - divide by quantizer to get normalized value
+        float quantised_val = coeffs[i] / effective_q;
+
+        // Step 2: Apply dead-zone quantization to normalized value
+        if (enc->dead_zone_threshold > 0.0f && !is_chroma) {
+            int level = get_subband_level(i, width, height, decomp_levels);
+            int subband_type = get_subband_type(i, width, height, decomp_levels);
+            float level_threshold = 0.0f;
+
+            if (level == 1) {
+                // Finest level (level 1: 280x224)
+                if (subband_type == 3) {
+                    // HH1: full dead-zone
+                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
+                } else if (subband_type == 1 || subband_type == 2) {
+                    // LH1, HL1: half dead-zone
+                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
+                }
+            } else if (level == 2) {
+                // Second-finest level (level 2: 140x112)
+                if (subband_type == 3) {
+                    // HH2: half dead-zone
+                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
+                }
+                // LH2, HL2: no dead-zone
+            }
+            // Coarser levels (3-6): no dead-zone to preserve structural information
+
+            if (fabsf(quantised_val) <= level_threshold) {
+                quantised_val = 0.0f;
+            }
+        }
+
+        // Step 3: Round to discrete quantization levels
+        quantised_val = roundf(quantised_val);
+
+        // Step 4: Denormalize - multiply back by quantizer to restore magnitude
+        // This gives us quantized values at original scale (not shrunken to 0-10 range)
+        float denormalized = quantised_val * effective_q;
+
+        quantised[i] = (int16_t)CLAMP((int)denormalized, -32768, 32767);
+    }
+}
 
 
 // Convert 2D spatial DWT layout to linear subband layout (for decoder compatibility)
@@ -6657,7 +7031,22 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
 
     if (mode == TAV_MODE_INTRA) {
         // INTRA mode: quantise coefficients directly and store for future reference
-        if (enc->perceptual_tuning) {
+        if (enc->enable_ezbc) {
+            // EZBC mode: Quantize with perceptual weighting but no normalization (division by quantizer)
+            fprintf(stderr, "[EZBC-QUANT-INTRA] Using perceptual quantization without normalization\n");
+            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
+            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
+            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
+
+            // Print max abs for debug
+            int max_y = 0, max_co = 0, max_cg = 0;
+            for (int i = 0; i < tile_size; i++) {
+                if (abs(quantised_y[i]) > max_y) max_y = abs(quantised_y[i]);
+                if (abs(quantised_co[i]) > max_co) max_co = abs(quantised_co[i]);
+                if (abs(quantised_cg[i]) > max_cg) max_cg = abs(quantised_cg[i]);
+            }
+            fprintf(stderr, "[EZBC-QUANT-INTRA] Quantized coeff max: Y=%d, Co=%d, Cg=%d\n", max_y, max_co, max_cg);
+        } else if (enc->perceptual_tuning) {
             // Perceptual quantisation: EXACTLY like uniform but with per-coefficient weights
             quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
             quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
@@ -6764,8 +7153,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
     }*/
 
     // Preprocess and write quantised coefficients using variable channel layout concatenated significance maps
-    size_t total_compressed_size = preprocess_coefficients_variable_layout(quantised_y, quantised_co, quantised_cg, NULL,
-                                                                          tile_size, enc->channel_layout, buffer + offset);
+    size_t total_compressed_size = preprocess_coefficients_variable_layout(enc->enable_ezbc, enc->width, enc->height,
+                                                                           quantised_y, quantised_co, quantised_cg, NULL,
+                                                                           tile_size, enc->channel_layout, buffer + offset);
     offset += total_compressed_size;
 
     // DEBUG: Dump raw DWT coefficients for specified frame when it's an intra-frame
@@ -8392,25 +8782,33 @@ static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) {
 }
 
 // Detect scene changes by analysing frame differences
-static int detect_scene_change(tav_encoder_t *enc) {
-    if (!enc->current_frame_rgb || enc->intra_only) {
-        return 0; // No current frame to compare
+// Unified scene change detection comparing two RGB frames
+// Returns 1 if scene change detected, 0 otherwise
+// Also outputs avg_diff and changed_ratio through pointers if non-NULL
+static int detect_scene_change_between_frames(
+    const uint8_t *frame1_rgb,
+    const uint8_t *frame2_rgb,
+    int width,
+    int height,
+    double *out_avg_diff,
+    double *out_changed_ratio
+) {
+    if (!frame1_rgb || !frame2_rgb) {
+        return 0; // No frames to compare
     }
 
-    uint8_t *comparison_buffer = enc->previous_frame_rgb;
-
     long long total_diff = 0;
     int changed_pixels = 0;
 
     // Sample every 4th pixel for performance (still gives good detection)
-    for (int y = 0; y < enc->height; y += 2) {
-        for (int x = 0; x < enc->width; x += 2) {
-            int offset = (y * enc->width + x) * 3;
+    for (int y = 0; y < height; y += 2) {
+        for (int x = 0; x < width; x += 2) {
+            int offset = (y * width + x) * 3;
 
             // Calculate colour difference
-            int r_diff = abs(enc->current_frame_rgb[offset] - comparison_buffer[offset]);
-            int g_diff = abs(enc->current_frame_rgb[offset + 1] - comparison_buffer[offset + 1]);
-            int b_diff = abs(enc->current_frame_rgb[offset + 2] - comparison_buffer[offset + 2]);
+            int r_diff = abs(frame2_rgb[offset] - frame1_rgb[offset]);
+            int g_diff = abs(frame2_rgb[offset + 1] - frame1_rgb[offset + 1]);
+            int b_diff = abs(frame2_rgb[offset + 2] - frame1_rgb[offset + 2]);
 
             int pixel_diff = r_diff + g_diff + b_diff;
             total_diff += pixel_diff;
@@ -8423,19 +8821,41 @@ static int detect_scene_change(tav_encoder_t *enc) {
     }
 
     // Calculate metrics for scene change detection
-    int sampled_pixels = (enc->height / 2) * (enc->width / 2);
+    int sampled_pixels = (height / 2) * (width / 2);
     double avg_diff = (double)total_diff / sampled_pixels;
     double changed_ratio = (double)changed_pixels / sampled_pixels;
 
-    if (enc->verbose) {
+    // Output metrics if requested
+    if (out_avg_diff) *out_avg_diff = avg_diff;
+    if (out_changed_ratio) *out_changed_ratio = changed_ratio;
+
+    // Scene change threshold
+    double threshold = 0.75;
+
+    return changed_ratio > threshold;
+}
+
+// Wrapper for normal mode: compare current frame with previous frame
+static int detect_scene_change(tav_encoder_t *enc) {
+    if (!enc->current_frame_rgb || enc->intra_only) {
+        return 0; // No current frame to compare
+    }
+
+    double avg_diff, changed_ratio;
+    int is_scene_change = detect_scene_change_between_frames(
+        enc->previous_frame_rgb,
+        enc->current_frame_rgb,
+        enc->width,
+        enc->height,
+        &avg_diff,
+        &changed_ratio
+    );
+
+    if (is_scene_change) {
         printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio);
     }
 
-    // Scene change thresholds - adjust for interlaced mode
-    // Interlaced fields have more natural differences due to temporal field separation
-    double threshold = 0.30;
-
-    return changed_ratio > threshold;
+    return is_scene_change;
 }
 
 // Detect still frames by comparing quantised DWT coefficients
@@ -8605,11 +9025,12 @@ int main(int argc, char *argv[]) {
         {"delta-haar", required_argument, 0, 1018},
         {"temporal-dwt", no_argument, 0, 1019},
         {"temporal-3d", no_argument, 0, 1019},
-        {"mesh-warp", no_argument, 0, 1020},
+        {"mc-ezbc", no_argument, 0, 1020},
         {"residual-coding", no_argument, 0, 1021},
         {"adaptive-blocks", no_argument, 0, 1022},
         {"bframes", required_argument, 0, 1023},
         {"gop-size", required_argument, 0, 1024},
+        {"ezbc", no_argument, 0, 1025},
         {"help", no_argument, 0, '?'},
         {0, 0, 0, 0}
     };
@@ -8776,9 +9197,9 @@ int main(int argc, char *argv[]) {
                 enc->enable_temporal_dwt = 1;
                 printf("Temporal 3D DWT encoding enabled (GOP size: %d frames)\n", TEMPORAL_GOP_SIZE);
                 break;
-            case 1020: // --mesh-warp
-                enc->temporal_enable_mesh_warp = 1;
-                printf("Mesh-based motion compensation enabled (requires --temporal-dwt)\n");
+            case 1020: // --mc-ezbc
+                enc->temporal_enable_mcezbc = 1;
+                printf("MC-EZBC block-based motion compensation enabled (requires --temporal-dwt)\n");
                 break;
             case 1021: // --residual-coding
                 enc->use_delta_encoding = 0; // Mutually exclusive with delta encoding
@@ -8815,6 +9236,10 @@ int main(int argc, char *argv[]) {
                 }
                 printf("GOP size set to %d frames\n", enc->residual_coding_gop_size);
                 break;
+            case 1025: // --ezbc
+                enc->enable_ezbc = 1;
+                printf("EZBC (Embedded Zero Block Coding) enabled for significance maps\n");
+                break;
             case 'a':
                 int bitrate = atoi(optarg);
                 int valid_bitrate = validate_mp2_bitrate(bitrate);
@@ -9155,7 +9580,7 @@ int main(int argc, char *argv[]) {
 
         if (enc->enable_temporal_dwt) {
             // GOP-based temporal 3D DWT encoding path
-            int add_result = gop_add_frame(enc, enc->current_frame_rgb,
+            int add_result = temporal_gop_add_frame(enc, enc->current_frame_rgb,
                                           enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg);
 
             if (add_result != 0) {
diff --git a/video_encoder/encoder_tav_opencv.cpp b/video_encoder/encoder_tav_opencv.cpp
index 40e6898..3ad8162 100644
--- a/video_encoder/encoder_tav_opencv.cpp
+++ b/video_encoder/encoder_tav_opencv.cpp
@@ -9,413 +9,6 @@
 
 extern "C" {
 
-// Helper: Compute SAD (Sum of Absolute Differences) for a block
-static int compute_sad(
-    const unsigned char *ref, const unsigned char *cur,
-    int ref_x, int ref_y, int cur_x, int cur_y,
-    int width, int height, int block_size
-) {
-    int sad = 0;
-    for (int by = 0; by < block_size; by++) {
-        for (int bx = 0; bx < block_size; bx++) {
-            int ry = ref_y + by;
-            int rx = ref_x + bx;
-            int cy = cur_y + by;
-            int cx = cur_x + bx;
-
-            // Boundary check
-            if (rx < 0 || rx >= width || ry < 0 || ry >= height ||
-                cx < 0 || cx >= width || cy < 0 || cy >= height) {
-                sad += 255;  // Penalty for out-of-bounds
-                continue;
-            }
-
-            int ref_val = ref[ry * width + rx];
-            int cur_val = cur[cy * width + cx];
-            sad += abs(ref_val - cur_val);
-        }
-    }
-    return sad;
-}
-
-// Parabolic interpolation for sub-pixel refinement
-// Given SAD values at positions (-1, 0, +1), estimate peak location
-static float parabolic_interp(int sad_m1, int sad_0, int sad_p1) {
-    // Fit parabola: y = a*x^2 + b*x + c
-    // Peak at x = -b/(2a) = (sad_m1 - sad_p1) / (2*(sad_m1 - 2*sad_0 + sad_p1))
-    int denom = 2 * (sad_m1 - 2 * sad_0 + sad_p1);
-    if (denom == 0) return 0.0f;
-
-    float offset = (float)(sad_m1 - sad_p1) / denom;
-    // Clamp to ±0.5 for reasonable sub-pixel values
-    if (offset < -0.5f) offset = -0.5f;
-    if (offset > 0.5f) offset = 0.5f;
-
-    return offset;
-}
-
-// Diamond search pattern for integer-pixel motion estimation
-static void diamond_search(
-    const unsigned char *ref, const unsigned char *cur,
-    int cx, int cy, int width, int height, int block_size,
-    int search_range, int *best_dx, int *best_dy
-) {
-    // Large diamond pattern (distance 2)
-    const int large_diamond[8][2] = {
-        {0, -2}, {-1, -1}, {1, -1}, {-2, 0},
-        {2, 0}, {-1, 1}, {1, 1}, {0, 2}
-    };
-
-    // Small diamond pattern (distance 1)
-    const int small_diamond[4][2] = {
-        {0, -1}, {-1, 0}, {1, 0}, {0, 1}
-    };
-
-    int dx = 0, dy = 0;
-    int best_sad = compute_sad(ref, cur, cx + dx, cy + dy, cx, cy, width, height, block_size);
-
-    // Large diamond search
-    bool improved = true;
-    while (improved) {
-        improved = false;
-        for (int i = 0; i < 8; i++) {
-            int test_dx = dx + large_diamond[i][0];
-            int test_dy = dy + large_diamond[i][1];
-
-            if (abs(test_dx) > search_range || abs(test_dy) > search_range) {
-                continue;
-            }
-
-            int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size);
-            if (sad < best_sad) {
-                best_sad = sad;
-                dx = test_dx;
-                dy = test_dy;
-                improved = true;
-                break;
-            }
-        }
-    }
-
-    // Small diamond refinement
-    improved = true;
-    while (improved) {
-        improved = false;
-        for (int i = 0; i < 4; i++) {
-            int test_dx = dx + small_diamond[i][0];
-            int test_dy = dy + small_diamond[i][1];
-
-            if (abs(test_dx) > search_range || abs(test_dy) > search_range) {
-                continue;
-            }
-
-            int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size);
-            if (sad < best_sad) {
-                best_sad = sad;
-                dx = test_dx;
-                dy = test_dy;
-                improved = true;
-                break;
-            }
-        }
-    }
-
-    *best_dx = dx;
-    *best_dy = dy;
-}
-
-// Sub-pixel refinement using parabolic interpolation
-static void subpixel_refinement(
-    const unsigned char *ref, const unsigned char *cur,
-    int cx, int cy, int width, int height, int block_size,
-    int int_dx, int int_dy,  // Integer-pixel motion
-    float *subpix_dx, float *subpix_dy  // Output: 1/4-pixel precision
-) {
-    // Get SAD at integer position and neighbors
-    int sad_0_0 = compute_sad(ref, cur, cx + int_dx, cy + int_dy, cx, cy, width, height, block_size);
-
-    // Horizontal neighbors
-    int sad_m1_0 = compute_sad(ref, cur, cx + int_dx - 1, cy + int_dy, cx, cy, width, height, block_size);
-    int sad_p1_0 = compute_sad(ref, cur, cx + int_dx + 1, cy + int_dy, cx, cy, width, height, block_size);
-
-    // Vertical neighbors
-    int sad_0_m1 = compute_sad(ref, cur, cx + int_dx, cy + int_dy - 1, cx, cy, width, height, block_size);
-    int sad_0_p1 = compute_sad(ref, cur, cx + int_dx, cy + int_dy + 1, cx, cy, width, height, block_size);
-
-    // Parabolic interpolation
-    float offset_x = parabolic_interp(sad_m1_0, sad_0_0, sad_p1_0);
-    float offset_y = parabolic_interp(sad_0_m1, sad_0_0, sad_0_p1);
-
-    // Quantize to 1/4-pixel precision
-    *subpix_dx = int_dx + roundf(offset_x * 4.0f) / 4.0f;
-    *subpix_dy = int_dy + roundf(offset_y * 4.0f) / 4.0f;
-}
-
-// MPEG-style bidirectional motion estimation
-// Uses variable block sizes (16×16, optionally split to 8×8)
-// 4-pixel overlap between blocks to reduce blocking artifacts
-// Diamond search + parabolic sub-pixel refinement
-void estimate_motion_optical_flow(
-    const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
-    int width, int height,
-    float **out_flow_x, float **out_flow_y
-) {
-    // Convert RGB to grayscale
-    unsigned char *gray1 = (unsigned char*)std::malloc(width * height);
-    unsigned char *gray2 = (unsigned char*)std::malloc(width * height);
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int idx = y * width + x;
-            int rgb_idx = idx * 3;
-
-            gray1[idx] = (unsigned char)(0.299f * frame1_rgb[rgb_idx] +
-                                         0.587f * frame1_rgb[rgb_idx + 1] +
-                                         0.114f * frame1_rgb[rgb_idx + 2]);
-            gray2[idx] = (unsigned char)(0.299f * frame2_rgb[rgb_idx] +
-                                         0.587f * frame2_rgb[rgb_idx + 1] +
-                                         0.114f * frame2_rgb[rgb_idx + 2]);
-        }
-    }
-
-    *out_flow_x = (float*)std::malloc(width * height * sizeof(float));
-    *out_flow_y = (float*)std::malloc(width * height * sizeof(float));
-    std::memset(*out_flow_x, 0, width * height * sizeof(float));
-    std::memset(*out_flow_y, 0, width * height * sizeof(float));
-
-    // Block parameters
-    const int block_size = 16;
-    const int overlap = 4;
-    const int stride = block_size - overlap;  // 12 pixels
-    const int search_range = 16;  // ±16 pixels
-
-    // Process overlapping blocks
-    for (int by = 0; by < height; by += stride) {
-        for (int bx = 0; bx < width; bx += stride) {
-            int actual_block_size = block_size;
-
-            // Clamp block to frame boundary
-            if (bx + block_size > width || by + block_size > height) {
-                continue;  // Skip partial blocks at edges
-            }
-
-            // Integer-pixel diamond search
-            int int_dx = 0, int_dy = 0;
-            diamond_search(gray1, gray2, bx, by, width, height,
-                          actual_block_size, search_range, &int_dx, &int_dy);
-
-            // Sub-pixel refinement
-            float subpix_dx = 0.0f, subpix_dy = 0.0f;
-            subpixel_refinement(gray1, gray2, bx, by, width, height,
-                              actual_block_size, int_dx, int_dy,
-                              &subpix_dx, &subpix_dy);
-
-            // Fill motion vectors for block with distance-weighted blending in overlap regions
-            for (int y = by; y < by + actual_block_size && y < height; y++) {
-                for (int x = bx; x < bx + actual_block_size && x < width; x++) {
-                    int idx = y * width + x;
-
-                    // Distance from block center for blending weight
-                    float dx_from_center = (x - (bx + actual_block_size / 2));
-                    float dy_from_center = (y - (by + actual_block_size / 2));
-                    float dist = sqrtf(dx_from_center * dx_from_center +
-                                      dy_from_center * dy_from_center);
-
-                    // Weight decreases with distance from center (for smooth blending in overlaps)
-                    float weight = 1.0f / (1.0f + dist / actual_block_size);
-
-                    // Accumulate weighted motion (will be normalized later)
-                    (*out_flow_x)[idx] += subpix_dx * weight;
-                    (*out_flow_y)[idx] += subpix_dy * weight;
-                }
-            }
-        }
-    }
-
-    std::free(gray1);
-    std::free(gray2);
-}
-
-// Build distortion mesh from dense optical flow field
-void build_mesh_from_flow(
-    const float *flow_x, const float *flow_y,
-    int width, int height,
-    int mesh_w, int mesh_h,
-    short *mesh_dx, short *mesh_dy  // Output: 1/8 pixel precision
-) {
-    int cell_w = width / mesh_w;
-    int cell_h = height / mesh_h;
-
-    for (int my = 0; my < mesh_h; my++) {
-        for (int mx = 0; mx < mesh_w; mx++) {
-            // Cell center coordinates
-            int cx = mx * cell_w + cell_w / 2;
-            int cy = my * cell_h + cell_h / 2;
-
-            // Sample flow at cell center (5×5 neighborhood for robustness)
-            float sum_dx = 0.0f, sum_dy = 0.0f;
-            int count = 0;
-
-            for (int dy = -2; dy <= 2; dy++) {
-                for (int dx = -2; dx <= 2; dx++) {
-                    int px = cx + dx;
-                    int py = cy + dy;
-                    if (px >= 0 && px < width && py >= 0 && py < height) {
-                        int idx = py * width + px;
-                        sum_dx += flow_x[idx];
-                        sum_dy += flow_y[idx];
-                        count++;
-                    }
-                }
-            }
-
-            float avg_dx = (count > 0) ? (sum_dx / count) : 0.0f;
-            float avg_dy = (count > 0) ? (sum_dy / count) : 0.0f;
-
-            int mesh_idx = my * mesh_w + mx;
-            mesh_dx[mesh_idx] = (short)(avg_dx * 4.0f);  // 1/4 pixel precision
-            mesh_dy[mesh_idx] = (short)(avg_dy * 4.0f);
-        }
-    }
-}
-
-// Laplacian smoothing for mesh spatial coherence
-void smooth_mesh_laplacian(
-    short *mesh_dx, short *mesh_dy,
-    int mesh_width, int mesh_height,
-    float smoothness, int iterations
-) {
-    short *temp_dx = (short*)std::malloc(mesh_width * mesh_height * sizeof(short));
-    short *temp_dy = (short*)std::malloc(mesh_width * mesh_height * sizeof(short));
-
-    for (int iter = 0; iter < iterations; iter++) {
-        std::memcpy(temp_dx, mesh_dx, mesh_width * mesh_height * sizeof(short));
-        std::memcpy(temp_dy, mesh_dy, mesh_width * mesh_height * sizeof(short));
-
-        for (int my = 0; my < mesh_height; my++) {
-            for (int mx = 0; mx < mesh_width; mx++) {
-                int idx = my * mesh_width + mx;
-
-                float neighbor_dx = 0.0f, neighbor_dy = 0.0f;
-                int neighbor_count = 0;
-
-                int neighbors[4][2] = {{0, -1}, {0, 1}, {-1, 0}, {1, 0}};
-                for (int n = 0; n < 4; n++) {
-                    int nx = mx + neighbors[n][0];
-                    int ny = my + neighbors[n][1];
-                    if (nx >= 0 && nx < mesh_width && ny >= 0 && ny < mesh_height) {
-                        int nidx = ny * mesh_width + nx;
-                        neighbor_dx += temp_dx[nidx];
-                        neighbor_dy += temp_dy[nidx];
-                        neighbor_count++;
-                    }
-                }
-
-                if (neighbor_count > 0) {
-                    neighbor_dx /= neighbor_count;
-                    neighbor_dy /= neighbor_count;
-
-                    float data_weight = 1.0f - smoothness;
-                    mesh_dx[idx] = (short)(data_weight * temp_dx[idx] + smoothness * neighbor_dx);
-                    mesh_dy[idx] = (short)(data_weight * temp_dy[idx] + smoothness * neighbor_dy);
-                }
-            }
-        }
-    }
-
-    std::free(temp_dx);
-    std::free(temp_dy);
-}
-
-// Bilinear mesh warp
-void warp_frame_with_mesh(
-    const float *src_frame, int width, int height,
-    const short *mesh_dx, const short *mesh_dy,
-    int mesh_width, int mesh_height,
-    float *dst_frame
-) {
-    int cell_w = width / mesh_width;
-    int cell_h = height / mesh_height;
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int cell_x = x / cell_w;
-            int cell_y = y / cell_h;
-
-            if (cell_x >= mesh_width - 1) cell_x = mesh_width - 2;
-            if (cell_y >= mesh_height - 1) cell_y = mesh_height - 2;
-            if (cell_x < 0) cell_x = 0;
-            if (cell_y < 0) cell_y = 0;
-
-            int idx_00 = cell_y * mesh_width + cell_x;
-            int idx_10 = idx_00 + 1;
-            int idx_01 = (cell_y + 1) * mesh_width + cell_x;
-            int idx_11 = idx_01 + 1;
-
-            float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
-            float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
-            float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
-            float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
-
-            float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
-            float beta = (y - cp_y0) / (cp_y1 - cp_y0);
-            if (alpha < 0.0f) alpha = 0.0f;
-            if (alpha > 1.0f) alpha = 1.0f;
-            if (beta < 0.0f) beta = 0.0f;
-            if (beta > 1.0f) beta = 1.0f;
-
-            float dx_00 = mesh_dx[idx_00] / 4.0f;
-            float dy_00 = mesh_dy[idx_00] / 4.0f;
-            float dx_10 = mesh_dx[idx_10] / 4.0f;
-            float dy_10 = mesh_dy[idx_10] / 4.0f;
-            float dx_01 = mesh_dx[idx_01] / 4.0f;
-            float dy_01 = mesh_dy[idx_01] / 4.0f;
-            float dx_11 = mesh_dx[idx_11] / 4.0f;
-            float dy_11 = mesh_dy[idx_11] / 4.0f;
-
-            float dx = (1 - alpha) * (1 - beta) * dx_00 +
-                       alpha * (1 - beta) * dx_10 +
-                       (1 - alpha) * beta * dx_01 +
-                       alpha * beta * dx_11;
-
-            float dy = (1 - alpha) * (1 - beta) * dy_00 +
-                       alpha * (1 - beta) * dy_10 +
-                       (1 - alpha) * beta * dy_01 +
-                       alpha * beta * dy_11;
-
-            float src_x = x + dx;
-            float src_y = y + dy;
-
-            int sx0 = (int)std::floor(src_x);
-            int sy0 = (int)std::floor(src_y);
-            int sx1 = sx0 + 1;
-            int sy1 = sy0 + 1;
-
-            if (sx0 < 0) sx0 = 0;
-            if (sy0 < 0) sy0 = 0;
-            if (sx1 >= width) sx1 = width - 1;
-            if (sy1 >= height) sy1 = height - 1;
-            if (sx0 >= width) sx0 = width - 1;
-            if (sy0 >= height) sy0 = height - 1;
-
-            float fx = src_x - sx0;
-            float fy = src_y - sy0;
-
-            float val_00 = src_frame[sy0 * width + sx0];
-            float val_10 = src_frame[sy0 * width + sx1];
-            float val_01 = src_frame[sy1 * width + sx0];
-            float val_11 = src_frame[sy1 * width + sx1];
-
-            float val = (1 - fx) * (1 - fy) * val_00 +
-                        fx * (1 - fy) * val_10 +
-                        (1 - fx) * fy * val_01 +
-                        fx * fy * val_11;
-
-            dst_frame[y * width + x] = val;
-        }
-    }
-}
-
 // Dense optical flow estimation using Farneback algorithm
 // Computes flow at every pixel, then samples at block centers for motion vectors
 // Much more spatially coherent than independent block matching
@@ -491,4 +84,100 @@ void estimate_optical_flow_motion(
     }
 }
 
+// Block-based motion compensation with bilinear interpolation (sub-pixel precision)
+// MVs are in 1/4-pixel units
+// This implements the warp() function from MC-EZBC pseudocode
+void warp_block_motion(
+    const float *src,          // Source frame
+    int width, int height,
+    const int16_t *mvs_x,      // Motion vectors X (1/4-pixel units)
+    const int16_t *mvs_y,      // Motion vectors Y (1/4-pixel units)
+    int block_size,            // Block size (e.g., 16)
+    float *dst                 // Output warped frame
+) {
+    int num_blocks_x = (width + block_size - 1) / block_size;
+    int num_blocks_y = (height + block_size - 1) / block_size;
+
+    // Process each block
+    for (int by = 0; by < num_blocks_y; by++) {
+        for (int bx = 0; bx < num_blocks_x; bx++) {
+            int block_idx = by * num_blocks_x + bx;
+
+            // Get motion vector for this block (in 1/4-pixel units)
+            float mv_x = mvs_x[block_idx] / 4.0f;  // Convert to pixels
+            float mv_y = mvs_y[block_idx] / 4.0f;
+
+            // Block boundaries in destination frame
+            int block_x_start = bx * block_size;
+            int block_y_start = by * block_size;
+            int block_x_end = std::min(block_x_start + block_size, width);
+            int block_y_end = std::min(block_y_start + block_size, height);
+
+            // Warp each pixel in the block
+            for (int y = block_y_start; y < block_y_end; y++) {
+                for (int x = block_x_start; x < block_x_end; x++) {
+                    // Source position (backward warping)
+                    float src_x = x - mv_x;
+                    float src_y = y - mv_y;
+
+                    // Clamp to valid range
+                    src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
+                    src_y = std::max(0.0f, std::min((float)(height - 1), src_y));
+
+                    // Bilinear interpolation
+                    int x0 = (int)src_x;
+                    int y0 = (int)src_y;
+                    int x1 = std::min(x0 + 1, width - 1);
+                    int y1 = std::min(y0 + 1, height - 1);
+
+                    float fx = src_x - x0;
+                    float fy = src_y - y0;
+
+                    float val00 = src[y0 * width + x0];
+                    float val10 = src[y0 * width + x1];
+                    float val01 = src[y1 * width + x0];
+                    float val11 = src[y1 * width + x1];
+
+                    float val_top = (1.0f - fx) * val00 + fx * val10;
+                    float val_bot = (1.0f - fx) * val01 + fx * val11;
+                    float val = (1.0f - fy) * val_top + fy * val_bot;
+
+                    dst[y * width + x] = val;
+                }
+            }
+        }
+    }
+}
+
+// Bidirectional motion compensation for MC-EZBC predict step
+// Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
+void warp_bidirectional(
+    const float *f0, const float *f1,
+    int width, int height,
+    const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y,  // F0 → F1
+    const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y,  // F1 → F0
+    int block_size,
+    float *prediction          // Output: 0.5 * (warped_f0 + warped_f1)
+) {
+    int num_pixels = width * height;
+
+    // Allocate temporary buffers
+    float *warped_f0 = new float[num_pixels];
+    float *warped_f1 = new float[num_pixels];
+
+    // Warp f0 forward using forward MVs
+    warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);
+
+    // Warp f1 backward using backward MVs
+    warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);
+
+    // Average the two warped frames
+    for (int i = 0; i < num_pixels; i++) {
+        prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
+    }
+
+    delete[] warped_f0;
+    delete[] warped_f1;
+}
+
 } // extern "C"