From f4b03b55b61a21c868a92ecdf4a3290e5ade4f7a Mon Sep 17 00:00:00 2001
From: minjaesong <alswo9628@gmail.com>
Date: Wed, 17 Sep 2025 21:49:32 +0900
Subject: [PATCH] monoblock TAV

---
 assets/disk0/tvdos/bin/playtav.js             |   8 +-
 assets/disk0/tvdos/bin/playtev.js             |   2 +-
 terranmon.txt                                 |  14 +-
 .../torvald/tsvm/GraphicsJSR223Delegate.kt    | 332 +++++++++++++-----
 video_encoder/encoder_tav.c                   | 230 ++++++++++--
 5 files changed, 455 insertions(+), 131 deletions(-)

diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js
index bd2705e..171b3d5 100644
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -402,7 +402,7 @@ for (let i = 0; i < 8; i++) {
 }
 
 if (!magicValid) {
-    con.puts("Error: Invalid TAV file format")
+    printerrln("Error: Invalid TAV file format")
     errorlevel = 1
     return
 }
@@ -425,8 +425,8 @@ for (let i = 0; i < 7; i++) {
     seqread.readOneByte()
 }
 
-if (header.version < 1 || header.version > 2) {
-    con.puts(`Error: Unsupported TAV version ${header.version}`)
+if (header.version < 1 || header.version > 4) {
+    printerrln(`Error: Unsupported TAV version ${header.version}`)
     errorlevel = 1
     return
 }
@@ -637,7 +637,7 @@ try {
 
                     // Upload RGB buffer to display framebuffer (like TEV)
                     let uploadStart = sys.nanoTime()
-                    graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, true)
+                    graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, false)
                     uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
 
                     // Defer audio playback until a first frame is sent
diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js
index eba85ca..ce9dd17 100644
--- a/assets/disk0/tvdos/bin/playtev.js
+++ b/assets/disk0/tvdos/bin/playtev.js
@@ -673,7 +673,7 @@ try {
 
                         // Upload RGB buffer to display framebuffer with dithering
                         let uploadStart = sys.nanoTime()
-                        graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, width, height, frameCount, true)
+                        graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, width, height, frameCount, false)
                         uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0  // Convert to milliseconds
                     }
                     else {
diff --git a/terranmon.txt b/terranmon.txt
index 72686c3..28d99c2 100644
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -695,7 +695,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
 
 ## Header (24 bytes)
     uint8  Magic[8]: "\x1FTSVM TEV"
-    uint8  Version: 2 or 3
+    uint8  Version: 2 (YCoCg-R) or 3 (ICtCp)
     uint16 Width: video width in pixels
     uint16 Height: video height in pixels
     uint8 FPS: frames per second
@@ -709,7 +709,6 @@ DCT-based compression, motion compensation, and efficient temporal coding.
     uint8  Video Flags
             - bit 0 = is interlaced (should be default for most non-archival TEV videos)
             - bit 1 = is NTSC framerate (repeat every 1000th frame)
-            - bit 2 = is lossless mode
     uint8  Reserved, fill with zero
 
 ## Packet Types
@@ -823,7 +822,7 @@ transmission capability, and region-of-interest coding.
 
 ## Header (32 bytes)
     uint8  Magic[8]: "\x1FTSVM TAV"
-    uint8  Version: 1
+    uint8  Version: 3 (YCoCg-R) or 4 (ICtCp)
     uint16 Width: video width in pixels  
     uint16 Height: video height in pixels
     uint8  FPS: frames per second
@@ -854,12 +853,11 @@ transmission capability, and region-of-interest coding.
     uint32 Compressed Size
     *      Zstd-compressed Block Data
 
-## Block Data (per 280x224 tile)
+## Block Data (per frame)
     uint8  Mode: encoding mode
            0x00 = SKIP (copy from previous frame)
-           0x01 = INTRA (DWT-coded, no prediction)
-           0x02 = INTER (DWT-coded with motion compensation)
-           0x03 = MOTION (motion vector only, no residual)
+           0x01 = INTRA (DWT-coded)
+           0x02 = DELTA (DWT delta)
     uint8  Quantiser override Y  (use 0 to disable overriding)
     uint8  Quantiser override Co (use 0 to disable overriding)
     uint8  Quantiser override Cg (use 0 to disable overriding)
@@ -900,7 +898,7 @@ TAV operates in YCoCg-R colour space with full resolution channels:
 - Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default)
 
 ## Compression Features
-- 280x224 DWT tiles vs 16x16 DCT blocks in TEV
+- Single DWT tiles vs 16x16 DCT blocks in TEV
 - Multi-resolution representation enables scalable decoding
 - Better frequency localization than DCT
 - Reduced blocking artifacts due to overlapping basis functions
diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
index 11ab701..4e59bea 100644
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -3822,8 +3822,21 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         var readPtr = blockDataPtr
 
         try {
-            val tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X  // 280x224 tiles
-            val tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
+            // Determine if monoblock mode based on TAV version
+            val isMonoblock = (tavVersion == 3 || tavVersion == 4)
+
+            val tilesX: Int
+            val tilesY: Int
+
+            if (isMonoblock) {
+                // Monoblock mode: single tile covering entire frame
+                tilesX = 1
+                tilesY = 1
+            } else {
+                // Standard mode: multiple 280x224 tiles
+                tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X
+                tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
+            }
             
             // Process each tile
             for (tileY in 0 until tilesY) {
@@ -3847,17 +3860,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                             // Copy 280x224 tile from previous frame to current frame
                             tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
                         }
-                        0x01 -> { // TAV_MODE_INTRA  
+                        0x01 -> { // TAV_MODE_INTRA
                             // Decode DWT coefficients directly to RGB buffer
-                            readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, 
+                            readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr,
                                                           width, height, qY, qCo, qCg,
-                                                          waveletFilter, decompLevels, isLossless, tavVersion)
+                                                          waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock)
                         }
                         0x02 -> { // TAV_MODE_DELTA
                             // Coefficient delta encoding for efficient P-frames
                             readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr,
                                                       width, height, qY, qCo, qCg,
-                                                      waveletFilter, decompLevels, isLossless, tavVersion)
+                                                      waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock)
                         }
                     }
                 }
@@ -3870,92 +3883,130 @@ class GraphicsJSR223Delegate(private val vm: VM) {
 
     private fun tavDecodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
                                          width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
-                                         waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
-        // Now reading padded coefficient tiles (344x288) instead of core tiles (280x224)
-        val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
+                                         waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
+        // Determine coefficient count based on mode
+        val coeffCount = if (isMonoblock) {
+            // Monoblock mode: entire frame
+            width * height
+        } else {
+            // Standard mode: padded tiles (344x288)
+            PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
+        }
+
         var ptr = readPtr
+
+        // Read quantised DWT coefficients for Y, Co, Cg channels
+        val quantisedY = ShortArray(coeffCount)
+        val quantisedCo = ShortArray(coeffCount)
+        val quantisedCg = ShortArray(coeffCount)
         
-        // Read quantised DWT coefficients for padded tile Y, Co, Cg channels (344x288)
-        val quantisedY = ShortArray(paddedCoeffCount)
-        val quantisedCo = ShortArray(paddedCoeffCount)
-        val quantisedCg = ShortArray(paddedCoeffCount)
-        
-        // OPTIMIZATION: Bulk read all coefficient data (344x288 * 3 channels * 2 bytes = 594,432 bytes)
-        val totalCoeffBytes = paddedCoeffCount * 3 * 2L  // 3 channels, 2 bytes per short
+        // OPTIMIZATION: Bulk read all coefficient data
+        val totalCoeffBytes = coeffCount * 3 * 2L  // 3 channels, 2 bytes per short
         val coeffBuffer = ByteArray(totalCoeffBytes.toInt())
         UnsafeHelper.memcpyRaw(null, vm.usermem.ptr + ptr, coeffBuffer, UnsafeHelper.getArrayOffset(coeffBuffer), totalCoeffBytes)
         
         // Convert bulk data to coefficient arrays
         var bufferOffset = 0
-        for (i in 0 until paddedCoeffCount) {
+        for (i in 0 until coeffCount) {
             quantisedY[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
             bufferOffset += 2
         }
-        for (i in 0 until paddedCoeffCount) {
+        for (i in 0 until coeffCount) {
             quantisedCo[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
             bufferOffset += 2
         }
-        for (i in 0 until paddedCoeffCount) {
+        for (i in 0 until coeffCount) {
             quantisedCg[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort()
             bufferOffset += 2
         }
         
         ptr += totalCoeffBytes.toInt()
         
-        // Dequantise padded coefficient tiles (344x288)
-        val yPaddedTile = FloatArray(paddedCoeffCount)
-        val coPaddedTile = FloatArray(paddedCoeffCount)
-        val cgPaddedTile = FloatArray(paddedCoeffCount)
-        
-        for (i in 0 until paddedCoeffCount) {
-            yPaddedTile[i] = quantisedY[i] * qY.toFloat()
-            coPaddedTile[i] = quantisedCo[i] * qCo.toFloat()
-            cgPaddedTile[i] = quantisedCg[i] * qCg.toFloat()
+        // Dequantise coefficient data
+        val yTile = FloatArray(coeffCount)
+        val coTile = FloatArray(coeffCount)
+        val cgTile = FloatArray(coeffCount)
+
+        for (i in 0 until coeffCount) {
+            yTile[i] = quantisedY[i] * qY.toFloat()
+            coTile[i] = quantisedCo[i] * qCo.toFloat()
+            cgTile[i] = quantisedCg[i] * qCg.toFloat()
         }
         
         // Store coefficients for future delta reference (for P-frames)
-        val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
+        val tileIdx = if (isMonoblock) {
+            0  // Single tile index for monoblock
+        } else {
+            tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
+        }
+
         if (tavPreviousCoeffsY == null) {
             tavPreviousCoeffsY = mutableMapOf()
             tavPreviousCoeffsCo = mutableMapOf()
             tavPreviousCoeffsCg = mutableMapOf()
         }
-        tavPreviousCoeffsY!![tileIdx] = yPaddedTile.clone()
-        tavPreviousCoeffsCo!![tileIdx] = coPaddedTile.clone()
-        tavPreviousCoeffsCg!![tileIdx] = cgPaddedTile.clone()
+        tavPreviousCoeffsY!![tileIdx] = yTile.clone()
+        tavPreviousCoeffsCo!![tileIdx] = coTile.clone()
+        tavPreviousCoeffsCg!![tileIdx] = cgTile.clone()
         
-        // Apply inverse DWT on full padded tiles (344x288)
+        // Apply inverse DWT
+        val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X
+        val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y
+
         if (isLossless) {
-            tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
-            tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
-            tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
+            tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0)
+            tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, 0)
+            tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, 0)
         } else {
-            tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
-            tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
-            tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
+            tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, waveletFilter)
+            tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, waveletFilter)
+            tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, waveletFilter)
         }
         
-        // Extract core 280x224 pixels from reconstructed padded tiles (344x288)
-        val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
-        val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
-        val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
-        
-        for (y in 0 until TILE_SIZE_Y) {
-            for (x in 0 until TILE_SIZE_X) {
-                val coreIdx = y * TILE_SIZE_X + x
-                val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
-                
-                yTile[coreIdx] = yPaddedTile[paddedIdx]
-                coTile[coreIdx] = coPaddedTile[paddedIdx]
-                cgTile[coreIdx] = cgPaddedTile[paddedIdx]
+        // Extract final tile data
+        val finalYTile: FloatArray
+        val finalCoTile: FloatArray
+        val finalCgTile: FloatArray
+
+        if (isMonoblock) {
+            // Monoblock mode: use full frame data directly (no padding to extract)
+            finalYTile = yTile
+            finalCoTile = coTile
+            finalCgTile = cgTile
+        } else {
+            // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288)
+            finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
+            finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
+            finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
+
+            for (y in 0 until TILE_SIZE_Y) {
+                for (x in 0 until TILE_SIZE_X) {
+                    val coreIdx = y * TILE_SIZE_X + x
+                    val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
+
+                    finalYTile[coreIdx] = yTile[paddedIdx]
+                    finalCoTile[coreIdx] = coTile[paddedIdx]
+                    finalCgTile[coreIdx] = cgTile[paddedIdx]
+                }
             }
         }
         
-        // Convert to RGB based on TAV version (YCoCg-R for v1, ICtCp for v2)
-        if (tavVersion == 2) {
-            tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
+        // Convert to RGB based on TAV version and mode
+        // v1,v3 = YCoCg-R, v2,v4 = ICtCp
+        if (tavVersion == 2 || tavVersion == 4) {
+            // ICtCp color space
+            if (isMonoblock) {
+                tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            } else {
+                tavConvertICtCpTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            }
         } else {
-            tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
+            // YCoCg-R color space (v1, v3)
+            if (isMonoblock) {
+                tavConvertYCoCgMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            } else {
+                tavConvertYCoCgTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            }
         }
         
         return ptr
@@ -4069,6 +4120,79 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
     }
 
+    // Monoblock conversion functions (full frame processing)
+    private fun tavConvertYCoCgMonoblockToRGB(yData: FloatArray, coData: FloatArray, cgData: FloatArray,
+                                              rgbAddr: Long, width: Int, height: Int) {
+        // Process entire frame at once for monoblock mode
+        for (y in 0 until height) {
+            // Create row buffer for bulk RGB data
+            val rowRgbBuffer = ByteArray(width * 3)
+            var bufferIdx = 0
+
+            for (x in 0 until width) {
+                val idx = y * width + x
+
+                // YCoCg-R to RGB conversion (exact inverse of encoder)
+                val Y = yData[idx]
+                val Co = coData[idx]
+                val Cg = cgData[idx]
+
+                // Inverse of encoder's YCoCg-R transform:
+                val tmp = Y - Cg / 2.0f
+                val g = Cg + tmp
+                val b = tmp - Co / 2.0f
+                val r = Co + b
+
+                rowRgbBuffer[bufferIdx++] = r.toInt().coerceIn(0, 255).toByte()
+                rowRgbBuffer[bufferIdx++] = g.toInt().coerceIn(0, 255).toByte()
+                rowRgbBuffer[bufferIdx++] = b.toInt().coerceIn(0, 255).toByte()
+            }
+
+            // OPTIMIZATION: Bulk copy entire row at once
+            val rowStartOffset = y * width * 3L
+            UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer),
+                                 null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong())
+        }
+    }
+
+    private fun tavConvertICtCpMonoblockToRGB(iData: FloatArray, ctData: FloatArray, cpData: FloatArray,
+                                              rgbAddr: Long, width: Int, height: Int) {
+        // Process entire frame at once for monoblock mode
+        for (y in 0 until height) {
+            // Create row buffer for bulk RGB data
+            val rowRgbBuffer = ByteArray(width * 3)
+            var bufferIdx = 0
+
+            for (x in 0 until width) {
+                val idx = y * width + x
+
+                // ICtCp to RGB conversion (BT.2100 -> sRGB)
+                val I = iData[idx]
+                val Ct = ctData[idx]
+                val Cp = cpData[idx]
+
+                // ICtCp to LMS
+                val L = I + 0.00975f * Ct + 0.20524f * Cp
+                val M = I - 0.11387f * Ct + 0.13321f * Cp
+                val S = I + 0.03259f * Ct - 0.67851f * Cp
+
+                // LMS to RGB (simplified conversion)
+                val r = 3.2406f * L - 1.5372f * M - 0.4986f * S
+                val g = -0.9689f * L + 1.8758f * M + 0.0415f * S
+                val b = 0.0557f * L - 0.2040f * M + 1.0570f * S
+
+                rowRgbBuffer[bufferIdx++] = (r * 255f).toInt().coerceIn(0, 255).toByte()
+                rowRgbBuffer[bufferIdx++] = (g * 255f).toInt().coerceIn(0, 255).toByte()
+                rowRgbBuffer[bufferIdx++] = (b * 255f).toInt().coerceIn(0, 255).toByte()
+            }
+
+            // OPTIMIZATION: Bulk copy entire row at once
+            val rowStartOffset = y * width * 3L
+            UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer),
+                                 null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong())
+        }
+    }
+
     private fun tavAddYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray,
                                              rgbAddr: Long, width: Int, height: Int) {
         val startX = tileX * TILE_SIZE_X
@@ -4145,20 +4269,30 @@ class GraphicsJSR223Delegate(private val vm: VM) {
 
     private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
                                       width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
-                                      waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
+                                      waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
         
-        val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
+        val tileIdx = if (isMonoblock) {
+            0  // Single tile index for monoblock
+        } else {
+            tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
+        }
         var ptr = readPtr
-        
+
         // Initialize coefficient storage if needed
         if (tavPreviousCoeffsY == null) {
             tavPreviousCoeffsY = mutableMapOf()
             tavPreviousCoeffsCo = mutableMapOf()
             tavPreviousCoeffsCg = mutableMapOf()
         }
-        
-        // Coefficient count for padded tiles: 344x288 = 99,072 coefficients per channel
-        val coeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
+
+        // Determine coefficient count based on mode
+        val coeffCount = if (isMonoblock) {
+            // Monoblock mode: entire frame
+            width * height
+        } else {
+            // Standard mode: padded tiles (344x288)
+            PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
+        }
         
         // Read delta coefficients (same format as intra: quantised int16 -> float)
         val deltaY = ShortArray(coeffCount)
@@ -4194,37 +4328,63 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
         
         // Apply inverse DWT
+        val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X
+        val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y
+
         if (isLossless) {
-            tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
-            tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
-            tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0)
+            tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0)
+            tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0)
+            tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0)
         } else {
-            tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
-            tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
-            tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter)
+            tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, waveletFilter)
+            tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter)
+            tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter)
         }
         
-        // Extract core 280x224 pixels and convert to RGB (same as intra)
-        val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
-        val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
-        val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
-        
-        for (y in 0 until TILE_SIZE_Y) {
-            for (x in 0 until TILE_SIZE_X) {
-                val coreIdx = y * TILE_SIZE_X + x
-                val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
-                
-                yTile[coreIdx] = currentY[paddedIdx]
-                coTile[coreIdx] = currentCo[paddedIdx]
-                cgTile[coreIdx] = currentCg[paddedIdx]
+        // Extract final tile data
+        val finalYTile: FloatArray
+        val finalCoTile: FloatArray
+        val finalCgTile: FloatArray
+
+        if (isMonoblock) {
+            // Monoblock mode: use full frame data directly (no padding to extract)
+            finalYTile = currentY
+            finalCoTile = currentCo
+            finalCgTile = currentCg
+        } else {
+            // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288)
+            finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
+            finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
+            finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y)
+
+            for (y in 0 until TILE_SIZE_Y) {
+                for (x in 0 until TILE_SIZE_X) {
+                    val coreIdx = y * TILE_SIZE_X + x
+                    val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
+
+                    finalYTile[coreIdx] = currentY[paddedIdx]
+                    finalCoTile[coreIdx] = currentCo[paddedIdx]
+                    finalCgTile[coreIdx] = currentCg[paddedIdx]
+                }
             }
         }
-        
-        // Convert to RGB based on TAV version
-        if (tavVersion == 2) {
-            tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
+
+        // Convert to RGB based on TAV version and mode
+        // v1,v3 = YCoCg-R, v2,v4 = ICtCp
+        if (tavVersion == 2 || tavVersion == 4) {
+            // ICtCp color space
+            if (isMonoblock) {
+                tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            } else {
+                tavConvertICtCpTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            }
         } else {
-            tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height)
+            // YCoCg-R color space (v1, v3)
+            if (isMonoblock) {
+                tavConvertYCoCgMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            } else {
+                tavConvertYCoCgTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
+            }
         }
         
         return ptr
diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c
index 805d1fe..960d08d 100644
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -23,8 +23,11 @@
 // TSVM Advanced Video (TAV) format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVM TAV"
 // TAV version - dynamic based on colour space mode
-// Version 1: YCoCg-R (default) 
-// Version 2: ICtCp (--ictcp flag)
+// Version 3: YCoCg-R monoblock (default)
+// Version 4: ICtCp monoblock (--ictcp flag)
+// Legacy versions (4-tile mode, code preserved but not accessible):
+// Version 1: YCoCg-R 4-tile
+// Version 2: ICtCp 4-tile
 
 // Tile encoding modes (280x224 tiles)
 #define TAV_MODE_SKIP      0x00  // Skip tile (copy from reference)
@@ -104,6 +107,21 @@ static inline float FCLAMP(float x, float min, float max) {
     return x < min ? min : (x > max ? max : x);
 }
 
+// Calculate maximum decomposition levels for a given frame size
+static int calculate_max_decomp_levels(int width, int height) {
+    int levels = 0;
+    int min_size = width < height ? width : height;
+
+    // Keep halving until we reach a minimum size (at least 4 pixels)
+    while (min_size >= 8) {  // Need at least 8 pixels to safely halve to 4
+        min_size /= 2;
+        levels++;
+    }
+
+    // Cap at a reasonable maximum to avoid going too deep
+    return levels > 10 ? 10 : levels;
+}
+
 // MP2 audio rate table (same as TEV)
 static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384};
 
@@ -164,6 +182,7 @@ typedef struct {
     int test_mode;
     int ictcp_mode;       // 0 = YCoCg-R (default), 1 = ICtCp colour space
     int intra_only;       // Force all tiles to use INTRA mode (disable delta encoding)
+    int monoblock;        // Single DWT tile mode (encode entire frame as one tile)
     
     // Frame buffers
     uint8_t *current_frame_rgb;
@@ -216,12 +235,39 @@ typedef struct {
 
 // Wavelet filter constants removed - using lifting scheme implementation instead
 
+// Parse resolution string like "1024x768" with keyword recognition
+static int parse_resolution(const char *res_str, int *width, int *height) {
+    if (!res_str) return 0;
+    if (strcmp(res_str, "cif") == 0 || strcmp(res_str, "CIF") == 0) {
+        *width = 352;
+        *height = 288;
+        return 1;
+    }
+    if (strcmp(res_str, "qcif") == 0 || strcmp(res_str, "QCIF") == 0) {
+        *width = 176;
+        *height = 144;
+        return 1;
+    }
+    if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) {
+        *width = DEFAULT_WIDTH >> 1;
+        *height = DEFAULT_HEIGHT >> 1;
+        return 1;
+    }
+    if (strcmp(res_str, "default") == 0 || strcmp(res_str, "DEFAULT") == 0) {
+        *width = DEFAULT_WIDTH;
+        *height = DEFAULT_HEIGHT;
+        return 1;
+    }
+    return sscanf(res_str, "%dx%d", width, height) == 2;
+}
+
 // Function prototypes
 static void show_usage(const char *program_name);
 static tav_encoder_t* create_encoder(void);
 static void cleanup_encoder(tav_encoder_t *enc);
 static int initialize_encoder(tav_encoder_t *enc);
 static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
+static int calculate_max_decomp_levels(int width, int height);
 
 // Audio and subtitle processing prototypes (from TEV)
 static int start_audio_conversion(tav_encoder_t *enc);
@@ -277,7 +323,7 @@ static void show_usage(const char *program_name) {
     }
     
     printf("\n\nFeatures:\n");
-    printf("  - 280x224 DWT tiles with multi-resolution encoding\n");
+    printf("  - Single DWT tile (monoblock) encoding for optimal quality\n");
     printf("  - Full resolution YCoCg-R/ICtCp colour space\n");
     printf("  - Lossless and lossy compression modes\n");
     
@@ -305,6 +351,7 @@ static tav_encoder_t* create_encoder(void) {
     enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY];
     enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY];
     enc->intra_only = 1;
+    enc->monoblock = 1;  // Default to monoblock mode
 
     return enc;
 }
@@ -312,10 +359,22 @@ static tav_encoder_t* create_encoder(void) {
 // Initialize encoder resources
 static int initialize_encoder(tav_encoder_t *enc) {
     if (!enc) return -1;
-    
+
+    // Automatic decomposition levels for monoblock mode
+    if (enc->monoblock) {
+        enc->decomp_levels = calculate_max_decomp_levels(enc->width, enc->height);
+    }
+
     // Calculate tile dimensions
-    enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X;
-    enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y;
+    if (enc->monoblock) {
+        // Monoblock mode: single tile covering entire frame
+        enc->tiles_x = 1;
+        enc->tiles_y = 1;
+    } else {
+        // Standard mode: multiple 280x224 tiles
+        enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X;
+        enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y;
+    }
     int num_tiles = enc->tiles_x * enc->tiles_y;
     
     // Allocate frame buffers
@@ -334,17 +393,31 @@ static int initialize_encoder(tav_encoder_t *enc) {
 
     // Initialize ZSTD compression
     enc->zstd_ctx = ZSTD_createCCtx();
-    enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max
+
+    // Calculate maximum possible frame size for ZSTD buffer
+    const size_t max_frame_coeff_count = enc->monoblock ?
+        (enc->width * enc->height) :
+        (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y);
+    const size_t max_frame_size = num_tiles * (4 + max_frame_coeff_count * 3 * sizeof(int16_t));
+    enc->compressed_buffer_size = ZSTD_compressBound(max_frame_size);
     enc->compressed_buffer = malloc(enc->compressed_buffer_size);
     
-    // OPTIMIZATION: Allocate reusable quantisation buffers for padded tiles (344x288)
-    const int padded_coeff_count = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
-    enc->reusable_quantised_y = malloc(padded_coeff_count * sizeof(int16_t));
-    enc->reusable_quantised_co = malloc(padded_coeff_count * sizeof(int16_t));
-    enc->reusable_quantised_cg = malloc(padded_coeff_count * sizeof(int16_t));
-    
+    // OPTIMIZATION: Allocate reusable quantisation buffers
+    int coeff_count_per_tile;
+    if (enc->monoblock) {
+        // Monoblock mode: entire frame
+        coeff_count_per_tile = enc->width * enc->height;
+    } else {
+        // Standard mode: padded tiles (344x288)
+        coeff_count_per_tile = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
+    }
+
+    enc->reusable_quantised_y = malloc(coeff_count_per_tile * sizeof(int16_t));
+    enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t));
+    enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t));
+
     // Allocate coefficient delta storage for P-frames (per-tile coefficient storage)
-    size_t total_coeff_size = num_tiles * padded_coeff_count * sizeof(float);
+    size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float);
     enc->previous_coeffs_y = malloc(total_coeff_size);
     enc->previous_coeffs_co = malloc(total_coeff_size);
     enc->previous_coeffs_cg = malloc(total_coeff_size);
@@ -605,8 +678,55 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type)
     free(temp_col);
 }
 
+// 2D DWT forward transform for arbitrary dimensions
+static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int levels, int filter_type) {
+    const int max_size = (width > height) ? width : height;
+    float *temp_row = malloc(max_size * sizeof(float));
+    float *temp_col = malloc(max_size * sizeof(float));
 
+    for (int level = 0; level < levels; level++) {
+        int current_width = width >> level;
+        int current_height = height >> level;
+        if (current_width < 1 || current_height < 1) break;
 
+        // Row transform (horizontal)
+        for (int y = 0; y < current_height; y++) {
+            for (int x = 0; x < current_width; x++) {
+                temp_row[x] = tile_data[y * width + x];
+            }
+
+            if (filter_type == WAVELET_5_3_REVERSIBLE) {
+                dwt_53_forward_1d(temp_row, current_width);
+            } else {
+                dwt_97_forward_1d(temp_row, current_width);
+            }
+
+            for (int x = 0; x < current_width; x++) {
+                tile_data[y * width + x] = temp_row[x];
+            }
+        }
+
+        // Column transform (vertical)
+        for (int x = 0; x < current_width; x++) {
+            for (int y = 0; y < current_height; y++) {
+                temp_col[y] = tile_data[y * width + x];
+            }
+
+            if (filter_type == WAVELET_5_3_REVERSIBLE) {
+                dwt_53_forward_1d(temp_col, current_height);
+            } else {
+                dwt_97_forward_1d(temp_col, current_height);
+            }
+
+            for (int y = 0; y < current_height; y++) {
+                tile_data[y * width + x] = temp_col[y];
+            }
+        }
+    }
+
+    free(temp_row);
+    free(temp_col);
+}
 
 // Quantisation for DWT subbands with rate control
 static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
@@ -642,8 +762,10 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
         return offset;
     }
     
-    // Quantise and serialise DWT coefficients (full padded tile: 344x288)
-    const int tile_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
+    // Quantise and serialise DWT coefficients
+    const int tile_size = enc->monoblock ?
+        (enc->width * enc->height) :  // Monoblock mode: full frame
+        (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y);  // Standard mode: padded tiles
     // OPTIMIZATION: Use pre-allocated buffers instead of malloc/free per tile
     int16_t *quantised_y = enc->reusable_quantised_y;
     int16_t *quantised_co = enc->reusable_quantised_co;
@@ -735,8 +857,11 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
 
 // Compress and write frame data
 static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) {
-    // Calculate total uncompressed size (for padded tile coefficients: 344x288)
-    const size_t max_tile_size = 4 + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y * 3 * sizeof(int16_t));  // header + 3 channels of coefficients
+    // Calculate total uncompressed size
+    const size_t coeff_count = enc->monoblock ?
+        (enc->width * enc->height) :
+        (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y);
+    const size_t max_tile_size = 4 + (coeff_count * 3 * sizeof(int16_t));  // header + 3 channels of coefficients
     const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size;
     
     // Allocate buffer for uncompressed tile data
@@ -756,13 +881,29 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                 mode = TAV_MODE_DELTA;  // P-frames use coefficient delta encoding
             }
             
-            // Extract padded tile data (344x288) with neighbour context for overlapping tiles
-            float tile_y_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y];
-            float tile_co_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y];
-            float tile_cg_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y];
-            
-            // Extract padded tiles using context from neighbours
-            extract_padded_tile(enc, tile_x, tile_y, tile_y_data, tile_co_data, tile_cg_data);
+            // Determine tile data size and allocate buffers
+            int tile_data_size;
+            if (enc->monoblock) {
+                // Monoblock mode: entire frame
+                tile_data_size = enc->width * enc->height;
+            } else {
+                // Standard mode: padded tiles (344x288)
+                tile_data_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y;
+            }
+
+            float *tile_y_data = malloc(tile_data_size * sizeof(float));
+            float *tile_co_data = malloc(tile_data_size * sizeof(float));
+            float *tile_cg_data = malloc(tile_data_size * sizeof(float));
+
+            if (enc->monoblock) {
+                // Extract entire frame (no padding)
+                memcpy(tile_y_data, enc->current_frame_y, tile_data_size * sizeof(float));
+                memcpy(tile_co_data, enc->current_frame_co, tile_data_size * sizeof(float));
+                memcpy(tile_cg_data, enc->current_frame_cg, tile_data_size * sizeof(float));
+            } else {
+                // Extract padded tiles using context from neighbours
+                extract_padded_tile(enc, tile_x, tile_y, tile_y_data, tile_co_data, tile_cg_data);
+            }
             
             // Debug: check input data before DWT
             /*if (tile_x == 0 && tile_y == 0) {
@@ -773,16 +914,29 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                 printf("\n");
             }*/
             
-            // Apply DWT transform to each padded channel (176x176)
-            dwt_2d_forward_padded(tile_y_data, enc->decomp_levels, enc->wavelet_filter);
-            dwt_2d_forward_padded(tile_co_data, enc->decomp_levels, enc->wavelet_filter);
-            dwt_2d_forward_padded(tile_cg_data, enc->decomp_levels, enc->wavelet_filter);
+            // Apply DWT transform to each channel
+            if (enc->monoblock) {
+                // Monoblock mode: transform entire frame
+                dwt_2d_forward_flexible(tile_y_data, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter);
+                dwt_2d_forward_flexible(tile_co_data, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter);
+                dwt_2d_forward_flexible(tile_cg_data, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter);
+            } else {
+                // Standard mode: transform padded tiles (344x288)
+                dwt_2d_forward_padded(tile_y_data, enc->decomp_levels, enc->wavelet_filter);
+                dwt_2d_forward_padded(tile_co_data, enc->decomp_levels, enc->wavelet_filter);
+                dwt_2d_forward_padded(tile_cg_data, enc->decomp_levels, enc->wavelet_filter);
+            }
             
             // Serialise tile
-            size_t tile_size = serialise_tile_data(enc, tile_x, tile_y, 
+            size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
                                                    tile_y_data, tile_co_data, tile_cg_data,
                                                    mode, uncompressed_buffer + uncompressed_offset);
             uncompressed_offset += tile_size;
+
+            // Free allocated tile data
+            free(tile_y_data);
+            free(tile_co_data);
+            free(tile_cg_data);
         }
     }
     
@@ -1055,8 +1209,13 @@ static int write_tav_header(tav_encoder_t *enc) {
     // Magic number
     fwrite(TAV_MAGIC, 1, 8, enc->output_fp);
     
-    // Version (dynamic based on colour space)
-    uint8_t version = enc->ictcp_mode ? 2 : 1;  // Version 2 for ICtCp, 1 for YCoCg-R
+    // Version (dynamic based on colour space and monoblock mode)
+    uint8_t version;
+    if (enc->monoblock) {
+        version = enc->ictcp_mode ? 4 : 3;  // Version 4 for ICtCp monoblock, 3 for YCoCg-R monoblock
+    } else {
+        version = enc->ictcp_mode ? 2 : 1;  // Version 2 for ICtCp, 1 for YCoCg-R
+    }
     fputc(version, enc->output_fp);
     
     // Video parameters
@@ -2040,6 +2199,13 @@ int main(int argc, char *argv[]) {
             case 'o':
                 enc->output_file = strdup(optarg);
                 break;
+            case 's':
+                if (!parse_resolution(optarg, &enc->width, &enc->height)) {
+                    fprintf(stderr, "Invalid resolution format: %s\n", optarg);
+                    cleanup_encoder(enc);
+                    return 1;
+                }
+                break;
             case 'q':
                 enc->quality_level = CLAMP(atoi(optarg), 0, 5);
                 enc->quantiser_y = QUALITY_Y[enc->quality_level];