From c50d015515ef3b5f7b50cdd0c27a764bf9446f81 Mon Sep 17 00:00:00 2001
From: minjaesong <alswo9628@gmail.com>
Date: Fri, 26 Sep 2025 17:17:48 +0900
Subject: [PATCH] TAV decoder for ffmpeg/ffplay

---
 .../torvald/tsvm/GraphicsJSR223Delegate.kt    | 387 +---------
 video_encoder/Makefile                        |   8 +-
 video_encoder/decoder_tav.c                   | 699 ++++++++++++++++++
 video_encoder/encoder_tav.c                   |  83 +--
 4 files changed, 752 insertions(+), 425 deletions(-)
 create mode 100644 video_encoder/decoder_tav.c

diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
index 62a29ab..7d88db9 100644
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -52,6 +52,7 @@ import kotlin.collections.isNotEmpty
 import kotlin.collections.listOf
 import kotlin.collections.map
 import kotlin.collections.maxOfOrNull
+import kotlin.collections.minus
 import kotlin.collections.mutableListOf
 import kotlin.collections.mutableMapOf
 import kotlin.collections.set
@@ -67,37 +68,13 @@ import kotlin.let
 import kotlin.longArrayOf
 import kotlin.math.*
 import kotlin.repeat
+import kotlin.sequences.minus
 import kotlin.text.format
 import kotlin.text.lowercase
 import kotlin.text.toString
+import kotlin.times
 
 class GraphicsJSR223Delegate(private val vm: VM) {
-    
-    // TAV Simulated overlapping tiles constants (must match encoder)
-    private val TILE_SIZE_X = 280
-    private val TILE_SIZE_Y = 224
-    private val TAV_TILE_MARGIN = 32  // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px)
-    private val PADDED_TILE_SIZE_X = TILE_SIZE_X + 2 * TAV_TILE_MARGIN  // 280 + 64 = 344px
-    private val PADDED_TILE_SIZE_Y = TILE_SIZE_Y + 2 * TAV_TILE_MARGIN  // 224 + 64 = 288px
-
-    // Reusable working arrays to reduce allocation overhead
-    private val tevIdct8TempBuffer = FloatArray(64)
-    private val tevIdct16TempBuffer = FloatArray(256) // For 16x16 IDCT
-    private val tevIdct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT
-    
-    // TAV coefficient delta storage for previous frame (for efficient P-frames)
-    private var tavPreviousCoeffsY: MutableMap<Int, FloatArray>? = null
-    private var tavPreviousCoeffsCo: MutableMap<Int, FloatArray>? = null
-    private var tavPreviousCoeffsCg: MutableMap<Int, FloatArray>? = null
-
-    // TAV Perceptual dequantisation support (must match encoder weights)
-    data class DWTSubbandInfo(
-        val level: Int,          // Decomposition level (1 to decompLevels)
-        val subbandType: Int,    // 0=LL, 1=LH, 2=HL, 3=HH
-        val coeffStart: Int,     // Starting index in linear coefficient array
-        val coeffCount: Int,     // Number of coefficients in this subband
-        val perceptualWeight: Float // Quantisation multiplier for this subband
-    )
 
     private fun getFirstGPU(): GraphicsAdapter? {
         return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
@@ -1352,6 +1329,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
     // TEV (TSVM Enhanced Video) format support
     // Created by Claude on 2025-08-17
 
+    // Reusable working arrays to reduce allocation overhead
+    private val tevIdct8TempBuffer = FloatArray(64)
+    private val tevIdct16TempBuffer = FloatArray(256) // For 16x16 IDCT
+    private val tevIdct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT
+
     fun jpeg_quality_to_mult(q: Float): Float {
         return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f
     }
@@ -3881,6 +3863,28 @@ class GraphicsJSR223Delegate(private val vm: VM) {
     // ================= TAV (TSVM Advanced Video) Decoder =================
     // DWT-based video codec with ICtCp colour space support
 
+    // TAV Simulated overlapping tiles constants (must match encoder)
+    private val TILE_SIZE_X = 280
+    private val TILE_SIZE_Y = 224
+    private val TAV_TILE_MARGIN = 32  // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px)
+    private val PADDED_TILE_SIZE_X = TILE_SIZE_X + 2 * TAV_TILE_MARGIN  // 280 + 64 = 344px
+    private val PADDED_TILE_SIZE_Y = TILE_SIZE_Y + 2 * TAV_TILE_MARGIN  // 224 + 64 = 288px
+
+    // TAV coefficient delta storage for previous frame (for efficient P-frames)
+    private var tavPreviousCoeffsY: MutableMap<Int, FloatArray>? = null
+    private var tavPreviousCoeffsCo: MutableMap<Int, FloatArray>? = null
+    private var tavPreviousCoeffsCg: MutableMap<Int, FloatArray>? = null
+
+    // TAV Perceptual dequantisation support (must match encoder weights)
+    data class DWTSubbandInfo(
+        val level: Int,          // Decomposition level (1 to decompLevels)
+        val subbandType: Int,    // 0=LL, 1=LH, 2=HL, 3=HH
+        val coeffStart: Int,     // Starting index in linear coefficient array
+        val coeffCount: Int,     // Number of coefficients in this subband
+        val perceptualWeight: Float // Quantisation multiplier for this subband
+    )
+
+
     // TAV Perceptual dequantisation helper functions (must match encoder implementation exactly)
     private fun calculateSubbandLayout(width: Int, height: Int, decompLevels: Int): List<DWTSubbandInfo> {
         val subbands = mutableListOf<DWTSubbandInfo>()
@@ -3946,149 +3950,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         return subbands
     }
 
-    private fun getPerceptualWeightModel2(level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
-        // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
-
-        if (!isChroma) {
-            // LUMA CHANNEL: Based on statistical analysis from real video content
-            when (subbandType) {
-                0 -> { // LL subband - contains most image energy, preserve carefully
-                    return when {
-                        level >= 6 -> 0.5f  // LL6: High energy but can tolerate moderate quantisation (range up to 22K)
-                        level >= 5 -> 0.7f  // LL5: Good preservation
-                        else -> 0.9f        // Lower LL levels: Fine preservation
-                    }
-                }
-                1 -> { // LH subband - horizontal details (human eyes more sensitive)
-                    return when {
-                        level >= 6 -> 0.8f  // LH6: Significant coefficients (max ~500), preserve well
-                        level >= 5 -> 1.0f  // LH5: Moderate coefficients (max ~600)
-                        level >= 4 -> 1.2f  // LH4: Small coefficients (max ~50)
-                        level >= 3 -> 1.6f  // LH3: Very small coefficients, can quantize more
-                        level >= 2 -> 2.0f  // LH2: Minimal impact
-                        else -> 2.5f        // LH1: Least important
-                    }
-                }
-                2 -> { // HL subband - vertical details (less sensitive due to HVS characteristics)
-                    return when {
-                        level >= 6 -> 1.0f  // HL6: Can quantize more aggressively than LH6
-                        level >= 5 -> 1.2f  // HL5: Standard quantisation
-                        level >= 4 -> 1.5f  // HL4: Notable range but less critical
-                        level >= 3 -> 2.0f  // HL3: Can tolerate more quantisation
-                        level >= 2 -> 2.5f  // HL2: Less important
-                        else -> 3.5f        // HL1: Most aggressive for vertical details
-                    }
-                }
-                3 -> { // HH subband - diagonal details (least important for HVS)
-                    return when {
-                        level >= 6 -> 1.2f  // HH6: Preserve some diagonal detail
-                        level >= 5 -> 1.6f  // HH5: Can quantize aggressively
-                        level >= 4 -> 2.0f  // HH4: Very aggressive
-                        level >= 3 -> 2.8f  // HH3: Minimal preservation
-                        level >= 2 -> 3.5f  // HH2: Maximum compression
-                        else -> 5.0f        // HH1: Most aggressive quantisation
-                    }
-                }
-            }
-        } else {
-            // CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
-            when (subbandType) {
-                0 -> { // LL chroma - still important but less than luma
-                    return 1f
-                    return when {
-                        level >= 6 -> 0.8f  // Chroma LL6: Less critical than luma LL
-                        level >= 5 -> 0.9f
-                        else -> 1.0f
-                    }
-                }
-                1 -> { // LH chroma - horizontal chroma details
-                    return 1.8f
-                    return when {
-                        level >= 6 -> 1.0f
-                        level >= 5 -> 1.2f
-                        level >= 4 -> 1.4f
-                        level >= 3 -> 1.6f
-                        level >= 2 -> 1.8f
-                        else -> 2.0f
-                    }
-                }
-                2 -> { // HL chroma - vertical chroma details (even less critical)
-                    return 1.3f;
-                    return when {
-                        level >= 6 -> 1.2f
-                        level >= 5 -> 1.4f
-                        level >= 4 -> 1.6f
-                        level >= 3 -> 1.8f
-                        level >= 2 -> 2.0f
-                        else -> 2.2f
-                    }
-                }
-                3 -> { // HH chroma - diagonal chroma details (most aggressive)
-                    return 2.5f
-                    return when {
-                        level >= 6 -> 1.4f
-                        level >= 5 -> 1.6f
-                        level >= 4 -> 1.8f
-                        level >= 3 -> 2.1f
-                        level >= 2 -> 2.3f
-                        else -> 2.5f
-                    }
-                }
-            }
-        }
-        return 1.0f
-
-        // Legacy data-driven model (kept for reference but not used)
-        /*if (!isChroma) {
-            // Luma strategy based on statistical variance analysis from real video data
-            return when (subbandType) {
-                0 -> { // LL
-                    // LL6 has extremely high variance (Range=8026.7) but contains most image energy
-                    // Moderate quantisation appropriate due to high variance tolerance
-                    1.1f
-                }
-                1 -> { // LH (horizontal detail)
-                    // Data-driven weights based on observed coefficient patterns
-                    when (level) {
-                        in 6..maxLevels -> 0.7f      // LH6: significant coefficients (Range=243.1)
-                        5 -> 0.8f      // LH5: moderate coefficients (Range=264.3)
-                        4 -> 1.0f      // LH4: small coefficients (Range=50.8)
-                        3 -> 1.4f      // LH3: sparse but large outliers (Range=11909.1)
-                        2 -> 1.6f      // LH2: fewer coefficients (Range=6720.2)
-                        else -> 1.9f   // LH1: smallest detail (Range=1606.3)
-                    }
-                }
-                2 -> { // HL (vertical detail)
-                    // Similar pattern to LH but slightly different variance
-                    when (level) {
-                        in 6..maxLevels -> 0.8f      // HL6: moderate coefficients (Range=181.6)
-                        5 -> 0.9f      // HL5: small coefficients (Range=80.4)
-                        4 -> 1.2f      // HL4: surprising large outliers (Range=9737.9)
-                        3 -> 1.3f      // HL3: very large outliers (Range=13698.2)
-                        2 -> 1.5f      // HL2: moderate range (Range=2099.4)
-                        else -> 1.8f   // HL1: small coefficients (Range=851.1)
-                    }
-                }
-                3 -> { // HH (diagonal detail)
-                    // HH bands generally have lower energy but important for texture
-                    when (level) {
-                        in 6..maxLevels -> 1.0f      // HH6: some significant coefficients (Range=95.8)
-                        5 -> 1.1f      // HH5: small coefficients (Range=75.9)
-                        4 -> 1.3f      // HH4: moderate range (Range=89.8)
-                        3 -> 1.5f      // HH3: large outliers (Range=11611.2)
-                        2 -> 1.8f      // HH2: moderate range (Range=2499.2)
-                        else -> 2.1f   // HH1: smallest coefficients (Range=761.6)
-                    }
-                }
-                else -> 1.0f
-            }
-        } else {
-            // Chroma strategy - apply 0.85x reduction to luma weights for color preservation
-            val lumaWeight = getPerceptualWeight(level, subbandType, false, maxLevels)
-            return lumaWeight * 1.6f
-        }*/
-    }
-
     var ANISOTROPY_MULT = floatArrayOf(1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f)
     var ANISOTROPY_BIAS = floatArrayOf(0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f)
     var ANISOTROPY_MULT_CHROMA = floatArrayOf(6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f)
@@ -4096,7 +3957,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
 
 
 
-    private fun perceptual_model3_LH(quality: Int, level: Int): Float {
+    private fun perceptual_model3_LH(quality: Int, level: Float): Float {
         val H4 = 1.2f
         val Lx = H4 - ((quality + 1f) / 15f) * (level - 4f)
         val Ld = (quality + 1f) / -15f
@@ -4114,14 +3975,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         return (HL / LH) * 1.44f;
     }
 
-    fun perceptual_model3_LL(quality: Int, level: Int): Float {
+    fun perceptual_model3_LL(quality: Int, level: Float): Float {
         val n = perceptual_model3_LH(quality, level)
         val m = perceptual_model3_LH(quality, level - 1) / n
 
         return n / m
     }
 
-    fun perceptual_model3_chroma_basecurve(quality: Int, level: Int): Float {
+    fun perceptual_model3_chroma_basecurve(quality: Int, level: Float): Float {
         return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4f) // just a line that passes (4,1)
     }
 
@@ -4140,9 +4001,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
     }
 
     // level is one-based index
-    private fun getPerceptualWeight(qIndex: Int, qYGlobal: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
+    private fun getPerceptualWeight(qIndex: Int, qYGlobal: Int, level0: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
         // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
 
+        val level = 1.0f + ((level0 - 1.0f) / (maxLevels - 1.0f)) * 5.0f
+
+
         val qualityLevel = tavDeriveEncoderQindex(qIndex, qYGlobal)
 
         if (!isChroma) {
@@ -4157,10 +4021,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
             
             // HL subband - vertical details
             val HL: Float = perceptual_model3_HL(qualityLevel, LH)
-            if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
+            if (subbandType == 2) return HL * (if (level in 1.8f..2.2f) TWO_PIXEL_DETAILER else if (level in 2.8f..3.2f) FOUR_PIXEL_DETAILER else 1f)
 
             // HH subband - diagonal details
-            else return perceptual_model3_HH(LH, HL) * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
+            else return perceptual_model3_HH(LH, HL) * (if (level in 1.8f..2.2f) TWO_PIXEL_DETAILER else if (level in 2.8f..3.2f) FOUR_PIXEL_DETAILER else 1f)
             
         } else {
             // CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
@@ -4854,51 +4718,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
     }
 
-    private fun tavAddYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray,
-                                             rgbAddr: Long, width: Int, height: Int) {
-        val startX = tileX * TILE_SIZE_X
-        val startY = tileY * TILE_SIZE_Y
-
-        for (y in 0 until TILE_SIZE_Y) {
-            for (x in 0 until TILE_SIZE_X) {
-                val frameX = startX + x
-                val frameY = startY + y
-
-                if (frameX < width && frameY < height) {
-                    val tileIdx = y * TILE_SIZE_X + x
-                    val pixelIdx = frameY * width + frameX
-                    val rgbOffset = pixelIdx * 3L
-
-                    // Get current RGB (from motion compensation)
-                    val curR = (vm.peek(rgbAddr + rgbOffset).toInt() and 0xFF).toFloat()
-                    val curG = (vm.peek(rgbAddr + rgbOffset + 1).toInt() and 0xFF).toFloat()
-                    val curB = (vm.peek(rgbAddr + rgbOffset + 2).toInt() and 0xFF).toFloat()
-
-                    // Convert current RGB back to YCoCg
-                    val co = (curR - curB) / 2
-                    val tmp = curB + co
-                    val cg = (curG - tmp) / 2
-                    val yPred = tmp + cg
-
-                    // Add residual
-                    val yFinal = yPred + yRes[tileIdx]
-                    val coFinal = co + coRes[tileIdx]
-                    val cgFinal = cg + cgRes[tileIdx]
-
-                    // Convert back to RGB
-                    val tmpFinal = yFinal - cgFinal
-                    val gFinal = yFinal + cgFinal
-                    val bFinal = tmpFinal - coFinal
-                    val rFinal = tmpFinal + coFinal
-
-                    vm.poke(rgbAddr + rgbOffset, rFinal.toInt().coerceIn(0, 255).toByte())
-                    vm.poke(rgbAddr + rgbOffset + 1, gFinal.toInt().coerceIn(0, 255).toByte())
-                    vm.poke(rgbAddr + rgbOffset + 2, bFinal.toInt().coerceIn(0, 255).toByte())
-                }
-            }
-        }
-    }
-
     // Helper functions (simplified versions of existing DWT functions)
     private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
         val startX = tileX * TILE_SIZE_X
@@ -4970,77 +4789,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
     }
 
-    // Helper functions for perceptual models (simplified versions of encoder models)
-    private fun getPerceptualModelLL(qualityLevel: Int, level: Int): Float {
-        // Simplified LL model - preserve DC components
-        return 1.0f - (level.toFloat() / 8.0f) * (qualityLevel.toFloat() / 6.0f)
-    }
-
-    private fun getPerceptualModelLH(qualityLevel: Int, level: Int): Float {
-        // Simplified LH model - horizontal details
-        return 1.2f + (level.toFloat() / 4.0f) * (qualityLevel.toFloat() / 3.0f)
-    }
-
-    private fun getPerceptualModelHL(qualityLevel: Int, lhWeight: Float): Float {
-        // Simplified HL model - vertical details
-        return lhWeight * 1.1f
-    }
-
-    private fun getPerceptualModelHH(lhWeight: Float, hlWeight: Float): Float {
-        // Simplified HH model - diagonal details
-        return (lhWeight + hlWeight) * 0.6f
-    }
-
     private fun getPerceptualModelChromaBase(qualityLevel: Int, level: Int): Float {
         // Simplified chroma base curve
         return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f)
     }
 
-    // Determine delta-specific perceptual weight for coefficient at linear position
-    private fun getPerceptualWeightForPositionDelta(qualityLevel: Int, linearIdx: Int, width: Int, height: Int, decompLevels: Int, isChroma: Boolean): Float {
-        // Map linear coefficient index to DWT subband using same layout as encoder
-        var offset = 0
-
-        // First: LL subband at maximum decomposition level
-        val llWidth = width shr decompLevels
-        val llHeight = height shr decompLevels
-        val llSize = llWidth * llHeight
-
-        if (linearIdx < offset + llSize) {
-            // LL subband at maximum level - use delta-specific perceptual weight
-            return getPerceptualWeightDelta(qualityLevel, decompLevels, 0, isChroma, decompLevels)
-        }
-        offset += llSize
-
-        // Then: LH, HL, HH subbands for each level from max down to 1
-        for (level in decompLevels downTo 1) {
-            val levelWidth = width shr (decompLevels - level + 1)
-            val levelHeight = height shr (decompLevels - level + 1)
-            val subbandSize = levelWidth * levelHeight
-
-            // LH subband (horizontal details)
-            if (linearIdx < offset + subbandSize) {
-                return getPerceptualWeightDelta(qualityLevel, level, 1, isChroma, decompLevels)
-            }
-            offset += subbandSize
-
-            // HL subband (vertical details)
-            if (linearIdx < offset + subbandSize) {
-                return getPerceptualWeightDelta(qualityLevel, level, 2, isChroma, decompLevels)
-            }
-            offset += subbandSize
-
-            // HH subband (diagonal details)
-            if (linearIdx < offset + subbandSize) {
-                return getPerceptualWeightDelta(qualityLevel, level, 3, isChroma, decompLevels)
-            }
-            offset += subbandSize
-        }
-
-        // Fallback for out-of-bounds indices
-        return 1.0f
-    }
-
     private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
                                       width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
                                       waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
@@ -5199,68 +4952,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         return ptr
     }
 
-    private fun tavApplyMotionCompensationRGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int,
-                                              currentRGBAddr: Long, prevRGBAddr: Long,
-                                              width: Int, height: Int) {
-        val startX = tileX * TILE_SIZE_X
-        val startY = tileY * TILE_SIZE_Y
-
-        // Motion vectors in quarter-pixel precision
-        val refX = startX + (mvX / 4.0f)
-        val refY = startY + (mvY / 4.0f)
-
-        for (y in 0 until TILE_SIZE_Y) {
-            for (x in 0 until TILE_SIZE_X) {
-                val currentPixelIdx = (startY + y) * width + (startX + x)
-
-                if (currentPixelIdx >= 0 && currentPixelIdx < width * height) {
-                    // Bilinear interpolation for sub-pixel motion vectors
-                    val srcX = refX + x
-                    val srcY = refY + y
-
-                    val interpolatedRGB = tavBilinearInterpolateRGB(prevRGBAddr, width, height, srcX, srcY)
-
-                    val rgbOffset = currentPixelIdx * 3L
-                    vm.poke(currentRGBAddr + rgbOffset, interpolatedRGB[0])
-                    vm.poke(currentRGBAddr + rgbOffset + 1, interpolatedRGB[1])
-                    vm.poke(currentRGBAddr + rgbOffset + 2, interpolatedRGB[2])
-                }
-            }
-        }
-    }
-
-    private fun tavBilinearInterpolateRGB(rgbPtr: Long, width: Int, height: Int, x: Float, y: Float): ByteArray {
-        val x0 = kotlin.math.floor(x).toInt()
-        val y0 = kotlin.math.floor(y).toInt()
-        val x1 = x0 + 1
-        val y1 = y0 + 1
-
-        if (x0 < 0 || y0 < 0 || x1 >= width || y1 >= height) {
-            return byteArrayOf(0, 0, 0)  // Out of bounds - return black
-        }
-
-        val fx = x - x0
-        val fy = y - y0
-
-        // Get 4 corner pixels
-        val rgb00 = getRGBPixel(rgbPtr, y0 * width + x0)
-        val rgb10 = getRGBPixel(rgbPtr, y0 * width + x1)
-        val rgb01 = getRGBPixel(rgbPtr, y1 * width + x0)
-        val rgb11 = getRGBPixel(rgbPtr, y1 * width + x1)
-
-        // Bilinear interpolation
-        val result = ByteArray(3)
-        for (c in 0..2) {
-            val interp = (1 - fx) * (1 - fy) * (rgb00[c].toInt() and 0xFF) +
-                    fx * (1 - fy) * (rgb10[c].toInt() and 0xFF) +
-                    (1 - fx) * fy * (rgb01[c].toInt() and 0xFF) +
-                    fx * fy * (rgb11[c].toInt() and 0xFF)
-            result[c] = interp.toInt().coerceIn(0, 255).toByte()
-        }
-
-        return result
-    }
-
     private fun getRGBPixel(rgbPtr: Long, pixelIdx: Int): ByteArray {
         val offset = pixelIdx * 3L
         return byteArrayOf(
diff --git a/video_encoder/Makefile b/video_encoder/Makefile
index 5fc6ae0..a42c219 100644
--- a/video_encoder/Makefile
+++ b/video_encoder/Makefile
@@ -6,7 +6,7 @@ CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE
 LIBS = -lm -lzstd
 
 # Source files and targets
-TARGETS = tev tav
+TARGETS = tev tav tav_decoder
 
 # Build all encoders
 all: $(TARGETS)
@@ -20,8 +20,9 @@ tav: encoder_tav.c
 	rm -f encoder_tav
 	$(CC) $(CFLAGS) -o encoder_tav $< $(LIBS)
 
-# Default target
-$(TARGETS): all
+tav_decoder: decoder_tav.c
+	rm -f decoder_tav
+	$(CC) $(CFLAGS) -o decoder_tav $< $(LIBS)
 
 # Build with debug symbols
 debug: CFLAGS += -g -DDEBUG
@@ -35,6 +36,7 @@ clean:
 install: $(TARGETS)
 	cp encoder_tev /usr/local/bin/
 	cp encoder_tav /usr/local/bin/
+	cp decoder_tav /usr/local/bin/
 
 # Check for required dependencies
 check-deps:
diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c
new file mode 100644
index 0000000..d2781b0
--- /dev/null
+++ b/video_encoder/decoder_tav.c
@@ -0,0 +1,699 @@
+// TAV Decoder - Working version with TSVM inverse DWT
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include <zstd.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <signal.h>
+
+// TAV format constants
+#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
+#define TAV_MODE_SKIP      0x00
+#define TAV_MODE_INTRA     0x01
+#define TAV_MODE_DELTA     0x02
+#define TAV_PACKET_IFRAME      0x10
+#define TAV_PACKET_PFRAME      0x11
+#define TAV_PACKET_AUDIO_MP2   0x20
+#define TAV_PACKET_SUBTITLE    0x30
+#define TAV_PACKET_SYNC        0xFF
+
+// Utility macros
+static inline int CLAMP(int x, int min, int max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+// TAV header structure (32 bytes)
+typedef struct {
+    uint8_t magic[8];
+    uint8_t version;
+    uint16_t width;
+    uint16_t height;
+    uint8_t fps;
+    uint32_t total_frames;
+    uint8_t wavelet_filter;
+    uint8_t decomp_levels;
+    uint8_t quantiser_y;
+    uint8_t quantiser_co;
+    uint8_t quantiser_cg;
+    uint8_t extra_flags;
+    uint8_t video_flags;
+    uint8_t encoder_quality;
+    uint8_t file_role;
+    uint8_t reserved[5];
+} __attribute__((packed)) tav_header_t;
+
+// Decoder state
+typedef struct {
+    FILE *input_fp;
+    FILE *audio_output_fp;      // For MP2 audio output when using -p flag
+    tav_header_t header;
+    uint8_t *current_frame_rgb;
+    uint8_t *reference_frame_rgb;
+    float *dwt_buffer_y;
+    float *dwt_buffer_co;
+    float *dwt_buffer_cg;
+    float *reference_ycocg_y;   // Reference frame in YCoCg float space
+    float *reference_ycocg_co;
+    float *reference_ycocg_cg;
+    int frame_count;
+    int frame_size;
+} tav_decoder_t;
+
+// 9/7 inverse DWT (from TSVM Kotlin code)
+static void dwt_97_inverse_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    // Split into low and high frequency components (matching TSVM layout)
+    for (int i = 0; i < half; i++) {
+        temp[i] = data[i];  // Low-pass coefficients (first half)
+    }
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            temp[half + i] = data[half + i];  // High-pass coefficients (second half)
+        }
+    }
+
+    // 9/7 inverse lifting coefficients from TSVM
+    const float alpha = -1.586134342f;
+    const float beta = -0.052980118f;
+    const float gamma = 0.882911076f;
+    const float delta = 0.443506852f;
+    const float K = 1.230174105f;
+
+    // Step 1: Undo scaling
+    for (int i = 0; i < half; i++) {
+        temp[i] /= K;  // Low-pass coefficients
+    }
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            temp[half + i] *= K;  // High-pass coefficients
+        }
+    }
+
+    // Step 2: Undo δ update
+    for (int i = 0; i < half; i++) {
+        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
+        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
+        temp[i] -= delta * (d_curr + d_prev);
+    }
+
+    // Step 3: Undo γ predict
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            float s_curr = temp[i];
+            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
+            temp[half + i] -= gamma * (s_curr + s_next);
+        }
+    }
+
+    // Step 4: Undo β update
+    for (int i = 0; i < half; i++) {
+        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
+        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
+        temp[i] -= beta * (d_curr + d_prev);
+    }
+
+    // Step 5: Undo α predict
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            float s_curr = temp[i];
+            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
+            temp[half + i] -= alpha * (s_curr + s_next);
+        }
+    }
+
+    // Reconstruction - interleave low and high pass
+    for (int i = 0; i < length; i++) {
+        if (i % 2 == 0) {
+            // Even positions: low-pass coefficients
+            data[i] = temp[i / 2];
+        } else {
+            // Odd positions: high-pass coefficients
+            int idx = i / 2;
+            if (half + idx < length) {
+                data[i] = temp[half + idx];
+            } else {
+                data[i] = 0.0f;
+            }
+        }
+    }
+
+    free(temp);
+}
+
+// 5/3 inverse DWT (simplified for testing)
+static void dwt_53_inverse_1d(float *data, int length) {
+    if (length < 2) return;
+
+    // For now, use a simplified version
+    // TODO: Implement proper 5/3 from TSVM if needed
+    dwt_97_inverse_1d(data, length);
+}
+
+// Multi-level inverse DWT (fixed to match TSVM exactly)
+static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) {
+    int max_size = (width > height) ? width : height;
+    float *temp_row = malloc(max_size * sizeof(float));
+    float *temp_col = malloc(max_size * sizeof(float));
+
+    // TSVM: for (level in levels - 1 downTo 0)
+    for (int level = levels - 1; level >= 0; level--) {
+        // TSVM: val currentWidth = width shr level
+        int current_width = width >> level;
+        int current_height = height >> level;
+
+        // Handle edge cases
+        if (current_width < 1 || current_height < 1) continue;
+        if (current_width == 1 && current_height == 1) continue;
+
+        // TSVM: Column inverse transform first (vertical)
+        for (int x = 0; x < current_width; x++) {
+            for (int y = 0; y < current_height; y++) {
+                // TSVM applies sharpenFilter multiplier, we'll skip for now
+                temp_col[y] = data[y * width + x];
+            }
+
+            if (filter_type == 0) {  // 5/3 reversible
+                dwt_53_inverse_1d(temp_col, current_height);
+            } else {  // 9/7 irreversible
+                dwt_97_inverse_1d(temp_col, current_height);
+            }
+
+            for (int y = 0; y < current_height; y++) {
+                data[y * width + x] = temp_col[y];
+            }
+        }
+
+        // TSVM: Row inverse transform second (horizontal)
+        for (int y = 0; y < current_height; y++) {
+            for (int x = 0; x < current_width; x++) {
+                // TSVM applies sharpenFilter multiplier, we'll skip for now
+                temp_row[x] = data[y * width + x];
+            }
+
+            if (filter_type == 0) {  // 5/3 reversible
+                dwt_53_inverse_1d(temp_row, current_width);
+            } else {  // 9/7 irreversible
+                dwt_97_inverse_1d(temp_row, current_width);
+            }
+
+            for (int x = 0; x < current_width; x++) {
+                data[y * width + x] = temp_row[x];
+            }
+        }
+    }
+
+    free(temp_row);
+    free(temp_col);
+}
+
+// YCoCg-R to RGB conversion (from TSVM)
+static void ycocg_r_to_rgb(float y, float co, float cg, uint8_t *r, uint8_t *g, uint8_t *b) {
+    float tmp = y - cg / 2.0f;
+    float g_val = cg + tmp;
+    float b_val = tmp - co / 2.0f;
+    float r_val = co + b_val;
+
+    *r = CLAMP((int)(r_val + 0.5f), 0, 255);
+    *g = CLAMP((int)(g_val + 0.5f), 0, 255);
+    *b = CLAMP((int)(b_val + 0.5f), 0, 255);
+}
+
+// Initialize decoder
+static tav_decoder_t* tav_decoder_init(const char *input_file) {
+    tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t));
+    if (!decoder) return NULL;
+
+    decoder->input_fp = fopen(input_file, "rb");
+    if (!decoder->input_fp) {
+        free(decoder);
+        return NULL;
+    }
+
+    // Read header
+    if (fread(&decoder->header, sizeof(tav_header_t), 1, decoder->input_fp) != 1) {
+        fclose(decoder->input_fp);
+        free(decoder);
+        return NULL;
+    }
+
+    // Verify magic
+    if (memcmp(decoder->header.magic, TAV_MAGIC, 8) != 0) {
+        fclose(decoder->input_fp);
+        free(decoder);
+        return NULL;
+    }
+
+    decoder->frame_size = decoder->header.width * decoder->header.height;
+
+    // Allocate buffers
+    decoder->current_frame_rgb = calloc(decoder->frame_size * 3, 1);
+    decoder->reference_frame_rgb = calloc(decoder->frame_size * 3, 1);
+    decoder->dwt_buffer_y = calloc(decoder->frame_size, sizeof(float));
+    decoder->dwt_buffer_co = calloc(decoder->frame_size, sizeof(float));
+    decoder->dwt_buffer_cg = calloc(decoder->frame_size, sizeof(float));
+    decoder->reference_ycocg_y = calloc(decoder->frame_size, sizeof(float));
+    decoder->reference_ycocg_co = calloc(decoder->frame_size, sizeof(float));
+    decoder->reference_ycocg_cg = calloc(decoder->frame_size, sizeof(float));
+
+    return decoder;
+}
+
+// Cleanup decoder
+static void tav_decoder_free(tav_decoder_t *decoder) {
+    if (!decoder) return;
+
+    if (decoder->input_fp) fclose(decoder->input_fp);
+    free(decoder->current_frame_rgb);
+    free(decoder->reference_frame_rgb);
+    free(decoder->dwt_buffer_y);
+    free(decoder->dwt_buffer_co);
+    free(decoder->dwt_buffer_cg);
+    free(decoder->reference_ycocg_y);
+    free(decoder->reference_ycocg_co);
+    free(decoder->reference_ycocg_cg);
+    free(decoder);
+}
+
+// Decode a single frame
+static int decode_frame(tav_decoder_t *decoder) {
+    uint8_t packet_type;
+    uint32_t packet_size;
+
+    // Check file position before reading
+    long file_pos = ftell(decoder->input_fp);
+
+    // Read packet header
+    if (fread(&packet_type, 1, 1, decoder->input_fp) != 1) {
+        fprintf(stderr, "EOF at frame %d (file pos: %ld)\n", decoder->frame_count, file_pos);
+        return 0; // EOF
+    }
+
+    // Sync packets have no size field - they're just a single 0xFF byte
+    if (packet_type == TAV_PACKET_SYNC) {
+        if (decoder->frame_count < 5) {
+            fprintf(stderr, "Found sync packet 0xFF at pos %ld\n", file_pos);
+        }
+        return decode_frame(decoder); // Immediately try next packet
+    }
+
+    // All other packets have a 4-byte size field
+    if (fread(&packet_size, 4, 1, decoder->input_fp) != 1) {
+        fprintf(stderr, "Error reading packet size at frame %d (file pos: %ld)\n", decoder->frame_count, file_pos);
+        return -1; // Error
+    }
+
+    // Debug: Show packet info for first few frames
+    if (decoder->frame_count < 5) {
+        fprintf(stderr, "Frame %d: packet_type=0x%02X, size=%u (file pos: %ld)\n",
+               decoder->frame_count, packet_type, packet_size, file_pos);
+    }
+
+    // Handle audio packets when using FFplay mode
+    if (packet_type == TAV_PACKET_AUDIO_MP2) {
+        if (decoder->audio_output_fp) {
+            // Read and write MP2 audio data directly
+            uint8_t *audio_data = malloc(packet_size);
+            if (fread(audio_data, 1, packet_size, decoder->input_fp) == packet_size) {
+                fwrite(audio_data, 1, packet_size, decoder->audio_output_fp);
+                fflush(decoder->audio_output_fp);
+            }
+            free(audio_data);
+        } else {
+            // Skip audio packets in normal mode
+            if (decoder->frame_count < 5) {
+                long before_skip = ftell(decoder->input_fp);
+                fprintf(stderr, "Skipping non-video packet: type=0x%02X, size=%u (pos: %ld)\n", packet_type, packet_size, before_skip);
+                fseek(decoder->input_fp, packet_size, SEEK_CUR);
+                long after_skip = ftell(decoder->input_fp);
+                fprintf(stderr, "After skip: pos=%ld (moved %ld bytes)\n", after_skip, after_skip - before_skip);
+            } else {
+                fseek(decoder->input_fp, packet_size, SEEK_CUR);
+            }
+        }
+        return decode_frame(decoder);
+    }
+
+    // Skip subtitle packets
+    if (packet_type == TAV_PACKET_SUBTITLE) {
+        if (decoder->frame_count < 5) {
+            long before_skip = ftell(decoder->input_fp);
+            fprintf(stderr, "Skipping subtitle packet: type=0x%02X, size=%u (pos: %ld)\n", packet_type, packet_size, before_skip);
+            fseek(decoder->input_fp, packet_size, SEEK_CUR);
+            long after_skip = ftell(decoder->input_fp);
+            fprintf(stderr, "After skip: pos=%ld (moved %ld bytes)\n", after_skip, after_skip - before_skip);
+        } else {
+            fseek(decoder->input_fp, packet_size, SEEK_CUR);
+        }
+        return decode_frame(decoder);
+    }
+
+    if (packet_type != TAV_PACKET_IFRAME && packet_type != TAV_PACKET_PFRAME) {
+        fprintf(stderr, "Unknown packet type: 0x%02X (expected 0x%02X for audio)\n", packet_type, TAV_PACKET_AUDIO_MP2);
+        return -1;
+    }
+
+    // Read and decompress frame data
+    uint8_t *compressed_data = malloc(packet_size);
+    if (fread(compressed_data, 1, packet_size, decoder->input_fp) != packet_size) {
+        free(compressed_data);
+        return -1;
+    }
+
+    size_t decompressed_size = ZSTD_getFrameContentSize(compressed_data, packet_size);
+    if (decompressed_size == ZSTD_CONTENTSIZE_ERROR || decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
+        decompressed_size = decoder->frame_size * 3 * sizeof(int16_t) + 1024;
+    }
+
+    uint8_t *decompressed_data = malloc(decompressed_size);
+    size_t actual_size = ZSTD_decompress(decompressed_data, decompressed_size, compressed_data, packet_size);
+
+    if (ZSTD_isError(actual_size)) {
+        fprintf(stderr, "ZSTD decompression failed: %s\n", ZSTD_getErrorName(actual_size));
+        free(compressed_data);
+        free(decompressed_data);
+        return -1;
+    }
+
+    // Parse block data
+    uint8_t *ptr = decompressed_data;
+    uint8_t mode = *ptr++;
+    uint8_t qy_override = *ptr++;
+    uint8_t qco_override = *ptr++;
+    uint8_t qcg_override = *ptr++;
+
+    int qy = qy_override ? qy_override : decoder->header.quantiser_y;
+    int qco = qco_override ? qco_override : decoder->header.quantiser_co;
+    int qcg = qcg_override ? qcg_override : decoder->header.quantiser_cg;
+
+    if (mode == TAV_MODE_SKIP) {
+        // Copy from reference frame
+        memcpy(decoder->current_frame_rgb, decoder->reference_frame_rgb, decoder->frame_size * 3);
+    } else {
+        // Read coefficients in TSVM order: all Y, then all Co, then all Cg
+        int coeff_count = decoder->frame_size;
+        uint8_t *coeff_ptr = ptr;
+
+        // Read and dequantize coefficients (simple version for now)
+        for (int i = 0; i < coeff_count; i++) {
+            int16_t y_coeff = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
+            decoder->dwt_buffer_y[i] = y_coeff * qy;
+            coeff_ptr += 2;
+        }
+        for (int i = 0; i < coeff_count; i++) {
+            int16_t co_coeff = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
+            decoder->dwt_buffer_co[i] = co_coeff * qco;
+            coeff_ptr += 2;
+        }
+        for (int i = 0; i < coeff_count; i++) {
+            int16_t cg_coeff = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
+            decoder->dwt_buffer_cg[i] = cg_coeff * qcg;
+            coeff_ptr += 2;
+        }
+
+        // Apply inverse DWT
+        apply_inverse_dwt_multilevel(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
+                                   decoder->header.decomp_levels, decoder->header.wavelet_filter);
+        apply_inverse_dwt_multilevel(decoder->dwt_buffer_co, decoder->header.width, decoder->header.height,
+                                   decoder->header.decomp_levels, decoder->header.wavelet_filter);
+        apply_inverse_dwt_multilevel(decoder->dwt_buffer_cg, decoder->header.width, decoder->header.height,
+                                   decoder->header.decomp_levels, decoder->header.wavelet_filter);
+
+        // Handle P-frame delta accumulation (in YCoCg float space)
+        if (packet_type == TAV_PACKET_PFRAME && mode == TAV_MODE_DELTA) {
+            // Add delta to reference frame
+            for (int i = 0; i < decoder->frame_size; i++) {
+                decoder->dwt_buffer_y[i] += decoder->reference_ycocg_y[i];
+                decoder->dwt_buffer_co[i] += decoder->reference_ycocg_co[i];
+                decoder->dwt_buffer_cg[i] += decoder->reference_ycocg_cg[i];
+            }
+        }
+
+        // Convert YCoCg-R to RGB
+        for (int i = 0; i < decoder->frame_size; i++) {
+            uint8_t r, g, b;
+            ycocg_r_to_rgb(decoder->dwt_buffer_y[i],
+                          decoder->dwt_buffer_co[i],
+                          decoder->dwt_buffer_cg[i], &r, &g, &b);
+
+            decoder->current_frame_rgb[i * 3] = r;
+            decoder->current_frame_rgb[i * 3 + 1] = g;
+            decoder->current_frame_rgb[i * 3 + 2] = b;
+        }
+
+        // Update reference YCoCg frame (for future P-frames)
+        memcpy(decoder->reference_ycocg_y, decoder->dwt_buffer_y, decoder->frame_size * sizeof(float));
+        memcpy(decoder->reference_ycocg_co, decoder->dwt_buffer_co, decoder->frame_size * sizeof(float));
+        memcpy(decoder->reference_ycocg_cg, decoder->dwt_buffer_cg, decoder->frame_size * sizeof(float));
+    }
+
+    // Update reference frame
+    memcpy(decoder->reference_frame_rgb, decoder->current_frame_rgb, decoder->frame_size * 3);
+
+    free(compressed_data);
+    free(decompressed_data);
+    decoder->frame_count++;
+
+    // Debug: Check file position after processing frame
+    if (decoder->frame_count < 5) {
+        long end_pos = ftell(decoder->input_fp);
+        fprintf(stderr, "Frame %d completed, file pos now: %ld\n", decoder->frame_count - 1, end_pos);
+    }
+
+    return 1;
+}
+
+// Output current frame as RGB24 to stdout
+static void output_frame_rgb24(tav_decoder_t *decoder) {
+    fwrite(decoder->current_frame_rgb, 1, decoder->frame_size * 3, stdout);
+}
+
+int main(int argc, char *argv[]) {
+    char *input_file = NULL;
+    int use_ffplay = 0;
+
+    // Parse command line arguments
+    if (argc < 2 || argc > 3) {
+        fprintf(stderr, "Usage: %s input.tav [-p]\n", argv[0]);
+        fprintf(stderr, "TAV Decoder decodes video packets into raw RGB24 picture that can be piped into FFmpeg or FFplay.\n");
+        fprintf(stderr, "  -p    Start FFplay directly instead of outputting to stdout\n");
+        fprintf(stderr, "\nExamples:\n");
+        fprintf(stderr, "  %s input.tav | mpv --demuxer=rawvideo --demuxer-rawvideo-w=WIDTH --demuxer-rawvideo-h=HEIGHT -\n", argv[0]);
+        fprintf(stderr, "  %s input.tav -p\n", argv[0]);
+        return 1;
+    }
+
+    // Check for -p flag
+    if (argc == 3) {
+        if (strcmp(argv[2], "-p") == 0) {
+            use_ffplay = 1;
+            input_file = argv[1];
+        } else if (strcmp(argv[1], "-p") == 0) {
+            use_ffplay = 1;
+            input_file = argv[2];
+        } else {
+            fprintf(stderr, "Error: Unknown flag '%s'\n", argv[2]);
+            return 1;
+        }
+    } else {
+        input_file = argv[1];
+    }
+
+    tav_decoder_t *decoder = tav_decoder_init(input_file);
+    if (!decoder) {
+        fprintf(stderr, "Failed to initialize decoder\n");
+        return 1;
+    }
+
+    fprintf(stderr, "TAV Decoder - %dx%d @ %dfps, %d levels, version %d\n",
+            decoder->header.width, decoder->header.height, decoder->header.fps,
+            decoder->header.decomp_levels, decoder->header.version);
+
+    fprintf(stderr, "Header says: %u total frames\n", decoder->header.total_frames);
+
+    FILE *output_fp = stdout;
+    pid_t ffplay_pid = 0, ffmpeg_pid = 0;
+    char *audio_fifo_path = NULL;
+
+    // If -p flag is used, use FFmpeg to mux video+audio and pipe to FFplay
+    if (use_ffplay) {
+        int video_pipe[2], audio_pipe[2], ffmpeg_pipe[2];
+        if (pipe(video_pipe) == -1 || pipe(audio_pipe) == -1 || pipe(ffmpeg_pipe) == -1) {
+            fprintf(stderr, "Failed to create pipes\n");
+            tav_decoder_free(decoder);
+            return 1;
+        }
+
+        ffmpeg_pid = fork();
+        if (ffmpeg_pid == -1) {
+            fprintf(stderr, "Failed to fork FFmpeg process\n");
+            tav_decoder_free(decoder);
+            return 1;
+        } else if (ffmpeg_pid == 0) {
+            // Child process 1 - FFmpeg muxer
+            close(video_pipe[1]);  // Close write ends
+            close(audio_pipe[1]);
+            close(ffmpeg_pipe[0]);  // Close read end of output pipe
+
+            char video_size[32];
+            char framerate[16];
+            snprintf(video_size, sizeof(video_size), "%dx%d", decoder->header.width, decoder->header.height);
+            snprintf(framerate, sizeof(framerate), "%d", decoder->header.fps);
+
+            // Redirect pipes to file descriptors
+            dup2(video_pipe[0], 3);  // Video input on fd 3
+            dup2(audio_pipe[0], 4);  // Audio input on fd 4
+            dup2(ffmpeg_pipe[1], STDOUT_FILENO);  // Output to stdout
+
+            close(video_pipe[0]);
+            close(audio_pipe[0]);
+            close(ffmpeg_pipe[1]);
+
+            execl("/usr/bin/ffmpeg", "ffmpeg",
+                  "-f", "rawvideo",
+                  "-pixel_format", "rgb24",
+                  "-video_size", video_size,
+                  "-framerate", framerate,
+                  "-i", "pipe:3",              // Video from fd 3
+                  "-f", "mp3",                 // MP3 demuxer handles MP2/MP3
+                  "-i", "pipe:4",              // Audio from fd 4
+                  "-c:v", "libx264",           // Encode video to H.264
+                  "-preset", "ultrafast",      // Fast encoding
+                  "-crf", "23",                // Good quality
+                  "-c:a", "copy",              // Copy audio as-is (no re-encoding)
+                  "-f", "matroska",            // Output as MKV (good for streaming)
+                  "-",                         // Output to stdout
+                  "-v", "error",               // Minimal logging
+                  (char*)NULL);
+
+            // Try alternative path
+            execl("/usr/local/bin/ffmpeg", "ffmpeg",
+                  "-f", "rawvideo",
+                  "-pixel_format", "rgb24",
+                  "-video_size", video_size,
+                  "-framerate", framerate,
+                  "-i", "pipe:3",
+                  "-f", "mp3",
+                  "-i", "pipe:4",
+                  "-c:v", "libx264",
+                  "-preset", "ultrafast",
+                  "-crf", "23",
+                  "-c:a", "copy",
+                  "-f", "matroska",
+                  "-",
+                  "-v", "error",
+                  (char*)NULL);
+
+            fprintf(stderr, "Failed to start ffmpeg for muxing\n");
+            exit(1);
+        }
+
+        // Fork again for FFplay
+        ffplay_pid = fork();
+        if (ffplay_pid == -1) {
+            fprintf(stderr, "Failed to fork FFplay process\n");
+            kill(ffmpeg_pid, SIGTERM);
+            tav_decoder_free(decoder);
+            return 1;
+        } else if (ffplay_pid == 0) {
+            // Child process 2 - FFplay
+            close(video_pipe[0]);  // Close unused ends
+            close(video_pipe[1]);
+            close(audio_pipe[0]);
+            close(audio_pipe[1]);
+            close(ffmpeg_pipe[1]);
+
+            // Read from FFmpeg output
+            dup2(ffmpeg_pipe[0], STDIN_FILENO);
+            close(ffmpeg_pipe[0]);
+
+            execl("/usr/bin/ffplay", "ffplay",
+                  "-i", "-",                   // Input from stdin
+                  "-v", "error",               // Minimal logging
+                  (char*)NULL);
+
+            execl("/usr/local/bin/ffplay", "ffplay",
+                  "-i", "-",
+                  "-v", "error",
+                  (char*)NULL);
+
+            fprintf(stderr, "Failed to start ffplay\n");
+            exit(1);
+        } else {
+            // Parent process - write to video and audio pipes
+            close(video_pipe[0]);   // Close read ends
+            close(audio_pipe[0]);
+            close(ffmpeg_pipe[0]);
+            close(ffmpeg_pipe[1]);
+
+            output_fp = fdopen(video_pipe[1], "wb");
+            decoder->audio_output_fp = fdopen(audio_pipe[1], "wb");
+
+            if (!output_fp || !decoder->audio_output_fp) {
+                fprintf(stderr, "Failed to open pipes for writing\n");
+                kill(ffmpeg_pid, SIGTERM);
+                kill(ffplay_pid, SIGTERM);
+                tav_decoder_free(decoder);
+                return 1;
+            }
+
+            fprintf(stderr, "Starting FFmpeg muxer + FFplay for video+audio playback\n");
+        }
+    } else {
+        fprintf(stderr, "To test: %s %s | ffplay -f rawvideo -pixel_format rgb24 -video_size %dx%d -framerate %d -\n",
+                argv[0], input_file, decoder->header.width, decoder->header.height, decoder->header.fps);
+    }
+
+    int result;
+    while ((result = decode_frame(decoder)) == 1) {
+        // Write RGB24 data to output (stdout or ffplay pipe)
+        fwrite(decoder->current_frame_rgb, decoder->frame_size * 3, 1, output_fp);
+        fflush(output_fp);
+
+        // Debug: Print frame progress (only to stderr)
+        if (decoder->frame_count % 100 == 0 || decoder->frame_count < 5) {
+            fprintf(stderr, "Decoded frame %d\n", decoder->frame_count);
+        }
+    }
+
+    if (result < 0) {
+        fprintf(stderr, "Decoding error\n");
+        if (use_ffplay) {
+            if (ffmpeg_pid > 0) kill(ffmpeg_pid, SIGTERM);
+            if (ffplay_pid > 0) kill(ffplay_pid, SIGTERM);
+        }
+        tav_decoder_free(decoder);
+        return 1;
+    }
+
+    fprintf(stderr, "Decoded %d frames\n", decoder->frame_count);
+
+    // Clean up
+    if (use_ffplay) {
+        if (output_fp != stdout) {
+            fclose(output_fp);
+        }
+        if (decoder->audio_output_fp) {
+            fclose(decoder->audio_output_fp);
+            decoder->audio_output_fp = NULL;
+        }
+        if (ffmpeg_pid > 0) {
+            int status;
+            waitpid(ffmpeg_pid, &status, 0);
+        }
+        if (ffplay_pid > 0) {
+            int status;
+            waitpid(ffplay_pid, &status, 0);
+        }
+    }
+
+    tav_decoder_free(decoder);
+    return 0;
+}
diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c
index 6ecb219..6cc9e9a 100644
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -806,7 +806,7 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz
 
 // https://www.desmos.com/calculator/mjlpwqm8ge
 // where Q=quality, x=level
-static float perceptual_model3_LH(int quality, int level) {
+static float perceptual_model3_LH(int quality, float level) {
     float H4 = 1.2f;
     float Lx = H4 - ((quality + 1.f) / 15.f) * (level - 4.f);
     float Ld = (quality + 1.f) / -15.f;
@@ -824,91 +824,26 @@ static float perceptual_model3_HH(float LH, float HL) {
     return (HL / LH) * 1.44f;
 }
 
-static float perceptual_model3_LL(int quality, int level) {
+static float perceptual_model3_LL(int quality, float level) {
     float n = perceptual_model3_LH(quality, level);
     float m = perceptual_model3_LH(quality, level - 1) / n;
 
     return n / m;
 }
 
-static float perceptual_model3_chroma_basecurve(int quality, int level) {
+static float perceptual_model3_chroma_basecurve(int quality, float level) {
     return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f); // just a line that passes (4,1)
 }
 
-// Get perceptual weight for specific subband - Data-driven model based on coefficient variance analysis
-static float get_perceptual_weight_model2(int level, int subband_type, int is_chroma, int max_levels) {
-    // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
-    // strategy: JPEG quantisation table + real-world statistics from the encoded videos
-    if (!is_chroma) {
-        // LUMA CHANNEL: Based on statistical analysis from real video content
-        if (subband_type == 0) { // LL subband - contains most image energy, preserve carefully
-            if (level >= 6) return 0.5f;  // LL6: High energy but can tolerate moderate quantisation (range up to 22K)
-            if (level >= 5) return 0.7f;  // LL5: Good preservation
-            return 0.9f;                   // Lower LL levels: Fine preservation
-        } else if (subband_type == 1) { // LH subband - horizontal details (human eyes more sensitive)
-            if (level >= 6) return 0.8f;  // LH6: Significant coefficients (max ~500), preserve well
-            if (level >= 5) return 1.0f;  // LH5: Moderate coefficients (max ~600)
-            if (level >= 4) return 1.2f;  // LH4: Small coefficients (max ~50)
-            if (level >= 3) return 1.6f;  // LH3: Very small coefficients, can quantise more
-            if (level >= 2) return 2.0f;  // LH2: Minimal impact
-            return 2.5f;                   // LH1: Least important
-        } else if (subband_type == 2) { // HL subband - vertical details (less sensitive due to HVS characteristics)
-            if (level >= 6) return 1.0f;  // HL6: Can quantise more aggressively than LH6
-            if (level >= 5) return 1.2f;  // HL5: Standard quantisation
-            if (level >= 4) return 1.5f;  // HL4: Notable range but less critical
-            if (level >= 3) return 2.0f;  // HL3: Can tolerate more quantisation
-            if (level >= 2) return 2.5f;  // HL2: Less important
-            return 3.5f;                   // HL1: Most aggressive for vertical details
-        } else { // HH subband - diagonal details (least important for HVS)
-            if (level >= 6) return 1.2f;  // HH6: Preserve some diagonal detail
-            if (level >= 5) return 1.6f;  // HH5: Can quantise aggressively
-            if (level >= 4) return 2.0f;  // HH4: Very aggressive
-            if (level >= 3) return 2.8f;  // HH3: Minimal preservation
-            if (level >= 2) return 3.5f;  // HH2: Maximum compression
-            return 5.0f;                   // HH1: Most aggressive quantisation
-        }
-    } else {
-        // CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
-        // strategy: mimic 4:2:2 chroma subsampling
-        if (subband_type == 0) { // LL chroma - still important but less than luma
-            return 1.0f;
-            if (level >= 6) return 0.8f;  // Chroma LL6: Less critical than luma LL
-            if (level >= 5) return 0.9f;
-            return 1.0f;
-        } else if (subband_type == 1) { // LH chroma - horizontal chroma details
-            return 1.8f;
-            if (level >= 6) return 1.0f;
-            if (level >= 5) return 1.2f;
-            if (level >= 4) return 1.4f;
-            if (level >= 3) return 1.6f;
-            if (level >= 2) return 1.8f;
-            return 2.0f;
-        } else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
-            return 1.3f;
-            if (level >= 6) return 1.2f;
-            if (level >= 5) return 1.4f;
-            if (level >= 4) return 1.6f;
-            if (level >= 3) return 1.8f;
-            if (level >= 2) return 2.0f;
-            return 2.2f;
-        } else { // HH chroma - diagonal chroma details (most aggressive)
-            return 2.5f;
-            if (level >= 6) return 1.4f;
-            if (level >= 5) return 1.6f;
-            if (level >= 4) return 1.8f;
-            if (level >= 3) return 2.1f;
-            if (level >= 2) return 2.3f;
-            return 2.5f;
-        }
-    }
-}
-
 #define FOUR_PIXEL_DETAILER 0.88f
 #define TWO_PIXEL_DETAILER  0.92f
 
 // level is one-based index
-static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) {
+static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
     // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
+
+    float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
+
     // strategy: more horizontal detail
     if (!is_chroma) {
         // LL subband - contains most image energy, preserve carefully
@@ -923,10 +858,10 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
         // HL subband - vertical details
         float HL = perceptual_model3_HL(enc->quality_level, LH);
         if (subband_type == 2)
-            return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
+            return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
 
         // HH subband - diagonal details
-        else return perceptual_model3_HH(LH, HL) * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
+        else return perceptual_model3_HH(LH, HL) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
     } else {
         // CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
         // strategy: more horizontal detail