diff --git a/.idea/libraries/badlogicgames_gdx.xml b/.idea/libraries/badlogicgames_gdx.xml
new file mode 100644
index 0000000..cd1db0b
--- /dev/null
+++ b/.idea/libraries/badlogicgames_gdx.xml
@@ -0,0 +1,11 @@
+<component name="libraryTable">
+  <library name="badlogicgames.gdx" type="repository">
+    <properties maven-id="com.badlogicgames.gdx:gdx:1.12.1" />
+    <CLASSES>
+      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/.idea/libraries/badlogicgames_gdx_backend_lwjgl3.xml b/.idea/libraries/badlogicgames_gdx_backend_lwjgl3.xml
new file mode 100644
index 0000000..5302a7a
--- /dev/null
+++ b/.idea/libraries/badlogicgames_gdx_backend_lwjgl3.xml
@@ -0,0 +1,62 @@
+<component name="libraryTable">
+  <library name="badlogicgames.gdx.backend.lwjgl3" type="repository">
+    <properties maven-id="com.badlogicgames.gdx:gdx-backend-lwjgl3:1.12.1" />
+    <CLASSES>
+      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-backend-lwjgl3/1.12.1/gdx-backend-lwjgl3-1.12.1.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm32.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows-x86.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm32.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows-x86.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm32.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows-x86.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm32.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows-x86.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm32.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows-x86.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm32.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos-arm64.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows-x86.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/jlayer/jlayer/1.0.1-gdx/jlayer-1.0.1-gdx.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/jcraft/jorbis/0.0.17/jorbis-0.0.17.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/2taud.sh b/2taud.sh
index 6266dff..e44bfcc 100755
--- a/2taud.sh
+++ b/2taud.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env fish
 
-for f in *.mod; python3 mod2taud.py $f assets/disk0/(basename $f .mod).taud; end
-for f in *.s3m; python3 s3m2taud.py $f assets/disk0/(basename $f .s3m).taud; end
-for f in *.it; python3 it2taud.py $f assets/disk0/(basename $f .it).taud; end
-for f in *.xm; python3 xm2taud.py $f assets/disk0/(basename $f .xm).taud; end
-for f in *.mon; python3 mon2taud.py $f assets/disk0/(basename $f .mon).taud; end
-for f in *.MON; python3 mon2taud.py $f assets/disk0/(basename $f .MON).taud; end
+for f in *.mod; python3 mod2taud.py $f assets/disk0/home/music/(basename $f .mod).taud; end
+for f in *.s3m; python3 s3m2taud.py $f assets/disk0/home/music/(basename $f .s3m).taud; end
+for f in *.it; python3 it2taud.py $f assets/disk0/home/music/(basename $f .it).taud; end
+for f in *.xm; python3 xm2taud.py $f assets/disk0/home/music/(basename $f .xm).taud; end
+for f in *.mon; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .mon).taud; end
+for f in *.MON; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .MON).taud; end
diff --git a/TerranBASICexecutable/TerranBASICexecutable.iml b/TerranBASICexecutable/TerranBASICexecutable.iml
index d98a81a..5829148 100644
--- a/TerranBASICexecutable/TerranBASICexecutable.iml
+++ b/TerranBASICexecutable/TerranBASICexecutable.iml
@@ -10,5 +10,7 @@
     <orderEntry type="module" module-name="tsvm_core" />
     <orderEntry type="library" name="TerranVirtualDisk" level="project" />
     <orderEntry type="library" name="lib" level="project" />
+    <orderEntry type="library" name="badlogicgames.gdx" level="project" />
+    <orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
   </component>
 </module>
\ No newline at end of file
diff --git a/assets/disk0/tvdos/bin/playtad.js b/assets/disk0/tvdos/bin/playtad.js
index cd1bf3a..d43f076 100644
--- a/assets/disk0/tvdos/bin/playtad.js
+++ b/assets/disk0/tvdos/bin/playtad.js
@@ -1,7 +1,9 @@
 const SND_BASE_ADDR = audio.getBaseAddr()
 const SND_MEM_ADDR = audio.getMemAddr()
-const TAD_INPUT_ADDR = SND_MEM_ADDR - 262144  // TAD input buffer (matches TAV packet 0x24)
-const TAD_DECODED_ADDR = SND_MEM_ADDR - 262144 + 65536  // TAD decoded buffer
+// tadInputBin lives at audio-local offset 917504 and tadDecodedBin at 983040
+// (post-bef85f6 memory map; the old 262144 offset now hits the enlarged sampleBin).
+const TAD_INPUT_ADDR = SND_MEM_ADDR - 917504  // TAD input buffer (matches TAV packet 0x24)
+const TAD_DECODED_ADDR = SND_MEM_ADDR - 983040  // TAD decoded buffer
 
 if (!SND_BASE_ADDR) return 10
 
diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js
index a689f62..390d672 100644
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -1746,7 +1746,9 @@ try {
                     tadInitialised = true
                 }
 
-                seqread.readBytes(payloadLen, SND_MEM_ADDR - 262144)
+                // tadInputBin lives at audio-local offset 917504 (post-bef85f6 memory map);
+                // the previous 262144 offset now points into the enlarged sampleBin.
+                seqread.readBytes(payloadLen, SND_MEM_ADDR - 917504)
                 audio.tadDecode()
                 audio.tadUploadDecoded(AUDIO_DEVICE, sampleLen)
             }
diff --git a/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
index 1e4d613..fbe39b0 100644
--- a/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
@@ -275,6 +275,7 @@ class AudioJSR223Delegate(private val vm: VM) {
 
 
 
+    // while the following code does work, it was decided that MP3 is "too new" for tsvm and thus removed.
     /*
     js-mp3
     https://github.com/soundbus-technologies/js-mp3
diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
index 522cdf0..dac0497 100644
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -5433,6 +5433,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
 
     private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
 
+    // Zstd magic = 0x28 0xB5 0x2F 0xFD (little-endian frame magic).
+    // Newer TAV files default to no Zstd (Video Flags bit 4); detecting the magic
+    // lets the decoder accept both compressed and raw payloads transparently.
+    private fun tavDecompressIfZstd(data: ByteArray): ByteArray {
+        if (data.size >= 4 &&
+            data[0] == 0x28.toByte() && data[1] == 0xB5.toByte() &&
+            data[2] == 0x2F.toByte() && data[3] == 0xFD.toByte()) {
+            return ZstdInputStream(ByteArrayInputStream(data)).use { it.readBytes() }
+        }
+        return data
+    }
+
     // New tavDecode function that accepts compressed data and decompresses internally
     fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
                             width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
@@ -5445,12 +5457,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
 
         return try {
-            // Decompress using Zstd
-            val bais = ByteArrayInputStream(compressedData)
-            val zis = ZstdInputStream(bais)
-            val decompressedData = zis.readBytes()
-            zis.close()
-            bais.close()
+            // Decompress with Zstd if the payload starts with the Zstd frame magic;
+            // otherwise pass through (TAV files written without --zstd-level).
+            val decompressedData = tavDecompressIfZstd(compressedData)
 
             // Allocate buffer for decompressed data
             val decompressedBuffer = vm.malloc(decompressedData.size)
@@ -6725,9 +6734,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         )
 
         val decompressedData = try {
-            ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
-                zstd.readBytes()
-            }
+            // Decompress with Zstd if the payload starts with the Zstd frame magic;
+            // otherwise pass through (TAV files written without --zstd-level).
+            tavDecompressIfZstd(compressedData)
         } catch (e: Exception) {
             println("ERROR: Zstd decompression failed: ${e.message}")
             return arrayOf(0, dbgOut)
diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
index b4a34d2..e80a64f 100644
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
@@ -911,24 +911,32 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                             ((tadInputBin[offset++].toUint()) shl 8)
                     )
             val maxIndex = tadInputBin[offset++].toUint()
-            val payloadSize = (
+            val payloadSizeField = (
                     (tadInputBin[offset++].toUint()) or
                             ((tadInputBin[offset++].toUint()) shl 8) or
                             ((tadInputBin[offset++].toUint()) shl 16) or
                             ((tadInputBin[offset++].toUint()) shl 24)
                     )
 
-            // Decompress payload
+            // MSB of payload size = 1 means the payload is stored uncompressed (no Zstd).
+            val payloadIsRaw = (payloadSizeField and 0x80000000.toInt()) != 0
+            val payloadSize = payloadSizeField and 0x7FFFFFFF
+
+            // Read payload bytes
             val compressed = ByteArray(payloadSize)
             UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong())
 
-            val payload: ByteArray = try {
-                ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
-                    zstd.readBytes()
+            val payload: ByteArray = if (payloadIsRaw) {
+                compressed
+            } else {
+                try {
+                    ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
+                        zstd.readBytes()
+                    }
+                } catch (e: Exception) {
+                    println("ERROR: Zstd decompression failed: ${e.message}")
+                    return
                 }
-            } catch (e: Exception) {
-                println("ERROR: Zstd decompression failed: ${e.message}")
-                return
             }
 
             // Decode using binary tree EZBC - FIXED!
diff --git a/tsvm_core/tsvm_core.iml b/tsvm_core/tsvm_core.iml
index 516f4f1..45571f2 100644
--- a/tsvm_core/tsvm_core.iml
+++ b/tsvm_core/tsvm_core.iml
@@ -12,5 +12,7 @@
     <orderEntry type="library" name="jetbrains.kotlin.reflect" level="project" />
     <orderEntry type="library" name="jetbrains.kotlin.test" level="project" />
     <orderEntry type="library" name="lib" level="project" />
+    <orderEntry type="library" name="badlogicgames.gdx" level="project" />
+    <orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
   </component>
 </module>
\ No newline at end of file
diff --git a/tsvm_executable.iml b/tsvm_executable.iml
index 942bf2b..b01ef5a 100644
--- a/tsvm_executable.iml
+++ b/tsvm_executable.iml
@@ -10,5 +10,7 @@
     <orderEntry type="library" name="TerranVirtualDisk" level="project" />
     <orderEntry type="module" module-name="tsvm_core" />
     <orderEntry type="library" name="lib" level="project" />
+    <orderEntry type="library" name="badlogicgames.gdx" level="project" />
+    <orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
   </component>
 </module>
\ No newline at end of file
diff --git a/video_encoder/Makefile b/video_encoder/Makefile
deleted file mode 100644
index 413592e..0000000
--- a/video_encoder/Makefile
+++ /dev/null
@@ -1,221 +0,0 @@
-# Created by CuriousTorvald and Claude on 2025-08-17.
-# Makefile for TSVM Enhanced Video (TEV) encoder and libraries
-
-CC = gcc
-CXX = g++
-CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
-CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
-DBGFLAGS =
-PREFIX = /usr/local
-
-# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
-ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
-ZSTD_LIBS = $(shell pkg-config --libs libzstd 2>/dev/null || echo "-lzstd")
-LIBS = -lm $(ZSTD_LIBS)
-
-# =============================================================================
-# Library Object Files
-# =============================================================================
-
-# libtavenc - TAV encoder library
-LIBTAVENC_OBJ = lib/libtavenc/tav_encoder_lib.o \
-                lib/libtavenc/tav_encoder_color.o \
-                lib/libtavenc/tav_encoder_dwt.o \
-                lib/libtavenc/tav_encoder_quantize.o \
-                lib/libtavenc/tav_encoder_ezbc.o \
-                lib/libtavenc/tav_encoder_utils.o \
-                lib/libtavenc/tav_encoder_tile.o
-
-# libtavdec - TAV decoder library
-LIBTAVDEC_OBJ = lib/libtavdec/tav_video_decoder.o
-
-# libtadenc - TAD encoder library
-LIBTADENC_OBJ = lib/libtadenc/encoder_tad.o
-
-# libtaddec - TAD decoder library
-LIBTADDEC_OBJ = lib/libtaddec/decoder_tad.o
-
-# libfec - Forward Error Correction library (LDPC + Reed-Solomon)
-LIBFEC_OBJ = lib/libfec/ldpc.o lib/libfec/reed_solomon.o lib/libfec/ldpc_payload.o
-
-# =============================================================================
-# Targets
-# =============================================================================
-
-# Source files and targets
-TARGETS = libs encoder_tav_ref decoder_tav_ref tav_inspector tad tav_dt
-LIBRARIES = lib/libtavenc.a lib/libtavdec.a lib/libtadenc.a lib/libtaddec.a lib/libfec.a
-TAV_TARGETS = encoder_tav_ref decoder_tav_ref tav_inspector
-TAD_TARGETS = encoder_tad decoder_tad
-DT_TARGETS = encoder_tav_dt decoder_tav_dt tavdt_noise_injector
-
-# Build all encoders (default)
-all: clean $(TARGETS)
-
-# Build all libraries
-libs: $(LIBRARIES)
-
-# Reference encoder using libtavenc (replaces old monolithic encoder)
-encoder_tav_ref: src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a
-	rm -f encoder_tav_ref
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o encoder_tav_ref src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a $(LIBS)
-	@echo ""
-	@echo "Reference encoder built: encoder_tav_ref"
-	@echo "This is the official reference implementation with all features"
-
-# Reference decoder using libtavdec (replaces old monolithic decoder)
-decoder_tav_ref: src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a
-	rm -f decoder_tav_ref
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o decoder_tav_ref src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a $(LIBS)
-	@echo ""
-	@echo "Reference decoder built: decoder_tav_ref"
-	@echo "This is the official reference implementation with all features"
-
-tav_inspector: tav_inspector.c lib/libfec.a
-	rm -f tav_inspector
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Ilib/libfec -o tav_inspector $< lib/libfec.a $(LIBS)
-
-tav: $(TAV_TARGETS)
-
-# Build TAD (Terrarum Advanced Audio) tools
-encoder_tad: src/encoder_tad_standalone.c lib/libtadenc/encoder_tad.c include/encoder_tad.h
-	rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtadenc/encoder_tad.c -o encoder_tad.o
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/encoder_tad_standalone.c -o encoder_tad_standalone.o
-	$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
-
-decoder_tad: lib/libtaddec/decoder_tad.c
-	rm -f decoder_tad
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o decoder_tad $< $(LIBS)
-
-# Build all TAD tools
-tad: $(TAD_TARGETS)
-
-# =============================================================================
-# Library Build Rules
-# =============================================================================
-
-# Compile library object files
-lib/libtavenc/%.o: lib/libtavenc/%.c
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
-
-lib/libtavdec/%.o: lib/libtavdec/%.c
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
-
-lib/libtadenc/%.o: lib/libtadenc/%.c
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
-
-lib/libtaddec/%.o: lib/libtaddec/%.c
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c $< -o $@
-
-lib/libfec/%.o: lib/libfec/%.c
-	$(CC) $(CFLAGS) -Ilib/libfec -c $< -o $@
-
-# Build static libraries
-lib/libtavenc.a: $(LIBTAVENC_OBJ)
-	ar rcs $@ $^
-
-lib/libtavdec.a: $(LIBTAVDEC_OBJ)
-	ar rcs $@ $^
-
-lib/libtadenc.a: $(LIBTADENC_OBJ)
-	ar rcs $@ $^
-
-lib/libtaddec.a: $(LIBTADDEC_OBJ)
-	ar rcs $@ $^
-
-lib/libfec.a: $(LIBFEC_OBJ)
-	ar rcs $@ $^
-
-# =============================================================================
-# TAV-DT (Digital Tape) Encoder/Decoder
-# =============================================================================
-
-# TAV-DT encoder with FEC (multithreaded)
-encoder_tav_dt: src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a
-	rm -f encoder_tav_dt
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o encoder_tav_dt src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a $(LIBS) -lpthread
-	@echo ""
-	@echo "TAV-DT encoder built: encoder_tav_dt"
-	@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
-
-# TAV-DT decoder with FEC (multithreaded)
-decoder_tav_dt: src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a
-	rm -f decoder_tav_dt
-	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o decoder_tav_dt src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a $(LIBS) -lpthread
-	@echo ""
-	@echo "TAV-DT decoder built: decoder_tav_dt"
-	@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
-
-# TAV-DT noise injector (channel simulator)
-tavdt_noise_injector: tavdt_noise_injector.c
-	rm -f tavdt_noise_injector
-	$(CC) -std=c99 -Wall -Ofast -D_GNU_SOURCE -o tavdt_noise_injector tavdt_noise_injector.c -lm
-	@echo ""
-	@echo "TAV-DT noise injector built: tavdt_noise_injector"
-	@echo "Simulates QPSK satellite channel noise (AWGN + burst)"
-
-# Build all TAV-DT tools
-tav_dt: $(DT_TARGETS)
-
-# Build with debug symbols
-debug: CFLAGS += -g -DDEBUG -fsanitize=address -fno-omit-frame-pointer
-debug: DBGFLAGS += -fsanitize=address -fno-omit-frame-pointer
-debug: clean $(TARGETS)
-
-# Clean build artifacts
-clean:
-	rm -f $(TARGETS) $(TAD_TARGETS) $(DT_TARGETS) $(LIBRARIES) *.o lib/*/*.o
-
-# Install (copy to PATH)
-install: $(TARGETS)
-	cp encoder_tav_ref $(PREFIX)/bin/
-	cp decoder_tav_ref $(PREFIX)/bin/
-	cp encoder_tad $(PREFIX)/bin/
-	cp decoder_tad $(PREFIX)/bin/
-	cp encoder_tav_dt $(PREFIX)/bin/
-	cp decoder_tav_dt $(PREFIX)/bin/
-	cp tav_inspector $(PREFIX)/bin/
-
-# Check for required dependencies
-check-deps:
-	@echo "Checking dependencies..."
-	@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install libzstd-dev or equivalent" && exit 1)
-	@echo "All dependencies found."
-
-# Help
-help:
-	@echo "TSVM Advanced Video (TAV) and Audio (TAD) Encoders"
-	@echo ""
-	@echo "Targets:"
-	@echo "  all          - Build video encoders (default)"
-	@echo "  libs         - Build all codec libraries (.a files)"
-	@echo "  tav          - Build the TAV advanced video encoder"
-	@echo "  tav_dt       - Build all TAV-DT (Digital Tape) tools with FEC"
-	@echo "  tavdt_noise_injector - Build TAV-DT channel noise simulator"
-	@echo "  tad          - Build all TAD audio tools (encoder, decoder)"
-	@echo "  encoder_tad  - Build TAD audio encoder"
-	@echo "  decoder_tad  - Build TAD audio decoder"
-	@echo "  tests        - Build test programs"
-	@echo "  debug        - Build with debug symbols"
-	@echo "  clean        - Remove build artifacts"
-	@echo "  install      - Install to /usr/local/bin"
-	@echo "  check-deps   - Check for required dependencies"
-	@echo "  help         - Show this help"
-	@echo ""
-	@echo "Libraries:"
-	@echo "  lib/libtavenc.a  - TAV encoder library"
-	@echo "  lib/libtavdec.a  - TAV decoder library"
-	@echo "  lib/libtadenc.a  - TAD encoder library"
-	@echo "  lib/libtaddec.a  - TAD decoder library"
-	@echo "  lib/libfec.a     - Forward Error Correction library (LDPC + RS)"
-	@echo ""
-	@echo "Usage:"
-	@echo "  make               # Build video encoders"
-	@echo "  make libs          # Build all libraries"
-	@echo "  make tav           # Build TAV encoder"
-	@echo "  make tav_dt        # Build TAV-DT encoder/decoder with FEC"
-	@echo "  make tad           # Build all TAD audio tools"
-	@echo "  sudo make install  # Install all encoders"
-
-.PHONY: all libs clean install check-deps help debug tad tav_dt tests
diff --git a/video_encoder/TAD_README.md b/video_encoder/TAD_README.md
deleted file mode 100644
index 81be478..0000000
--- a/video_encoder/TAD_README.md
+++ /dev/null
@@ -1,350 +0,0 @@
-# TAD - TSVM Advanced Audio Codec
-
-A perceptually-optimised wavelet-based audio codec designed for resource-constrained systems, featuring CDF 9/7 wavelets, EZBC sparse coding, and sophisticated perceptual quantisation.
-
-## Overview
-
-TAD (TSVM Advanced Audio) is a modern audio codec built on discrete wavelet transform (DWT) using Cohen-Daubechies-Feauveau (CDF) 9/7 biorthogonal wavelets. It combines perceptual quantisation, advanced entropy coding, and careful optimisation for resource-constrained systems.
-
-### Key Advantages
-
-- **Perceptual optimisation**: HVS-aware quantisation preserves audio quality where it matters
-- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity (86.9% zeros in typical content)
-- **Variable chunk sizes**: Supports any chunk size ≥1024 samples, including non-power-of-2
-- **Stereo decorrelation**: Mid/Side encoding exploits stereo correlation for better compression
-- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
-
-## Features
-
-### Compression Technology
-
-- **CDF 9/7 Biorthogonal Wavelets**
-  - 9-level fixed decomposition for all chunk sizes
-  - Lifting scheme implementation for efficient computation
-  - Optimal frequency discrimination for audio signals
-
-- **Pre-processing**
-  - First-order IIR pre-emphasis filter (α=0.5) shifts quantisation noise to lower frequencies, where they are less objectionable to listeners
-  - Gamma companding (γ=0.5) for dynamic range compression before quantisation
-  - Mid/Side stereo transformation exploits stereo correlation
-  - Lambda companding (λ=6.0) with Laplacian CDF mapping for full bit utilisation
-
-- **Perceptual Quantisation**
-  - Channel-specific (Mid/Side) frequency-dependent weights
-  - Subband-aware quantisation preserves perceptually important frequencies
-
-- **EZBC Encoding**
-  - Binary tree embedded zero block coding
-  - Exploits coefficient sparsity (86.9% Mid, 97.8% Side typical)
-  - Progressive refinement structure
-  - Spatial clustering of non-zero coefficients
-
-- **Entropy Coding**
-  - Zstandard compression (level 7) on concatenated EZBC bitstreams
-  - Cross-channel compression optimisation
-  - Optional Zstd bypass for debugging
-
-### Audio Format
-
-- **Sample Rate**: 32 KHz (TSVM audio hardware native format)
-- **Channels**: Stereo (L/R input, Mid/Side internal representation)
-- **Chunk Sizes**: Variable, any size ≥1024 samples (including non-power-of-2)
-- **Bit Depth**: 32-bit float internal, 8-bit unsigned PCM output with noise-shaped dithering
-- **Bandwidth**: Full 0-16 KHz frequency range preserved
-
-### Quality Levels
-
-Six quality levels (0-5) provide a wide range of compression/quality trade-offs:
-- **Level 0**: Lowest quality, smallest file size
-- **Level 3**: Default, balanced quality/compression (2.51:1 vs PCMu8)
-- **Level 5**: Highest quality, largest file size
-
-Quality levels are designed to be synchronised with TAV video codec for unified encoding.
-
-## Building
-
-### Prerequisites
-
-- C compiler (GCC/Clang)
-- Zstandard library (libzstd)
-- Math library (libm)
-
-### Compilation
-
-```bash
-# Build TAD encoder/decoder
-make tad
-
-# Build all tools
-make all
-
-# Clean build artifacts
-make clean
-```
-
-### Build Targets
-
-- `encoder_tad` - Standalone audio encoder with FFmpeg calls
-- `decoder_tad` - Standalone audio decoder
-
-## Usage
-
-### Basic Encoding
-
-Encoding requires FFmpeg executable installed in your system.
-
-```bash
-# Default encoding (quality level 3)
-./encoder_tad -i input.mp3 -o output.tad
-
-# Specify quality level (0-5)
-./encoder_tad -i input.m4a -o output.tad -q 0    # Lowest quality
-./encoder_tad -i input.ogg -o output.tad -q 5    # Highest quality
-
-# Disable Zstd compression (for debugging)
-./encoder_tad -i input.opus -o output.tad --no-zstd
-
-# Verbose output with statistics
-./encoder_tad -i input.flac -o output.tad -v
-```
-
-### Decoding
-
-```bash
-# Decode to PCMu8
-./decoder_tad -i input.tad -o output.pcm --raw-pcm
-
-# Decode to WAV
-./decoder_tad -i input.tad -o output.wav
-```
-
-### Input Formats
-
-TAD encoder accepts any audio format supported by FFmpeg:
-- Audio files: WAV, MP3, FLAC, OGG, AAC, etc.
-- Video files with audio streams: MP4, MKV, AVI, etc.
-- Raw PCM formats
-
-Audio is automatically resampled to 32 KHz stereo if necessary.
-
-## Technical Architecture
-
-### Encoder Pipeline
-
-1. **Input Processing**
-   - FFmpeg demuxing and audio stream extraction
-   - Resampling to 32 KHz stereo
-   - Conversion to PCM32f
-
-2. **Pre-emphasis Filter**
-   - First-order IIR filter with α=0.5
-   - Shifts quantisation noise toward lower frequencies
-   - Improves perceptual quality
-
-3. **Gamma Companding**
-   - Dynamic range compression with γ=0.5
-   - Applied independently to each sample
-   - Reduces quantisation error for low-amplitude signals
-
-4. **Stereo Decorrelation**
-   - Left/Right to Mid/Side transformation
-   - Mid = (L + R) / 2
-   - Side = (L - R) / 2
-   - Exploits stereo correlation for better compression
-
-5. **9-Level CDF 9/7 DWT**
-   - Fixed 9 decomposition levels for all chunk sizes
-   - Forward lifting scheme implementation
-   - Correct length tracking for non-power-of-2 sizes
-
-6. **Perceptual Quantisation**
-   - Channel-specific (Mid/Side) subband weights
-   - Lambda companding with λ=6.0
-   - Laplacian CDF mapping: `sign(x) * floor(λ * log(1 + |x|/λ))`
-   - Quantised to int8 coefficients
-
-7. **EZBC Encoding**
-   - Binary tree structure per channel
-   - Progressive refinement by bitplanes
-   - Zero block coding exploits sparsity
-   - Independent bitstreams for Mid and Side
-
-8. **Zstd Compression**
-   - Level 7 compression on concatenated `[Mid_bitstream][Side_bitstream]`
-   - Cross-channel optimisation opportunities
-   - Adaptive compression based on content
-
-### Decoder Pipeline
-
-1. **Container Parsing**
-   - TAD packet identification (type 0x24)
-   - Chunk size extraction
-   - Compressed data boundaries
-
-2. **Zstd Decompression**
-   - Decompress concatenated bitstreams
-   - Split into Mid and Side EZBC streams
-
-3. **EZBC Decoding**
-   - Binary tree decoder per channel
-   - Reconstruct quantised int8 coefficients
-   - Progressive refinement reconstruction
-
-4. **Lambda Decompanding**
-   - Inverse Laplacian CDF with channel-specific weights
-   - Reconstruct float32 DWT coefficients
-   - Apply subband-specific perceptual weights
-
-5. **9-Level Inverse CDF 9/7 DWT**
-   - Inverse lifting scheme implementation
-   - Correct length tracking for non-power-of-2 chunk sizes
-   - Pre-calculated length sequence from forward transform
-
-6. **Mid/Side to Left/Right**
-   - L = Mid + Side
-   - R = Mid - Side
-   - Reconstruct stereo channels
-
-7. **Gamma Decompanding**
-   - Inverse gamma with γ⁻¹=2.0
-   - Restore original dynamic range
-
-8. **De-emphasis Filter**
-   - Reverse pre-emphasis with α=0.5
-   - Remove frequency shaping
-   - Restore flat frequency response
-
-9. **PCM32f to PCM8u Conversion**
-   - Noise-shaped dithering for 8-bit output
-   - Clamping to valid range
-   - Final output format
-
-### Wavelet Implementation
-
-CDF 9/7 wavelet follows a **two-stage lifting scheme**:
-
-```c
-// Forward Transform: Predict → Update
-// Predict step (generate high-pass)
-temp[half + i] = data[odd] - α * (data[even_left] + data[even_right]);
-
-// Update step (generate low-pass)
-temp[i] = data[even] + β * (temp[half + i - 1] + temp[half + i]);
-
-// Normalization (K factor)
-temp[i] *= K;
-temp[half + i] /= K;
-
-// Inverse Transform: Denormalize → Undo Update → Undo Predict (reversed order)
-temp[i] /= K;
-temp[half + i] *= K;
-
-temp[i] -= β * (temp[half + i - 1] + temp[half + i]);
-data[odd] = temp[half + i] + α * (temp[i] + temp[i + 1]);
-data[even] = temp[i];
-```
-
-**CDF 9/7 Coefficients**:
-- α = -1.586134342
-- β = -0.052980118
-- γ = +0.882911075
-- δ = +0.443506852
-- K = 1.230174105
-
-### Non-Power-of-2 Chunk Size Handling
-
-Critical implementation detail for variable chunk sizes:
-
-```c
-// Pre-calculate exact length sequence from forward transform
-int lengths[MAX_LEVELS + 1];
-lengths[0] = chunk_size;
-for (int i = 1; i <= levels; i++) {
-    lengths[i] = (lengths[i - 1] + 1) / 2;
-}
-
-// Apply inverse DWT using lengths[level] for each level
-// NEVER use simple doubling (length *= 2) - incorrect for non-power-of-2!
-```
-
-Incorrect length tracking causes mirrored subband artefacts in decoded audio.
-
-### Perceptual Quantisation Weights
-
-Channel-specific weights for Mid (channel 0) and Side (channel 1):
-
-```c
-// Base quantiser weights per subband (9 levels + approximation)
-float BASE_QUANTISER_WEIGHTS[2][10] = {
-    // Mid channel (0)
-    {4.0f, 2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f, 1.3f, 2.0f},
-
-    // Side channel (1)
-    {6.0f, 5.0f, 2.6f, 2.4f, 1.8f, 1.3f, 1.0f, 1.0f, 1.6f, 3.2f}
-};
-
-// During dequantisation:
-float weight = BASE_QUANTISER_WEIGHTS[channel][subband] * quantiser_scale;
-coeffs[i] = normalised_val * TAD32_COEFF_SCALARS[subband] * weight;
-```
-
-Different weights for Mid and Side channels reflect perceptual importance of frequency bands in each channel. DC frequency has highest weight (4.0 Mid, 6.0 Side) due to energy concentration.
-
-## Performance Characteristics
-
-### Compression Efficiency
-
-- **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input)
-- **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3
-- **Audio Quality**: Preserves full 0-16 KHz bandwidth
-- **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
-- **EZBC Benefits**: Exploits sparsity, progressive refinement, spatial clustering
-
-### Computational Complexity
-
-- **Encoding**: O(n log n) per chunk for DWT, O(n) for EZBC encoding
-- **Decoding**: O(n log n) per chunk for inverse DWT, O(n) for EZBC decoding
-- **Memory**: O(n) working memory for chunk processing
-
-### Quality Characteristics
-
-- **Frequency Response**: Flat 0-16 KHz within perceptual limits
-- **Dynamic Range**: Preserved through gamma companding
-- **Stereo Imaging**: Maintained through Mid/Side decorrelation
-- **Perceptual Quality**: Optimised for human auditory system characteristics
-
-## Integration with TAV
-
-TAD is designed as an includable API for TAV video encoder integration:
-
-- **Variable Chunk Sizes**: Audio chunks can match video GOP boundaries (e.g., 32016 samples for 1-second TAV GOP)
-- **Unified Quality Levels**: TAD quality 0-5 synchronised with TAV quality 0-5
-- **Embedded Packets**: TAV embeds TAD-compressed audio using packet type 0x24
-- **Shared Container**: Single .tav file contains both video and audio streams
-
-### TAV Integration Example
-
-```c
-// TAD handles non-power-of-2 chunk size correctly
-tad_encode_chunk(audio_buffer, audio_samples_per_gop, output_buffer, &output_size);
-
-// TAV embeds TAD packet
-tav_write_packet(TAV_PACKET_AUDIO, output_buffer, output_size);
-```
-
-## Format Specification
-
-For complete packet structure and bitstream format details, refer to `format documentation.txt`.
-
-### Key Packet Types
-
-- `0x24`: TAD audio packet (used in standalone .tad files and embedded in .tav files)
-
-## Related Projects
-
-- **TAV** (TSVM Advanced Video): Wavelet-based video codec with integrated TAD audio
-- **TSVM**: Target virtual machine platform for TAD playback
-
-## Licence
-
-MIT.
diff --git a/video_encoder/TAV_README.md b/video_encoder/TAV_README.md
deleted file mode 100644
index 5d003cc..0000000
--- a/video_encoder/TAV_README.md
+++ /dev/null
@@ -1,261 +0,0 @@
-# TAV - TSVM Advanced Video Codec
-
-A perceptually-optimised wavelet-based video codec designed for resource-constrained systems, featuring multiple wavelet types, temporal 3D DWT, and sophisticated compression techniques.
-
-## Overview
-
-TAV (TSVM Advanced Video) is a modern video codec built on discrete wavelet transformation (DWT). It combines cutting-edge compression techniques with careful optimisation for resource-constrained systems.
-
-### Key Advantages
-
-- **No blocking artefacts**: Large-tile DWT encoding with padding eliminates DCT block boundaries
-- **No colour banding**: Wavelets spreads gradients across scales, preventing banding in the first place
-- **Perceptual optimisation**: HVS-aware quantisation preserves visual quality where it matters
-- **Temporal coherence**: 3D DWT with GOP encoding exploits inter-frame similarity
-- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity for 16-18% additional compression
-- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
-
-## Features
-
-### Compression Technology
-
-- **Wavelet Types**
-  - **5/3 Reversible** (JPEG 2000 standard): Lossless-capable, good for archival
-  - **9/7 Irreversible** (default): Best overall compression, CDF 9/7 variant
-
-- **Spatial Encoding**
-  - Large-tile encoding with padding, with optional single-tile mode (no blocking artefacts)
-  - 6-level DWT decomposition for deep frequency analysis
-  - Perceptual quantisation with HVS-optimised coefficient scaling
-  - YCoCg-R colour space with anisotropic chroma quantisation
-
-- **Temporal Encoding** (3D DWT Mode)
-  - Group-of-pictures (GOP) encoding with adaptive size (typically 20 frames)
-  - Unified EZBC encoding across temporal dimension
-  - Adaptive GOP boundaries with scene change detection
-
-- **EZBC Encoding**
-  - Binary tree embedded zero block coding exploits coefficient sparsity
-  - Progressive refinement structure with bitplane encoding
-  - Concatenated channel layout for cross-channel compression optimisation
-  - Typical sparsity: 86.9% (Y), 97.8% (Co), 99.5% (Cg)
-  - 16-18% compression improvement over naive coefficient encoding
-  
-### Audio Integration
-
-TAV seamlessly integrates with the TAD (TSVM Advanced Audio) codec for synchronised audio/video encoding:
-- Variable chunk sizes match video GOP boundaries
-- Embedded TAD packets (type 0x24) with Zstd compression
-- Unified container format
-
-## Building
-
-### Prerequisites
-
-- C compiler (GCC/Clang)
-- Zstandard library
-- OpenCV 4 library (only used by experimental motion estimation feature)
-
-### Compilation
-
-```bash
-# Build TAV encoder/decoder
-make tav
-
-# Build all tools including TAD audio codec
-make all
-
-# Clean build artefacts
-make clean
-```
-
-### Build Targets
-
-- `encoder_tav` - Main video encoder
-- `decoder_tav` - Standalone video decoder
-- `tav_inspector` - Packet analysis and debugging tool
-
-## Usage
-
-### Basic Encoding
-
-Encoding requires FFmpeg executable installed in your system.
-
-```bash
-# Default encoding (CDF 9/7 wavelet, quality level 3)
-./encoder_tav -i input.mp4 -o output.tav
-
-# Quality levels (0-5)
-./encoder_tav -i input.avi -q 0 -o output.tav    # Lowest quality, smallest file
-./encoder_tav -i input.mkv -q 5 -o output.tav    # Highest quality, largest file
-```
-
-### Intra-only Encoding
-
-```bash
-# Enable Intra-only encoding
-./encoder_tav -i input.mp4 --intra-only -o output.tav
-```
-
-### Decoding and Inspection
-
-```bash
-# Decode TAV to raw video
-./decoder_tav -i input.tav -o output.mkv
-
-# Inspect packet structure (debugging)
-./tav_inspector input.tav -v
-```
-
-### Frame Limiting
-
-```bash
-# Encode only first N frames (useful for testing)
-./encoder_tav -i input.mp4 -o output.tav --encode-limit 100
-```
-
-## Technical Architecture
-
-### Encoder Pipeline
-
-1. **Input Processing**
-   - FFmpeg demuxing and frame extraction
-   - RGB to YCoCg-R colour space conversion
-   - Resolution validation and padding
-
-2. **DWT Transform**
-   - Spatial: 6-level decomposition per frame
-   - Temporal: 1D DWT across GOP frames (3D DWT mode)
-   - Lifting scheme implementation for all wavelets
-
-3. **Perceptual Quantisation**
-   - HVS-based subband weights
-   - Anisotropic chroma quantisation (YCoCg-R specific)
-   - Quality-dependent quantisation matrices
-
-4. **EZBC Encoding**
-   - Binary tree embedded zero block coding per channel
-   - Progressive refinement by bitplanes
-   - Concatenated bitstream layout: `[Y_bitstream][Co_bitstream][Cg_bitstream]`
-   - Cross-channel compression optimisation
-   
-5. **Entropy Coding**
-   - Zstandard compression (level 7) on concatenated EZBC bitstreams
-   - Cross-channel compression opportunities
-   - Adaptive compression based on GOP structure
-
-### Decoder Pipeline
-
-1. **Container Parsing**
-   - Packet type identification (0x00-0xFF)
-   - Timecode synchronisation
-   - GOP boundary detection
-
-2. **Entropy Decoding**
-   - Zstd decompression of concatenated bitstreams
-   - EZBC binary tree decoding per channel
-   - Progressive coefficient reconstruction
-
-3. **Inverse Quantisation**
-   - Perceptual weight application
-   - Subband-specific scaling
-   - Coefficient reconstruction from sparse representation
-
-4. **Inverse DWT**
-   - Temporal: 1D inverse DWT across frames (3D DWT mode)
-   - Spatial: 6-level inverse wavelet reconstruction
-
-5. **Output Conversion**
-   - YCoCg-R to RGB colour space
-   - Clamping and dithering
-   - Frame buffering for display
-
-### Wavelet Implementation
-
-All wavelets follow a **lifting scheme** pattern with symmetric boundary extension:
-
-```c
-// Forward Transform: Predict → Update
-temp[half + i] = data[odd] - predict(data[even]);  // High-pass
-temp[i] = data[even] + update(temp[half]);         // Low-pass
-
-// Inverse Transform: Undo Update → Undo Predict (reversed order)
-data[even] = temp[i] - update(temp[half]);         // Undo low-pass
-data[odd] = temp[half + i] + predict(data[even]);  // Undo high-pass
-```
-
-**Critical**: Forward and inverse transforms must use identical coefficient indexing and exactly reverse operations to avoid grid artefacts.
-
-### Coefficient Layout
-
-TAV uses **2D Spatial Layout** in memory for each decomposition level:
-
-```
-[LL] [LH] [HL] [HH] [LH] [HL] [HH] ...
- └── Level 0 ──┘ └─── Level 1 ───┘
-```
-
-- `LL`: Low-pass (approximation) - progressively smaller with each level
-- `LH`, `HL`, `HH`: High-pass subbands (horizontal, vertical, diagonal detail)
-
-## Performance Characteristics
-
-### Compression Efficiency
-
-- **Sparsity Exploitation**: Typical quantised coefficient sparsity
-  - Y channel: 86.9% zeros
-  - Co channel: 97.8% zeros
-  - Cg channel: 99.5% zeros
-
-- **EZBC Benefits**: 16-18% compression improvement over naive coefficient encoding through sparsity exploitation
-
-- **Temporal Coherence**: Additional 15-25% improvement with 3D DWT (content-dependent)
-
-### Computational Complexity
-
-- **Encoding**: O(n log n) per frame for spatial DWT
-- **Decoding**: O(n log n) per frame, optimised lifting scheme implementation
-- **Memory**: Single-tile encoding requires O(w × h) working memory
-
-### Quality Characteristics
-
-- **No blocking artefacts**: Wavelet-based encoding is inherently smooth
-- **Perceptual optimisation**: Better subjective quality than bitrate-equivalent DCT codecs
-- **Scalability**: 6 quality levels (0-5) provide wide range of bitrate/quality trade-offs
-- **Temporal stability**: 3D DWT mode reduces flickering and temporal artefacts
-
-## Format Specification
-
-For complete packet structure and bitstream format details, refer to `format documentation.txt`.
-
-### Key Packet Types
-
-- `0x00`: Metadata and initialisation
-- `0x01`: I-frame (intra-coded frame)
-- `0x12`: GOP unified packet (3D DWT mode)
-- `0x24`: Embedded TAD audio
-- `0xFC`: GOP synchronisation
-- `0xFD`: Timecode
-
-## Debugging Tools
-
-### TAV Inspector
-
-Analyse TAV packet structure and decode individual frames:
-
-```bash
-# Verbose packet analysis
-./tav_inspector input.tav -v
-
-# Extract specific frame ranges
-./tav_inspector input.tav --frame-range 100-200
-```
-
-## Related Projects
-
-- **TAD** (TSVM Advanced Audio): Perceptual audio codec using CDF 9/7 wavelets
-- **TSVM**: Target virtual machine platform for TAV playback
-
-## Licence
-
-MIT.
diff --git a/video_encoder/create_ucf_payload.c b/video_encoder/create_ucf_payload.c
deleted file mode 100644
index c0a2e34..0000000
--- a/video_encoder/create_ucf_payload.c
+++ /dev/null
@@ -1,424 +0,0 @@
-/**
- * TAV+UCF Payload Writer for TAV Files
- * Creates a TAV header-only (32 bytes) + UCF cue file (4KB) for concatenated TAV files
- * Total output size: 4096 bytes (32 + 4064)
- * Usage: ./create_ucf_payload input.tav output.ucf [track_names.txt]
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#define TAV_HEADER_SIZE 32
-#define UCF_SIZE 4064
-#define TAV_OFFSET_BIAS (TAV_HEADER_SIZE + UCF_SIZE)
-#define TAV_MAGIC "\x1FTSVMTA"  // Matches both TAV and TAP
-
-typedef struct {
-    uint8_t magic[8];
-    uint8_t version;
-    uint16_t width;
-    uint16_t height;
-    uint8_t fps;
-    uint32_t total_frames;
-    // ... rest of header fields
-} __attribute__((packed)) TAVHeader;
-
-// Write TAV header-only payload (File Role = 1)
-static void write_tav_header_only(FILE *out) {
-    uint8_t header[TAV_HEADER_SIZE] = {0};
-
-    // Magic: "\x1FTSVMTAV"
-    header[0] = 0x1F;
-    header[1] = 'T';
-    header[2] = 'S';
-    header[3] = 'V';
-    header[4] = 'M';
-    header[5] = 'T';
-    header[6] = 'A';
-    header[7] = 'V';
-
-    // Version: 5 (YCoCg-R perceptual)
-    header[8] = 5;
-
-    // Width: 560 (little-endian)
-    header[9] = 0x30;
-    header[10] = 0x02;
-
-    // Height: 448 (little-endian)
-    header[11] = 0xC0;
-    header[12] = 0x01;
-
-    // FPS: 30
-    header[13] = 30;
-
-    // Total Frames: 0xFFFFFFFF (still image marker / not applicable)
-    header[14] = 0xFF;
-    header[15] = 0xFF;
-    header[16] = 0xFF;
-    header[17] = 0xFF;
-
-    // Wavelet Filter Type: 1 (9/7 irreversible, default)
-    header[18] = 1;
-
-    // Decomposition Levels: 6
-    header[19] = 6;
-
-    // Quantiser Indices (Y, Co, Cg): 255 (not applicable for header-only)
-    header[20] = 0xFF;
-    header[21] = 0xFF;
-    header[22] = 0xFF;
-
-    // Extra Feature Flags: 0x80 (bit 7 = has no actual packets)
-    header[23] = 0x80;
-
-    // Video Flags: 0
-    header[24] = 0;
-
-    // Encoder quality level: 0
-    header[25] = 0;
-
-    // Channel layout: 0 (Y-Co-Cg)
-    header[26] = 0;
-
-    // Reserved[4]: zeros (27-30 already initialised to 0)
-
-    // File Role: 1 (header-only, UCF payload follows)
-    header[31] = 1;
-
-    fwrite(header, 1, TAV_HEADER_SIZE, out);
-}
-
-// Write UCF header
-static void write_ucf_header(FILE *out, uint16_t num_cues) {
-    uint8_t magic[8] = {0x1F, 'T', 'S', 'V', 'M', 'U', 'C', 'F'};
-    uint8_t version = 1;
-    uint32_t cue_file_size = TAV_OFFSET_BIAS;
-    uint8_t reserved = 0;
-
-    fwrite(magic, 1, 8, out);
-    fwrite(&version, 1, 1, out);
-    fwrite(&num_cues, 2, 1, out);
-    fwrite(&cue_file_size, 4, 1, out);
-    fwrite(&reserved, 1, 1, out);
-}
-
-// Write UCF cue element (internal addressing, human+machine interactable)
-static void write_cue_element(FILE *out, uint64_t offset, const char *name) {
-    uint8_t addressing_mode = 0x22;  // 0x20 (human) | 0x01 (machine) | 0x02 (internal)
-    uint16_t name_len = strlen(name);
-
-    // Offset with 4KB bias
-    uint64_t biased_offset = offset + TAV_OFFSET_BIAS;
-
-    fwrite(&addressing_mode, 1, 1, out);
-    fwrite(&name_len, 2, 1, out);
-    fwrite(name, 1, name_len, out);
-
-    // Write 48-bit (6-byte) offset
-    fwrite(&biased_offset, 6, 1, out);
-}
-
-// Read track names from file (newline-delimited)
-static char **read_track_names(const char *filename, int *count_out) {
-    FILE *f = fopen(filename, "r");
-    if (!f) {
-        return NULL;
-    }
-
-    char **names = NULL;
-    int count = 0;
-    int capacity = 16;
-    char line[256];
-
-    names = malloc(capacity * sizeof(char *));
-    if (!names) {
-        fclose(f);
-        return NULL;
-    }
-
-    while (fgets(line, sizeof(line), f)) {
-        // Remove trailing newline
-        size_t len = strlen(line);
-        if (len > 0 && line[len - 1] == '\n') {
-            line[len - 1] = '\0';
-            len--;
-        }
-        if (len > 0 && line[len - 1] == '\r') {
-            line[len - 1] = '\0';
-            len--;
-        }
-
-        // Skip empty lines
-        if (len == 0) {
-            continue;
-        }
-
-        // Expand capacity if needed
-        if (count >= capacity) {
-            capacity *= 2;
-            char **new_names = realloc(names, capacity * sizeof(char *));
-            if (!new_names) {
-                // Cleanup on failure
-                for (int i = 0; i < count; i++) {
-                    free(names[i]);
-                }
-                free(names);
-                fclose(f);
-                return NULL;
-            }
-            names = new_names;
-        }
-
-        // Allocate and copy name
-        names[count] = strdup(line);
-        if (!names[count]) {
-            // Cleanup on failure
-            for (int i = 0; i < count; i++) {
-                free(names[i]);
-            }
-            free(names);
-            fclose(f);
-            return NULL;
-        }
-        count++;
-    }
-
-    fclose(f);
-    *count_out = count;
-    return names;
-}
-
-// Find all TAV headers in the file (with smart packet-wise skipping)
-static int find_tav_headers(FILE *in, uint64_t **offsets_out) {
-    uint64_t *offsets = NULL;
-    int count = 0;
-    int capacity = 16;
-
-    offsets = malloc(capacity * sizeof(uint64_t));
-    if (!offsets) {
-        fprintf(stderr, "Error: Memory allocation failed\n");
-        return -1;
-    }
-
-    // Seek to beginning
-    fseek(in, 0, SEEK_SET);
-
-    uint8_t magic[8];
-
-    while (1) {
-        // Remember current position before reading
-        uint64_t pos = ftell(in);
-
-        // Try to read magic
-        if (fread(magic, 1, 8, in) != 8) {
-            // End of file
-            break;
-        }
-
-        // Check for TAV magic signature
-        if (memcmp(magic, TAV_MAGIC, 7) == 0 && (magic[7] == 'V' || magic[7] == 'P')) {
-            // Found TAV header
-            if (count >= capacity) {
-                capacity *= 2;
-                uint64_t *new_offsets = realloc(offsets, capacity * sizeof(uint64_t));
-                if (!new_offsets) {
-                    fprintf(stderr, "Error: Memory reallocation failed\n");
-                    free(offsets);
-                    return -1;
-                }
-                offsets = new_offsets;
-            }
-
-            offsets[count++] = pos;
-            printf("Found TAV header at offset: 0x%lX (%lu)\n", pos, pos);
-
-            // Skip past this header (32 bytes total)
-            uint64_t packet_pos = pos + 32;
-            fseek(in, packet_pos, SEEK_SET);
-
-            // Smart packet-wise skipping
-            while (1) {
-                uint8_t packet_type;
-                if (fread(&packet_type, 1, 1, in) != 1) {
-                    // End of file
-                    break;
-                }
-
-                // Check if this is the start of next TAV file (0x1F is prohibited as packet type)
-                if (packet_type == 0x1F) {
-                    // Rewind 1 byte to re-read as magic at the top of outer loop
-                    fseek(in, packet_pos, SEEK_SET);
-                    break;
-                }
-
-                // printf("TAV Packet 0x%02X at 0x%lX\n", packet_type, packet_pos);
-
-                // Sync packets (0xFE, 0xFF) have no payload size - they're single-byte packets
-                if (packet_type == 0xFE || packet_type == 0xFF) {
-                    packet_pos += 1;
-                    fseek(in, packet_pos, SEEK_SET);
-                    continue;
-                }
-
-                // Read payload size (uint32, little-endian)
-                uint32_t payload_size = 0;
-                if (fread(&payload_size, 4, 1, in) != 1) {
-                    // End of file
-                    break;
-                }
-
-                // Skip packet: 1 byte (type) + 4 bytes (size) + payload_size
-                packet_pos += 1 + 4 + payload_size;
-                fseek(in, packet_pos, SEEK_SET);
-            }
-        } else {
-            // Move forward by 1 byte for next search
-            fseek(in, pos + 1, SEEK_SET);
-        }
-    }
-
-    *offsets_out = offsets;
-    return count;
-}
-
-int main(int argc, char *argv[]) {
-    if (argc < 3 || argc > 4) {
-        fprintf(stderr, "Usage: %s <input.tav> <output.ucf> [track_names.txt]\n", argv[0]);
-        fprintf(stderr, "Creates a 4KB UCF payload for concatenated TAV file\n");
-        fprintf(stderr, "  track_names.txt: Optional file with track names (one per line)\n");
-        return 1;
-    }
-
-    const char *input_path = argv[1];
-    const char *output_path = argv[2];
-    const char *names_path = (argc == 4) ? argv[3] : NULL;
-
-    // Read track names if provided
-    char **track_names = NULL;
-    int num_names = 0;
-    if (names_path) {
-        track_names = read_track_names(names_path, &num_names);
-        if (track_names) {
-            printf("Loaded %d track name(s) from '%s'\n", num_names, names_path);
-        } else {
-            fprintf(stderr, "Warning: Could not read track names from '%s', using defaults\n", names_path);
-        }
-    }
-
-    // Open input file
-    FILE *in = fopen(input_path, "rb");
-    if (!in) {
-        fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
-        if (track_names) {
-            for (int i = 0; i < num_names; i++) {
-                free(track_names[i]);
-            }
-            free(track_names);
-        }
-        return 1;
-    }
-
-    // Find all TAV headers
-    uint64_t *offsets = NULL;
-    int num_tracks = find_tav_headers(in, &offsets);
-    fclose(in);
-
-    if (num_tracks < 0) {
-        fprintf(stderr, "Error: Failed to scan input file\n");
-        if (track_names) {
-            for (int i = 0; i < num_names; i++) {
-                free(track_names[i]);
-            }
-            free(track_names);
-        }
-        return 1;
-    }
-
-    if (num_tracks == 0) {
-        fprintf(stderr, "Error: No TAV headers found in input file\n");
-        free(offsets);
-        if (track_names) {
-            for (int i = 0; i < num_names; i++) {
-                free(track_names[i]);
-            }
-            free(track_names);
-        }
-        return 1;
-    }
-
-    printf("\nFound %d TAV header(s)\n", num_tracks);
-
-    // Create output UCF file
-    FILE *out = fopen(output_path, "wb");
-    if (!out) {
-        fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
-        free(offsets);
-        if (track_names) {
-            for (int i = 0; i < num_names; i++) {
-                free(track_names[i]);
-            }
-            free(track_names);
-        }
-        return 1;
-    }
-
-    // Write TAV header-only payload (File Role = 1)
-    write_tav_header_only(out);
-    printf("Written TAV header-only payload (%d bytes)\n", TAV_HEADER_SIZE);
-
-    // Write UCF header
-    write_ucf_header(out, num_tracks);
-
-    // Write cue elements
-    for (int i = 0; i < num_tracks; i++) {
-        char default_name[32];
-        const char *name;
-
-        // Use custom name if available, otherwise generate default
-        if (track_names && i < num_names) {
-            name = track_names[i];
-        } else {
-            snprintf(default_name, sizeof(default_name), "Track %d", i + 1);
-            name = default_name;
-        }
-
-        write_cue_element(out, offsets[i], name);
-        printf("Written cue element: '%s' at offset 0x%lX (biased: 0x%lX)\n",
-               name, offsets[i], offsets[i] + TAV_OFFSET_BIAS);
-    }
-
-    // Get current file position
-    long current_pos = ftell(out);
-
-    // Fill remaining space with zeros to reach TAV header + 4KB UCF
-    size_t target_size = TAV_HEADER_SIZE + UCF_SIZE;
-    if (current_pos < target_size) {
-        size_t remaining = target_size - current_pos;
-        uint8_t *zeros = calloc(remaining, 1);
-        if (zeros) {
-            fwrite(zeros, 1, remaining, out);
-            free(zeros);
-        }
-    }
-
-    fclose(out);
-    free(offsets);
-
-    // Clean up track names
-    if (track_names) {
-        for (int i = 0; i < num_names; i++) {
-            free(track_names[i]);
-        }
-        free(track_names);
-    }
-
-    printf("\nTAV+UCF payload created successfully: %s\n", output_path);
-    printf("File size: %zu bytes (TAV header: %d + UCF: %d)\n",
-           (size_t)(TAV_HEADER_SIZE + UCF_SIZE), TAV_HEADER_SIZE, UCF_SIZE);
-    printf("\nTo create seekable TAV file, prepend this payload to your concatenated TAV file:\n");
-    printf("  cat %s input.tav > output_seekable.tav\n", output_path);
-
-    return 0;
-}
diff --git a/video_encoder/encoder_ipf1d.c b/video_encoder/encoder_ipf1d.c
deleted file mode 100644
index d546e2a..0000000
--- a/video_encoder/encoder_ipf1d.c
+++ /dev/null
@@ -1,935 +0,0 @@
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-#include <zlib.h>
-#include <unistd.h>
-#include <sys/wait.h>
-#include <getopt.h>
-#include <sys/time.h>
-
-// TVDOS Movie format constants
-#define TVDOS_MAGIC "\x1F\x54\x53\x56\x4D\x4D\x4F\x56"  // "\x1FTSVM MOV"
-#define IPF_BLOCK_SIZE 12
-
-// iPF1-delta opcodes
-#define SKIP_OP  0x00
-#define PATCH_OP 0x01
-#define REPEAT_OP 0x02
-#define END_OP   0xFF
-
-// Video packet types
-#define IPF1_PACKET_TYPE 0x04, 0x00      // iPF Type 1 (4 + 0)
-#define IPF1_DELTA_PACKET_TYPE 0x04, 0x02 // iPF Type 1 delta
-#define SYNC_PACKET_TYPE 0xFF, 0xFF      // Sync packet
-
-// Audio constants
-#define MP2_SAMPLE_RATE 32000
-#define MP2_DEFAULT_PACKET_SIZE 0x240
-#define MP2_PACKET_TYPE_BASE 0x11
-
-// Default values
-#define DEFAULT_WIDTH 560
-#define DEFAULT_HEIGHT 448
-#define TEMP_AUDIO_FILE "/tmp/tvdos_temp_audio.mp2"
-
-typedef struct {
-    char *input_file;
-    char *output_file;
-    int width;
-    int height;
-    int fps;
-    int total_frames;
-    double duration;
-    int has_audio;
-    int output_to_stdout;
-    
-    // Internal buffers
-    uint8_t *previous_ipf_frame;
-    uint8_t *current_ipf_frame;
-    uint8_t *delta_buffer;
-    uint8_t *rgb_buffer;
-    uint8_t *compressed_buffer;
-    uint8_t *mp2_buffer;
-    size_t frame_buffer_size;
-    
-    // Audio handling
-    FILE *mp2_file;
-    int mp2_packet_size;
-    int mp2_rate_index;
-    size_t audio_remaining;
-    int audio_frames_in_buffer;
-    int target_audio_buffer_size;
-    
-    // FFmpeg processes
-    FILE *ffmpeg_video_pipe;
-    FILE *ffmpeg_audio_pipe;
-    
-    // Progress tracking
-    struct timeval start_time;
-    struct timeval last_progress_time;
-    size_t total_output_bytes;
-    
-    // Dithering mode
-    int dither_mode;
-} encoder_config_t;
-
-// CORRECTED YCoCg conversion matching Kotlin implementation
-typedef struct {
-    float y, co, cg;
-} ycocg_t;
-
-static ycocg_t rgb_to_ycocg_correct(uint8_t r, uint8_t g, uint8_t b, float ditherThreshold) {
-    ycocg_t result;
-    float rf = floor((ditherThreshold / 15.0 + r / 255.0) * 15.0) / 15.0;
-    float gf = floor((ditherThreshold / 15.0 + g / 255.0) * 15.0) / 15.0;
-    float bf = floor((ditherThreshold / 15.0 + b / 255.0) * 15.0) / 15.0;
-
-    // CORRECTED: Match Kotlin implementation exactly
-    float co = rf - bf;           // co = r - b    [-1..1]
-    float tmp = bf + co / 2.0f;   // tmp = b + co/2
-    float cg = gf - tmp;          // cg = g - tmp  [-1..1]
-    float y = tmp + cg / 2.0f;    // y = tmp + cg/2 [0..1]
-    
-    result.y = y;
-    result.co = co;
-    result.cg = cg;
-    
-    return result;
-}
-
-static int quantise_4bit_y(float value) {
-    // Y quantisation: round(y * 15)
-    return (int)round(fmaxf(0.0f, fminf(15.0f, value * 15.0f)));
-}
-
-static int chroma_to_four_bits(float f) {
-    // CORRECTED: Match Kotlin chromaToFourBits function exactly
-    // return (round(f * 8) + 7).coerceIn(0..15)
-    int result = (int)round(f * 8.0f) + 7;
-    return fmaxf(0, fminf(15, result));
-}
-
-// Parse resolution string like "1024x768"
-static int parse_resolution(const char *res_str, int *width, int *height) {
-    if (!res_str) return 0;
-    return sscanf(res_str, "%dx%d", width, height) == 2;
-}
-
-// Execute command and capture output
-static char *execute_command(const char *command) {
-    FILE *pipe = popen(command, "r");
-    if (!pipe) return NULL;
-    
-    char *result = malloc(4096);
-    size_t len = fread(result, 1, 4095, pipe);
-    result[len] = '\0';
-    
-    pclose(pipe);
-    return result;
-}
-
-// Get video metadata using ffprobe
-static int get_video_metadata(encoder_config_t *config) {
-    char command[1024];
-    char *output;
-    
-    // Get frame count
-    snprintf(command, sizeof(command), 
-        "ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 \"%s\"", 
-        config->input_file);
-    output = execute_command(command);
-    if (!output) {
-        fprintf(stderr, "Failed to get frame count\n");
-        return 0;
-    }
-    config->total_frames = atoi(output);
-    free(output);
-    
-    // Get frame rate
-    snprintf(command, sizeof(command),
-        "ffprobe -v quiet -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 \"%s\"",
-        config->input_file);
-    output = execute_command(command);
-    if (!output) {
-        fprintf(stderr, "Failed to get frame rate\n");
-        return 0;
-    }
-    
-    // Parse framerate (could be "30/1" or "29.97")
-    int num, den;
-    if (sscanf(output, "%d/%d", &num, &den) == 2) {
-        config->fps = (den > 0) ? (num / den) : 30;
-    } else {
-        config->fps = (int)round(atof(output));
-    }
-    free(output);
-    
-    // Get duration
-    snprintf(command, sizeof(command),
-        "ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"%s\"",
-        config->input_file);
-    output = execute_command(command);
-    if (output) {
-        config->duration = atof(output);
-        free(output);
-    }
-    
-    // Check if has audio
-    snprintf(command, sizeof(command),
-        "ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\"",
-        config->input_file);
-    output = execute_command(command);
-    config->has_audio = (output && strlen(output) > 0 && atoi(output) >= 0);
-    if (output) free(output);
-    
-    // Validate frame count using duration if needed
-    if (config->total_frames <= 0 && config->duration > 0) {
-        config->total_frames = (int)(config->duration * config->fps);
-    }
-    
-    fprintf(stderr, "Video metadata:\n");
-    fprintf(stderr, "  Frames: %d\n", config->total_frames);
-    fprintf(stderr, "  FPS: %d\n", config->fps);
-    fprintf(stderr, "  Duration: %.2fs\n", config->duration);
-    fprintf(stderr, "  Audio: %s\n", config->has_audio ? "Yes" : "No");
-    fprintf(stderr, "  Resolution: %dx%d\n", config->width, config->height);
-    
-    return (config->total_frames > 0 && config->fps > 0);
-}
-
-// Start FFmpeg process for video conversion
-static int start_video_conversion(encoder_config_t *config) {
-    char command[2048];
-    snprintf(command, sizeof(command),
-        "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d -y - 2>/dev/null",
-        config->input_file, config->width, config->height, config->width, config->height);
-    
-    config->ffmpeg_video_pipe = popen(command, "r");
-    return (config->ffmpeg_video_pipe != NULL);
-}
-
-// Start FFmpeg process for audio conversion
-static int start_audio_conversion(encoder_config_t *config) {
-    if (!config->has_audio) return 1;
-    
-    char command[2048];
-    snprintf(command, sizeof(command),
-        "ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
-        config->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
-    
-    int result = system(command);
-    if (result == 0) {
-        config->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
-        if (config->mp2_file) {
-            fseek(config->mp2_file, 0, SEEK_END);
-            config->audio_remaining = ftell(config->mp2_file);
-            fseek(config->mp2_file, 0, SEEK_SET);
-            return 1;
-        }
-    }
-    
-    fprintf(stderr, "Warning: Failed to convert audio, proceeding without audio\n");
-    config->has_audio = 0;
-    return 1;
-}
-
-// Write variable-length integer
-static void write_varint(uint8_t **ptr, uint32_t value) {
-    while (value >= 0x80) {
-        **ptr = (uint8_t)((value & 0x7F) | 0x80);
-        (*ptr)++;
-        value >>= 7;
-    }
-    **ptr = (uint8_t)(value & 0x7F);
-    (*ptr)++;
-}
-
-// Get MP2 packet size and rate index
-static int get_mp2_packet_size(uint8_t *header) {
-    int bitrate_index = (header[2] >> 4) & 0xF;
-    int padding_bit = (header[2] >> 1) & 0x1;
-    
-    int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, -1};
-    int bitrate = bitrates[bitrate_index];
-    
-    if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE;
-    
-    int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit;
-    return frame_size;
-}
-
-static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
-    int rate_index;
-    switch (packet_size) {
-        case 144:  rate_index = 0; break;
-        case 216:  rate_index = 2; break;
-        case 252:  rate_index = 4; break;
-        case 288:  rate_index = 6; break;
-        case 360:  rate_index = 8; break;
-        case 432:  rate_index = 10; break;
-        case 504:  rate_index = 12; break;
-        case 576:  rate_index = 14; break;
-        case 720:  rate_index = 16; break;
-        case 864:  rate_index = 18; break;
-        case 1008: rate_index = 20; break;
-        case 1152: rate_index = 22; break;
-        case 1440: rate_index = 24; break;
-        case 1728: rate_index = 26; break;
-        default: rate_index = 14; break;
-    }
-    return rate_index + (is_mono ? 1 : 0);
-}
-
-// Gzip compress function (instead of zlib)
-static size_t gzip_compress(uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_max) {
-    z_stream stream = {0};
-    stream.next_in = src;
-    stream.avail_in = src_len;
-    stream.next_out = dst;
-    stream.avail_out = dst_max;
-    
-    // Use deflateInit2 with gzip format
-    if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
-        return 0;
-    }
-    
-    if (deflate(&stream, Z_FINISH) != Z_STREAM_END) {
-        deflateEnd(&stream);
-        return 0;
-    }
-    
-    size_t compressed_size = stream.total_out;
-    deflateEnd(&stream);
-    return compressed_size;
-}
-
-// Bayer dithering kernels (4 patterns, each 4x4)
-static const float bayerKernels[4][16] = {
-    { // Pattern 0
-        (0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
-        (12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
-        (3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f,
-        (15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f
-    },
-    { // Pattern 1
-        (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
-        (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
-        (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f,
-        (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f
-    },
-    { // Pattern 2
-        (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f,
-        (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
-        (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
-        (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f
-    },
-    { // Pattern 3
-        (15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f,
-        (0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
-        (12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
-        (3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f
-    }
-};
-
-// CORRECTED: Encode a 4x4 block to iPF1 format matching Kotlin implementation
-static void encode_ipf1_block_correct(uint8_t *rgb_data, int width, int height, int block_x, int block_y,
-                                     int channels, int pattern, uint8_t *output) {
-    ycocg_t pixels[16];
-    int y_values[16];
-    float co_values[16];  // Keep full precision for subsampling
-    float cg_values[16];  // Keep full precision for subsampling
-    
-    // Convert 4x4 block to YCoCg using corrected transform
-    for (int py = 0; py < 4; py++) {
-        for (int px = 0; px < 4; px++) {
-            int src_x = block_x * 4 + px;
-            int src_y = block_y * 4 + py;
-            float t = (pattern < 0) ? 0.0f : bayerKernels[pattern % 4][4 * (py % 4) + (px % 4)];
-            int idx = py * 4 + px;
-            
-            if (src_x < width && src_y < height) {
-                int pixel_offset = (src_y * width + src_x) * channels;
-                uint8_t r = rgb_data[pixel_offset];
-                uint8_t g = rgb_data[pixel_offset + 1];
-                uint8_t b = rgb_data[pixel_offset + 2];
-                pixels[idx] = rgb_to_ycocg_correct(r, g, b, t);
-            } else {
-                pixels[idx] = (ycocg_t){0.0f, 0.0f, 0.0f};
-            }
-            
-            y_values[idx] = quantise_4bit_y(pixels[idx].y);
-            co_values[idx] = pixels[idx].co;
-            cg_values[idx] = pixels[idx].cg;
-        }
-    }
-    
-    // CORRECTED: Chroma subsampling (4:2:0 for iPF1) with correct averaging
-    int cos1 = chroma_to_four_bits((co_values[0] + co_values[1] + co_values[4] + co_values[5]) / 4.0f);
-    int cos2 = chroma_to_four_bits((co_values[2] + co_values[3] + co_values[6] + co_values[7]) / 4.0f);
-    int cos3 = chroma_to_four_bits((co_values[8] + co_values[9] + co_values[12] + co_values[13]) / 4.0f);
-    int cos4 = chroma_to_four_bits((co_values[10] + co_values[11] + co_values[14] + co_values[15]) / 4.0f);
-    
-    int cgs1 = chroma_to_four_bits((cg_values[0] + cg_values[1] + cg_values[4] + cg_values[5]) / 4.0f);
-    int cgs2 = chroma_to_four_bits((cg_values[2] + cg_values[3] + cg_values[6] + cg_values[7]) / 4.0f);
-    int cgs3 = chroma_to_four_bits((cg_values[8] + cg_values[9] + cg_values[12] + cg_values[13]) / 4.0f);
-    int cgs4 = chroma_to_four_bits((cg_values[10] + cg_values[11] + cg_values[14] + cg_values[15]) / 4.0f);
-    
-    // CORRECTED: Pack into iPF1 format matching Kotlin exactly
-    // Co values (2 bytes): cos2|cos1, cos4|cos3
-    output[0] = ((cos2 << 4) | cos1);
-    output[1] = ((cos4 << 4) | cos3);
-    
-    // Cg values (2 bytes): cgs2|cgs1, cgs4|cgs3
-    output[2] = ((cgs2 << 4) | cgs1);
-    output[3] = ((cgs4 << 4) | cgs3);
-    
-    // CORRECTED: Y values (8 bytes) with correct ordering from Kotlin
-    output[4] = ((y_values[1] << 4) | y_values[0]);   // Y1|Y0
-    output[5] = ((y_values[5] << 4) | y_values[4]);   // Y5|Y4  
-    output[6] = ((y_values[3] << 4) | y_values[2]);   // Y3|Y2
-    output[7] = ((y_values[7] << 4) | y_values[6]);   // Y7|Y6
-    output[8] = ((y_values[9] << 4) | y_values[8]);   // Y9|Y8
-    output[9] = ((y_values[13] << 4) | y_values[12]); // Y13|Y12
-    output[10] = ((y_values[11] << 4) | y_values[10]); // Y11|Y10
-    output[11] = ((y_values[15] << 4) | y_values[14]); // Y15|Y14
-}
-
-// Helper function for contrast weighting
-static double contrast_weight(int v1, int v2, int delta, int weight) {
-    double avg = (v1 + v2) / 2.0;
-    double contrast = (avg < 4 || avg > 11) ? 1.5 : 1.0;
-    return delta * weight * contrast;
-}
-
-// Check if two iPF1 blocks are significantly different
-static int is_significantly_different(uint8_t *block_a, uint8_t *block_b) {
-    double score = 0.0;
-    
-    // Co values (bytes 0-1)
-    uint16_t co_a = block_a[0] | (block_a[1] << 8);
-    uint16_t co_b = block_b[0] | (block_b[1] << 8);
-    for (int i = 0; i < 4; i++) {
-        int va = (co_a >> (i * 4)) & 0xF;
-        int vb = (co_b >> (i * 4)) & 0xF;
-        int delta = abs(va - vb);
-        score += contrast_weight(va, vb, delta, 3);
-    }
-    
-    // Cg values (bytes 2-3)
-    uint16_t cg_a = block_a[2] | (block_a[3] << 8);
-    uint16_t cg_b = block_b[2] | (block_b[3] << 8);
-    for (int i = 0; i < 4; i++) {
-        int va = (cg_a >> (i * 4)) & 0xF;
-        int vb = (cg_b >> (i * 4)) & 0xF;
-        int delta = abs(va - vb);
-        score += contrast_weight(va, vb, delta, 3);
-    }
-    
-    // Y values (bytes 4-11)
-    for (int i = 4; i < 12; i++) {
-        int byte_a = block_a[i] & 0xFF;
-        int byte_b = block_b[i] & 0xFF;
-        
-        int y_a_high = (byte_a >> 4) & 0xF;
-        int y_a_low = byte_a & 0xF;
-        int y_b_high = (byte_b >> 4) & 0xF;
-        int y_b_low = byte_b & 0xF;
-        
-        int delta_high = abs(y_a_high - y_b_high);
-        int delta_low = abs(y_a_low - y_b_low);
-        
-        score += contrast_weight(y_a_high, y_b_high, delta_high, 2);
-        score += contrast_weight(y_a_low, y_b_low, delta_low, 2);
-    }
-    
-    return score > 4.0;
-}
-
-// Encode iPF1 frame to buffer
-static void encode_ipf1_frame(uint8_t *rgb_data, int width, int height, int channels, int pattern,
-                             uint8_t *ipf_buffer) {
-    int blocks_per_row = (width + 3) / 4;
-    int blocks_per_col = (height + 3) / 4;
-    
-    for (int block_y = 0; block_y < blocks_per_col; block_y++) {
-        for (int block_x = 0; block_x < blocks_per_row; block_x++) {
-            int block_index = block_y * blocks_per_row + block_x;
-            uint8_t *output_block = ipf_buffer + block_index * IPF_BLOCK_SIZE;
-            encode_ipf1_block_correct(rgb_data, width, height, block_x, block_y, channels, pattern, output_block);
-        }
-    }
-}
-
-// Create iPF1-delta encoded frame
-static size_t encode_ipf1_delta(uint8_t *previous_frame, uint8_t *current_frame, 
-                               int width, int height, uint8_t *delta_buffer) {
-    int blocks_per_row = (width + 3) / 4;
-    int blocks_per_col = (height + 3) / 4;
-    int total_blocks = blocks_per_row * blocks_per_col;
-    
-    uint8_t *output_ptr = delta_buffer;
-    int skip_count = 0;
-    uint8_t *patch_blocks = malloc(total_blocks * IPF_BLOCK_SIZE);
-    int patch_count = 0;
-    
-    for (int block_index = 0; block_index < total_blocks; block_index++) {
-        uint8_t *prev_block = previous_frame + block_index * IPF_BLOCK_SIZE;
-        uint8_t *curr_block = current_frame + block_index * IPF_BLOCK_SIZE;
-        
-        if (is_significantly_different(prev_block, curr_block)) {
-            if (skip_count > 0) {
-                *output_ptr++ = SKIP_OP;
-                write_varint(&output_ptr, skip_count);
-                skip_count = 0;
-            }
-            
-            memcpy(patch_blocks + patch_count * IPF_BLOCK_SIZE, curr_block, IPF_BLOCK_SIZE);
-            patch_count++;
-        } else {
-            if (patch_count > 0) {
-                *output_ptr++ = PATCH_OP;
-                write_varint(&output_ptr, patch_count);
-                memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
-                output_ptr += patch_count * IPF_BLOCK_SIZE;
-                patch_count = 0;
-            }
-            skip_count++;
-        }
-    }
-    
-    if (patch_count > 0) {
-        *output_ptr++ = PATCH_OP;
-        write_varint(&output_ptr, patch_count);
-        memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
-        output_ptr += patch_count * IPF_BLOCK_SIZE;
-    }
-    
-    *output_ptr++ = END_OP;
-    
-    free(patch_blocks);
-    return output_ptr - delta_buffer;
-}
-
-// Get current time in seconds
-static double get_current_time_sec(struct timeval *tv) {
-    gettimeofday(tv, NULL);
-    return tv->tv_sec + tv->tv_usec / 1000000.0;
-}
-
-// Display progress information similar to FFmpeg
-static void display_progress(encoder_config_t *config, int frame_num) {
-    struct timeval current_time;
-    double current_sec = get_current_time_sec(&current_time);
-    
-    // Only update progress once per second
-    double last_progress_sec = config->last_progress_time.tv_sec + config->last_progress_time.tv_usec / 1000000.0;
-    if (current_sec - last_progress_sec < 1.0) {
-        return;
-    }
-    
-    config->last_progress_time = current_time;
-    
-    // Calculate timing
-    double start_sec = config->start_time.tv_sec + config->start_time.tv_usec / 1000000.0;
-    double elapsed_sec = current_sec - start_sec;
-    double current_video_time = (double)frame_num / config->fps;
-    double fps = frame_num / elapsed_sec;
-    double speed = (elapsed_sec > 0) ? current_video_time / elapsed_sec : 0.0;
-    double bitrate = (elapsed_sec > 0) ? (config->total_output_bytes * 8.0 / 1024.0) / elapsed_sec : 0.0;
-    
-    // Format output size in human readable format
-    char size_str[32];
-    if (config->total_output_bytes >= 1024 * 1024) {
-        snprintf(size_str, sizeof(size_str), "%.1fMB", config->total_output_bytes / (1024.0 * 1024.0));
-    } else if (config->total_output_bytes >= 1024) {
-        snprintf(size_str, sizeof(size_str), "%.1fkB", config->total_output_bytes / 1024.0);
-    } else {
-        snprintf(size_str, sizeof(size_str), "%zuB", config->total_output_bytes);
-    }
-    
-    // Format current time as HH:MM:SS.xx
-    int hours = (int)(current_video_time / 3600);
-    int minutes = (int)((current_video_time - hours * 3600) / 60);
-    double seconds = current_video_time - hours * 3600 - minutes * 60;
-    
-    // Print progress line (overwrite previous line)
-    fprintf(stderr, "\rframe=%d fps=%.1f size=%s time=%02d:%02d:%05.2f bitrate=%.1fkbits/s speed=%4.2fx", 
-            frame_num, fps, size_str, hours, minutes, seconds, bitrate, speed);
-    fflush(stderr);
-}
-
-// Process audio for current frame
-static int process_audio(encoder_config_t *config, int frame_num, FILE *output) {
-    if (!config->has_audio || !config->mp2_file || config->audio_remaining <= 0) {
-        return 1;
-    }
-    
-    // Initialise packet size on first frame
-    if (config->mp2_packet_size == 0) {
-        uint8_t header[4];
-        if (fread(header, 1, 4, config->mp2_file) != 4) return 1;
-        fseek(config->mp2_file, 0, SEEK_SET);
-        
-        config->mp2_packet_size = get_mp2_packet_size(header);
-        int is_mono = (header[3] >> 6) == 3;
-        config->mp2_rate_index = mp2_packet_size_to_rate_index(config->mp2_packet_size, is_mono);
-    }
-    
-    // Calculate how much audio time each frame represents (in seconds)
-    double frame_audio_time = 1.0 / config->fps;
-    
-    // Calculate how much audio time each MP2 packet represents
-    // MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
-    double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
-    
-    // Estimate how many packets we consume per video frame
-    double packets_per_frame = frame_audio_time / packet_audio_time;
-    
-    // Only insert audio when buffer would go below 2 frames
-    // Initialise with 2 packets on first frame to prime the buffer
-    int packets_to_insert = 0;
-    if (frame_num == 1) {
-        packets_to_insert = 2;
-        config->audio_frames_in_buffer = 2;
-    } else {
-        // Simulate buffer consumption (packets consumed per frame)
-        config->audio_frames_in_buffer -= (int)ceil(packets_per_frame);
-        
-        // Only insert packets when buffer gets low (≤ 2 frames)
-        if (config->audio_frames_in_buffer <= 2) {
-            packets_to_insert = config->target_audio_buffer_size - config->audio_frames_in_buffer;
-            packets_to_insert = (packets_to_insert > 0) ? packets_to_insert : 1;
-        }
-    }
-    
-    // Insert the calculated number of audio packets
-    for (int q = 0; q < packets_to_insert; q++) {
-        size_t bytes_to_read = config->mp2_packet_size;
-        if (bytes_to_read > config->audio_remaining) {
-            bytes_to_read = config->audio_remaining;
-        }
-        
-        size_t bytes_read = fread(config->mp2_buffer, 1, bytes_to_read, config->mp2_file);
-        if (bytes_read == 0) break;
-        
-        uint8_t audio_packet_type[2] = {config->mp2_rate_index, MP2_PACKET_TYPE_BASE};
-        fwrite(audio_packet_type, 1, 2, output);
-        fwrite(config->mp2_buffer, 1, bytes_read, output);
-        
-        // Track audio bytes written
-        config->total_output_bytes += 2 + bytes_read;
-        config->audio_remaining -= bytes_read;
-        config->audio_frames_in_buffer++;
-    }
-    
-    return 1;
-}
-
-// Write TVDOS header
-static void write_tvdos_header(encoder_config_t *config, FILE *output) {
-    fwrite(TVDOS_MAGIC, 1, 8, output);
-    fwrite(&config->width, 2, 1, output);
-    fwrite(&config->height, 2, 1, output);
-    fwrite(&config->fps, 2, 1, output);
-    fwrite(&config->total_frames, 4, 1, output);
-    
-    uint16_t unused = 0x00FF;
-    fwrite(&unused, 2, 1, output);
-    
-    int audio_sample_size = 2 * (((MP2_SAMPLE_RATE / config->fps) + 1));
-    int audio_queue_size = config->has_audio ? 
-        (int)ceil(audio_sample_size / 2304.0) + 1 : 0;
-        
-    uint16_t audio_queue_info = config->has_audio ? 
-        (MP2_DEFAULT_PACKET_SIZE >> 2) | (audio_queue_size << 12) : 0x0000;
-    fwrite(&audio_queue_info, 2, 1, output);
-    
-    // Store target buffer size for audio timing
-    config->target_audio_buffer_size = audio_queue_size;
-    
-    uint8_t reserved[10] = {0};
-    fwrite(reserved, 1, 10, output);
-}
-
-// Initialise encoder configuration
-static encoder_config_t *init_encoder_config() {
-    encoder_config_t *config = calloc(1, sizeof(encoder_config_t));
-    if (!config) return NULL;
-    
-    config->width = DEFAULT_WIDTH;
-    config->height = DEFAULT_HEIGHT;
-    
-    return config;
-}
-
-// Allocate encoder buffers
-static int allocate_buffers(encoder_config_t *config) {
-    config->frame_buffer_size = ((config->width + 3) / 4) * ((config->height + 3) / 4) * IPF_BLOCK_SIZE;
-    
-    config->rgb_buffer = malloc(config->width * config->height * 3);
-    config->previous_ipf_frame = malloc(config->frame_buffer_size);
-    config->current_ipf_frame = malloc(config->frame_buffer_size);
-    config->delta_buffer = malloc(config->frame_buffer_size * 2);
-    config->compressed_buffer = malloc(config->frame_buffer_size * 2);
-    config->mp2_buffer = malloc(2048);
-    
-    return (config->rgb_buffer && config->previous_ipf_frame && 
-            config->current_ipf_frame && config->delta_buffer && 
-            config->compressed_buffer && config->mp2_buffer);
-}
-
-// Process one frame - CORRECTED ORDER: Audio -> Video -> Sync
-static int process_frame(encoder_config_t *config, int frame_num, int is_keyframe, FILE *output) {
-    // Read RGB data from FFmpeg pipe first
-    size_t rgb_size = config->width * config->height * 3;
-    if (fread(config->rgb_buffer, 1, rgb_size, config->ffmpeg_video_pipe) != rgb_size) {
-        if (feof(config->ffmpeg_video_pipe)) return 0;
-        return -1;
-    }
-    
-    // Step 1: Process audio FIRST (matches working file pattern)
-    if (!process_audio(config, frame_num, output)) {
-        return -1;
-    }
-    
-    // Step 2: Encode and write video
-    int pattern;
-    switch (config->dither_mode) {
-        case 0: pattern = -1; break;  // No dithering
-        case 1: pattern = 0; break;   // Static pattern
-        case 2: pattern = frame_num % 4; break;  // Dynamic pattern
-        default: pattern = 0; break;  // Fallback to static
-    }
-    encode_ipf1_frame(config->rgb_buffer, config->width, config->height, 3, pattern,
-                     config->current_ipf_frame);
-    
-    // Determine if we should use delta encoding
-    int use_delta = 0;
-    size_t data_size = config->frame_buffer_size;
-    uint8_t *frame_data = config->current_ipf_frame;
-    
-    if (frame_num > 1 && !is_keyframe) {
-        size_t delta_size = encode_ipf1_delta(config->previous_ipf_frame, 
-                                            config->current_ipf_frame,
-                                            config->width, config->height,
-                                            config->delta_buffer);
-        
-        if (delta_size < config->frame_buffer_size * 0.576) {
-            use_delta = 1;
-            data_size = delta_size;
-            frame_data = config->delta_buffer;
-        }
-    }
-    
-    // Compress the frame data using gzip
-    size_t compressed_size = gzip_compress(frame_data, data_size, 
-                                          config->compressed_buffer, 
-                                          config->frame_buffer_size * 2);
-    if (compressed_size == 0) {
-        fprintf(stderr, "Gzip compression failed\n");
-        return -1;
-    }
-    
-    // Write video packet
-    if (use_delta) {
-        uint8_t packet_type[2] = {IPF1_DELTA_PACKET_TYPE};
-        fwrite(packet_type, 1, 2, output);
-    } else {
-        uint8_t packet_type[2] = {IPF1_PACKET_TYPE};
-        fwrite(packet_type, 1, 2, output);
-    }
-    
-    uint32_t size_le = compressed_size;
-    fwrite(&size_le, 4, 1, output);
-    fwrite(config->compressed_buffer, 1, compressed_size, output);
-    
-    // Step 3: Write sync packet AFTER video (matches working file pattern)
-    uint8_t sync[2] = {SYNC_PACKET_TYPE};
-    fwrite(sync, 1, 2, output);
-    
-    // Track video bytes written (packet type + size + compressed data + sync)
-    config->total_output_bytes += 2 + 4 + compressed_size + 2;
-    
-    // Swap frame buffers
-    uint8_t *temp = config->previous_ipf_frame;
-    config->previous_ipf_frame = config->current_ipf_frame;
-    config->current_ipf_frame = temp;
-    
-    // Display progress
-    display_progress(config, frame_num);
-    
-    return 1;
-}
-
-// Cleanup function
-static void cleanup_config(encoder_config_t *config) {
-    if (!config) return;
-    
-    if (config->ffmpeg_video_pipe) pclose(config->ffmpeg_video_pipe);
-    if (config->mp2_file) fclose(config->mp2_file);
-    
-    free(config->input_file);
-    free(config->output_file);
-    free(config->rgb_buffer);
-    free(config->previous_ipf_frame);
-    free(config->current_ipf_frame);
-    free(config->delta_buffer);
-    free(config->compressed_buffer);
-    free(config->mp2_buffer);
-    
-    // Remove temporary audio file
-    unlink(TEMP_AUDIO_FILE);
-    
-    free(config);
-}
-
-// Print usage information
-static void print_usage(const char *program_name) {
-    printf("TVDOS Movie Encoder\n\n");
-    printf("Usage: %s [options] input_video\n\n", program_name);
-    printf("Options:\n");
-    printf("  -o, --output FILE    Output TVDOS movie file (default: stdout)\n");
-    printf("  -s, --size WxH       Video resolution (default: 560x448)\n");
-    printf("  -d, --dither MODE    Dithering mode (default: 1)\n");
-    printf("                         0: No dithering\n");
-    printf("                         1: Static pattern\n");
-    printf("                         2: Dynamic pattern (better quality, larger files)\n");
-    printf("  -h, --help           Show this help message\n\n");
-    printf("Examples:\n");
-    printf("  %s input.mp4 -o output.mov\n", program_name);
-    printf("  %s input.avi -s 1024x768 -o output.mov\n", program_name);
-    printf("  yt-dlp -o - \"https://youtube.com/watch?v=VIDEO_ID\" | ffmpeg -i pipe:0 -c copy temp.mp4 && %s temp.mp4 -o youtube_video.mov && rm temp.mp4\n", program_name);
-}
-
-int main(int argc, char *argv[]) {
-    encoder_config_t *config = init_encoder_config();
-    if (!config) {
-        fprintf(stderr, "Failed to initialise encoder\n");
-        return 1;
-    }
-    
-    config->output_to_stdout = 1; // Default to stdout
-    config->dither_mode = 1; // Default to static dithering
-    
-    // Parse command line arguments
-    static struct option long_options[] = {
-        {"output", required_argument, 0, 'o'},
-        {"size", required_argument, 0, 's'},
-        {"dither", required_argument, 0, 'd'},
-        {"help", no_argument, 0, 'h'},
-        {0, 0, 0, 0}
-    };
-    
-    int c;
-    while ((c = getopt_long(argc, argv, "o:s:d:h", long_options, NULL)) != -1) {
-        switch (c) {
-            case 'o':
-                config->output_file = strdup(optarg);
-                config->output_to_stdout = 0;
-                break;
-            case 's':
-                if (!parse_resolution(optarg, &config->width, &config->height)) {
-                    fprintf(stderr, "Invalid resolution format: %s\n", optarg);
-                    cleanup_config(config);
-                    return 1;
-                }
-                break;
-            case 'd':
-                config->dither_mode = atoi(optarg);
-                if (config->dither_mode < 0 || config->dither_mode > 2) {
-                    fprintf(stderr, "Invalid dither mode: %s (must be 0, 1, or 2)\n", optarg);
-                    cleanup_config(config);
-                    return 1;
-                }
-                break;
-            case 'h':
-                print_usage(argv[0]);
-                cleanup_config(config);
-                return 0;
-            default:
-                print_usage(argv[0]);
-                cleanup_config(config);
-                return 1;
-        }
-    }
-    
-    if (optind >= argc) {
-        fprintf(stderr, "Error: Input video file required\n\n");
-        print_usage(argv[0]);
-        cleanup_config(config);
-        return 1;
-    }
-    
-    config->input_file = strdup(argv[optind]);
-    
-    // Get video metadata
-    if (!get_video_metadata(config)) {
-        fprintf(stderr, "Failed to analyze video metadata\n");
-        cleanup_config(config);
-        return 1;
-    }
-    
-    // Allocate buffers
-    if (!allocate_buffers(config)) {
-        fprintf(stderr, "Failed to allocate memory buffers\n");
-        cleanup_config(config);
-        return 1;
-    }
-    
-    // Start video conversion
-    if (!start_video_conversion(config)) {
-        fprintf(stderr, "Failed to start video conversion\n");
-        cleanup_config(config);
-        return 1;
-    }
-    
-    // Start audio conversion
-    if (!start_audio_conversion(config)) {
-        fprintf(stderr, "Failed to start audio conversion\n");
-        cleanup_config(config);
-        return 1;
-    }
-    
-    // Open output
-    FILE *output = config->output_to_stdout ? stdout : fopen(config->output_file, "wb");
-    if (!output) {
-        fprintf(stderr, "Failed to open output file\n");
-        cleanup_config(config);
-        return 1;
-    }
-    
-    // Write TVDOS header
-    write_tvdos_header(config, output);
-    
-    // Initialise progress tracking
-    gettimeofday(&config->start_time, NULL);
-    config->last_progress_time = config->start_time;
-    config->total_output_bytes = 8 + 2 + 2 + 2 + 4 + 2 + 2 + 10; // TVDOS header size
-    
-    // Process frames with correct order: Audio -> Video -> Sync
-    for (int frame = 1; frame <= config->total_frames; frame++) {
-        int is_keyframe = (frame == 1) || (frame % 30 == 0);
-        
-        int result = process_frame(config, frame, is_keyframe, output);
-        if (result <= 0) {
-            if (result == 0) {
-                fprintf(stderr, "End of video at frame %d\n", frame);
-            }
-            break;
-        }
-    }
-    
-    // Final progress update and newline
-    fprintf(stderr, "\n");
-    
-    if (!config->output_to_stdout) {
-        fclose(output);
-        fprintf(stderr, "Encoding complete: %s\n", config->output_file);
-    }
-    
-    cleanup_config(config);
-    return 0;
-}
diff --git a/video_encoder/encoder_tav_opencv.cpp b/video_encoder/encoder_tav_opencv.cpp
deleted file mode 100644
index f74d2d1..0000000
--- a/video_encoder/encoder_tav_opencv.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-// Created by CuriousTorvald and Claude on 2025-10-17
-// MPEG-style bidirectional block motion compensation for TAV encoder
-// Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement
-
-#include <opencv2/opencv.hpp>
-#include <cstdlib>
-#include <cstring>
-#include <cmath>
-
-extern "C" {
-
-// Dense optical flow estimation using Farneback algorithm
-// Computes flow at every pixel, then samples at block centers for motion vectors
-// Much more spatially coherent than independent block matching
-void estimate_optical_flow_motion(
-    const float *current_y,    // Current frame Y channel (width×height)
-    const float *reference_y,  // Reference frame Y channel
-    int width, int height,
-    int block_size,            // Block size (e.g., 16)
-    int16_t *mvs_x,           // Output: motion vectors X (in 1/4-pixel units)
-    int16_t *mvs_y            // Output: motion vectors Y (in 1/4-pixel units)
-) {
-    // Convert float Y channels to 8-bit grayscale for OpenCV
-    cv::Mat cur_gray(height, width, CV_8UC1);
-    cv::Mat ref_gray(height, width, CV_8UC1);
-
-    // Detect if Y is in [0,1] range and scale to [0,255] if needed
-    float y_min = current_y[0], y_max = current_y[0];
-    for (int i = 1; i < width * height; i++) {
-        if (current_y[i] < y_min) y_min = current_y[i];
-        if (current_y[i] > y_max) y_max = current_y[i];
-    }
-    float scale = (y_max <= 1.1f) ? 255.0f : 1.0f;
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int idx = y * width + x;
-            cur_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, current_y[idx] * scale)));
-            ref_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, reference_y[idx] * scale)));
-        }
-    }
-
-    // Compute dense optical flow using Farneback algorithm
-    // IMPORTANT: We need BACKWARD flow (current → reference) for motion compensation
-    // This tells us where to PULL pixels FROM in the reference frame
-    cv::Mat flow;
-    cv::calcOpticalFlowFarneback(
-        cur_gray,      // Current frame (source)
-        ref_gray,      // Reference frame (destination)
-        flow,          // Output flow (2-channel float: dx, dy per pixel)
-        0.5,           // pyr_scale: pyramid scale (0.5 = each layer is half size)
-        3,             // levels: number of pyramid levels
-        20,            // winsize: averaging window size
-        3,             // iterations: number of iterations at each pyramid level
-        5,             // poly_n: size of pixel neighborhood (5 or 7)
-        1.2,           // poly_sigma: standard deviation of Gaussian for polynomial expansion
-        0              // flags: 0 = normal, OPTFLOW_USE_INITIAL_FLOW = use input flow as initial estimate
-    );
-
-    // Sample flow at block centers to get motion vectors
-    int num_blocks_x = (width + block_size - 1) / block_size;
-    int num_blocks_y = (height + block_size - 1) / block_size;
-
-    for (int by = 0; by < num_blocks_y; by++) {
-        for (int bx = 0; bx < num_blocks_x; bx++) {
-            int block_idx = by * num_blocks_x + bx;
-
-            // Block center position
-            int center_x = bx * block_size + block_size / 2;
-            int center_y = by * block_size + block_size / 2;
-
-            // Clamp to frame boundaries
-            if (center_x >= width) center_x = width - 1;
-            if (center_y >= height) center_y = height - 1;
-
-            // Get flow at block center
-            cv::Point2f flow_vec = flow.at<cv::Point2f>(center_y, center_x);
-
-            // Convert to 1/4-pixel units and store
-            // Flow is in pixels, positive = motion to the right/down
-            mvs_x[block_idx] = (int16_t)std::round(flow_vec.x * 4.0f);
-            mvs_y[block_idx] = (int16_t)std::round(flow_vec.y * 4.0f);
-        }
-    }
-}
-
-// Block-based motion compensation with bilinear interpolation (sub-pixel precision)
-// MVs are in 1/4-pixel units
-// This implements the warp() function from MC-EZBC pseudocode
-void warp_block_motion(
-    const float *src,          // Source frame
-    int width, int height,
-    const int16_t *mvs_x,      // Motion vectors X (1/4-pixel units)
-    const int16_t *mvs_y,      // Motion vectors Y (1/4-pixel units)
-    int block_size,            // Block size (e.g., 16)
-    float *dst                 // Output warped frame
-) {
-    int num_blocks_x = (width + block_size - 1) / block_size;
-    int num_blocks_y = (height + block_size - 1) / block_size;
-
-    // Process each block
-    for (int by = 0; by < num_blocks_y; by++) {
-        for (int bx = 0; bx < num_blocks_x; bx++) {
-            int block_idx = by * num_blocks_x + bx;
-
-            // Get motion vector for this block (in 1/4-pixel units)
-            float mv_x = mvs_x[block_idx] / 4.0f;  // Convert to pixels
-            float mv_y = mvs_y[block_idx] / 4.0f;
-
-            // Block boundaries in destination frame
-            int block_x_start = bx * block_size;
-            int block_y_start = by * block_size;
-            int block_x_end = std::min(block_x_start + block_size, width);
-            int block_y_end = std::min(block_y_start + block_size, height);
-
-            // Warp each pixel in the block
-            for (int y = block_y_start; y < block_y_end; y++) {
-                for (int x = block_x_start; x < block_x_end; x++) {
-                    // Source position (backward warping)
-                    float src_x = x - mv_x;
-                    float src_y = y - mv_y;
-
-                    // Clamp to valid range
-                    src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
-                    src_y = std::max(0.0f, std::min((float)(height - 1), src_y));
-
-                    // Bilinear interpolation
-                    int x0 = (int)src_x;
-                    int y0 = (int)src_y;
-                    int x1 = std::min(x0 + 1, width - 1);
-                    int y1 = std::min(y0 + 1, height - 1);
-
-                    float fx = src_x - x0;
-                    float fy = src_y - y0;
-
-                    float val00 = src[y0 * width + x0];
-                    float val10 = src[y0 * width + x1];
-                    float val01 = src[y1 * width + x0];
-                    float val11 = src[y1 * width + x1];
-
-                    float val_top = (1.0f - fx) * val00 + fx * val10;
-                    float val_bot = (1.0f - fx) * val01 + fx * val11;
-                    float val = (1.0f - fy) * val_top + fy * val_bot;
-
-                    dst[y * width + x] = val;
-                }
-            }
-        }
-    }
-}
-
-// Bidirectional motion compensation for MC-EZBC predict step
-// Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
-void warp_bidirectional(
-    const float *f0, const float *f1,
-    int width, int height,
-    const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y,  // F0 → F1
-    const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y,  // F1 → F0
-    int block_size,
-    float *prediction          // Output: 0.5 * (warped_f0 + warped_f1)
-) {
-    int num_pixels = width * height;
-
-    // Allocate temporary buffers
-    float *warped_f0 = new float[num_pixels];
-    float *warped_f1 = new float[num_pixels];
-
-    // Warp f0 forward using forward MVs
-    warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);
-
-    // Warp f1 backward using backward MVs
-    warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);
-
-    // Average the two warped frames
-    for (int i = 0; i < num_pixels; i++) {
-        prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
-    }
-
-    delete[] warped_f0;
-    delete[] warped_f1;
-}
-
-} // extern "C"
diff --git a/video_encoder/encoder_tav_text.c b/video_encoder/encoder_tav_text.c
deleted file mode 100644
index fbddb1f..0000000
--- a/video_encoder/encoder_tav_text.c
+++ /dev/null
@@ -1,795 +0,0 @@
-/*
-encoder_tav_text.c
-Text-based video encoder for TSVM using custom font ROMs
-
-Outputs Videotex files with custom header and packet type 0x3F (text mode)
-
-File structure:
-  - Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
-  - Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
-  - Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
-  - Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
-
-Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
-  - rows: uint8 (32)
-  - cols: uint8 (80)
-  - fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
-  - bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
-  - char-array: rows*cols bytes (glyph indices 0-255)
-
-Total uncompressed size: 2 + (80*32*3) = 7682 bytes
-Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
-Video size: 80×32 characters (560×448 pixels with 7×14 font)
-Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
-Each text frame is treated as an I-frame with sync packet
-
-Usage:
-  gcc -Ofast -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
-  ./encoder_tav_text -i video.mp4 -f font.chr -o output.mv3
-*/
-
-#define _POSIX_C_SOURCE 200809L
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-#include <zstd.h>
-#include <unistd.h>
-#include <time.h>
-#include <sys/time.h>
-
-#define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
-
-#define CHAR_W 7
-#define CHAR_H 14
-#define GRID_W 80
-#define GRID_H 32
-#define PIXEL_W (GRID_W * CHAR_W)  // 560
-#define PIXEL_H (GRID_H * CHAR_H)  // 448
-#define PATCH_SZ (CHAR_W * CHAR_H)
-#define SAMPLE_RATE 32000
-#define MP2_DEFAULT_PACKET_SIZE 1152
-
-// TAV packet types
-#define PACKET_TIMECODE 0xFD
-#define PACKET_SYNC 0xFF
-#define PACKET_AUDIO_MP2 0x20
-#define PACKET_SSF 0x30
-#define PACKET_TEXT 0x3F
-#define PACKET_EXTENDED_HDR 0xEF
-
-// SSF opcodes for font ROM
-#define SSF_OPCODE_LOWROM 0x80
-#define SSF_OPCODE_HIGHROM 0x81
-
-// Font ROM size constants
-#define FONTROM_PADDED_SIZE 1920
-#define GLYPHS_PER_ROM 128
-
-// Color mapping (4-bit RGB to TSVM palette)
-#define COLOR_BLACK 0xF0
-#define COLOR_WHITE 0xFE
-
-// Generate random filename for temporary audio file
-static void generate_random_filename(char *filename) {
-    srand(time(NULL));
-
-    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    const int charset_size = sizeof(charset) - 1;
-
-    // Start with the prefix
-    strcpy(filename, "/tmp/");
-
-    // Generate 32 random characters
-    for (int i = 0; i < 32; i++) {
-        filename[5 + i] = charset[rand() % charset_size];
-    }
-
-    // Add the .mp2 extension
-    strcpy(filename + 37, ".mp2");
-    filename[41] = '\0';  // Null terminate
-}
-
-char TEMP_AUDIO_FILE[42];
-
-// Global flag to disable inverted character matching
-int g_no_invert_char = 0;
-
-typedef struct {
-    uint8_t *data;     // Binary glyph data (PATCH_SZ bytes per glyph)
-    int count;         // Number of glyphs
-} FontROM;
-
-// Get FFmpeg version string
-char *get_ffmpeg_version(void) {
-    FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
-    if (!pipe) return NULL;
-
-    char *version = malloc(256);
-    if (!version) {
-        pclose(pipe);
-        return NULL;
-    }
-
-    if (fgets(version, 256, pipe)) {
-        // Remove trailing newline
-        size_t len = strlen(version);
-        if (len > 0 && version[len - 1] == '\n') {
-            version[len - 1] = '\0';
-        }
-        pclose(pipe);
-        return version;
-    }
-
-    free(version);
-    pclose(pipe);
-    return NULL;
-}
-
-// Detect video FPS using ffprobe
-float detect_fps(const char *video_path) {
-    char cmd[1024];
-    snprintf(cmd, sizeof(cmd),
-             "ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
-             "-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
-             video_path);
-
-    FILE *pipe = popen(cmd, "r");
-    if (!pipe) return 30.0f; // fallback
-
-    char fps_str[64] = {0};
-    if (fgets(fps_str, sizeof(fps_str), pipe)) {
-        // Parse fraction like "30/1" or "24000/1001"
-        int num = 0, den = 1;
-        if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
-            pclose(pipe);
-            return (float)num / (float)den;
-        }
-    }
-    pclose(pipe);
-    return 30.0f; // fallback
-}
-
-// Load font ROM (14 bytes per glyph, no header)
-FontROM *load_font_rom(const char *path) {
-    FILE *f = fopen(path, "rb");
-    if (!f) return NULL;
-
-    fseek(f, 0, SEEK_END);
-    long size = ftell(f);
-    fseek(f, 0, SEEK_SET);
-
-    if (size % 14 != 0) {
-        fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
-    }
-
-    int glyph_count = size / 14;
-    FontROM *rom = malloc(sizeof(FontROM));
-    rom->count = glyph_count;
-    rom->data = malloc(glyph_count * PATCH_SZ);
-
-    // Read and unpack glyphs
-    for (int g = 0; g < glyph_count; g++) {
-        uint8_t row_bytes[14];
-        if (fread(row_bytes, 14, 1, f) != 1) {
-            free(rom->data);
-            free(rom);
-            fclose(f);
-            return NULL;
-        }
-
-        // Unpack bits to binary pixels
-        for (int row = 0; row < CHAR_H; row++) {
-            for (int col = 0; col < CHAR_W; col++) {
-                // Bit 6 = leftmost, bit 0 = rightmost
-                int bit = (row_bytes[row] >> (6 - col)) & 1;
-                rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
-            }
-        }
-    }
-
-    fclose(f);
-    fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
-    return rom;
-}
-
-// Find best matching glyph for a grayscale patch
-int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
-    // Try both normal and inverted matching (unless --no-invert-char is set)
-    int best_glyph = 0;
-    float best_error = INFINITY;
-    uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
-
-    for (int g = 0; g < rom->count; g++) {
-        const uint8_t *glyph = &rom->data[g * PATCH_SZ];
-
-        // Try normal: glyph 1 = fg, glyph 0 = bg
-        float err_normal = 0;
-        for (int i = 0; i < PATCH_SZ; i++) {
-            int expected = glyph[i] ? 255 : 0;
-            int diff = patch[i] - expected;
-            err_normal += diff * diff;
-        }
-
-        if (err_normal < best_error) {
-            best_error = err_normal;
-            best_glyph = g;
-            best_bg = COLOR_BLACK;
-            best_fg = COLOR_WHITE;
-        }
-
-        // Try inverted: glyph 0 = fg, glyph 1 = bg (skip if --no-invert-char)
-        if (!g_no_invert_char) {
-            float err_inverted = 0;
-            for (int i = 0; i < PATCH_SZ; i++) {
-                int expected = glyph[i] ? 0 : 255;
-                int diff = patch[i] - expected;
-                err_inverted += diff * diff;
-            }
-
-            if (err_inverted < best_error) {
-                best_error = err_inverted;
-                best_glyph = g;
-                best_bg = COLOR_WHITE;
-                best_fg = COLOR_BLACK;
-            }
-        }
-    }
-
-    *out_bg = best_bg;
-    *out_fg = best_fg;
-    return best_glyph;
-}
-
-// Convert frame to text mode
-void frame_to_text(const uint8_t *pixels, const FontROM *rom,
-                   uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
-    uint8_t patch[PATCH_SZ];
-
-    for (int gr = 0; gr < GRID_H; gr++) {
-        for (int gc = 0; gc < GRID_W; gc++) {
-            int idx = gr * GRID_W + gc;
-
-            // Extract patch
-            for (int y = 0; y < CHAR_H; y++) {
-                for (int x = 0; x < CHAR_W; x++) {
-                    int px = gc * CHAR_W + x;
-                    int py = gr * CHAR_H + y;
-                    patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
-                }
-            }
-
-            // Find best match
-            chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
-        }
-    }
-}
-
-// Get current time in nanoseconds since UNIX epoch
-uint64_t get_current_time_ns(void) {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
-}
-
-// Parse MP2 packet header to get accurate packet size
-int get_mp2_packet_size(uint8_t *header) {
-    int bitrate_index = (header[2] >> 4) & 0x0F;
-    int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
-    if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
-
-    int bitrate = bitrates[bitrate_index];
-    if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
-
-    int sampling_freq_index = (header[2] >> 2) & 0x03;
-    int sampling_freqs[] = {44100, 48000, 32000, 0};
-    int sampling_freq = sampling_freqs[sampling_freq_index];
-    if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
-
-    int padding = (header[2] >> 1) & 0x01;
-    return (144 * bitrate * 1000) / sampling_freq + padding;
-}
-
-// Write Videotex header (32 bytes, similar to TAV but simpler)
-void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
-    fwrite("\x1FTSVMTAV", 8, 1, f);
-
-    // Version: 1 (uint8)
-    fputc(1, f);
-
-    // Grid dimensions (uint8 each)
-    uint16_t width = GRID_W;
-    uint16_t height = GRID_H;
-    fwrite(&width, sizeof(uint16_t), 1, f);  // cols = 80
-    fwrite(&height, sizeof(uint16_t), 1, f);  // rows = 32
-
-    // FPS (uint8)
-    fputc(fps, f);
-
-    // Total frames (uint32, little-endian)
-    fwrite(&total_frames, sizeof(uint32_t), 1, f);
-
-    fputc(0, f); // wavelet filter type
-    fputc(0, f); // decomposition levels
-    fputc(0, f); // quantiser Y
-    fputc(0, f); // quantiser Co
-    fputc(0, f); // quantiser Cg
-
-    // Feature Flags
-    fputc(0x03, f);  // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
-
-    // Video Flags
-    fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
-
-
-    fputc(0, f); // encoder quality level
-    fputc(0x02, f); // channel layout: Y only
-    fputc(0, f); // entropy coder
-
-    fputc(0, f); // reserved
-    fputc(0, f); // reserved
-
-    fputc(0, f); // device orientation: no rotation
-    fputc(0, f); // file role: generic
-}
-
-// Write extended header packet with metadata
-// Returns the file offset where ENDT value is written (for later update)
-long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
-    fputc(PACKET_EXTENDED_HDR, f);
-
-    // Helper macros for key-value pairs
-    #define WRITE_KV_UINT64(key_str, value) do { \
-        fwrite(key_str, 1, 4, f); \
-        uint8_t value_type = 0x04; /* Uint64 */ \
-        fwrite(&value_type, 1, 1, f); \
-        uint64_t val = (value); \
-        fwrite(&val, sizeof(uint64_t), 1, f); \
-    } while(0)
-
-    #define WRITE_KV_BYTES(key_str, data, len) do { \
-        fwrite(key_str, 1, 4, f); \
-        uint8_t value_type = 0x10; /* Bytes */ \
-        fwrite(&value_type, 1, 1, f); \
-        uint16_t length = (len); \
-        fwrite(&length, sizeof(uint16_t), 1, f); \
-        fwrite((data), 1, (len), f); \
-    } while(0)
-
-    // Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
-    uint16_t num_pairs = ffmpeg_version ? 5 : 4;  // FMPG is optional
-    fwrite(&num_pairs, sizeof(uint16_t), 1, f);
-
-    // BGNT: Video begin time (0 for frame 0)
-    WRITE_KV_UINT64("BGNT", 0ULL);
-
-    // ENDT: Video end time (placeholder, will be updated at end)
-    long endt_offset = ftell(f);
-    WRITE_KV_UINT64("ENDT", 0ULL);
-
-    // CDAT: Creation time in nanoseconds since UNIX epoch
-    WRITE_KV_UINT64("CDAT", creation_time_ns);
-
-    // VNDR: Encoder name and version
-    const char *vendor_str = ENCODER_VENDOR_STRING;
-    WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
-
-    // FMPG: FFmpeg version (if available)
-    if (ffmpeg_version) {
-        WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
-    }
-
-    #undef WRITE_KV_UINT64
-    #undef WRITE_KV_BYTES
-
-    // Return offset of ENDT value (skip key, type byte)
-    return endt_offset + 4 + 1;  // 4 bytes for "ENDT", 1 byte for type
-}
-
-// Write font ROM packet (SSF packet type 0x30)
-void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
-    // Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
-    uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
-    memcpy(padded_data, rom_data, data_size);
-
-    // Packet structure:
-    // [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
-    uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
-
-    // Write packet type and size
-    fputc(PACKET_SSF, f);
-    fwrite(&packet_size, sizeof(uint32_t), 1, f);
-
-    // Write SSF payload
-    // Index (3 bytes, always 0 for font ROM)
-    fputc(0, f);
-    fputc(0, f);
-    fputc(0, f);
-
-    // Opcode (0x80=lowrom, 0x81=highrom)
-    fputc(opcode, f);
-
-    // Payload length (uint16, little-endian)
-    uint16_t payload_len = FONTROM_PADDED_SIZE;
-    fwrite(&payload_len, sizeof(uint16_t), 1, f);
-
-    // Font data (padded to 1920 bytes)
-    fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
-
-    // Terminator
-    fputc(0x00, f);
-
-    free(padded_data);
-
-    fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
-            data_size, FONTROM_PADDED_SIZE, opcode);
-}
-
-// Write timecode packet (nanoseconds)
-void write_timecode(FILE *f, uint64_t timecode_ns) {
-    fputc(PACKET_TIMECODE, f);
-    fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
-}
-
-// Write sync packet
-void write_sync(FILE *f) {
-    fputc(PACKET_SYNC, f);
-}
-
-// Write MP2 audio packet
-void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
-    fputc(PACKET_AUDIO_MP2, f);
-    fwrite(&size, sizeof(uint32_t), 1, f);
-    fwrite(data, 1, size, f);
-}
-
-// Write text packet with separated arrays (better compression)
-void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
-                       const uint8_t *chars, int rows, int cols) {
-    int grid_size = rows * cols;
-
-    // Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
-    // Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
-    size_t uncompressed_size = 2 + grid_size * 3;
-    uint8_t *uncompressed = malloc(uncompressed_size);
-
-    uncompressed[0] = rows;
-    uncompressed[1] = cols;
-
-    // Copy arrays in order: foreground, background, characters
-    memcpy(&uncompressed[2], fg_col, grid_size);                    // Foreground first
-    memcpy(&uncompressed[2 + grid_size], bg_col, grid_size);        // Background second
-    memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size);     // Characters third
-
-    // Compress with Zstd
-    size_t max_compressed = ZSTD_compressBound(uncompressed_size);
-    uint8_t *compressed = malloc(max_compressed);
-    size_t compressed_size = ZSTD_compress(compressed, max_compressed,
-                                           uncompressed, uncompressed_size, 3);
-
-    if (ZSTD_isError(compressed_size)) {
-        fprintf(stderr, "Zstd compression error\n");
-        exit(1);
-    }
-
-    // Write packet: [type][size][data]
-    fputc(PACKET_TEXT, f);
-    uint32_t size32 = compressed_size;
-    fwrite(&size32, 4, 1, f);
-    fwrite(compressed, compressed_size, 1, f);
-
-    free(compressed);
-    free(uncompressed);
-}
-
-int main(int argc, char **argv) {
-    if (argc < 7) {
-        fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav> [--no-invert-char]\n", argv[0]);
-        return 1;
-    }
-
-    const char *input_video = NULL;
-    const char *font_path = NULL;
-    const char *output_path = NULL;
-
-    for (int i = 1; i < argc; i++) {
-        if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
-        else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
-        else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
-        else if (strcmp(argv[i], "--no-invert-char") == 0) g_no_invert_char = 1;
-    }
-
-    if (!input_video || !font_path || !output_path) {
-        fprintf(stderr, "Missing required arguments\n");
-        return 1;
-    }
-
-    if (g_no_invert_char) {
-        fprintf(stderr, "Inverted character matching disabled\n");
-    }
-
-    // Generate random temp filename for audio
-    generate_random_filename(TEMP_AUDIO_FILE);
-
-    // Capture creation time and FFmpeg version for extended header
-    uint64_t creation_time_ns = get_current_time_ns();
-    char *ffmpeg_version = get_ffmpeg_version();
-
-    // Detect video FPS
-    float fps_float = detect_fps(input_video);
-    uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
-    fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
-
-    // Load font ROM
-    FontROM *rom = load_font_rom(font_path);
-    if (!rom) {
-        fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
-        return 1;
-    }
-
-    // Open FFmpeg pipe for grayscale frames at 560×448
-    char ffmpeg_cmd[1024];
-    snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
-             "ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
-             "-f rawvideo -pix_fmt gray - 2>/dev/null",
-             input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
-
-    fprintf(stderr, "Opening video stream...\n");
-    FILE *video_pipe = popen(ffmpeg_cmd, "r");
-    if (!video_pipe) {
-        fprintf(stderr, "Failed to open FFmpeg pipe\n");
-        return 1;
-    }
-
-    // Extract MP2 audio to temporary file using libtwolame
-    fprintf(stderr, "Extracting MP2 audio...\n");
-    char audio_cmd[1024];
-    snprintf(audio_cmd, sizeof(audio_cmd),
-             "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 224k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
-             input_video, SAMPLE_RATE, TEMP_AUDIO_FILE);
-
-    int audio_result = system(audio_cmd);
-    if (audio_result != 0) {
-        fprintf(stderr, "Warning: Audio extraction failed, continuing without audio\n");
-    }
-
-    // Open MP2 file for reading
-    FILE *mp2_file = NULL;
-    long audio_remaining = 0;
-    if (audio_result == 0) {
-        mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
-        if (mp2_file) {
-            fseek(mp2_file, 0, SEEK_END);
-            audio_remaining = ftell(mp2_file);
-            fseek(mp2_file, 0, SEEK_SET);
-            fprintf(stderr, "Audio ready: %ld bytes\n", audio_remaining);
-        }
-    }
-
-    // Open output file
-    FILE *out = fopen(output_path, "wb");
-    if (!out) {
-        fprintf(stderr, "Failed to open output file\n");
-        pclose(video_pipe);
-        if (mp2_file) fclose(mp2_file);
-        return 1;
-    }
-
-    // Write Videotex header with placeholder total_frames (will update at end)
-    long header_offset = ftell(out);
-    write_videotex_header(out, fps, 0);
-
-    // Write extended header packet (before first timecode)
-    long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
-
-    // Upload font ROM to TSVM (split into lowrom and highrom)
-    fprintf(stderr, "Uploading font ROM to TSVM...\n");
-    FILE *rom_file = fopen(font_path, "rb");
-    if (rom_file) {
-        fseek(rom_file, 0, SEEK_END);
-        long rom_size = ftell(rom_file);
-        fseek(rom_file, 0, SEEK_SET);
-
-        uint8_t *raw_rom = malloc(rom_size);
-        if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
-            // Split into lowrom and highrom
-            size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
-
-            // Write lowrom (first 128 glyphs)
-            if (rom_size >= bytes_per_half) {
-                write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
-            }
-
-            // Write highrom (second 128 glyphs)
-            if (rom_size >= bytes_per_half * 2) {
-                write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
-            } else if (rom_size > bytes_per_half) {
-                // Partial highrom
-                write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
-            }
-
-            free(raw_rom);
-        }
-        fclose(rom_file);
-    }
-
-    // Allocate buffers
-    size_t frame_size = PIXEL_W * PIXEL_H;
-    uint8_t *gray_pixels = malloc(frame_size);
-    uint8_t *bg_col = malloc(GRID_W * GRID_H);
-    uint8_t *fg_col = malloc(GRID_W * GRID_H);
-    uint8_t *chars = malloc(GRID_W * GRID_H);
-
-    // Audio buffer for MP2 packets
-    #define MP2_BUFFER_SIZE 2048
-    uint8_t *audio_buffer = malloc(MP2_BUFFER_SIZE);
-
-    uint32_t frame_num = 0;
-    uint64_t total_audio_bytes = 0;
-
-    // Audio timing calculation
-    double frame_audio_time = 1.0 / fps_float;  // Time per video frame
-    double packet_audio_time = (double)MP2_DEFAULT_PACKET_SIZE / SAMPLE_RATE;  // Time per audio packet
-    double packets_per_frame = frame_audio_time / packet_audio_time;
-    double audio_frames_in_buffer = 0.0;  // Simulated audio buffer level
-
-    fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
-            GRID_W, GRID_H, PIXEL_W, PIXEL_H);
-
-    // Track encoding start time
-    struct timeval start_time, now;
-    gettimeofday(&start_time, NULL);
-
-    // Read and process frames
-    while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
-        // Calculate timecode in nanoseconds
-        uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
-
-        // Write audio packets for this frame (based on timing)
-        if (mp2_file && audio_remaining > 0) {
-            // Simulate buffer consumption
-            audio_frames_in_buffer -= packets_per_frame;
-
-            // Calculate how many packets we need to maintain buffer
-            double target_level = fmax(packets_per_frame, 2.0);
-            int packets_to_insert = 0;
-
-            if (audio_frames_in_buffer < target_level) {
-                double deficit = target_level - audio_frames_in_buffer;
-                packets_to_insert = (int)ceil(deficit);
-            }
-
-            // Insert the calculated number of audio packets
-            for (int q = 0; q < packets_to_insert; q++) {
-                // Peek at header to get actual packet size
-                long pos = ftell(mp2_file);
-                uint8_t header[4];
-                if (fread(header, 1, 4, mp2_file) != 4) break;
-                fseek(mp2_file, pos, SEEK_SET);  // Rewind to re-read with full packet
-
-                int actual_packet_size = get_mp2_packet_size(header);
-                size_t bytes_to_read = actual_packet_size;
-
-                // Clamp to remaining audio
-                if (bytes_to_read > audio_remaining) {
-                    bytes_to_read = audio_remaining;
-                }
-
-                // Sanity check
-                if (bytes_to_read > MP2_BUFFER_SIZE) {
-                    fprintf(stderr, "ERROR: MP2 packet size %zu exceeds buffer\n", bytes_to_read);
-                    break;
-                }
-
-                // Read full packet
-                size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
-                if (bytes_read == 0) break;
-
-                // Write MP2 audio packet
-                write_audio_mp2(out, audio_buffer, bytes_read);
-
-                // Track audio
-                audio_remaining -= bytes_read;
-                audio_frames_in_buffer++;
-                total_audio_bytes += bytes_read;
-            }
-        }
-
-        // Write timecode
-        write_timecode(out, timecode_ns);
-
-        // Convert to text mode
-        frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
-
-        // Write text packet (treated as I-frame)
-        write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
-
-        // Write sync packet after each frame
-        write_sync(out);
-
-        frame_num++;
-        if (frame_num % 30 == 0) {
-            // Calculate encoding speed
-            gettimeofday(&now, NULL);
-            double elapsed = (now.tv_sec - start_time.tv_sec) +
-                           (now.tv_usec - start_time.tv_usec) / 1000000.0;
-            double encoding_fps = frame_num / elapsed;
-
-            fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
-            fflush(stderr);
-        }
-    }
-
-    // Write any remaining audio
-    if (mp2_file && audio_remaining > 0) {
-        while (audio_remaining > 0) {
-            // Peek at header to get actual packet size
-            long pos = ftell(mp2_file);
-            uint8_t header[4];
-            if (fread(header, 1, 4, mp2_file) != 4) break;
-            fseek(mp2_file, pos, SEEK_SET);
-
-            int actual_packet_size = get_mp2_packet_size(header);
-            size_t bytes_to_read = (actual_packet_size < audio_remaining) ? actual_packet_size : audio_remaining;
-
-            if (bytes_to_read > MP2_BUFFER_SIZE) break;
-
-            size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
-            if (bytes_read == 0) break;
-
-            write_audio_mp2(out, audio_buffer, bytes_read);
-            audio_remaining -= bytes_read;
-            total_audio_bytes += bytes_read;
-        }
-    }
-
-    // Final timing
-    gettimeofday(&now, NULL);
-    double total_time = (now.tv_sec - start_time.tv_sec) +
-                       (now.tv_usec - start_time.tv_usec) / 1000000.0;
-    double final_fps = frame_num / total_time;
-
-    fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
-            frame_num, total_time, final_fps);
-    fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
-            (unsigned long long)total_audio_bytes,
-            total_audio_bytes / 1024.0 / 1024.0);
-
-    // Update total_frames in header
-    if (frame_num > 0) {
-        fseek(out, header_offset + 14, SEEK_SET);  // Offset to total_frames field
-        fwrite(&frame_num, sizeof(uint32_t), 1, out);
-        fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
-    }
-
-    // Update ENDT in extended header (calculate end time for last frame)
-    if (frame_num > 0) {
-        // Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
-        uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
-        uint64_t endt_ns = duration_ns;
-
-        fseek(out, endt_offset, SEEK_SET);
-        fwrite(&endt_ns, sizeof(uint64_t), 1, out);
-        fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
-                (unsigned long long)endt_ns, endt_ns / 1000000000.0);
-    }
-
-    // Cleanup
-    pclose(video_pipe);
-    if (mp2_file) {
-        fclose(mp2_file);
-        unlink(TEMP_AUDIO_FILE);  // Remove temporary audio file
-    }
-    fclose(out);
-    free(gray_pixels);
-    free(bg_col);
-    free(fg_col);
-    free(chars);
-    free(audio_buffer);
-    free(rom->data);
-    free(rom);
-    if (ffmpeg_version) free(ffmpeg_version);
-
-    return 0;
-}
diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c
deleted file mode 100644
index e0ebef2..0000000
--- a/video_encoder/encoder_tev.c
+++ /dev/null
@@ -1,3052 +0,0 @@
-// Created by CuriousTorvald and Claude on 2025-08-18.
-// TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <string.h>
-#include <math.h>
-#include <zstd.h>
-#include <unistd.h>
-#include <sys/wait.h>
-#include <getopt.h>
-#include <ctype.h>
-#include <sys/time.h>
-#include <time.h>
-#include <limits.h>
-
-// TSVM Enhanced Video (TEV) format constants
-#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56"  // "\x1FTSVM TEV"
-// TEV version - dynamic based on colour space mode
-// Version 2: YCoCg-R 4:2:0 (default)
-// Version 3: ICtCp 4:2:0 (--ictcp flag)
-// version 1: 8x8 RGB
-// version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) {current winner}
-// version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor)
-
-// Block encoding modes (16x16 blocks)
-#define TEV_MODE_SKIP      0x00  // Skip block (copy from reference)
-#define TEV_MODE_INTRA     0x01  // Intra DCT coding (I-frame blocks)
-#define TEV_MODE_INTER     0x02  // Inter DCT coding with motion compensation
-#define TEV_MODE_MOTION    0x03  // Motion vector only (good prediction)
-
-// Video packet types
-#define TEV_PACKET_IFRAME      0x10  // Intra frame (keyframe)
-#define TEV_PACKET_PFRAME      0x11  // Predicted frame  
-#define TEV_PACKET_AUDIO_MP2   0x20  // MP2 audio
-#define TEV_PACKET_SUBTITLE_TC 0x31  // Subtitle packet with timecode (SSF-TC format)
-#define TEV_PACKET_SYNC        0xFF  // Sync packet
-
-// Utility macros
-static inline int CLAMP(int x, int min, int max) {
-    return x < min ? min : (x > max ? max : x);
-}
-static inline float FCLAMP(float x, float min, float max) {
-    return x < min ? min : (x > max ? max : x);
-}
-// Which preset should I be using?
-// from dataset of three videos with Q0..Q95: (real life video, low res pixel art, high res pixel art)
-// 56  96 128 192 256  Claude Opus 4.1 (with data analysis)
-// 64  96 128 192 256  ChatGPT-5 (without data analysis)
-static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384};
-
-// Valid MP2 bitrates as per MPEG-1 Layer II specification
-static const int MP2_VALID_BITRATES[] = {32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
-
-// Validate and return closest valid MP2 bitrate, or 0 if invalid
-static int validate_mp2_bitrate(int bitrate) {
-    for (int i = 0; i < sizeof(MP2_VALID_BITRATES) / sizeof(int); i++) {
-        if (MP2_VALID_BITRATES[i] == bitrate) {
-            return bitrate;  // Exact match
-        }
-    }
-    return 0;  // Invalid bitrate
-}
-
-// Which preset should I be using?
-// from dataset of three videos with Q0..Q95: (real life video, low res pixel art, high res pixel art)
-//  5  25  50  75  90  Claude Opus 4.1 (with data analysis)
-// 10  25  45  65  85  ChatGPT-5 (without data analysis)
-// 10  30  50  70  90  ChatGPT-5 (with data analysis)
-static const int QUALITY_Y[] =  {5, 18, 36, 54, 72, 90};
-static const int QUALITY_CO[] = {5, 18, 36, 54, 72, 90};
-
-// Encoding parameters
-#define MAX_MOTION_SEARCH 16
-int KEYFRAME_INTERVAL = 60;
-#define BLOCK_SIZE 16  // 16x16 blocks now
-#define BLOCK_SIZE_SQR 256
-#define BLOCK_SIZE_SQRF 256.f
-#define HALF_BLOCK_SIZE 8
-#define HALF_BLOCK_SIZE_SQR 64
-
-#define ZSTD_COMPRESSON_LEVEL 15
-
-static float jpeg_quality_to_mult(int q) {
-    return ((q < 50) ? 5000.f / q : 200.f - 2*q) / 100.f;
-}
-
-// Quality settings for quantisation (Y channel) - 16x16 tables
-static const uint32_t QUANT_TABLE_Y[BLOCK_SIZE_SQR] =
-    // Quality 50
-    {16, 14, 12, 11, 11, 13, 16, 20, 24, 30, 39, 48, 54, 61, 67, 73,
-     14, 13, 12, 12, 12, 15, 18, 21, 25, 33, 46, 57, 61, 65, 67, 70,
-     13, 12, 12, 13, 14, 17, 19, 23, 27, 36, 53, 66, 68, 69, 68, 67,
-     13, 13, 13, 14, 15, 18, 22, 26, 32, 41, 56, 67, 71, 74, 70, 67,
-     14, 14, 14, 15, 17, 20, 24, 30, 38, 47, 58, 68, 74, 79, 73, 67,
-     15, 15, 15, 17, 19, 22, 27, 34, 44, 55, 68, 79, 83, 85, 78, 70,
-     15, 16, 17, 20, 22, 26, 30, 38, 49, 63, 81, 94, 93, 91, 83, 74,
-     16, 18, 20, 24, 28, 33, 38, 47, 57, 73, 93, 108, 105, 101, 91, 81,
-     19, 21, 23, 29, 35, 43, 52, 60, 68, 83, 105, 121, 118, 115, 102, 89,
-     21, 24, 27, 35, 43, 53, 62, 70, 78, 91, 113, 128, 127, 125, 112, 99,
-     25, 30, 34, 43, 53, 61, 68, 76, 85, 97, 114, 127, 130, 132, 120, 108,
-     31, 38, 44, 54, 64, 71, 76, 84, 94, 105, 118, 129, 135, 138, 127, 116,
-     45, 52, 60, 69, 78, 84, 90, 97, 107, 118, 130, 139, 142, 143, 133, 122,
-     59, 68, 76, 84, 91, 97, 102, 110, 120, 129, 139, 147, 147, 146, 137, 127,
-     73, 82, 92, 98, 103, 107, 110, 117, 126, 132, 134, 136, 138, 138, 133, 127,
-     86, 98, 109, 112, 114, 116, 118, 124, 133, 135, 129, 125, 128, 130, 128, 127};
-
-// Quality settings for quantisation (X channel - 8x8)
-static const uint32_t QUANT_TABLE_C[HALF_BLOCK_SIZE_SQR] =
-    {17, 18, 24, 47, 99, 99, 99, 99,
-     18, 21, 26, 66, 99, 99, 99, 99,
-     24, 26, 56, 99, 99, 99, 99, 99,
-     47, 66, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99};
-
-// Audio constants (reuse MP2 from existing system)
-#define MP2_SAMPLE_RATE 32000
-#define MP2_DEFAULT_PACKET_SIZE 1728
-
-// Default values
-#define DEFAULT_WIDTH 560
-#define DEFAULT_HEIGHT 448
-
-static void generate_random_filename(char *filename) {
-    srand(time(NULL));
-
-    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    const int charset_size = sizeof(charset) - 1;
-
-    // Start with the prefix
-    strcpy(filename, "/tmp/");
-
-    // Generate 32 random characters
-    for (int i = 0; i < 32; i++) {
-        filename[5 + i] = charset[rand() % charset_size];
-    }
-
-    // Add the .mp2 extension
-    strcpy(filename + 37, ".mp2");
-    filename[41] = '\0';  // Null terminate
-}
-
-char TEMP_AUDIO_FILE[42];
-
-typedef struct __attribute__((packed)) {
-    uint8_t mode;           // Block encoding mode
-    int16_t mv_x, mv_y;     // Motion vector (1/4 pixel precision)
-    float rate_control_factor; // Rate control factor (4 bytes, little-endian)
-    uint16_t cbp;           // Coded block pattern (which channels have non-zero coeffs)
-    int16_t y_coeffs[BLOCK_SIZE_SQR];  // quantised Y DCT coefficients (16x16)
-    int16_t co_coeffs[HALF_BLOCK_SIZE_SQR];  // quantised Co DCT coefficients (8x8)
-    int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR];  // quantised Cg DCT coefficients (8x8)
-} tev_block_t;
-
-// Subtitle entry structure
-typedef struct subtitle_entry {
-    int start_frame;
-    int end_frame;
-    char *text;
-    struct subtitle_entry *next;
-} subtitle_entry_t;
-
-typedef struct {
-    char *input_file;
-    char *output_file;
-    char *subtitle_file;  // SubRip (.srt) file path
-    int width;
-    int height;
-    int fps;
-    int output_fps;  // User-specified output FPS (for frame rate conversion)
-    int total_frames;
-    double duration;
-    int has_audio;
-    int has_subtitles;
-    int output_to_stdout;
-    int progressive_mode;  // 0 = interlaced (default), 1 = progressive
-    int is_ntsc_framerate; // 1 if framerate denominator is 1001, 0 otherwise
-    int qualityIndex; // -q option
-    int qualityY;
-    int qualityCo;
-    int qualityCg;
-    int verbose;
-    int disable_rcf;          // 0 = rcf enabled, 1 = disabled
-    int ictcp_mode;       // 0 = YCoCg-R (default), 1 = ICtCp colour space
-
-    // Bitrate control
-    int target_bitrate_kbps;  // Target bitrate in kbps (0 = quality mode)
-    int bitrate_mode;         // 0 = quality, 1 = bitrate, 2 = hybrid
-    float rate_control_factor; // Dynamic adjustment factor
-
-    // Frame buffers (8-bit RGB format for encoding)
-    uint8_t *current_rgb, *previous_rgb, *reference_rgb;
-    uint8_t *previous_even_field;  // Previous even field buffer for interlaced scene change detection
-
-    // YCoCg workspace
-    float *y_workspace, *co_workspace, *cg_workspace;
-    float *dct_workspace;       // DCT coefficients
-    tev_block_t *block_data;    // Encoded block data
-    uint8_t *compressed_buffer; // Zstd output
-
-    // Audio handling
-    FILE *mp2_file;
-    int mp2_packet_size;
-    int mp2_rate_index;
-    int audio_bitrate;  // Custom audio bitrate (0 = use quality table)
-    size_t audio_remaining;
-    uint8_t *mp2_buffer;
-    double audio_frames_in_buffer;
-    int target_audio_buffer_size;
-
-    // Compression context
-    ZSTD_CCtx *zstd_context;
-
-    // FFmpeg processes
-    FILE *ffmpeg_video_pipe;
-
-    // Progress tracking
-    struct timeval start_time;
-    size_t total_output_bytes;
-
-    // Statistics
-    int blocks_skip, blocks_intra, blocks_inter, blocks_motion;
-
-    // Rate control statistics
-    size_t frame_bits_accumulator;
-    size_t target_bits_per_frame;
-    float complexity_history[60];  // Rolling window for complexity
-    int complexity_history_index;
-    float average_complexity;
-
-    // Subtitle handling
-    subtitle_entry_t *subtitle_list;
-    subtitle_entry_t *current_subtitle;
-
-    // Complexity statistics collection
-    int stats_mode;           // 0 = disabled, 1 = enabled
-    float *complexity_values; // Array to store all complexity values
-    int complexity_count;     // Current count of complexity values
-    int complexity_capacity;  // Capacity of complexity_values array
-} tev_encoder_t;
-
-//////////////////////////
-// COLOUR MATHS CODES //
-//////////////////////////
-
-// RGB to YCoCg-R transform (per YCoCg-R specification with truncated division)
-static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int *cg) {
-    *co = (int)r - (int)b;
-    int tmp = (int)b + ((*co) / 2);
-    *cg = (int)g - tmp;
-    *y = tmp + ((*cg) / 2);
-
-    // Clamp to valid ranges (YCoCg-R should be roughly -256 to +255)
-    *y = CLAMP(*y, 0, 255);
-    *co = CLAMP(*co, -256, 255);
-    *cg = CLAMP(*cg, -256, 255);
-}
-
-// YCoCg-R to RGB transform (for verification - per YCoCg-R specification)
-static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t *b) {
-    int tmp = y - (cg / 2);
-    *g = cg + tmp;
-    *b = tmp - (co / 2);
-    *r = *b + co;
-
-    // Clamp values
-    *r = CLAMP(*r, 0, 255);
-    *g = CLAMP(*g, 0, 255);
-    *b = CLAMP(*b, 0, 255);
-}
-
-// ---------------------- ICtCp Implementation ----------------------
-
-static inline int iround(double v) { return (int)floor(v + 0.5); }
-
-// ---------------------- sRGB gamma helpers ----------------------
-static inline double srgb_linearize(double val) {
-    // val in [0,1]
-    if (val <= 0.04045) return val / 12.92;
-    return pow((val + 0.055) / 1.055, 2.4);
-}
-static inline double srgb_unlinearize(double val) {
-    // val in [0,1]
-    if (val <= 0.0031308) return val * 12.92;
-    return 1.055 * pow(val, 1.0 / 2.4) - 0.055;
-}
-
-// -------------------------- HLG --------------------------
-// Forward HLG OETF (linear -> HLG)
-static inline double HLG_OETF(double L) {
-    // L in [0,1], relative scene-linear
-    const double a = 0.17883277;
-    const double b = 1.0 - 4.0 * a;
-    const double c = 0.5 - a * log(4.0 * a);
-
-    if (L <= 1.0/12.0)
-        return sqrt(3.0 * L);
-    else
-        return a * log(12.0 * L - b) + c;
-}
-
-// Inverse HLG OETF (HLG -> linear)
-static inline double HLG_inverse_OETF(double V) {
-    const double a = 0.17883277;
-    const double b = 1.0 - 4.0 * a;
-    const double c = 0.5 - a * log(4.0 * a);
-
-    if (V <= 0.5)
-        return (V * V) / 3.0;
-    else
-        return (exp((V - c)/a) + b) / 12.0;
-}
-
-// ---------------------- Matrices (doubles) ----------------------
-// linear RGB -> XYZ -> Rec.2100 -> LMS
-/*static const double M_RGB_TO_LMS[3][3] = {
-    {1688.0/4096.0,2146.0/4096.0, 262.0/4096.0},
-    { 683.0/4096.0,2951.0/4096.0, 462.0/4096.0},
-    {  99.0/4096.0, 309.0/4096.0,3688.0/4096.0}
-};*/
-static const double M_RGB_TO_LMS[3][3] = {
-    {0.2958564579364564, 0.6230869483219083, 0.08106989398623762},
-    {0.15627390752659093, 0.727308963512872, 0.11639736914944238},
-    {0.035141262332177715, 0.15657109121101628, 0.8080956851990795}
-};
-
-// Inverse: LMS -> linear sRGB (inverse of above)
-/*static const double M_LMS_TO_RGB[3][3] = {
-    {3.436606694333079, -2.5064521186562705, 0.06984542432319149},
-    {-0.7913295555989289, 1.983600451792291, -0.192270896193362},
-    {-0.025949899690592665, -0.09891371471172647, 1.1248636144023192}
-};*/
-static const double M_LMS_TO_RGB[3][3] = {
-    {6.1723815689243215, -5.319534979827695, 0.14699442094633924},
-    {-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
-    {-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
-};
-
-// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants.
-static const double M_LMSPRIME_TO_ICTCP[3][3] = {
-    { 2048.0/4096.0,   2048.0/4096.0,     0.0          },
-    { 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0    },
-    { 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0    }
-};
-
-// Inverse: I Ct Cp -> L' M' S'  (precomputed inverse)
-static const double M_ICTCP_TO_LMSPRIME[3][3] = {
-    { 1.0,         0.015718580108730416,  0.2095810681164055 },
-    { 1.0,        -0.015718580108730416, -0.20958106811640548 },
-    { 1.0,         1.0212710798422344, -0.6052744909924316 }
-};
-
-// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ----------------------
-// Inputs: r,g,b in 0..255 sRGB (8-bit)
-// Outputs: I, Ct, Cp as doubles (nominally I in ~[0..1], Ct/Cp ranges depend on colours)
-void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
-                       double *out_I, double *out_Ct, double *out_Cp)
-{
-    // 1) linearize sRGB to 0..1
-    double r = srgb_linearize((double)r8 / 255.0);
-    double g = srgb_linearize((double)g8 / 255.0);
-    double b = srgb_linearize((double)b8 / 255.0);
-
-    // 2) linear RGB -> LMS (single 3x3 multiply)
-    double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
-    double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
-    double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
-
-    // 3) apply HLG encode (map linear LMS -> perceptual domain L',M',S')
-    double Lp = HLG_OETF(L);
-    double Mp = HLG_OETF(M);
-    double Sp = HLG_OETF(S);
-
-    // 4) L'M'S' -> ICtCp
-    double I  = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
-    double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
-    double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
-
-    *out_I = FCLAMP(I * 255.f, 0.f, 255.f);
-    *out_Ct = FCLAMP(Ct * 255.f, -256.f, 255.f);
-    *out_Cp = FCLAMP(Cp * 255.f, -256.f, 255.f);
-}
-
-// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ----------------------
-// Inputs: I, Ct, Cp as doubles
-// Outputs: r8,g8,b8 in 0..255 (8-bit sRGB, clamped and rounded)
-void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
-                       uint8_t *r8, uint8_t *g8, uint8_t *b8)
-{
-    double I = I8 / 255.f;
-    double Ct = Ct8 / 255.f;
-    double Cp = Cp8 / 255.f;
-
-    // 1) ICtCp -> L' M' S' (3x3 multiply)
-    double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
-    double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
-    double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
-
-    // 2) HLG decode: L' -> linear LMS
-    double L = HLG_inverse_OETF(Lp);
-    double M = HLG_inverse_OETF(Mp);
-    double S = HLG_inverse_OETF(Sp);
-
-    // 3) LMS -> linear sRGB (3x3 inverse)
-    double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
-    double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
-    double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
-
-    // 4) gamma encode and convert to 0..255 with center-of-bin rounding
-    double r = srgb_unlinearize(r_lin);
-    double g = srgb_unlinearize(g_lin);
-    double b = srgb_unlinearize(b_lin);
-
-    *r8 = (uint8_t)CLAMP(iround(r * 255.0), 0, 255);
-    *g8 = (uint8_t)CLAMP(iround(g * 255.0), 0, 255);
-    *b8 = (uint8_t)CLAMP(iround(b * 255.0), 0, 255);
-}
-
-// ---------------------- Color Space Switching Functions ----------------------
-// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode
-
-static void rgb_to_colour_space(tev_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b,
-                               double *c1, double *c2, double *c3) {
-    if (enc->ictcp_mode) {
-        // Use ICtCp colour space
-        srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3);
-    } else {
-        // Use YCoCg-R colour space (convert to int first, then to double)
-        int y_val, co_val, cg_val;
-        rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
-        *c1 = (double)y_val;
-        *c2 = (double)co_val;
-        *c3 = (double)cg_val;
-    }
-}
-
-static void colour_space_to_rgb(tev_encoder_t *enc, double c1, double c2, double c3,
-                               uint8_t *r, uint8_t *g, uint8_t *b) {
-    if (enc->ictcp_mode) {
-        // Use ICtCp colour space
-        ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b);
-    } else {
-        // Use YCoCg-R colour space (convert from double to int first)
-        int y_val = (int)round(c1);
-        int co_val = (int)round(c2);
-        int cg_val = (int)round(c3);
-        ycocgr_to_rgb(y_val, co_val, cg_val, r, g, b);
-    }
-}
-
-////////////////////////////////////////
-// DISCRETE COSINE TRANSFORMATIONS //
-////////////////////////////////////////
-
-// Pre-calculated cosine tables
-static float dct_table_16[16][16]; // For 16x16 DCT
-static float dct_table_8[8][8];    // For 8x8 DCT
-static int tables_initialised = 0;
-
-// Initialise the pre-calculated tables
-static void init_dct_tables(void) {
-    if (tables_initialised) return;
-
-    // Pre-calculate cosine values for 16x16 DCT
-    for (int u = 0; u < 16; u++) {
-        for (int x = 0; x < 16; x++) {
-            dct_table_16[u][x] = cosf((2.0f * x + 1.0f) * u * M_PI / 32.0f);
-        }
-    }
-
-    // Pre-calculate cosine values for 8x8 DCT
-    for (int u = 0; u < 8; u++) {
-        for (int x = 0; x < 8; x++) {
-            dct_table_8[u][x] = cosf((2.0f * x + 1.0f) * u * M_PI / 16.0f);
-        }
-    }
-
-    tables_initialised = 1;
-}
-
-// 16x16 2D DCT
-// Fast separable 16x16 DCT - 8x performance improvement
-static float temp_dct_16[BLOCK_SIZE_SQR]; // Reusable temporary buffer
-
-static void dct_16x16_fast(float *input, float *output) {
-    init_dct_tables(); // Ensure tables are initialised
-
-    // First pass: Process rows (16 1D DCTs)
-    for (int row = 0; row < 16; row++) {
-        for (int u = 0; u < 16; u++) {
-            float sum = 0.0f;
-            float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f;
-
-            for (int x = 0; x < 16; x++) {
-                sum += input[row * 16 + x] * dct_table_16[u][x];
-            }
-
-            temp_dct_16[row * 16 + u] = 0.5f * cu * sum;
-        }
-    }
-
-    // Second pass: Process columns (16 1D DCTs)
-    for (int col = 0; col < 16; col++) {
-        for (int v = 0; v < 16; v++) {
-            float sum = 0.0f;
-            float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f;
-
-            for (int y = 0; y < 16; y++) {
-                sum += temp_dct_16[y * 16 + col] * dct_table_16[v][y];
-            }
-
-            output[v * 16 + col] = 0.5f * cv * sum;
-        }
-    }
-}
-
-// Fast separable 8x8 DCT - 4x performance improvement
-static float temp_dct_8[HALF_BLOCK_SIZE_SQR]; // Reusable temporary buffer
-
-static void dct_8x8_fast(float *input, float *output) {
-    init_dct_tables(); // Ensure tables are initialised
-
-    // First pass: Process rows (8 1D DCTs)
-    for (int row = 0; row < 8; row++) {
-        for (int u = 0; u < 8; u++) {
-            float sum = 0.0f;
-            float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f;
-
-            for (int x = 0; x < 8; x++) {
-                sum += input[row * 8 + x] * dct_table_8[u][x];
-            }
-
-            temp_dct_8[row * 8 + u] = 0.5f * cu * sum;
-        }
-    }
-
-    // Second pass: Process columns (8 1D DCTs)
-    for (int col = 0; col < 8; col++) {
-        for (int v = 0; v < 8; v++) {
-            float sum = 0.0f;
-            float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f;
-
-            for (int y = 0; y < 8; y++) {
-                sum += temp_dct_8[y * 8 + col] * dct_table_8[v][y];
-            }
-
-            output[v * 8 + col] = 0.5f * cv * sum;
-        }
-    }
-}
-
-// quantise DCT coefficient using quality table with rate control
-static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma) {
-    if (is_dc) {
-        if (is_chroma) {
-            // Chroma DC: range -256 to +255, use lossless quantisation for testing
-            return (int16_t)roundf(coeff);
-        } else {
-            // Luma DC: range -128 to +127, use lossless quantisation for testing
-            return (int16_t)roundf(coeff);
-        }
-    } else {
-        // AC coefficients use quality table (rate control factor applied to quant table before calling)
-        float safe_quant = fmaxf(quant, 1.0f); // Prevent division by zero
-        return (int16_t)roundf(coeff / safe_quant);
-    }
-}
-
-// Extract 16x16 block from RGB frame and convert to colour space
-static void extract_colour_space_block(tev_encoder_t *enc, uint8_t *rgb_frame, int width, int height,
-                                      int block_x, int block_y,
-                                      float *c1_block, float *c2_block, float *c3_block) {
-    int start_x = block_x * BLOCK_SIZE;
-    int start_y = block_y * BLOCK_SIZE;
-
-    // Extract 16x16 primary channel block (Y for YCoCg-R, I for ICtCp)
-    for (int py = 0; py < BLOCK_SIZE; py++) {
-        for (int px = 0; px < BLOCK_SIZE; px++) {
-            int x = start_x + px;
-            int y = start_y + py;
-
-            if (x < width && y < height) {
-                int offset = (y * width + x) * 3;
-                uint8_t r = rgb_frame[offset];
-                uint8_t g = rgb_frame[offset + 1];
-                uint8_t b = rgb_frame[offset + 2];
-
-                double c1, c2, c3;
-                rgb_to_colour_space(enc, r, g, b, &c1, &c2, &c3);
-
-                c1_block[py * BLOCK_SIZE + px] = (float)c1 - 128.0f;
-            }
-        }
-    }
-
-    // Extract 8x8 chroma blocks with 4:2:0 subsampling (average 2x2 pixels)
-    for (int py = 0; py < HALF_BLOCK_SIZE; py++) {
-        for (int px = 0; px < HALF_BLOCK_SIZE; px++) {
-            int co_sum = 0, cg_sum = 0, count = 0;
-
-            // Average 2x2 block of pixels
-            for (int dy = 0; dy < 2; dy++) {
-                for (int dx = 0; dx < 2; dx++) {
-                    int x = start_x + px * 2 + dx;
-                    int y = start_y + py * 2 + dy;
-
-                    if (x < width && y < height) {
-                        int offset = (y * width + x) * 3;
-                        uint8_t r = rgb_frame[offset];
-                        uint8_t g = rgb_frame[offset + 1];
-                        uint8_t b = rgb_frame[offset + 2];
-
-                        double c1, c2, c3;
-                        rgb_to_colour_space(enc, r, g, b, &c1, &c2, &c3);
-
-                        co_sum += (int)c2;
-                        cg_sum += (int)c3;
-
-                        count++;
-                    }
-                }
-            }
-
-            if (count > 0) {
-                // Average the accumulated chroma values and store
-                c2_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
-                c3_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
-            }
-        }
-    }
-}
-
-
-
-
-
-// Calculate spatial activity for any channel (16x16 or 8x8)
-static float calculate_spatial_activity(const float *block, int block_size) {
-    float activity = 0.0f;
-    
-    // Sum of absolute differences with neighbors (spatial activity)
-    for (int y = 0; y < block_size; y++) {
-        for (int x = 0; x < block_size; x++) {
-            float pixel = block[y * block_size + x];
-            
-            // Compare with right neighbor
-            if (x < block_size - 1) {
-                activity += fabsf(pixel - block[y * block_size + (x + 1)]);
-            }
-            
-            // Compare with bottom neighbor
-            if (y < block_size - 1) {
-                activity += fabsf(pixel - block[(y + 1) * block_size + x]);
-            }
-        }
-    }
-    
-    return activity;
-}
-
-// Calculate variance for any channel
-static float calculate_variance(const float *block, int block_size) {
-    int total_pixels = block_size * block_size;
-    
-    // Calculate mean
-    float mean = 0.0f;
-    for (int i = 0; i < total_pixels; i++) {
-        mean += block[i];
-    }
-    mean /= total_pixels;
-    
-    // Calculate variance
-    float variance = 0.0f;
-    for (int i = 0; i < total_pixels; i++) {
-        float diff = block[i] - mean;
-        variance += diff * diff;
-    }
-    variance /= total_pixels;
-    
-    return variance;
-}
-
-// Enhanced block complexity calculation including chroma information
-static float calculate_block_complexity_enhanced(const float *y_block, const float *co_block, const float *cg_block) {
-    // Luma complexity (16x16)
-    float luma_activity = calculate_spatial_activity(y_block, BLOCK_SIZE);
-    float luma_variance = calculate_variance(y_block, BLOCK_SIZE);
-    float luma_complexity = luma_activity + sqrtf(luma_variance) * 10.0f;
-    
-    // Chroma complexity (8x8 blocks, but weighted appropriately)
-    float co_activity = calculate_spatial_activity(co_block, HALF_BLOCK_SIZE);
-    float co_variance = calculate_variance(co_block, HALF_BLOCK_SIZE);
-    float co_complexity = co_activity + sqrtf(co_variance) * 10.0f;
-    
-    float cg_activity = calculate_spatial_activity(cg_block, HALF_BLOCK_SIZE);
-    float cg_variance = calculate_variance(cg_block, HALF_BLOCK_SIZE);
-    float cg_complexity = cg_activity + sqrtf(cg_variance) * 10.0f;
-    
-    // Combine complexities with appropriate weighting
-    // Luma gets primary weight, chroma gets secondary weight but significant enough to matter
-    // Scale chroma by 4 to account for 8x8 vs 16x16 size difference (64 vs 256 pixels)
-    float total_complexity = luma_complexity + 
-                           (co_complexity * 4.0f * 0.3f) + 
-                           (cg_complexity * 4.0f * 0.3f);
-    
-    return total_complexity;
-}
-
-// Legacy function for compatibility - calls enhanced version
-static float calculate_block_complexity(const float *y_block) {
-    float complexity = 0.0f;
-    
-    // Method 1: Sum of absolute differences with neighbors (spatial activity)
-    for (int y = 0; y < BLOCK_SIZE; y++) {
-        for (int x = 0; x < BLOCK_SIZE; x++) {
-            float pixel = y_block[y * BLOCK_SIZE + x];
-            
-            // Compare with right neighbor
-            if (x < BLOCK_SIZE - 1) {
-                complexity += fabsf(pixel - y_block[y * BLOCK_SIZE + (x + 1)]);
-            }
-            
-            // Compare with bottom neighbor
-            if (y < BLOCK_SIZE - 1) {
-                complexity += fabsf(pixel - y_block[(y + 1) * BLOCK_SIZE + x]);
-            }
-        }
-    }
-    
-    // Method 2: Add variance contribution
-    float mean = 0.0f;
-    for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
-        mean += y_block[i];
-    }
-    mean /= BLOCK_SIZE_SQRF;
-    
-    float variance = 0.0f;
-    for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
-        float diff = y_block[i] - mean;
-        variance += diff * diff;
-    }
-    variance /= BLOCK_SIZE_SQRF;
-    
-    // Combine spatial activity and variance
-    return complexity + sqrtf(variance) * 10.0f;
-}
-
-// Map complexity to rate control factor (pure per-block, no global factor)
-// Data-driven approach: rate_control_factor multiplies reconstructed coefficients in decoder
-// Higher factor = more detail preserved, lower factor = acceptable quality loss
-static float complexity_to_rate_factor(float complexity) {
-    // Handle zero/near-zero complexity (very common in sample data)
-    if (complexity <= 0.001f) {
-        return 0.7f; // Reduce detail for flat blocks (saves bits, minimal perceptual loss)
-    }
-    
-    // Parameters recalibrated for chroma-aware complexity calculation:
-    // - Median complexity now ~1400-3700 (increased due to chroma contribution)
-    // - High complexity threshold ~10000-15000 (91st percentile)
-    // - Maximum values up to ~22800 (vs ~17000 in luma-only version)
-    
-    const float median_complexity = 4447.0f;  // Target for rate_factor ≈ 1.0. e^8.4
-    const float high_complexity = 12088.0f;   // ~91st percentile threshold. e^9.4
-    
-    // Logarithmic preprocessing to handle wide dynamic range (0 to 23000+)
-    float log_complexity = logf(complexity + 1.0f);
-    float log_median = logf(median_complexity + 1.0f);
-    float log_high = logf(high_complexity + 1.0f);
-    
-    // Normalise: 0 = median complexity, 1 = high complexity threshold
-    float normalised = (log_complexity - log_median) / (log_high - log_median);
-    
-    // Sigmoid centered at median: f(0) ≈ 1.0, f(1) ≈ 1.6, f(-∞) ≈ 0.7
-    float sigmoid = 1.0f / (1.0f + expf(-4.0f * normalised));
-    float rate_factor = 0.7f + 0.9f * sigmoid; // Range: 0.7 to 1.6
-    
-    // Clamp to prevent extreme coefficient amplification/reduction
-    return FCLAMP(rate_factor, 0.7f, 1.6f);
-
-    // See also: https://www.desmos.com/calculator/awwjztvv3o
-}
-
-// Add complexity value to statistics collection
-static void add_complexity_value(tev_encoder_t *enc, float complexity) {
-    if (!enc->stats_mode) return;
-    
-    // Initialise array if needed
-    if (!enc->complexity_values) {
-        enc->complexity_capacity = 10000; // Initial capacity
-        enc->complexity_values = malloc(enc->complexity_capacity * sizeof(float));
-        if (!enc->complexity_values) {
-            fprintf(stderr, "Warning: Failed to allocate complexity statistics array\n");
-            enc->stats_mode = 0;
-            return;
-        }
-        enc->complexity_count = 0;
-    }
-    
-    // Resize array if needed
-    if (enc->complexity_count >= enc->complexity_capacity) {
-        enc->complexity_capacity *= 2;
-        float *new_array = realloc(enc->complexity_values, enc->complexity_capacity * sizeof(float));
-        if (!new_array) {
-            fprintf(stderr, "Warning: Failed to resize complexity statistics array\n");
-            return;
-        }
-        enc->complexity_values = new_array;
-    }
-    
-    enc->complexity_values[enc->complexity_count++] = complexity;
-}
-
-// Comparison function for qsort
-static int compare_float(const void *a, const void *b) {
-    float fa = *(const float*)a;
-    float fb = *(const float*)b;
-    if (fa < fb) return -1;
-    if (fa > fb) return 1;
-    return 0;
-}
-
-// Calculate seven-number summary statistics
-static void calculate_complexity_stats(tev_encoder_t *enc) {
-    if (!enc->stats_mode || enc->complexity_count == 0) return;
-    
-    printf("\n=== BLOCK COMPLEXITY STATISTICS ===\n");
-    printf("Analysed %d blocks during encoding\n\n", enc->complexity_count);
-    
-    // Sort the values to calculate percentiles
-    float *sorted_values = malloc(enc->complexity_count * sizeof(float));
-    if (!sorted_values) {
-        fprintf(stderr, "Failed to allocate memory for statistics calculation\n");
-        return;
-    }
-    
-    memcpy(sorted_values, enc->complexity_values, enc->complexity_count * sizeof(float));
-    qsort(sorted_values, enc->complexity_count, sizeof(float), compare_float);
-    
-    // Calculate seven-number summary percentiles: 2.15%, 8.87%, 25%, 50%, 75%, 91.13%, 97.85%
-    float p2_15 = sorted_values[(int)(0.0215 * (enc->complexity_count - 1))];
-    float p8_87 = sorted_values[(int)(0.0887 * (enc->complexity_count - 1))];
-    float p25 = sorted_values[(int)(0.25 * (enc->complexity_count - 1))];
-    float p50 = sorted_values[(int)(0.50 * (enc->complexity_count - 1))];
-    float p75 = sorted_values[(int)(0.75 * (enc->complexity_count - 1))];
-    float p91_13 = sorted_values[(int)(0.9113 * (enc->complexity_count - 1))];
-    float p97_85 = sorted_values[(int)(0.9785 * (enc->complexity_count - 1))];
-
-    // Print human-readable format
-    printf("Seven-Number Summary:\n");
-    printf("  2.15%% percentile:  %.6f\n", p2_15);
-    printf("  8.87%% percentile:  %.6f\n", p8_87);
-    printf("  25.0%% percentile:    %.6f\n", p25);
-    printf("  50.0%% percentile:    %.6f\n", p50);
-    printf("  75.0%% percentile:    %.6f\n", p75);
-    printf("  91.13%% percentile: %.6f\n", p91_13);
-    printf("  97.85%% percentile: %.6f\n", p97_85);
-
-    // Print CSV format for copy-pasting
-    printf("CSV Format (copy-pastable):\n");
-    printf("2.15%%,8.87%%,25.0%%,50.0%%,75.0%%,91.13%%,97.85%%\n");
-    printf("%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f\n", p2_15, p8_87, p25, p50, p75, p91_13, p97_85);
-    
-    free(sorted_values);
-    printf("=====================================\n");
-}
-
-// Simple motion estimation (full search) for 16x16 blocks
-static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
-                           int16_t *best_mv_x, int16_t *best_mv_y) {
-    int best_sad = INT_MAX;
-    *best_mv_x = 0;
-    *best_mv_y = 0;
-
-    int start_x = block_x * BLOCK_SIZE;
-    int start_y = block_y * BLOCK_SIZE;
-
-    // Diamond search pattern (much faster than full search)
-    static const int diamond_x[] = {0, -1, 1, 0, 0, -2, 2, 0, 0};
-    static const int diamond_y[] = {0, 0, 0, -1, 1, 0, 0, -2, 2};
-
-    int center_x = 0, center_y = 0;
-    int step_size = 4;  // Start with larger steps
-
-    while (step_size >= 1) {
-        int improved = 0;
-
-        for (int i = 0; i < 9; i++) {
-            int mv_x = center_x + diamond_x[i] * step_size;
-            int mv_y = center_y + diamond_y[i] * step_size;
-
-            // Check bounds
-            if (mv_x < -MAX_MOTION_SEARCH || mv_x > MAX_MOTION_SEARCH ||
-                mv_y < -MAX_MOTION_SEARCH || mv_y > MAX_MOTION_SEARCH) {
-                continue;
-            }
-
-            int ref_x = start_x - mv_x;
-            int ref_y = start_y - mv_y;
-
-            if (ref_x < 0 || ref_y < 0 ||
-                ref_x + BLOCK_SIZE > enc->width || ref_y + BLOCK_SIZE > enc->height) {
-                continue;
-            }
-
-            // Fast SAD using integer luma approximation
-            int sad = 0;
-            for (int dy = 0; dy < BLOCK_SIZE; dy += 2) {  // Sample every 2nd row for speed
-                uint8_t *cur_row = &enc->current_rgb[((start_y + dy) * enc->width + start_x) * 3];
-                uint8_t *ref_row = &enc->previous_rgb[((ref_y + dy) * enc->width + ref_x) * 3];
-
-                for (int dx = 0; dx < BLOCK_SIZE; dx += 2) {  // Sample every 2nd pixel
-                    // Fast luma approximation: (R + 2*G + B) >> 2
-                    int cur_luma = (cur_row[dx*3] + (cur_row[dx*3+1] << 1) + cur_row[dx*3+2]) >> 2;
-                    int ref_luma = (ref_row[dx*3] + (ref_row[dx*3+1] << 1) + ref_row[dx*3+2]) >> 2;
-                    sad += abs(cur_luma - ref_luma);
-                }
-            }
-
-            if (sad < best_sad) {
-                best_sad = sad;
-                *best_mv_x = mv_x;
-                *best_mv_y = mv_y;
-                center_x = mv_x;
-                center_y = mv_y;
-                improved = 1;
-            }
-        }
-
-        if (!improved) {
-            step_size >>= 1;  // Reduce step size
-        }
-    }
-}
-
-// Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling
-static void convert_rgb_to_colour_space_block(tev_encoder_t *enc, const uint8_t *rgb_block,
-                                            float *c1_workspace, float *c2_workspace, float *c3_workspace) {
-    if (enc->ictcp_mode) {
-        // ICtCp mode: Convert 16x16 RGB to ICtCp (full resolution for I, 4:2:0 subsampling for CtCp)
-
-        // Convert I channel at full resolution (16x16)
-        for (int py = 0; py < BLOCK_SIZE; py++) {
-            for (int px = 0; px < BLOCK_SIZE; px++) {
-                int rgb_idx = (py * BLOCK_SIZE + px) * 3;
-                uint8_t r = rgb_block[rgb_idx];
-                uint8_t g = rgb_block[rgb_idx + 1];
-                uint8_t b = rgb_block[rgb_idx + 2];
-
-                double I, Ct, Cp;
-                srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
-
-                // Store I at full resolution, scale to appropriate range
-                c1_workspace[py * BLOCK_SIZE + px] = (float)(I * 255.0);
-            }
-        }
-
-        // Convert Ct and Cp with 4:2:0 subsampling (8x8)
-        for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
-            for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
-                double sum_ct = 0.0, sum_cp = 0.0;
-
-                // Sample 2x2 block from RGB and average for chroma
-                for (int dy = 0; dy < 2; dy++) {
-                    for (int dx = 0; dx < 2; dx++) {
-                        int py = cy * 2 + dy;
-                        int px = cx * 2 + dx;
-                        int rgb_idx = (py * 16 + px) * 3;
-
-                        int r = rgb_block[rgb_idx];
-                        int g = rgb_block[rgb_idx + 1];
-                        int b = rgb_block[rgb_idx + 2];
-
-                        double I, Ct, Cp;
-                        srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
-
-                        sum_ct += Ct;
-                        sum_cp += Cp;
-                    }
-                }
-
-                // Average and store subsampled chroma, scale to signed 8-bit equivalent range
-                // Apply centering to ensure chroma is balanced around 0 (like YCoCg-R)
-                double avg_ct = sum_ct / 4.0;
-                double avg_cp = sum_cp / 4.0;
-
-                // Scale and clamp to [-256, 255] range like YCoCg-R
-                c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(avg_ct * 255.0, -256, 255);
-                c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(avg_cp * 255.0, -256, 255);
-            }
-        }
-    } else {
-        // YCoCg-R mode: Original implementation
-
-        // Convert 16x16 RGB to Y (full resolution)
-        for (int py = 0; py < BLOCK_SIZE; py++) {
-            for (int px = 0; px < BLOCK_SIZE; px++) {
-                int rgb_idx = (py * BLOCK_SIZE + px) * 3;
-                int r = rgb_block[rgb_idx];
-                int g = rgb_block[rgb_idx + 1];
-                int b = rgb_block[rgb_idx + 2];
-
-                // YCoCg-R transform (per specification with truncated division)
-                int y = (r + 2*g + b) / 4;
-                c1_workspace[py * BLOCK_SIZE + px] = (float)CLAMP(y, 0, 255);
-            }
-        }
-
-        // Convert to Co and Cg with 4:2:0 subsampling (8x8)
-        for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
-            for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
-                int sum_co = 0, sum_cg = 0;
-
-                // Sample 2x2 block from RGB and average for chroma
-                for (int dy = 0; dy < 2; dy++) {
-                    for (int dx = 0; dx < 2; dx++) {
-                        int py = cy * 2 + dy;
-                        int px = cx * 2 + dx;
-                        int rgb_idx = (py * 16 + px) * 3;
-
-                        int r = rgb_block[rgb_idx];
-                        int g = rgb_block[rgb_idx + 1];
-                        int b = rgb_block[rgb_idx + 2];
-
-                        int co = r - b;
-                        int tmp = b + (co / 2);
-                        int cg = g - tmp;
-
-                        sum_co += co;
-                        sum_cg += cg;
-                    }
-                }
-
-                // Average and store subsampled chroma
-                c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_co / 4, -256, 255);
-                c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_cg / 4, -256, 255);
-            }
-        }
-    }
-}
-
-// Extract motion-compensated YCoCg-R block from reference frame
-static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
-                                           int block_x, int block_y, int mv_x, int mv_y,
-                                           uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
-    // Extract 16x16 RGB block with motion compensation
-    uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
-
-    for (int dy = 0; dy < BLOCK_SIZE; dy++) {
-        for (int dx = 0; dx < BLOCK_SIZE; dx++) {
-            int cur_x = block_x + dx;
-            int cur_y = block_y + dy;
-            int ref_x = cur_x + mv_x;  // Revert to original motion compensation
-            int ref_y = cur_y + mv_y;
-
-            int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
-
-            if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
-                // Copy RGB from reference position
-                int ref_offset = (ref_y * width + ref_x) * 3;
-                rgb_block[rgb_idx] = rgb_data[ref_offset];         // R
-                rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
-                rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
-            } else {
-                // Out of bounds - use black
-                rgb_block[rgb_idx] = 0;     // R
-                rgb_block[rgb_idx + 1] = 0; // G
-                rgb_block[rgb_idx + 2] = 0; // B
-            }
-        }
-    }
-
-    // Convert RGB block to YCoCg-R (original implementation for motion compensation)
-    // Convert 16x16 RGB to Y (full resolution)
-    for (int py = 0; py < BLOCK_SIZE; py++) {
-        for (int px = 0; px < BLOCK_SIZE; px++) {
-            int rgb_idx = (py * BLOCK_SIZE + px) * 3;
-            int r = rgb_block[rgb_idx];
-            int g = rgb_block[rgb_idx + 1];
-            int b = rgb_block[rgb_idx + 2];
-
-            // YCoCg-R transform (per specification with truncated division)
-            int y = (r + 2*g + b) / 4;
-
-            y_block[py * 16 + px] = CLAMP(y, 0, 255);
-        }
-    }
-
-    // Convert to Co and Cg with 4:2:0 subsampling (8x8)
-    for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
-        for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
-            // Sample 2x2 block from RGB and average for chroma
-            int sum_co = 0, sum_cg = 0;
-
-            for (int dy = 0; dy < 2; dy++) {
-                for (int dx = 0; dx < 2; dx++) {
-                    int py = cy * 2 + dy;
-                    int px = cx * 2 + dx;
-                    int rgb_idx = (py * 16 + px) * 3;
-
-                    int r = rgb_block[rgb_idx];
-                    int g = rgb_block[rgb_idx + 1];
-                    int b = rgb_block[rgb_idx + 2];
-
-                    int co = r - b;
-                    int tmp = b + (co / 2);
-                    int cg = g - tmp;
-
-                    sum_co += co;
-                    sum_cg += cg;
-                }
-            }
-
-            // Average and store subsampled chroma
-            co_block[cy * HALF_BLOCK_SIZE + cx] = CLAMP(sum_co / 4, -256, 255);
-            cg_block[cy * HALF_BLOCK_SIZE + cx] = CLAMP(sum_cg / 4, -256, 255);
-        }
-    }
-}
-
-// Compute motion-compensated residual for INTER mode
-static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) {
-    int start_x = block_x * BLOCK_SIZE;
-    int start_y = block_y * BLOCK_SIZE;
-
-    // Extract motion-compensated reference block from previous frame
-    uint8_t ref_y[BLOCK_SIZE_SQR];
-    int8_t ref_co[HALF_BLOCK_SIZE_SQR], ref_cg[HALF_BLOCK_SIZE_SQR];
-    extract_motion_compensated_block(enc->previous_rgb, enc->width, enc->height,
-                                   start_x, start_y, mv_x, mv_y,
-                                   ref_y, ref_co, ref_cg);
-
-    // Compute residuals: current - motion_compensated_reference
-    // Current is already centered (-128 to +127), reference is 0-255, so subtract and center reference
-    for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
-        float ref_y_centered = (float)ref_y[i] - 128.0f;  // Center reference to match current
-        enc->y_workspace[i] = enc->y_workspace[i] - ref_y_centered;
-    }
-
-    // Chroma residuals (already centered in both current and reference)
-    for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
-        enc->co_workspace[i] = enc->co_workspace[i] - (float)ref_co[i];
-        enc->cg_workspace[i] = enc->cg_workspace[i] - (float)ref_cg[i];
-    }
-}
-
-// Calculate block complexity for rate control
-
-
-// Encode a 16x16 block
-static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
-    tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
-
-    // Extract YCoCg-R block
-    extract_colour_space_block(enc, enc->current_rgb, enc->width, enc->height,
-                        block_x, block_y,
-                        enc->y_workspace, enc->co_workspace, enc->cg_workspace);
-
-    if (is_keyframe) {
-        // Intra coding for keyframes
-        block->mode = TEV_MODE_INTRA;
-        block->mv_x = block->mv_y = 0;
-        enc->blocks_intra++;
-    } else {
-        // Implement proper mode decision for P-frames
-        int start_x = block_x * BLOCK_SIZE;
-        int start_y = block_y * BLOCK_SIZE;
-
-        // Calculate SAD for skip mode (no motion compensation)
-        int skip_sad = 0;
-        int skip_colour_diff = 0;
-        for (int dy = 0; dy < BLOCK_SIZE; dy++) {
-            for (int dx = 0; dx < BLOCK_SIZE; dx++) {
-                int x = start_x + dx;
-                int y = start_y + dy;
-                if (x < enc->width && y < enc->height) {
-                    int cur_offset = (y * enc->width + x) * 3;
-
-                    // Compare current with previous frame (using YCoCg-R Luma calculation)
-                    int cur_luma = (enc->current_rgb[cur_offset] +
-                                   2 * enc->current_rgb[cur_offset + 1] +
-                                   enc->current_rgb[cur_offset + 2]) / 4;
-                    int prev_luma = (enc->previous_rgb[cur_offset] +
-                                    2 * enc->previous_rgb[cur_offset + 1] +
-                                    enc->previous_rgb[cur_offset + 2]) / 4;
-
-                    skip_sad += abs(cur_luma - prev_luma);
-                    
-                    // Also check for colour differences to prevent SKIP on colour changes
-                    int cur_r = enc->current_rgb[cur_offset];
-                    int cur_g = enc->current_rgb[cur_offset + 1];
-                    int cur_b = enc->current_rgb[cur_offset + 2];
-                    int prev_r = enc->previous_rgb[cur_offset];
-                    int prev_g = enc->previous_rgb[cur_offset + 1];
-                    int prev_b = enc->previous_rgb[cur_offset + 2];
-                    
-                    skip_colour_diff += abs(cur_r - prev_r) + abs(cur_g - prev_g) + abs(cur_b - prev_b);
-                }
-            }
-        }
-
-        // Try motion estimation
-        estimate_motion(enc, block_x, block_y, &block->mv_x, &block->mv_y);
-
-        // Calculate motion compensation SAD
-        int motion_sad = INT_MAX;
-        if (abs(block->mv_x) > 0 || abs(block->mv_y) > 0) {
-            motion_sad = 0;
-            for (int dy = 0; dy < BLOCK_SIZE; dy++) {
-                for (int dx = 0; dx < BLOCK_SIZE; dx++) {
-                    int cur_x = start_x + dx;
-                    int cur_y = start_y + dy;
-                    int ref_x = cur_x + block->mv_x;
-                    int ref_y = cur_y + block->mv_y;
-
-                    if (cur_x < enc->width && cur_y < enc->height &&
-                        ref_x >= 0 && ref_y >= 0 &&
-                        ref_x < enc->width && ref_y < enc->height) {
-
-                        int cur_offset = (cur_y * enc->width + cur_x) * 3;
-                        int ref_offset = (ref_y * enc->width + ref_x) * 3;
-
-                        // use YCoCg-R Luma calculation
-                        int cur_luma = (enc->current_rgb[cur_offset] +
-                                       2 * enc->current_rgb[cur_offset + 1] +
-                                       enc->current_rgb[cur_offset + 2]) / 4;
-                        int ref_luma = (enc->previous_rgb[ref_offset] +
-                                       2 * enc->previous_rgb[ref_offset + 1] +
-                                       enc->previous_rgb[ref_offset + 2]) / 4;
-
-                        motion_sad += abs(cur_luma - ref_luma);
-                    } else {
-                        motion_sad += 128; // Penalty for out-of-bounds
-                    }
-                }
-            }
-        }
-
-        // Mode decision with strict thresholds for quality
-        // Require both low luma difference AND low colour difference for SKIP
-        if (skip_sad <= 64 && skip_colour_diff <= 192) {
-            // Very small difference - skip block (copy from previous frame)
-            block->mode = TEV_MODE_SKIP;
-            block->mv_x = 0;
-            block->mv_y = 0;
-            // Even skip blocks benefit from complexity analysis for consistency
-            float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
-            add_complexity_value(enc, block_complexity);
-            block->rate_control_factor = (enc->disable_rcf) ? 1.f : complexity_to_rate_factor(block_complexity);
-            block->cbp = 0x00;  // No coefficients present
-            // Zero out DCT coefficients for consistent format
-            memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
-            memset(block->co_coeffs, 0, sizeof(block->co_coeffs));
-            memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
-            enc->blocks_skip++;
-            return; // Skip DCT encoding entirely
-        } else if (motion_sad < skip_sad && motion_sad <= 1024 &&
-                   (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
-            // Good motion prediction - use motion-only mode
-            block->mode = TEV_MODE_MOTION;
-            // Analyse complexity for motion blocks too
-            float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
-            add_complexity_value(enc, block_complexity);
-            block->rate_control_factor = (enc->disable_rcf) ? 1.f : complexity_to_rate_factor(block_complexity);
-            block->cbp = 0x00;  // No coefficients present
-            // Zero out DCT coefficients for consistent format
-            memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
-            memset(block->co_coeffs, 0, sizeof(block->co_coeffs));
-            memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
-            enc->blocks_motion++;
-            return; // Skip DCT encoding, just store motion vector
-        // disabling INTER mode: residual DCT is crapping out no matter what I do
-        } /*else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
-            // Motion compensation with threshold
-            if (motion_sad <= 1024) {
-                block->mode = TEV_MODE_MOTION;
-                block->cbp = 0x00;  // No coefficients present
-                memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
-                memset(block->co_coeffs, 0, sizeof(block->co_coeffs));
-                memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
-                enc->blocks_motion++;
-                return; // Skip DCT encoding, just store motion vector
-            }
-
-            // Use INTER mode with motion vector and residuals
-            if (abs(block->mv_x) < BLOCK_SIZE && abs(block->mv_y) < BLOCK_SIZE) {
-                block->mode = TEV_MODE_INTER;
-                enc->blocks_inter++;
-            } else {
-                // Motion vector too large, fall back to INTRA
-                block->mode = TEV_MODE_INTRA;
-                block->mv_x = 0;
-                block->mv_y = 0;
-                enc->blocks_intra++;
-            }
-        }*/ else {
-            // No good motion prediction - use intra mode
-            block->mode = TEV_MODE_INTRA;
-            block->mv_x = 0;
-            block->mv_y = 0;
-            enc->blocks_intra++;
-        }
-    }
-
-    // Calculate block complexity BEFORE DCT transform for adaptive rate control
-    // Use enhanced complexity calculation that includes chroma information
-    float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
-    add_complexity_value(enc, block_complexity);
-    block->rate_control_factor = (enc->disable_rcf) ? 1.f : complexity_to_rate_factor(block_complexity);
-
-    // Apply fast DCT transform
-    dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
-
-    // quantise Y coefficients (luma) using per-block rate control
-    const uint32_t *y_quant = enc->ictcp_mode ? QUANT_TABLE_Y : QUANT_TABLE_Y;
-    const float qmult_y = jpeg_quality_to_mult(enc->qualityY * block->rate_control_factor);
-    for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
-        // Apply rate control factor to quantisation table (like decoder does)
-        float effective_quant = y_quant[i] * qmult_y;
-        block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 0);
-    }
-
-    // Apply fast DCT transform to chroma
-    dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
-
-    // quantise Co coefficients (chroma - orange-blue) using per-block rate control
-    const uint32_t *co_quant = enc->ictcp_mode ? QUANT_TABLE_C : QUANT_TABLE_C;
-    const float qmult_co = jpeg_quality_to_mult(enc->qualityCo * block->rate_control_factor);
-    for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
-        // Apply rate control factor to quantisation table (like decoder does)
-        float effective_quant = co_quant[i] * qmult_co;
-        block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
-    }
-
-    // Apply fast DCT transform to Cg
-    dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
-
-    // quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) using per-block rate control
-    // In ICtCp mode, Cg becomes Cp (chroma-red) which needs special quantisation table
-    const uint32_t *cg_quant = enc->ictcp_mode ? QUANT_TABLE_C : QUANT_TABLE_C;
-    const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg * block->rate_control_factor);
-    for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
-        // Apply rate control factor to quantisation table (like decoder does)
-        float effective_quant = cg_quant[i] * qmult_cg;
-        block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
-    }
-
-    // Set CBP (simplified - always encode all channels)
-    block->cbp = 0x07;  // Y, Co, Cg all present
-}
-
-// Convert SubRip time format (HH:MM:SS,mmm) to frame number
-static int srt_time_to_frame(const char *time_str, int fps) {
-    int hours, minutes, seconds, milliseconds;
-    if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) {
-        return -1;
-    }
-    
-    double total_seconds = hours * 3600.0 + minutes * 60.0 + seconds + milliseconds / 1000.0;
-    return (int)(total_seconds * fps + 0.5);  // Round to nearest frame
-}
-
-// Convert SAMI milliseconds to frame number
-static int sami_ms_to_frame(int milliseconds, int fps) {
-    double seconds = milliseconds / 1000.0;
-    return (int)(seconds * fps + 0.5);  // Round to nearest frame
-}
-
-// Parse SubRip subtitle file
-static subtitle_entry_t* parse_srt_file(const char *filename, int fps) {
-    FILE *file = fopen(filename, "r");
-    if (!file) {
-        fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
-        return NULL;
-    }
-    
-    subtitle_entry_t *head = NULL;
-    subtitle_entry_t *tail = NULL;
-    char line[1024];
-    int state = 0;  // 0=index, 1=time, 2=text, 3=blank
-    
-    subtitle_entry_t *current_entry = NULL;
-    char *text_buffer = NULL;
-    size_t text_buffer_size = 0;
-    
-    while (fgets(line, sizeof(line), file)) {
-        // Remove trailing newline
-        size_t len = strlen(line);
-        if (len > 0 && line[len-1] == '\n') {
-            line[len-1] = '\0';
-            len--;
-        }
-        if (len > 0 && line[len-1] == '\r') {
-            line[len-1] = '\0';
-            len--;
-        }
-        
-        if (state == 0) {  // Expecting subtitle index
-            if (strlen(line) == 0) continue;  // Skip empty lines
-            // Create new subtitle entry
-            current_entry = calloc(1, sizeof(subtitle_entry_t));
-            if (!current_entry) break;
-            state = 1;
-        } else if (state == 1) {  // Expecting time range
-            char start_time[32], end_time[32];
-            if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) {
-                current_entry->start_frame = srt_time_to_frame(start_time, fps);
-                current_entry->end_frame = srt_time_to_frame(end_time, fps);
-                
-                if (current_entry->start_frame < 0 || current_entry->end_frame < 0) {
-                    free(current_entry);
-                    current_entry = NULL;
-                    state = 3;  // Skip to next blank line
-                    continue;
-                }
-                
-                // Initialise text buffer
-                text_buffer_size = 256;
-                text_buffer = malloc(text_buffer_size);
-                if (!text_buffer) {
-                    free(current_entry);
-                    current_entry = NULL;
-                    fprintf(stderr, "Memory allocation failed while parsing subtitles\n");
-                    break;
-                }
-                text_buffer[0] = '\0';
-                state = 2;
-            } else {
-                free(current_entry);
-                current_entry = NULL;
-                state = 3;  // Skip malformed entry
-            }
-        } else if (state == 2) {  // Collecting subtitle text
-            if (strlen(line) == 0) {
-                // End of subtitle text
-                current_entry->text = strdup(text_buffer);
-                free(text_buffer);
-                text_buffer = NULL;
-                
-                // Add to list
-                if (!head) {
-                    head = current_entry;
-                    tail = current_entry;
-                } else {
-                    tail->next = current_entry;
-                    tail = current_entry;
-                }
-                current_entry = NULL;
-                state = 0;
-            } else {
-                // Append text line
-                size_t current_len = strlen(text_buffer);
-                size_t line_len = strlen(line);
-                size_t needed = current_len + line_len + 2;  // +2 for newline and null
-                
-                if (needed > text_buffer_size) {
-                    text_buffer_size = needed + 256;
-                    char *new_buffer = realloc(text_buffer, text_buffer_size);
-                    if (!new_buffer) {
-                        free(text_buffer);
-                        free(current_entry);
-                        current_entry = NULL;
-                        fprintf(stderr, "Memory allocation failed while parsing subtitles\n");
-                        break;
-                    }
-                    text_buffer = new_buffer;
-                }
-                
-                if (current_len > 0) {
-                    strcat(text_buffer, "\n");
-                }
-                strcat(text_buffer, line);
-            }
-        } else if (state == 3) {  // Skip to next blank line
-            if (strlen(line) == 0) {
-                state = 0;
-            }
-        }
-    }
-    
-    // Handle final subtitle if file doesn't end with blank line
-    if (current_entry && text_buffer) {
-        current_entry->text = strdup(text_buffer);
-        free(text_buffer);
-        
-        if (!head) {
-            head = current_entry;
-        } else {
-            tail->next = current_entry;
-        }
-    }
-    
-    //fclose(file); // why uncommenting it errors out with "Fatal error: glibc detected an invalid stdio handle"?
-    return head;
-}
-
-// Strip HTML tags from text but preserve <b> and <i> formatting tags
-static char* strip_html_tags(const char *html) {
-    if (!html) return NULL;
-    
-    size_t len = strlen(html);
-    char *result = malloc(len + 1);
-    if (!result) return NULL;
-    
-    int in_tag = 0;
-    int out_pos = 0;
-    int i = 0;
-    
-    while (i < len) {
-        if (html[i] == '<') {
-            // Check if this is a formatting tag we want to preserve
-            int preserve_tag = 0;
-            
-            // Check for <b>, </b>, <i>, </i> tags
-            if (i + 1 < len) {
-                if ((i + 2 < len && strncasecmp(&html[i], "<b>", 3) == 0) ||
-                    (i + 3 < len && strncasecmp(&html[i], "</b>", 4) == 0) ||
-                    (i + 2 < len && strncasecmp(&html[i], "<i>", 3) == 0) ||
-                    (i + 3 < len && strncasecmp(&html[i], "</i>", 4) == 0)) {
-                    preserve_tag = 1;
-                }
-            }
-            
-            if (preserve_tag) {
-                // Copy the entire tag
-                while (i < len && html[i] != '>') {
-                    result[out_pos++] = html[i++];
-                }
-                if (i < len) {
-                    result[out_pos++] = html[i++]; // Copy the '>'
-                }
-            } else {
-                // Skip non-formatting tags
-                in_tag = 1;
-                i++;
-            }
-        } else if (html[i] == '>') {
-            in_tag = 0;
-            i++;
-        } else if (!in_tag) {
-            result[out_pos++] = html[i++];
-        } else {
-            i++;
-        }
-    }
-    
-    result[out_pos] = '\0';
-    return result;
-}
-
-// Parse SAMI subtitle file
-static subtitle_entry_t* parse_smi_file(const char *filename, int fps) {
-    FILE *file = fopen(filename, "r");
-    if (!file) {
-        fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
-        return NULL;
-    }
-    
-    subtitle_entry_t *head = NULL;
-    subtitle_entry_t *tail = NULL;
-    char line[2048];
-    char *content = NULL;
-    size_t content_size = 0;
-    size_t content_pos = 0;
-    
-    // Read entire file into memory for easier parsing
-    while (fgets(line, sizeof(line), file)) {
-        size_t line_len = strlen(line);
-        
-        // Expand content buffer if needed
-        if (content_pos + line_len + 1 > content_size) {
-            content_size = content_size ? content_size * 2 : 8192;
-            char *new_content = realloc(content, content_size);
-            if (!new_content) {
-                free(content);
-                fclose(file);
-                fprintf(stderr, "Memory allocation failed while parsing SAMI file\n");
-                return NULL;
-            }
-            content = new_content;
-        }
-        
-        strcpy(content + content_pos, line);
-        content_pos += line_len;
-    }
-    fclose(file);
-    
-    if (!content) return NULL;
-    
-    // Convert to lowercase for case-insensitive parsing
-    char *content_lower = malloc(strlen(content) + 1);
-    if (!content_lower) {
-        free(content);
-        return NULL;
-    }
-    
-    for (int i = 0; content[i]; i++) {
-        content_lower[i] = tolower(content[i]);
-    }
-    content_lower[strlen(content)] = '\0';
-    
-    // Find BODY section
-    char *body_start = strstr(content_lower, "<body");
-    if (!body_start) {
-        fprintf(stderr, "No BODY section found in SAMI file\n");
-        free(content);
-        free(content_lower);
-        return NULL;
-    }
-    
-    // Skip to actual body content
-    body_start = strchr(body_start, '>');
-    if (!body_start) {
-        free(content);
-        free(content_lower);
-        return NULL;
-    }
-    body_start++;
-    
-    // Calculate offset in original content
-    size_t body_offset = body_start - content_lower;
-    char *body_content = content + body_offset;
-    
-    // Parse SYNC tags
-    char *pos = content_lower + body_offset;
-    char *original_pos = body_content;
-    
-    while ((pos = strstr(pos, "<sync")) != NULL) {
-        // Find start time
-        char *start_attr = strstr(pos, "start");
-        if (!start_attr || start_attr > strstr(pos, ">")) {
-            pos++;
-            continue;
-        }
-        
-        // Parse start time
-        start_attr = strchr(start_attr, '=');
-        if (!start_attr) {
-            pos++;
-            continue;
-        }
-        start_attr++;
-        
-        // Skip whitespace and quotes
-        while (*start_attr && (*start_attr == ' ' || *start_attr == '"' || *start_attr == '\'')) {
-            start_attr++;
-        }
-        
-        int start_ms = atoi(start_attr);
-        if (start_ms < 0) {
-            pos++;
-            continue;
-        }
-        
-        // Find end of sync tag
-        char *sync_end = strchr(pos, '>');
-        if (!sync_end) {
-            pos++;
-            continue;
-        }
-        sync_end++;
-        
-        // Find next sync tag or end of body
-        char *next_sync = strstr(sync_end, "<sync");
-        char *body_end = strstr(sync_end, "</body>");
-        char *text_end = next_sync;
-        
-        if (body_end && (!next_sync || body_end < next_sync)) {
-            text_end = body_end;
-        }
-        
-        if (!text_end) {
-            // Use end of content
-            text_end = content_lower + strlen(content_lower);
-        }
-        
-        // Extract subtitle text
-        size_t text_len = text_end - sync_end;
-        if (text_len > 0) {
-            // Get text from original content (not lowercase version)
-            size_t sync_offset = sync_end - content_lower;
-            char *subtitle_text = malloc(text_len + 1);
-            if (!subtitle_text) break;
-            
-            strncpy(subtitle_text, content + sync_offset, text_len);
-            subtitle_text[text_len] = '\0';
-            
-            // Strip HTML tags and clean up text
-            char *clean_text = strip_html_tags(subtitle_text);
-            free(subtitle_text);
-            
-            if (clean_text && strlen(clean_text) > 0) {
-                // Remove leading/trailing whitespace
-                char *start = clean_text;
-                while (*start && (*start == ' ' || *start == '\t' || *start == '\n' || *start == '\r')) {
-                    start++;
-                }
-                
-                char *end = start + strlen(start) - 1;
-                while (end > start && (*end == ' ' || *end == '\t' || *end == '\n' || *end == '\r')) {
-                    *end = '\0';
-                    end--;
-                }
-                
-                if (strlen(start) > 0) {
-                    // Create subtitle entry
-                    subtitle_entry_t *entry = calloc(1, sizeof(subtitle_entry_t));
-                    if (entry) {
-                        entry->start_frame = sami_ms_to_frame(start_ms, fps);
-                        entry->text = strdup(start);
-                        
-                        // Set end frame to next subtitle start or a default duration
-                        if (next_sync) {
-                            // Parse next sync start time
-                            char *next_start = strstr(next_sync, "start");
-                            if (next_start) {
-                                next_start = strchr(next_start, '=');
-                                if (next_start) {
-                                    next_start++;
-                                    while (*next_start && (*next_start == ' ' || *next_start == '"' || *next_start == '\'')) {
-                                        next_start++;
-                                    }
-                                    int next_ms = atoi(next_start);
-                                    if (next_ms > start_ms) {
-                                        entry->end_frame = sami_ms_to_frame(next_ms, fps);
-                                    } else {
-                                        entry->end_frame = entry->start_frame + fps * 3;  // 3 second default
-                                    }
-                                }
-                            }
-                        } else {
-                            entry->end_frame = entry->start_frame + fps * 3;  // 3 second default
-                        }
-                        
-                        // Add to list
-                        if (!head) {
-                            head = entry;
-                            tail = entry;
-                        } else {
-                            tail->next = entry;
-                            tail = entry;
-                        }
-                    }
-                }
-            }
-            
-            free(clean_text);
-        }
-        
-        pos = sync_end;
-    }
-    
-    free(content);
-    free(content_lower);
-    return head;
-}
-
-// Detect subtitle file format based on extension and content
-static int detect_subtitle_format(const char *filename) {
-    // Check file extension first
-    const char *ext = strrchr(filename, '.');
-    if (ext) {
-        ext++; // Skip the dot
-        if (strcasecmp(ext, "smi") == 0 || strcasecmp(ext, "sami") == 0) {
-            return 1; // SAMI format
-        }
-        if (strcasecmp(ext, "srt") == 0) {
-            return 2; // SubRip format
-        }
-    }
-
-    // If extension is unclear, try to detect from content
-    FILE *file = fopen(filename, "r");
-    if (!file) return 0; // Default to SRT
-
-    char line[1024];
-    int has_sami_tags = 0;
-    int has_srt_format = 0;
-    int lines_checked = 0;
-
-    while (fgets(line, sizeof(line), file) && lines_checked < 20) {
-        // Convert to lowercase for checking
-        char *lower_line = malloc(strlen(line) + 1);
-        if (lower_line) {
-            for (int i = 0; line[i]; i++) {
-                lower_line[i] = tolower(line[i]);
-            }
-            lower_line[strlen(line)] = '\0';
-
-            // Check for SAMI indicators
-            if (strstr(lower_line, "<sami>") || strstr(lower_line, "<sync") ||
-                strstr(lower_line, "<body>") || strstr(lower_line, "start=")) {
-                has_sami_tags = 1;
-                free(lower_line);
-                break;
-            }
-
-            // Check for SRT indicators (time format)
-            if (strstr(lower_line, "-->")) {
-                has_srt_format = 1;
-            }
-
-            free(lower_line);
-        }
-        lines_checked++;
-    }
-
-    fclose(file);
-
-    // Return format based on detection
-    if (has_sami_tags) return 1; // SAMI
-    if (has_srt_format) return 2; // SRT
-    return 0; // Unknown
-}
-
-// Parse subtitle file (auto-detect format)
-static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps) {
-    int format = detect_subtitle_format(filename);
-
-    if (format == 1) return parse_smi_file(filename, fps);
-    else if (format == 2) return parse_srt_file(filename, fps);
-    else return NULL;
-}
-
-// Free subtitle list
-static void free_subtitle_list(subtitle_entry_t *list) {
-    while (list) {
-        subtitle_entry_t *next = list->next;
-        free(list->text);
-        free(list);
-        list = next;
-    }
-}
-
-// Write SSF-TC subtitle packet to output
-static int write_subtitle_packet_tc(FILE *output, uint32_t index, uint8_t opcode, const char *text, uint64_t timecode_ns) {
-    // Calculate packet size: index (3 bytes) + timecode (8 bytes) + opcode (1 byte) + text + null terminator
-    size_t text_len = text ? strlen(text) : 0;
-    size_t packet_size = 3 + 8 + 1 + text_len + 1;
-
-    // Write packet type and size
-    uint8_t packet_type = TEV_PACKET_SUBTITLE_TC;
-    fwrite(&packet_type, 1, 1, output);
-    fwrite(&packet_size, 4, 1, output);
-
-    // Write subtitle index (24-bit, little-endian)
-    uint8_t index_bytes[3];
-    index_bytes[0] = index & 0xFF;
-    index_bytes[1] = (index >> 8) & 0xFF;
-    index_bytes[2] = (index >> 16) & 0xFF;
-    fwrite(index_bytes, 1, 3, output);
-
-    // Write timecode (64-bit, little-endian)
-    uint8_t timecode_bytes[8];
-    for (int i = 0; i < 8; i++) {
-        timecode_bytes[i] = (timecode_ns >> (i * 8)) & 0xFF;
-    }
-    fwrite(timecode_bytes, 1, 8, output);
-
-    // Write opcode
-    fwrite(&opcode, 1, 1, output);
-
-    // Write text if present
-    if (text && text_len > 0) {
-        fwrite(text, 1, text_len, output);
-    }
-
-    // Write null terminator
-    uint8_t null_term = 0x00;
-    fwrite(&null_term, 1, 1, output);
-
-    return packet_size + 5;  // packet_size + packet_type + size field
-}
-
-// Write all subtitles upfront in SSF-TC format (called before first frame)
-static int write_all_subtitles_tc(tev_encoder_t *enc, FILE *output) {
-    if (!enc->has_subtitles) return 0;
-
-    int bytes_written = 0;
-    int subtitle_count = 0;
-
-    // Convert frame timing to nanoseconds
-    // Frame time = 1e9 / fps nanoseconds
-    uint64_t frame_time_ns = (uint64_t)(1000000000.0 / enc->output_fps);
-
-    // Iterate through all subtitles and write them with timecodes
-    subtitle_entry_t *sub = enc->subtitle_list;
-    while (sub) {
-        // Calculate timecodes for show and hide events
-        uint64_t show_timecode = (uint64_t)sub->start_frame * frame_time_ns;
-        uint64_t hide_timecode = (uint64_t)sub->end_frame * frame_time_ns;
-
-        // Write show subtitle event
-        bytes_written += write_subtitle_packet_tc(output, 0, 0x01, sub->text, show_timecode);
-
-        // Write hide subtitle event
-        bytes_written += write_subtitle_packet_tc(output, 0, 0x02, NULL, hide_timecode);
-
-        subtitle_count++;
-        if (enc->verbose) {
-            printf("SSF-TC: Subtitle %d: show at %.3fs, hide at %.3fs: %.50s%s\n",
-                   subtitle_count,
-                   show_timecode / 1000000000.0,
-                   hide_timecode / 1000000000.0,
-                   sub->text, strlen(sub->text) > 50 ? "..." : "");
-        }
-
-        sub = sub->next;
-    }
-
-    if (enc->verbose && subtitle_count > 0) {
-        printf("Wrote %d SSF-TC subtitle events (%d bytes)\n", subtitle_count * 2, bytes_written);
-    }
-
-    return bytes_written;
-}
-
-// Initialise encoder
-static tev_encoder_t* init_encoder(void) {
-    tev_encoder_t *enc = calloc(1, sizeof(tev_encoder_t));
-    if (!enc) return NULL;
-
-    // set defaults
-    enc->qualityIndex = 2; // Default quality
-    enc->qualityY = QUALITY_Y[enc->qualityIndex];
-    enc->qualityCo = QUALITY_CO[enc->qualityIndex];
-    enc->qualityCg = enc->qualityCo / 2;
-    enc->mp2_packet_size = 0; // Will be detected from MP2 header
-    enc->mp2_rate_index = 0;
-    enc->audio_bitrate = 0;  // 0 = use quality table
-    enc->audio_frames_in_buffer = 0;
-    enc->target_audio_buffer_size = 4;
-    enc->width = DEFAULT_WIDTH;
-    enc->height = DEFAULT_HEIGHT;
-    enc->fps = 0;  // Will be detected from input
-    enc->output_fps = 0;  // No frame rate conversion by default
-    enc->is_ntsc_framerate = 0;  // Will be detected from input
-    enc->verbose = 0;
-    enc->disable_rcf = 1;
-    enc->subtitle_file = NULL;
-    enc->has_subtitles = 0;
-    enc->subtitle_list = NULL;
-    enc->current_subtitle = NULL;
-
-    // Rate control defaults
-    enc->target_bitrate_kbps = 0;    // 0 = quality mode
-    enc->bitrate_mode = 0;           // Quality mode by default
-    // No global rate control factor needed - per-block complexity-based control only
-    enc->frame_bits_accumulator = 0;
-    enc->target_bits_per_frame = 0;
-    enc->complexity_history_index = 0;
-    enc->average_complexity = 0.0f;
-    memset(enc->complexity_history, 0, sizeof(enc->complexity_history));
-
-    init_dct_tables();
-
-    return enc;
-}
-
-// Allocate encoder buffers
-static int alloc_encoder_buffers(tev_encoder_t *enc) {
-    // In interlaced mode, FFmpeg separatefields outputs field frames at half height
-    // In progressive mode, we work with full height frames  
-    int encoding_pixels = enc->width * enc->height;
-    
-    int blocks_x = (enc->width + 15) / 16;
-    int blocks_y = (enc->height + 15) / 16;
-    int total_blocks = blocks_x * blocks_y;
-
-    // Allocate buffers for encoding (FFmpeg provides frames at the correct resolution)
-    enc->current_rgb = malloc(encoding_pixels * 3);   // Current frame buffer from FFmpeg
-    enc->previous_rgb = malloc(encoding_pixels * 3);  // Previous frame buffer for motion estimation  
-    enc->reference_rgb = malloc(encoding_pixels * 3); // Reference frame buffer
-    enc->previous_even_field = malloc(encoding_pixels * 3);  // Previous even field for interlaced scene change
-
-    enc->y_workspace = malloc(16 * 16 * sizeof(float));
-    enc->co_workspace = malloc(8 * 8 * sizeof(float));
-    enc->cg_workspace = malloc(8 * 8 * sizeof(float));
-    enc->dct_workspace = malloc(16 * 16 * sizeof(float));
-
-    // Allocate block data
-    enc->block_data = malloc(total_blocks * sizeof(tev_block_t));
-
-    // Allocate compression buffer
-    size_t compressed_buffer_size = total_blocks * sizeof(tev_block_t) * 2;
-    enc->compressed_buffer = malloc(compressed_buffer_size);
-
-    enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE);
-
-    if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb ||
-        !enc->previous_even_field ||
-        !enc->y_workspace || !enc->co_workspace || !enc->cg_workspace ||
-        !enc->dct_workspace || !enc->block_data ||
-        !enc->compressed_buffer || !enc->mp2_buffer) {
-        return -1;
-    }
-
-    // Initialise Zstd compression context
-    enc->zstd_context = ZSTD_createCCtx();
-    if (!enc->zstd_context) {
-        fprintf(stderr, "Failed to initialise Zstd compression\n");
-        return 0;
-    }
-    
-    // Set reasonable compression level and memory limits
-    ZSTD_CCtx_setParameter(enc->zstd_context, ZSTD_c_compressionLevel, ZSTD_COMPRESSON_LEVEL);
-    ZSTD_CCtx_setParameter(enc->zstd_context, ZSTD_c_windowLog, 24); // 16MB window (should be plenty to hold an entire frame; interframe compression is unavailable)
-    ZSTD_CCtx_setParameter(enc->zstd_context, ZSTD_c_hashLog, 16);
-
-    // Initialise previous frame to black
-    memset(enc->previous_rgb, 0, encoding_pixels * 3);
-    memset(enc->previous_even_field, 0, encoding_pixels * 3);
-
-    return 1;
-}
-
-// Free encoder resources
-static void free_encoder(tev_encoder_t *enc) {
-    if (!enc) return;
-
-    if (enc->zstd_context) {
-        ZSTD_freeCCtx(enc->zstd_context);
-        enc->zstd_context = NULL;
-    }
-
-    if (enc->current_rgb) { free(enc->current_rgb); enc->current_rgb = NULL; }
-    if (enc->previous_rgb) { free(enc->previous_rgb); enc->previous_rgb = NULL; }
-    if (enc->reference_rgb) { free(enc->reference_rgb); enc->reference_rgb = NULL; }
-    if (enc->previous_even_field) { free(enc->previous_even_field); enc->previous_even_field = NULL; }
-    if (enc->y_workspace) { free(enc->y_workspace); enc->y_workspace = NULL; }
-    if (enc->co_workspace) { free(enc->co_workspace); enc->co_workspace = NULL; }
-    if (enc->cg_workspace) { free(enc->cg_workspace); enc->cg_workspace = NULL; }
-    if (enc->dct_workspace) { free(enc->dct_workspace); enc->dct_workspace = NULL; }
-    if (enc->block_data) { free(enc->block_data); enc->block_data = NULL; }
-    if (enc->compressed_buffer) { free(enc->compressed_buffer); enc->compressed_buffer = NULL; }
-    if (enc->mp2_buffer) { free(enc->mp2_buffer); enc->mp2_buffer = NULL; }
-    if (enc->complexity_values) { free(enc->complexity_values); enc->complexity_values = NULL; }
-    free(enc);
-}
-
-// Write TEV header
-
-static int write_tev_header(FILE *output, tev_encoder_t *enc) {
-    // Magic + version
-    fwrite(TEV_MAGIC, 1, 8, output);
-    uint8_t version = enc->ictcp_mode ? 3 : 2;  // Version 3 for ICtCp, 2 for YCoCg-R
-    fwrite(&version, 1, 1, output);
-
-    // Video parameters
-    uint16_t width = enc->width;
-    uint16_t height = enc->progressive_mode ? enc->height : enc->height * 2;
-    uint8_t fps = enc->output_fps;
-    uint32_t total_frames = enc->total_frames;
-    uint8_t qualityY = enc->qualityY;
-    uint8_t qualityCo = enc->qualityCo;
-    uint8_t qualityCg = enc->qualityCg;
-    uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1);
-    uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate
-    uint8_t reserved = 0;
-
-    fwrite(&width, 2, 1, output);
-    fwrite(&height, 2, 1, output);
-    fwrite(&fps, 1, 1, output);
-    fwrite(&total_frames, 4, 1, output);
-    fwrite(&qualityY, 1, 1, output);
-    fwrite(&qualityCo, 1, 1, output);
-    fwrite(&qualityCg, 1, 1, output);
-    fwrite(&flags, 1, 1, output);
-    fwrite(&video_flags, 1, 1, output);
-    fwrite(&reserved, 1, 1, output);
-
-    return 0;
-}
-
-// Detect scene changes by analysing frame differences
-static int detect_scene_change(tev_encoder_t *enc, int field_parity) {
-    if (!enc->current_rgb) {
-        return 0; // No current frame to compare
-    }
-    
-    // In interlaced mode, use previous even field for comparison
-    uint8_t *comparison_buffer = enc->previous_rgb;
-    if (!enc->progressive_mode && field_parity == 0) {
-        // Interlaced even field: compare to previous even field
-        if (!enc->previous_even_field) {
-            return 0; // No previous even field to compare
-        }
-        comparison_buffer = enc->previous_even_field;
-    } else {
-        // Progressive mode: use regular previous_rgb
-        if (!enc->previous_rgb) {
-            return 0; // No previous frame to compare
-        }
-        comparison_buffer = enc->previous_rgb;
-    }
-    
-    long long total_diff = 0;
-    int changed_pixels = 0;
-
-    // Sample every 4th pixel for performance (still gives good detection)
-    for (int y = 0; y < enc->height; y += 2) {
-        for (int x = 0; x < enc->width; x += 2) {
-            int offset = (y * enc->width + x) * 3;
-            
-            // Calculate colour difference
-            int r_diff = abs(enc->current_rgb[offset] - comparison_buffer[offset]);
-            int g_diff = abs(enc->current_rgb[offset + 1] - comparison_buffer[offset + 1]);
-            int b_diff = abs(enc->current_rgb[offset + 2] - comparison_buffer[offset + 2]);
-            
-            int pixel_diff = r_diff + g_diff + b_diff;
-            total_diff += pixel_diff;
-            
-            // Count significantly changed pixels (threshold of 30 per channel average)
-            if (pixel_diff > 90) {
-                changed_pixels++;
-            }
-        }
-    }
-    
-    // Calculate metrics for scene change detection
-    int sampled_pixels = (enc->height / 2) * (enc->width / 2);
-    double avg_diff = (double)total_diff / sampled_pixels;
-    double changed_ratio = (double)changed_pixels / sampled_pixels;
-
-    if (enc->verbose) {
-        printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio);
-    }
-
-    // Scene change thresholds - adjust for interlaced mode
-    // Interlaced fields have more natural differences due to temporal field separation
-    double threshold = 0.30;
-    
-    return changed_ratio > threshold;
-}
-
-// Encode and write a frame
-static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int field_parity) {
-    // In interlaced mode, only do scene change detection for even fields (field_parity = 0)
-    // to avoid false scene changes between fields of the same frame
-    int is_scene_change = 0;
-    if (enc->progressive_mode || field_parity == 0) {
-        is_scene_change = detect_scene_change(enc, field_parity);
-    }
-    int is_time_keyframe = (frame_num % KEYFRAME_INTERVAL) == 0;
-    int is_keyframe = is_time_keyframe || is_scene_change;
-    
-    // Verbose output for keyframe decisions
-    if (enc->verbose && is_keyframe) {
-        if (is_scene_change && !is_time_keyframe) {
-            printf("Frame %d: Scene change detected, inserting keyframe\n", frame_num);
-        } else if (is_time_keyframe) {
-            printf("Frame %d: Time-based keyframe (interval: %d)\n", frame_num, KEYFRAME_INTERVAL);
-        }
-    }
-    int blocks_x = (enc->width + 15) / 16;
-    int blocks_y = (enc->height + 15) / 16;
-
-    // Track frame complexity for rate control
-    float frame_complexity = 0.0f;
-    size_t frame_start_bits = enc->total_output_bytes * 8;
-
-    // Encode all blocks
-    for (int by = 0; by < blocks_y; by++) {
-        for (int bx = 0; bx < blocks_x; bx++) {
-            encode_block(enc, bx, by, is_keyframe);
-
-            // Calculate complexity for rate control (if enabled)
-            if (enc->bitrate_mode > 0) {
-                tev_block_t *block = &enc->block_data[by * blocks_x + bx];
-                if (block->mode == TEV_MODE_INTRA || block->mode == TEV_MODE_INTER) {
-                    // Sum absolute values of quantised coefficients as complexity metric
-                    for (int i = 1; i < BLOCK_SIZE_SQR; i++) frame_complexity += abs(block->y_coeffs[i]);
-                    for (int i = 1; i < HALF_BLOCK_SIZE_SQR; i++) frame_complexity += abs(block->co_coeffs[i]);
-                    for (int i = 1; i < HALF_BLOCK_SIZE_SQR; i++) frame_complexity += abs(block->cg_coeffs[i]);
-                }
-            }
-        }
-    }
-
-    // Compress block data using Zstd (compatible with TSVM decoder)
-    size_t compressed_size;
-
-    // Regular mode: use regular block data
-    size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
-    compressed_size = ZSTD_compressCCtx(enc->zstd_context,
-                                       enc->compressed_buffer, block_data_size * 2,
-                                       enc->block_data, block_data_size,
-                                       ZSTD_COMPRESSON_LEVEL);
-
-    if (ZSTD_isError(compressed_size)) {
-        fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size));
-        return 0;
-    }
-
-    // Write frame packet header (rate control factor now per-block)
-    uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME;
-    uint32_t payload_size = compressed_size; // Rate control factor now per-block, not per-packet
-
-    fwrite(&packet_type, 1, 1, output);
-    fwrite(&payload_size, 4, 1, output);
-    fwrite(enc->compressed_buffer, 1, compressed_size, output);
-
-    if (enc->verbose) {
-        printf("perBlockComplexityBasedRateControl=enabled\n");
-    }
-
-    enc->total_output_bytes += 5 + compressed_size; // packet + size + data (rate_factor now per-block)
-
-    // No global rate control needed - per-block complexity-based control only
-
-    // Swap frame buffers for next frame
-    if (!enc->progressive_mode && field_parity == 0) {
-        // Interlaced even field: save to previous_even_field for scene change detection
-        size_t field_size = enc->width * enc->height * 3;
-        memcpy(enc->previous_even_field, enc->current_rgb, field_size);
-    }
-    
-    // Normal buffer swap for motion estimation
-    uint8_t *temp_rgb = enc->previous_rgb;
-    enc->previous_rgb = enc->current_rgb;
-    enc->current_rgb = temp_rgb;
-
-    return 1;
-}
-
-// Parse resolution string like "1024x768"
-static int parse_resolution(const char *res_str, int *width, int *height) {
-    if (!res_str) return 0;
-    if (strcmp(res_str, "cif") == 0 || strcmp(res_str, "CIF") == 0) {
-        *width = 352;
-        *height = 288;
-        return 1;
-    }
-    if (strcmp(res_str, "qcif") == 0 || strcmp(res_str, "QCIF") == 0) {
-        *width = 176;
-        *height = 144;
-        return 1;
-    }
-    if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) {
-        *width = DEFAULT_WIDTH >> 1;
-        *height = DEFAULT_HEIGHT >> 1;
-        return 1;
-    }
-    if (strcmp(res_str, "default") == 0 || strcmp(res_str, "DEFAULT") == 0) {
-        *width = DEFAULT_WIDTH;
-        *height = DEFAULT_HEIGHT;
-        return 1;
-    }
-    return sscanf(res_str, "%dx%d", width, height) == 2;
-}
-
-// Execute command and capture output
-static char *execute_command(const char *command) {
-    FILE *pipe = popen(command, "r");
-    if (!pipe) return NULL;
-
-    char *result = malloc(4096);
-    if (!result) {
-        pclose(pipe);
-        return NULL;
-    }
-    
-    size_t len = fread(result, 1, 4095, pipe);
-    result[len] = '\0';
-
-    pclose(pipe);
-    return result;
-}
-
-// Get video metadata using ffprobe
-static int get_video_metadata(tev_encoder_t *config) {
-    char command[1024];
-    char *output;
-
-    // Get all metadata without frame count (much faster)
-    snprintf(command, sizeof(command),
-        "ffprobe -v quiet "
-        "-show_entries stream=r_frame_rate:format=duration "
-        "-select_streams v:0 -of csv=p=0 \"%s\" 2>/dev/null; "
-        "ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\" 2>/dev/null",
-        config->input_file, config->input_file);
-
-    output = execute_command(command);
-    if (!output) {
-        fprintf(stderr, "Failed to get video metadata (ffprobe failed)\n");
-        return 0;
-    }
-
-    // Parse the combined output
-    char *line = strtok(output, "\n");
-    int line_num = 0;
-    int input_is_ntsc_framerate = 0;
-
-    while (line && line_num < 2) {
-        switch (line_num) {
-            case 0: // Line format: "framerate" (e.g., "30000/1001"), (e.g., "30/1")
-                {
-                    // Parse frame rate
-                    int num, den;
-                    if (sscanf(line, "%d/%d", &num, &den) == 2) {
-                        config->fps = (den > 0) ? (int)round((float)num/(float)den) : 30;
-                        config->is_ntsc_framerate = (den == 1001 && config->output_fps == 0) ? 1 : 0; // set NTSC framerate mode only when the user did not supply fps option
-                        input_is_ntsc_framerate = (den == 1001) ? 1 : 0;
-                    } else {
-                        config->fps = (int)round(atof(line));
-                        config->is_ntsc_framerate = 0;
-                    }
-                    // Frame count will be determined during encoding
-                    config->total_frames = 0;
-                }
-                break;
-            case 1: // duration in seconds
-                config->duration = atof(line);
-                break;
-        }
-        line = strtok(NULL, "\n");
-        line_num++;
-    }
-
-    // Check for audio stream (will be on line 3 if present)
-    config->has_audio = (line && strlen(line) > 0 && atoi(line) >= 0);
-
-    free(output);
-
-    // Store input framerate for later calculations
-    float inputFramerate;
-    if (input_is_ntsc_framerate) {
-        inputFramerate = config->fps * 1000.f / 1001.f;
-    } else {
-        inputFramerate = config->fps * 1.f;
-    }
-
-    // if output FPS is unspecified, use the input rate
-    if (config->output_fps == 0) {
-        config->output_fps = config->fps;
-    }
-
-    // Frame count will be determined during encoding
-    config->total_frames = 0;
-
-    fprintf(stderr, "Video metadata:\n");
-    fprintf(stderr, "  Frames: (will be determined during encoding)\n");
-    fprintf(stderr, "  FPS: %.2f\n", inputFramerate);
-    fprintf(stderr, "  Duration: %.2fs\n", config->duration);
-    fprintf(stderr, "  Audio: %s\n", config->has_audio ? "Yes" : "No");
-    fprintf(stderr, "  Resolution: %dx%d (%s)\n", config->width, config->height, 
-            config->progressive_mode ? "progressive" : "interlaced");
-
-    return (config->fps > 0);
-}
-
-// Start FFmpeg process for video conversion with frame rate support
-static int start_video_conversion(tev_encoder_t *enc) {
-    char command[2048];
-
-    // Build FFmpeg command with potential frame rate conversion
-    if (enc->progressive_mode) {
-        if (enc->output_fps > 0 && enc->output_fps != enc->fps) {
-            // Frame rate conversion requested
-            snprintf(command, sizeof(command),
-                "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
-                "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
-                "-y - 2>&1",
-                enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height);
-        } else {
-            // No frame rate conversion
-            snprintf(command, sizeof(command),
-                "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
-                "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
-                "-y -",
-                enc->input_file, enc->width, enc->height, enc->width, enc->height);
-        }
-    // let FFmpeg handle the interlacing
-    } else {
-        if (enc->output_fps > 0 && enc->output_fps != enc->fps) {
-            // Frame rate conversion requested
-            // filtergraph path:
-            // 1. FPS conversion
-            // 2. scale and crop to requested size
-            // 3. tinterlace weave-overwrites even and odd fields together to produce intermediate video at half framerate, full height (we're losing half the information here -- and that's on purpose)
-            // 4. separatefields separates weave-overwritten frame as two consecutive frames, at half height. Since the frame rate is halved in Step 3. and being doubled here, the final framerate is identical to given framerate
-            snprintf(command, sizeof(command),
-                "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
-                "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,tinterlace=interleave_top:cvlpf,separatefields\" "
-                "-y - 2>&1",
-                enc->input_file, enc->output_fps, enc->width, enc->height * 2, enc->width, enc->height * 2);
-        } else {
-            // No frame rate conversion
-            // filtergraph path:
-            // 1. scale and crop to requested size
-            // 2. tinterlace weave-overwrites even and odd fields together to produce intermediate video at half framerate, full height (we're losing half the information here -- and that's on purpose)
-            // 3. separatefields separates weave-overwritten frame as two consecutive frames, at half height. Since the frame rate is halved in Step 2. and being doubled here, the final framerate is identical to the original framerate
-            snprintf(command, sizeof(command),
-                "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
-                "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,tinterlace=interleave_top:cvlpf,separatefields\" "
-                "-y -",
-                enc->input_file, enc->width, enc->height * 2, enc->width, enc->height * 2);
-        }
-    }
-
-    if (enc->verbose) {
-        printf("FFmpeg command: %s\n", command);
-    }
-
-    enc->ffmpeg_video_pipe = popen(command, "r");
-    if (!enc->ffmpeg_video_pipe) {
-        fprintf(stderr, "Failed to start FFmpeg process\n");
-        return 0;
-    }
-
-    return 1;
-}
-
-// Start audio conversion
-static int start_audio_conversion(tev_encoder_t *enc) {
-    if (!enc->has_audio) return 1;
-
-    char command[2048];
-    int bitrate = (enc->audio_bitrate > 0) ? enc->audio_bitrate : MP2_RATE_TABLE[enc->qualityIndex];
-    snprintf(command, sizeof(command),
-        "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
-        enc->input_file, bitrate, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
-
-    int result = system(command);
-    if (result == 0) {
-        enc->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
-        if (enc->mp2_file) {
-            fseek(enc->mp2_file, 0, SEEK_END);
-            enc->audio_remaining = ftell(enc->mp2_file);
-            fseek(enc->mp2_file, 0, SEEK_SET);
-        }
-    }
-
-    return (result == 0);
-}
-
-// Get MP2 packet size and rate index from header
-static int get_mp2_packet_size(uint8_t *header) {
-    int bitrate_index = (header[2] >> 4) & 0x0F;
-    int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
-    if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
-
-    int bitrate = bitrates[bitrate_index];
-    int padding_bit = (header[2] >> 1) & 0x01;
-    if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE;
-
-    int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit;
-    return frame_size;
-}
-
-static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
-    // Map packet sizes to rate indices for TEV format
-    const int mp2_frame_sizes[] = {144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728};
-    for (int i = 0; i < 14; i++) {
-        if (packet_size <= mp2_frame_sizes[i]) {
-            return i;
-        }
-    }
-    return 13; // Default to highest rate
-}
-
-// Process audio for current frame
-static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {
-    if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) {
-        return 1;
-    }
-
-    // Initialise packet size on first frame
-    if (enc->mp2_packet_size == 0) {
-        uint8_t header[4];
-        if (fread(header, 1, 4, enc->mp2_file) != 4) return 1;
-        fseek(enc->mp2_file, 0, SEEK_SET);
-
-        enc->mp2_packet_size = get_mp2_packet_size(header);
-        int is_mono = (header[3] >> 6) == 3;
-        enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono);
-        enc->target_audio_buffer_size = 4; // 4 audio packets in buffer
-    }
-
-    // Calculate how much audio time each frame represents (in seconds)
-    double frame_audio_time = 1.0 / enc->output_fps;
-
-    // Calculate how much audio time each MP2 packet represents
-    // MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
-    double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
-
-    // Estimate how many packets we consume per video frame
-    double packets_per_frame = frame_audio_time / packet_audio_time;
-
-    // Audio buffering strategy: maintain target buffer level
-    int packets_to_insert = 0;
-    if (frame_num == 0) {
-        // Prime buffer to target level initially
-        packets_to_insert = enc->target_audio_buffer_size;
-        enc->audio_frames_in_buffer = 0; // count starts from 0
-        if (enc->verbose) {
-            printf("Frame %d: Priming audio buffer with %d packets\n", frame_num, packets_to_insert);
-        }
-    } else {
-        // Simulate buffer consumption (fractional consumption per frame)
-        double old_buffer = enc->audio_frames_in_buffer;
-        enc->audio_frames_in_buffer -= packets_per_frame;
-
-        // Calculate how many packets we need to maintain target buffer level
-        // Only insert when buffer drops below target, and only insert enough to restore target
-        double target_level = (double)enc->target_audio_buffer_size;
-        if (enc->audio_frames_in_buffer < target_level) {
-            double deficit = target_level - enc->audio_frames_in_buffer;
-            // Insert packets to cover the deficit, but at least maintain minimum flow
-            packets_to_insert = (int)ceil(deficit);
-            // Cap at reasonable maximum to prevent excessive insertion
-            if (packets_to_insert > enc->target_audio_buffer_size) {
-                packets_to_insert = enc->target_audio_buffer_size;
-            }
-            
-            if (enc->verbose) {
-                printf("Frame %d: Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n", 
-                       frame_num, old_buffer, enc->audio_frames_in_buffer, deficit, packets_to_insert);
-            }
-        } else if (enc->verbose && old_buffer != enc->audio_frames_in_buffer) {
-            printf("Frame %d: Buffer sufficient (%.2f->%.2f), no packets\n", 
-                   frame_num, old_buffer, enc->audio_frames_in_buffer);
-        }
-    }
-
-    // Insert the calculated number of audio packets
-    for (int q = 0; q < packets_to_insert; q++) {
-        size_t bytes_to_read = enc->mp2_packet_size;
-        if (bytes_to_read > enc->audio_remaining) {
-            bytes_to_read = enc->audio_remaining;
-        }
-
-        size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file);
-        if (bytes_read == 0) break;
-
-        // Write TEV MP2 audio packet
-        uint8_t audio_packet_type = TEV_PACKET_AUDIO_MP2;
-        uint32_t audio_len = (uint32_t)bytes_read;
-        fwrite(&audio_packet_type, 1, 1, output);
-        fwrite(&audio_len, 4, 1, output);
-        fwrite(enc->mp2_buffer, 1, bytes_read, output);
-
-        // Track audio bytes written
-        enc->total_output_bytes += 1 + 4 + bytes_read;
-        enc->audio_remaining -= bytes_read;
-        enc->audio_frames_in_buffer++;
-
-        if (frame_num == 0) {
-            enc->audio_frames_in_buffer = enc->target_audio_buffer_size / 2; // trick the buffer simulator so that it doesn't count the frame 0 priming
-        }
-
-        if (enc->verbose) {
-            printf("Audio packet %d: %zu bytes (buffer: %.2f packets)\n", 
-                   q, bytes_read, enc->audio_frames_in_buffer);
-        }
-    }
-
-    return 1;
-}
-
-// Show usage information
-static void show_usage(const char *program_name) {
-    printf("TEV YCoCg-R/ICtCp 4:2:0 Video Encoder\n");
-    printf("Usage: %s [options] -i input.mp4 -o output.mv2\n\n", program_name);
-    printf("Options:\n");
-    printf("  -i, --input FILE       Input video file\n");
-    printf("  -o, --output FILE      Output video file (use '-' for stdout)\n");
-    printf("  -s, --size WxH         Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
-    printf("  -f, --fps N            Output frames per second (enables frame rate conversion)\n");
-    printf("  -q, --quality N        Quality level 0-4 (default: 2, only decides audio rate in quantiser/lossless mode)\n");
-    printf("  -Q, --quantiser N      Quantiser level 0-100 (100: lossless, 0: potato)\n");
-//    printf("  -b, --bitrate N        Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n");
-    printf("  --arate N              MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
-    printf("                         Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
-    printf("  -p, --progressive      Use progressive scan (default: interlaced)\n");
-    printf("  -S, --subtitles FILE   SubRip (.srt) or SAMI (.smi) subtitle file\n");
-    printf("  -v, --verbose          Verbose output\n");
-    printf("  -t, --test             Test mode: generate solid colour frames\n");
-    printf("  --ictcp                Use ICtCp colour space instead of YCoCg-R (generates TEV version 3)\n");
-    printf("  --enable-rcf           Enable per-block rate control (experimental)\n");
-    printf("  --enable-encode-stats  Collect and report block complexity statistics\n");
-    printf("  --help                 Show this help\n\n");
-//    printf("Rate Control Modes:\n");
-//    printf("  Quality mode (default): Fixed quantisation based on -q parameter\n");
-//    printf("  Bitrate mode (-b N):    Dynamic quantisation targeting N kbps average\n\n");
-    printf("Audio Rate by Quality:\n");
-    printf("  ");
-    for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) {
-        printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]);
-    }
-    printf("\nQuantiser Value by Quality:\n");
-    printf("  ");
-    for (int i = 0; i < sizeof(QUALITY_Y) / sizeof(int); i++) {
-        printf("%d: -Q %d  \t", i, QUALITY_Y[i]);
-    }
-    printf("\nVideo Size Keywords:");
-    printf("\n  -s cif: equal to 352x288");
-    printf("\n  -s qcif: equal to 176x144");
-    printf("\n  -s half: equal to %dx%d", DEFAULT_WIDTH >> 1, DEFAULT_HEIGHT >> 1);
-    printf("\n  -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
-    printf("\n\n");
-    printf("Features:\n");
-    printf("  - YCoCg-R or ICtCp 4:2:0 chroma subsampling for 50%% compression improvement\n");
-    printf("  - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n");
-    printf("  - Frame rate conversion with FFmpeg temporal filtering\n");
-    printf("  - Adaptive quality control with complexity-based adjustment\n");
-    printf("Examples:\n");
-    printf("  %s -i input.mp4 -o output.mv2                 # Use default setting (q=2)\n", program_name);
-    printf("  %s -i input.mkv -s cif -o output.mv2          # Encode at CIF (352x288) resolution\n", program_name);
-    printf("  %s -i input.mxf -f 15 -q 3 -p -o output.mv2   # Encode at 15 FPS progressive with higher quality\n", program_name);
-    printf("  %s -i input.webp -Q 50 -o output.mv2          # Encode at quantiser level 50\n", program_name);
-    printf("  %s -i input.flv -S input.srt -o output.mv2    # With SubRip subtitles\n", program_name);
-    printf("  %s -i input.ts -S input.smi -o output.mv2     # With SAMI subtitles\n", program_name);
-//    printf("  %s -i input.mp4 -b 800 -o output.mv2          # 800 kbps bitrate target\n", program_name);
-//    printf("  %s -i input.avi -f 15 -b 500 -o output.mv2    # 15fps @ 500 kbps\n", program_name);
-//    printf("  %s --test -b 1000 -o test.mv2                 # Test with 1000 kbps target\n", program_name);
-}
-
-
-// Cleanup encoder resources
-static void cleanup_encoder(tev_encoder_t *enc) {
-    if (!enc) return;
-
-    if (enc->ffmpeg_video_pipe) { 
-        pclose(enc->ffmpeg_video_pipe); 
-        enc->ffmpeg_video_pipe = NULL;
-    }
-    if (enc->mp2_file) {
-        fclose(enc->mp2_file);
-        enc->mp2_file = NULL;
-        unlink(TEMP_AUDIO_FILE); // Remove temporary audio file
-    }
-
-    if (enc->input_file) { free(enc->input_file); enc->input_file = NULL; }
-    if (enc->output_file) { free(enc->output_file); enc->output_file = NULL; }
-    if (enc->subtitle_file) { free(enc->subtitle_file); enc->subtitle_file = NULL; }
-    free_subtitle_list(enc->subtitle_list);
-
-    free_encoder(enc);
-}
-
-int sync_packet_count = 0;
-
-// Main function
-int main(int argc, char *argv[]) {
-    generate_random_filename(TEMP_AUDIO_FILE);
-
-    printf("Initialising encoder...\n");
-    tev_encoder_t *enc = init_encoder();
-    if (!enc) {
-        fprintf(stderr, "Failed to initialise encoder\n");
-        return 1;
-    }
-
-    int test_mode = 0;
-
-    static struct option long_options[] = {
-        {"input", required_argument, 0, 'i'},
-        {"output", required_argument, 0, 'o'},
-        {"size", required_argument, 0, 's'},
-        {"subtitle", required_argument, 0, 'S'},
-        {"subtitles", required_argument, 0, 'S'},
-        {"fps", required_argument, 0, 'f'},
-        {"quality", required_argument, 0, 'q'},
-        {"quantiser", required_argument, 0, 'Q'},
-        {"quantiser", required_argument, 0, 'Q'},
-        {"bitrate", required_argument, 0, 'b'},
-        {"arate", required_argument, 0, 1400},
-        {"progressive", no_argument, 0, 'p'},
-        {"verbose", no_argument, 0, 'v'},
-        {"test", no_argument, 0, 't'},
-        {"enable-encode-stats", no_argument, 0, 1000},
-        {"enable-rcf", no_argument, 0, 1100},
-        {"ictcp", no_argument, 0, 1300},
-        {"help", no_argument, 0, '?'},
-        {0, 0, 0, 0}
-    };
-
-    int option_index = 0;
-    int c;
-
-    while ((c = getopt_long(argc, argv, "i:o:s:S:w:h:f:q:b:Q:pvt", long_options, &option_index)) != -1) {
-        switch (c) {
-            case 'i':
-                enc->input_file = strdup(optarg);
-                break;
-            case 'o':
-                enc->output_file = strdup(optarg);
-                enc->output_to_stdout = (strcmp(optarg, "-") == 0);
-                break;
-            case 's':
-                if (!parse_resolution(optarg, &enc->width, &enc->height)) {
-                    fprintf(stderr, "Invalid resolution format: %s\n", optarg);
-                    cleanup_encoder(enc);
-                    return 1;
-                }
-                break;
-            case 'S':
-                enc->subtitle_file = strdup(optarg);
-                break;
-            case 'w':
-                enc->width = atoi(optarg);
-                break;
-            case 'h':
-                enc->height = atoi(optarg);
-                break;
-            case 'f':
-                enc->output_fps = atoi(optarg);
-                enc->is_ntsc_framerate = 0;
-                if (enc->output_fps <= 0) {
-                    fprintf(stderr, "Invalid FPS: %d\n", enc->output_fps);
-                    cleanup_encoder(enc);
-                    return 1;
-                }
-                break;
-            case 'q':
-                int qi = atoi(optarg);
-                if (qi < 0 || qi > 5) {
-                    fprintf(stderr, "Invalid quality index: %d\nUse value between 0 and 4.\n", qi);
-                    cleanup_encoder(enc);
-                    return 1;
-                }
-                enc->qualityIndex = qi;
-                enc->qualityY = QUALITY_Y[enc->qualityIndex];
-                enc->qualityCo = QUALITY_CO[enc->qualityIndex];
-                enc->qualityCg = enc->qualityCo >> 1; // bitshift instead of division so it would round up
-                break;
-            case 'b':
-                enc->target_bitrate_kbps = atoi(optarg);
-                if (enc->target_bitrate_kbps > 0) {
-                    enc->bitrate_mode = 1; // Enable bitrate control
-                }
-                break;
-            case 'p':
-                enc->progressive_mode = 1;
-                break;
-            case 'v':
-                enc->verbose = 1;
-                break;
-            case 't':
-                test_mode = 1;
-                break;
-            case 1000: // --enable-encode-stats
-                enc->stats_mode = 1;
-                break;
-             case 1100: // --enable-rcf
-                enc->disable_rcf = 0;
-                break;
-            case 1300: // --ictcp
-                enc->ictcp_mode = 1;
-                break;
-            case 1400: // --arate
-                {
-                    int bitrate = atoi(optarg);
-                    int valid_bitrate = validate_mp2_bitrate(bitrate);
-                    if (valid_bitrate == 0) {
-                        fprintf(stderr, "Error: Invalid MP2 bitrate %d. Valid values are: ", bitrate);
-                        for (int i = 0; i < sizeof(MP2_VALID_BITRATES) / sizeof(int); i++) {
-                            fprintf(stderr, "%d%s", MP2_VALID_BITRATES[i],
-                                    (i < sizeof(MP2_VALID_BITRATES) / sizeof(int) - 1) ? ", " : "\n");
-                        }
-                        cleanup_encoder(enc);
-                        return 1;
-                    }
-                    enc->audio_bitrate = valid_bitrate;
-                }
-                break;
-            case 0:
-                if (strcmp(long_options[option_index].name, "help") == 0) {
-                    show_usage(argv[0]);
-                    cleanup_encoder(enc);
-                    return 0;
-                }
-                break;
-            case 'Q':
-                enc->qualityY = CLAMP(atoi(optarg), 0, 100);
-                enc->qualityCo = enc->qualityY;
-                enc->qualityCg = (enc->qualityY == 100) ? enc->qualityY : enc->qualityCo >> 1;
-                break;
-            default:
-                show_usage(argv[0]);
-                cleanup_encoder(enc);
-                return 1;
-        }
-    }
-
-    // halve the internal representation of frame height
-    if (!enc->progressive_mode) {
-        enc->height /= 2;
-    }
-
-    if (enc->ictcp_mode) {
-        // ICtCp: Ct and Cp have different characteristics than YCoCg Co/Cg
-        // Cp channel now uses specialised quantisation table, so moderate quality is fine
-        int base_chroma_quality = enc->qualityCo;
-        enc->qualityCo = base_chroma_quality;           // Ct channel: keep original Co quantisation
-        enc->qualityCg = base_chroma_quality;           // Cp channel: same quality since Q_Cp_8 handles detail preservation
-    }
-
-    if (!test_mode && (!enc->input_file || !enc->output_file)) {
-        fprintf(stderr, "Input and output files are required (unless using --test mode)\n");
-        show_usage(argv[0]);
-        cleanup_encoder(enc);
-        return 1;
-    }
-
-    if (!enc->output_file) {
-        fprintf(stderr, "Output file is required\n");
-        show_usage(argv[0]);
-        cleanup_encoder(enc);
-        return 1;
-    }
-
-    // Handle test mode or real video
-    if (test_mode) {
-        // Test mode: generate solid colour frames
-        enc->fps = 1;
-        enc->output_fps = 1;
-        enc->total_frames = 15;
-        enc->has_audio = 0;
-        printf("Test mode: Generating 15 solid colour frames\n");
-    } else {
-        // Get video metadata and start FFmpeg processes
-        printf("Retrieving video metadata...\n");
-        if (!get_video_metadata(enc)) {
-            fprintf(stderr, "Failed to get video metadata\n");
-            cleanup_encoder(enc);
-            return 1;
-        }
-    }
-
-    // Load subtitle file if specified
-    printf("Loading subtitles...\n");
-    if (enc->subtitle_file) {
-        int format = detect_subtitle_format(enc->subtitle_file);
-        const char *format_name = (format == 1) ? "SAMI" : "SubRip";
-        
-        enc->subtitle_list = parse_subtitle_file(enc->subtitle_file, enc->output_fps);
-        if (enc->subtitle_list) {
-            enc->has_subtitles = 1;
-            enc->current_subtitle = enc->subtitle_list;
-            if (enc->verbose) {
-                printf("Loaded %s subtitles from: %s\n", format_name, enc->subtitle_file);
-            }
-        } else {
-            fprintf(stderr, "Failed to parse %s subtitle file: %s\n", format_name, enc->subtitle_file);
-            // Continue without subtitles
-        }
-    }
-
-    // Allocate buffers
-    if (!alloc_encoder_buffers(enc)) {
-        fprintf(stderr, "Failed to allocate encoder buffers\n");
-        cleanup_encoder(enc);
-        return 1;
-    }
-
-    // Start FFmpeg processes (only for real video mode)
-    if (!test_mode) {
-        // Start FFmpeg video conversion
-        if (!start_video_conversion(enc)) {
-            fprintf(stderr, "Failed to start video conversion\n");
-            cleanup_encoder(enc);
-            return 1;
-        }
-
-        // Start audio conversion (if audio present)
-        if (!start_audio_conversion(enc)) {
-            fprintf(stderr, "Warning: Audio conversion failed\n");
-            enc->has_audio = 0;
-        }
-    }
-
-    // Open output
-    FILE *output = enc->output_to_stdout ? stdout : fopen(enc->output_file, "wb");
-    if (!output) {
-        perror("Failed to open output file");
-        cleanup_encoder(enc);
-        return 1;
-    }
-
-    // Write TEV header
-    write_tev_header(output, enc);
-
-    // Write all subtitles upfront in SSF-TC format (before first frame)
-    if (enc->has_subtitles) {
-        write_all_subtitles_tc(enc, output);
-    }
-
-    gettimeofday(&enc->start_time, NULL);
-
-    printf("Encoding video with %s 4:2:0 format...\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
-    if (enc->output_fps != enc->fps) {
-        printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
-    }
-    if (enc->bitrate_mode > 0) {
-        printf("Bitrate control enabled: targeting %d kbps\n", enc->target_bitrate_kbps);
-    } else {
-        printf("Quality mode: q=%d\n", enc->qualityIndex);
-        printf("Quantiser levels: %d, %d, %d\n", enc->qualityY, enc->qualityCo, enc->qualityCg);
-    }
-
-    // Process frames (read until EOF from FFmpeg, or frame limit in test mode)
-    int frame_count = 0;
-    int continue_encoding = 1;
-    while (continue_encoding) {
-        if (test_mode) {
-            // Test mode has a fixed frame count
-            if (frame_count >= enc->total_frames) {
-                continue_encoding = 0;
-                break;
-            }
-            // Generate test frame with solid colours
-            size_t rgb_size = enc->width * enc->height * 3;
-            uint8_t test_r = 0, test_g = 0, test_b = 0;
-            const char* colour_name = "unknown";
-
-            switch (frame_count) {
-                case 0: test_r = 0; test_g = 0; test_b = 0; colour_name = "black"; break;
-                case 1: test_r = 127; test_g = 127; test_b = 127; colour_name = "grey"; break;
-                case 2: test_r = 255; test_g = 255; test_b = 255; colour_name = "white"; break;
-                case 3: test_r = 127; test_g = 0; test_b = 0; colour_name = "half red"; break;
-                case 4: test_r = 127; test_g = 127; test_b = 0; colour_name = "half yellow"; break;
-                case 5: test_r = 0; test_g = 127; test_b = 0; colour_name = "half green"; break;
-                case 6: test_r = 0; test_g = 127; test_b = 127; colour_name = "half cyan"; break;
-                case 7: test_r = 0; test_g = 0; test_b = 127; colour_name = "half blue"; break;
-                case 8: test_r = 127; test_g = 0; test_b = 127; colour_name = "half magenta"; break;
-                case 9: test_r = 255; test_g = 0; test_b = 0; colour_name = "red"; break;
-                case 10: test_r = 255; test_g = 255; test_b = 0; colour_name = "yellow"; break;
-                case 11: test_r = 0; test_g = 255; test_b = 0; colour_name = "green"; break;
-                case 12: test_r = 0; test_g = 255; test_b = 255; colour_name = "cyan"; break;
-                case 13: test_r = 0; test_g = 0; test_b = 255; colour_name = "blue"; break;
-                case 14: test_r = 255; test_g = 0; test_b = 255; colour_name = "magenta"; break;
-            }
-
-            // Fill entire frame with solid colour
-            for (size_t i = 0; i < rgb_size; i += 3) {
-                enc->current_rgb[i] = test_r;
-                enc->current_rgb[i + 1] = test_g;
-                enc->current_rgb[i + 2] = test_b;
-            }
-
-            printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b);
-            
-            // Test YCoCg-R conversion
-            double y_test, co_test, cg_test;
-            rgb_to_colour_space(enc, test_r, test_g, test_b, &y_test, &co_test, &cg_test);
-            printf("  %s: Y=%.3f Co=%.3f Cg=%.3f\n", enc->ictcp_mode ? "ICtCp" : "YCoCg", y_test, co_test, cg_test);
-            
-            // Test reverse conversion
-            uint8_t r_rev, g_rev, b_rev;
-            colour_space_to_rgb(enc, y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
-            printf("  Reverse: R=%d G=%d B=%d\n", r_rev, g_rev, b_rev);
-            
-        } else {
-            // Read RGB data directly from FFmpeg pipe
-            // height-halving is already done on the encoder initialisation
-            int frame_height = enc->height;
-            size_t rgb_size = enc->width * frame_height * 3;
-            size_t bytes_read = fread(enc->current_rgb, 1, rgb_size, enc->ffmpeg_video_pipe);
-            
-            if (bytes_read != rgb_size) {
-                if (enc->verbose) {
-                    printf("Frame %d: Expected %zu bytes, got %zu bytes\n", frame_count, rgb_size, bytes_read);
-                    if (feof(enc->ffmpeg_video_pipe)) {
-                        printf("FFmpeg pipe reached end of file\n");
-                    }
-                    if (ferror(enc->ffmpeg_video_pipe)) {
-                        printf("FFmpeg pipe error occurred\n");
-                    }
-                }
-                continue_encoding = 0;
-                break; // End of video or error
-            }
-            
-            // In interlaced mode, FFmpeg separatefields filter already provides field-separated frames
-            // Each frame from FFmpeg is now a single field at half height
-            // Frame parity: even frames (0,2,4...) = bottom fields, odd frames (1,3,5...) = top fields
-        }
-
-        // Process audio for this frame
-        process_audio(enc, frame_count, output);
-
-        // Note: Subtitles are now written upfront in SSF-TC format (see write_all_subtitles_tc)
-        // process_subtitles() is no longer called here
-
-        // Encode frame
-        // Pass field parity for interlaced mode, -1 for progressive mode
-        int frame_field_parity = enc->progressive_mode ? -1 : (frame_count % 2);
-        if (!encode_frame(enc, output, frame_count, frame_field_parity)) {
-            fprintf(stderr, "Failed to encode frame %d\n", frame_count);
-            break;
-        }
-        else {
-            // Write a sync packet only after a video is been coded
-            uint8_t sync_packet = TEV_PACKET_SYNC;
-            fwrite(&sync_packet, 1, 1, output);
-            sync_packet_count++;
-        }
-
-
-
-        frame_count++;
-        if (enc->verbose || frame_count % 30 == 0) {
-            struct timeval now;
-            gettimeofday(&now, NULL);
-            double elapsed = (now.tv_sec - enc->start_time.tv_sec) + 
-                           (now.tv_usec - enc->start_time.tv_usec) / 1000000.0;
-            double fps = frame_count / elapsed;
-            printf("Encoded frame %d (%.1f fps)\n", frame_count, fps);
-        }
-    }
-    
-    // Update actual frame count in encoder struct  
-    enc->total_frames = frame_count;
-
-    // Update header with actual frame count (seek back to header position)
-    if (!enc->output_to_stdout) {
-        long current_pos = ftell(output);
-        fseek(output, 14, SEEK_SET);  // Offset of total_frames field in header
-        uint32_t actual_frames = frame_count;
-        fwrite(&actual_frames, 4, 1, output);
-        fseek(output, current_pos, SEEK_SET);  // Restore position
-        if (enc->verbose) {
-            printf("Updated header with actual frame count: %d\n", frame_count);
-        }
-        fclose(output);
-    }
-    
-    // Final statistics
-    struct timeval end_time;
-    gettimeofday(&end_time, NULL);
-    double total_time = (end_time.tv_sec - enc->start_time.tv_sec) + 
-                       (end_time.tv_usec - enc->start_time.tv_usec) / 1000000.0;
-    
-    printf("\nEncoding complete!\n");
-    printf("  Frames encoded: %d\n", frame_count);
-    printf("  Framerate: %d\n", enc->output_fps);
-    printf("  Output size: %zu bytes\n", enc->total_output_bytes);
-    printf("  Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time);
-    printf("  Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n",
-           enc->blocks_intra, enc->blocks_inter, enc->blocks_motion, enc->blocks_skip);
-    
-    // Print complexity statistics if enabled
-    calculate_complexity_stats(enc);
-    
-    cleanup_encoder(enc);
-    return 0;
-}
diff --git a/video_encoder/estimate_affine_from_blocks.cpp b/video_encoder/estimate_affine_from_blocks.cpp
deleted file mode 100644
index 2cb7f9b..0000000
--- a/video_encoder/estimate_affine_from_blocks.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-// Affine estimation for TAV mesh warping
-// This file contains logic to estimate per-cell affine transforms from block motion
-
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
-
-extern "C" {
-
-// Estimate affine transform for a mesh cell from surrounding block motion vectors
-// Uses least-squares fitting of motion vectors to affine model: [x'] = [a11 a12][x] + [tx]
-//                                                                  [y']   [a21 a22][y]   [ty]
-//
-// Returns 1 if affine improves residual by >threshold, 0 if translation-only is better
-int estimate_cell_affine(
-    const float *flow_x, const float *flow_y,
-    int width, int height,
-    int cell_x, int cell_y,      // Cell position in mesh coordinates
-    int cell_w, int cell_h,       // Cell size in pixels
-    float threshold,              // Residual improvement threshold (e.g. 0.10 = 10%)
-    short *out_tx, short *out_ty, // Translation (1/8 pixel)
-    short *out_a11, short *out_a12, // Affine matrix (1/256 fixed-point)
-    short *out_a21, short *out_a22
-) {
-    // Compute cell bounding box
-    int x_start = cell_x * cell_w;
-    int y_start = cell_y * cell_h;
-    int x_end = (cell_x + 1) * cell_w;
-    int y_end = (cell_y + 1) * cell_h;
-    if (x_end > width) x_end = width;
-    if (y_end > height) y_end = height;
-
-    // Sample motion vectors from a 4×4 grid within the cell
-    const int samples_x = 4;
-    const int samples_y = 4;
-    float sample_motion_x[16];
-    float sample_motion_y[16];
-    int sample_px[16];
-    int sample_py[16];
-    int n_samples = 0;
-
-    for (int sy = 0; sy < samples_y; sy++) {
-        for (int sx = 0; sx < samples_x; sx++) {
-            int px = x_start + (x_end - x_start) * sx / (samples_x - 1);
-            int py = y_start + (y_end - y_start) * sy / (samples_y - 1);
-
-            if (px >= width) px = width - 1;
-            if (py >= height) py = height - 1;
-
-            int idx = py * width + px;
-            sample_motion_x[n_samples] = flow_x[idx];
-            sample_motion_y[n_samples] = flow_y[idx];
-            sample_px[n_samples] = px - (x_start + x_end) / 2;  // Relative to cell center
-            sample_py[n_samples] = py - (y_start + y_end) / 2;
-            n_samples++;
-        }
-    }
-
-    // 1. Compute translation-only model (average motion)
-    float avg_dx = 0, avg_dy = 0;
-    for (int i = 0; i < n_samples; i++) {
-        avg_dx += sample_motion_x[i];
-        avg_dy += sample_motion_y[i];
-    }
-    avg_dx /= n_samples;
-    avg_dy /= n_samples;
-
-    // Translation residual
-    float trans_residual = 0;
-    for (int i = 0; i < n_samples; i++) {
-        float dx_err = sample_motion_x[i] - avg_dx;
-        float dy_err = sample_motion_y[i] - avg_dy;
-        trans_residual += dx_err * dx_err + dy_err * dy_err;
-    }
-
-    // 2. Estimate affine model using least-squares
-    // Solve: [vx] = [a11 a12][px] + [tx]
-    //        [vy]   [a21 a22][py]   [ty]
-    // Using normal equations for 2×2 affine
-
-    double sum_x = 0, sum_y = 0, sum_xx = 0, sum_yy = 0, sum_xy = 0;
-    double sum_vx = 0, sum_vy = 0, sum_vx_x = 0, sum_vx_y = 0;
-    double sum_vy_x = 0, sum_vy_y = 0;
-
-    for (int i = 0; i < n_samples; i++) {
-        double px = sample_px[i];
-        double py = sample_py[i];
-        double vx = sample_motion_x[i];
-        double vy = sample_motion_y[i];
-
-        sum_x += px;
-        sum_y += py;
-        sum_xx += px * px;
-        sum_yy += py * py;
-        sum_xy += px * py;
-        sum_vx += vx;
-        sum_vy += vy;
-        sum_vx_x += vx * px;
-        sum_vx_y += vx * py;
-        sum_vy_x += vy * px;
-        sum_vy_y += vy * py;
-    }
-
-    // Solve 2×2 system for [a11, a12, tx] and [a21, a22, ty]
-    double n = n_samples;
-    double det = n * sum_xx * sum_yy + 2 * sum_x * sum_y * sum_xy -
-                 sum_xx * sum_y * sum_y - sum_yy * sum_x * sum_x - n * sum_xy * sum_xy;
-
-    if (fabs(det) < 1e-6) {
-        // Singular matrix, fall back to translation
-        *out_tx = (short)(avg_dx * 8.0f);
-        *out_ty = (short)(avg_dy * 8.0f);
-        *out_a11 = 256;  // Identity
-        *out_a12 = 0;
-        *out_a21 = 0;
-        *out_a22 = 256;
-        return 0;  // Translation only
-    }
-
-    // Solve for affine parameters (simplified for readability)
-    double a11 = (sum_vx_x * sum_yy * n - sum_vx_y * sum_xy * n - sum_vx * sum_y * sum_y +
-                  sum_vx * sum_xy * sum_y + sum_vx_y * sum_x * sum_y - sum_vx_x * sum_y * sum_y) / det;
-    double a12 = (sum_vx_y * sum_xx * n - sum_vx_x * sum_xy * n - sum_vx * sum_x * sum_xy +
-                  sum_vx * sum_xx * sum_y + sum_vx_x * sum_x * sum_y - sum_vx_y * sum_x * sum_x) / det;
-    double tx = (sum_vx - a11 * sum_x - a12 * sum_y) / n;
-
-    double a21 = (sum_vy_x * sum_yy * n - sum_vy_y * sum_xy * n - sum_vy * sum_y * sum_y +
-                  sum_vy * sum_xy * sum_y + sum_vy_y * sum_x * sum_y - sum_vy_x * sum_y * sum_y) / det;
-    double a22 = (sum_vy_y * sum_xx * n - sum_vy_x * sum_xy * n - sum_vy * sum_x * sum_xy +
-                  sum_vy * sum_xx * sum_y + sum_vy_x * sum_x * sum_y - sum_vy_y * sum_x * sum_x) / det;
-    double ty = (sum_vy - a21 * sum_x - a22 * sum_y) / n;
-
-    // Affine residual
-    float affine_residual = 0;
-    for (int i = 0; i < n_samples; i++) {
-        double px = sample_px[i];
-        double py = sample_py[i];
-        double pred_vx = a11 * px + a12 * py + tx;
-        double pred_vy = a21 * px + a22 * py + ty;
-        double dx_err = sample_motion_x[i] - pred_vx;
-        double dy_err = sample_motion_y[i] - pred_vy;
-        affine_residual += dx_err * dx_err + dy_err * dy_err;
-    }
-
-    // Decision: Use affine if residual improves by > threshold
-    float improvement = (trans_residual - affine_residual) / (trans_residual + 1e-6f);
-
-    if (improvement > threshold) {
-        // Use affine
-        *out_tx = (short)(tx * 8.0f);
-        *out_ty = (short)(ty * 8.0f);
-        *out_a11 = (short)(a11 * 256.0);
-        *out_a12 = (short)(a12 * 256.0);
-        *out_a21 = (short)(a21 * 256.0);
-        *out_a22 = (short)(a22 * 256.0);
-        return 1;  // Affine
-    } else {
-        // Use translation
-        *out_tx = (short)(avg_dx * 8.0f);
-        *out_ty = (short)(avg_dy * 8.0f);
-        *out_a11 = 256;  // Identity
-        *out_a12 = 0;
-        *out_a21 = 0;
-        *out_a22 = 256;
-        return 0;  // Translation only
-    }
-}
-
-} // extern "C"
diff --git a/video_encoder/exponential_numeric_system.ods b/video_encoder/exponential_numeric_system.ods
deleted file mode 100644
index e76634f..0000000
Binary files a/video_encoder/exponential_numeric_system.ods and /dev/null differ
diff --git a/video_encoder/include/coefficient_compress.h b/video_encoder/include/coefficient_compress.h
deleted file mode 100644
index 07aa751..0000000
--- a/video_encoder/include/coefficient_compress.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Simple coefficient preprocessing for better compression
-// Insert right before Zstd compression
-
-#ifndef COEFFICIENT_COMPRESS_H
-#define COEFFICIENT_COMPRESS_H
-
-#include <stdint.h>
-#include <string.h>
-
-// Preprocess coefficients using significance map
-// Returns new buffer size, modifies buffer in-place if possible
-static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) {
-    // Count non-zero coefficients
-    int nonzero_count = 0;
-    for (int i = 0; i < coeff_count; i++) {
-        if (coeffs[i] != 0) nonzero_count++;
-    }
-
-    // Create significance map (1 bit per coefficient, packed into bytes)
-    int map_bytes = (coeff_count + 7) / 8;  // Round up to nearest byte
-    uint8_t *sig_map = output_buffer;
-    int16_t *values = (int16_t *)(output_buffer + map_bytes);
-
-    // Clear significance map
-    memset(sig_map, 0, map_bytes);
-
-    // Fill significance map and extract non-zero values
-    int value_idx = 0;
-    for (int i = 0; i < coeff_count; i++) {
-        if (coeffs[i] != 0) {
-            // Set bit in significance map
-            int byte_idx = i / 8;
-            int bit_idx = i % 8;
-            sig_map[byte_idx] |= (1 << bit_idx);
-
-            // Store the value
-            values[value_idx++] = coeffs[i];
-        }
-    }
-
-    return map_bytes + (nonzero_count * sizeof(int16_t));
-}
-
-// Decoder: reconstruct coefficients from significance map
-static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
-    int map_bytes = (coeff_count + 7) / 8;
-    uint8_t *sig_map = compressed_data;
-    int16_t *values = (int16_t *)(compressed_data + map_bytes);
-
-    // Clear output
-    memset(output_coeffs, 0, coeff_count * sizeof(int16_t));
-
-    // Reconstruct coefficients
-    int value_idx = 0;
-    for (int i = 0; i < coeff_count; i++) {
-        int byte_idx = i / 8;
-        int bit_idx = i % 8;
-
-        if (sig_map[byte_idx] & (1 << bit_idx)) {
-            output_coeffs[i] = values[value_idx++];
-        }
-    }
-}
-
-#endif // COEFFICIENT_COMPRESS_H
\ No newline at end of file
diff --git a/video_encoder/include/decoder_tad.h b/video_encoder/include/decoder_tad.h
deleted file mode 100644
index 2c5c11e..0000000
--- a/video_encoder/include/decoder_tad.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef TAD32_DECODER_H
-#define TAD32_DECODER_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-// TAD32 (Terrarum Advanced Audio - PCM32f version) Decoder
-// DWT-based perceptual audio codec for TSVM
-// Shared decoder library used by both decoder_tad (standalone) and decoder_tav (video decoder)
-
-// Constants (must match encoder)
-#define TAD32_SAMPLE_RATE 32000
-#define TAD32_CHANNELS 2  // Stereo
-#define TAD_DEFAULT_CHUNK_SIZE 32768  // Default chunk size for standalone TAD files
-
-/**
- * Decode audio chunk with TAD32 codec
- *
- * @param input           Input TAD32 chunk data
- * @param input_size      Size of input buffer
- * @param pcmu8_stereo    Output PCMu8 stereo samples (interleaved L,R)
- * @param bytes_consumed  [out] Number of bytes consumed from input
- * @param samples_decoded [out] Number of samples decoded per channel
- * @return                0 on success, -1 on error
- *
- * Input format:
- *   uint16 sample_count (samples per channel)
- *   uint8  max_index (maximum quantisation index)
- *   uint32 payload_size (bytes in payload)
- *   *      payload (encoded M/S data, Zstd-compressed with EZBC)
- *
- * Output format:
- *   PCMu8 stereo interleaved (8-bit unsigned PCM, L,R pairs)
- *   Range: [0, 255] where 128 = silence
- */
-int tad32_decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
-                       size_t *bytes_consumed, size_t *samples_decoded);
-
-#endif // TAD32_DECODER_H
diff --git a/video_encoder/include/encoder_tad.h b/video_encoder/include/encoder_tad.h
deleted file mode 100644
index 4be0aa7..0000000
--- a/video_encoder/include/encoder_tad.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef TAD32_ENCODER_H
-#define TAD32_ENCODER_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder
-// DWT-based perceptual audio codec for TSVM
-// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
-
-// Constants
-#define TAD32_COEFF_SCALARS {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f} // value only valid for CDF 9/7 with decomposition level 9. Index 0 = LL band
-#define TAD32_MIN_CHUNK_SIZE 1024       // Minimum: 1024 samples
-#define TAD32_SAMPLE_RATE 32000
-#define TAD32_CHANNELS 2  // Stereo
-#define TAD32_QUALITY_MIN 0
-#define TAD32_QUALITY_MAX 6
-#define TAD32_QUALITY_DEFAULT 3
-#define TAD32_ZSTD_LEVEL 15
-
-static inline int tad32_quality_to_max_index(int quality) {
-    static const int quality_map[6] = {21, 31, 44, 63, 89, 127};
-    if (quality < 0) quality = 0;
-    if (quality > 5) quality = 5;
-    return quality_map[quality];
-}
-
-/**
- * Encode audio chunk with TAD32 codec (PCM32f version)
- *
- * @param pcm32_stereo    Input PCM32fLE stereo samples (interleaved L,R)
- * @param num_samples     Number of samples per channel (min 1024)
- * @param max_index       Maximum quantisation index (7=3bit, 15=4bit, 31=5bit, 63=6bit, 127=7bit)
- * @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantisation)
- *                        Higher values = more aggressive quantisation = smaller files
- * @param zstd_level      Zstd compression level (1-22). Use negative value to disable compression.
- *                        When disabled, MSB of payload_size is set to indicate uncompressed data.
- * @param output          Output buffer (must be large enough)
- * @return                Number of bytes written to output, or 0 on error
- *
- * Output format:
- *   uint16 sample_count (samples per channel)
- *   uint8  max_index (maximum quantisation index)
- *   uint32 payload_size (bytes in payload; MSB=1 indicates uncompressed)
- *   *      payload (encoded M/S data, optionally Zstd-compressed)
- */
-size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
-                          int max_index,
-                          float quantiser_scale, int zstd_level, uint8_t *output);
-
-/**
- * Print accumulated coefficient statistics
- * Only effective if TAD_COEFF_STATS environment variable is set
- */
-void tad32_print_statistics(void);
-
-/**
- * Free accumulated statistics memory
- * Should be called after tad32_print_statistics()
- */
-void tad32_free_statistics(void);
-
-#endif // TAD32_ENCODER_H
diff --git a/video_encoder/include/entropy_coder.h b/video_encoder/include/entropy_coder.h
deleted file mode 100644
index bc2f1e8..0000000
--- a/video_encoder/include/entropy_coder.h
+++ /dev/null
@@ -1,74 +0,0 @@
-// TEV Entropy Coder - Specialised for DCT coefficients
-// Replaces gzip with video-optimized compression
-#ifndef ENTROPY_CODER_H
-#define ENTROPY_CODER_H
-
-#include <stdint.h>
-#include <stdio.h>
-
-// Bit writer for variable-length codes
-typedef struct {
-    uint8_t *buffer;
-    size_t buffer_size;
-    size_t byte_pos;
-    int bit_pos;  // 0-7, next bit to write
-} bit_writer_t;
-
-// Bit reader for decoding
-typedef struct {
-    const uint8_t *buffer;
-    size_t buffer_size;
-    size_t byte_pos;
-    int bit_pos;  // 0-7, next bit to read
-} bit_reader_t;
-
-// Huffman table entry
-typedef struct {
-    uint16_t code;    // Huffman code
-    uint8_t bits;     // Code length in bits
-} huffman_entry_t;
-
-// Video entropy coder optimized for TEV coefficients
-typedef struct {
-    // Huffman tables for different coefficient types
-    huffman_entry_t y_dc_table[512];      // Y DC coefficients (-255 to +255)
-    huffman_entry_t y_ac_table[512];      // Y AC coefficients
-    huffman_entry_t c_dc_table[512];      // Chroma DC coefficients  
-    huffman_entry_t c_ac_table[512];      // Chroma AC coefficients
-    huffman_entry_t run_table[256];       // Zero run lengths (0-255)
-    
-    // Motion vector Huffman tables
-    huffman_entry_t mv_table[65];         // Motion vectors (-32 to +32)
-    
-    // Bit writer/reader
-    bit_writer_t writer;
-    bit_reader_t reader;
-} entropy_coder_t;
-
-static const huffman_entry_t BLOCK_MODE_HUFFMAN[16];
-
-void write_bits(bit_writer_t *writer, uint32_t value, int bits);
-uint32_t read_bits(bit_reader_t *reader, int bits);
-
-// Initialise entropy coder
-entropy_coder_t* entropy_coder_create(uint8_t *buffer, size_t buffer_size);
-void entropy_coder_destroy(entropy_coder_t *coder);
-
-// Encoding functions
-int encode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
-int encode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
-int encode_motion_vector(entropy_coder_t *coder, int16_t mv_x, int16_t mv_y);
-int encode_block_mode(entropy_coder_t *coder, uint8_t mode);
-
-// Decoding functions  
-void entropy_coder_init_reader(entropy_coder_t *coder, const uint8_t *buffer, size_t buffer_size);
-int decode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
-int decode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
-int decode_motion_vector(entropy_coder_t *coder, int16_t *mv_x, int16_t *mv_y);
-int decode_block_mode(entropy_coder_t *coder, uint8_t *mode);
-
-// Get compressed size
-size_t entropy_coder_get_size(entropy_coder_t *coder);
-void entropy_coder_reset(entropy_coder_t *coder);
-
-#endif // ENTROPY_CODER_H
\ No newline at end of file
diff --git a/video_encoder/include/tav_avx512.h b/video_encoder/include/tav_avx512.h
deleted file mode 100644
index be3f0cd..0000000
--- a/video_encoder/include/tav_avx512.h
+++ /dev/null
@@ -1,837 +0,0 @@
-/*
- * TAV AVX-512 Optimisations
- *
- * This file contains AVX-512 optimised versions of performance-critical functions
- * in the TAV encoder. Runtime CPU detection ensures fallback to scalar versions
- * on non-AVX-512 systems.
- *
- * Optimised functions:
- * - 1D DWT transforms (5/3, 9/7, Haar, Bior13/7, DD4)
- * - Quantisation functions
- * - RGB to YCoCg colour conversion
- * - 2D DWT gather/scatter operations
- *
- * Compile with: -mavx512f -mavx512dq -mavx512bw -mavx512vl
- */
-
-#ifndef TAV_AVX512_H
-#define TAV_AVX512_H
-
-#include <immintrin.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <stdio.h>
-
-// =============================================================================
-// SIMD Capability Detection
-// =============================================================================
-
-typedef enum {
-    SIMD_NONE = 0,
-    SIMD_AVX512F = 1
-} simd_level_t;
-
-// Global SIMD level (set by tav_simd_init)
-static simd_level_t g_simd_level = SIMD_NONE;
-
-// CPU feature detection
-static inline int cpu_has_avx512f(void) {
-#ifdef __AVX512F__
-    return __builtin_cpu_supports("avx512f") &&
-           __builtin_cpu_supports("avx512dq");
-#else
-    return 0;
-#endif
-}
-
-// Initialize SIMD detection (call once at startup)
-static inline void tav_simd_init(void) {
-#ifdef __AVX512F__
-    if (cpu_has_avx512f()) {
-        g_simd_level = SIMD_AVX512F;
-        fprintf(stderr, "[TAV] AVX-512 optimisations enabled\n");
-    } else {
-        g_simd_level = SIMD_NONE;
-        fprintf(stderr, "[TAV] AVX-512 not available, using scalar fallback\n");
-    }
-#else
-    g_simd_level = SIMD_NONE;
-    fprintf(stderr, "[TAV] Compiled without AVX-512 support\n");
-#endif
-}
-
-#ifdef __AVX512F__
-
-// =============================================================================
-// Helper Functions
-// =============================================================================
-
-// Horizontal sum of 16 floats
-static inline float _mm512_reduce_add_ps_compat(__m512 v) {
-    __m256 low = _mm512_castps512_ps256(v);
-    __m256 high = _mm512_extractf32x8_ps(v, 1);
-    __m256 sum256 = _mm256_add_ps(low, high);
-    __m128 sum128 = _mm_add_ps(_mm256_castps256_ps128(sum256), _mm256_extractf128_ps(sum256, 1));
-    sum128 = _mm_hadd_ps(sum128, sum128);
-    sum128 = _mm_hadd_ps(sum128, sum128);
-    return _mm_cvtss_f32(sum128);
-}
-
-// Clamp helper for vectorised operations
-static inline __m512 _mm512_clamp_ps(__m512 v, __m512 min_val, __m512 max_val) {
-    return _mm512_min_ps(_mm512_max_ps(v, min_val), max_val);
-}
-
-// =============================================================================
-// AVX-512 Optimised 1D DWT Forward Transforms
-// =============================================================================
-
-// 5/3 Reversible Forward DWT with AVX-512
-static inline void dwt_53_forward_1d_avx512(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = (float*)calloc(length, sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Predict step (high-pass) - vectorised
-    // temp[half + i] = data[2*i+1] - 0.5 * (data[2*i] + data[2*i+2])
-    int i;
-    for (i = 0; i + 16 <= half; i += 16) {
-        __mmask16 valid_mask = 0xFFFF;
-
-        // Check boundary for last iteration
-        for (int j = 0; j < 16; j++) {
-            int idx = 2 * (i + j) + 1;
-            if (idx >= length) {
-                valid_mask &= ~(1 << j);
-            }
-        }
-
-        if (valid_mask == 0) break;
-
-        // Load data[2*i] - stride 2 load
-        float even_curr_vals[16], even_next_vals[16], odd_vals[16];
-
-        for (int j = 0; j < 16; j++) {
-            if (valid_mask & (1 << j)) {
-                even_curr_vals[j] = data[2 * (i + j)];
-                even_next_vals[j] = (2 * (i + j) + 2 < length) ? data[2 * (i + j) + 2] : data[2 * (i + j)];
-                odd_vals[j] = data[2 * (i + j) + 1];
-            } else {
-                even_curr_vals[j] = 0.0f;
-                even_next_vals[j] = 0.0f;
-                odd_vals[j] = 0.0f;
-            }
-        }
-
-        __m512 even_curr = _mm512_loadu_ps(even_curr_vals);
-        __m512 even_next = _mm512_loadu_ps(even_next_vals);
-        __m512 odd = _mm512_loadu_ps(odd_vals);
-
-        __m512 pred = _mm512_mul_ps(_mm512_add_ps(even_curr, even_next), _mm512_set1_ps(0.5f));
-        __m512 high = _mm512_sub_ps(odd, pred);
-
-        _mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
-    }
-
-    // Handle remaining elements
-    for (; i < half; i++) {
-        int idx = 2 * i + 1;
-        if (idx < length) {
-            float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
-            temp[half + i] = data[idx] - pred;
-        }
-    }
-
-    // Update step (low-pass) - vectorised
-    // temp[i] = data[2*i] + 0.25 * (temp[half+i-1] + temp[half+i])
-    for (i = 0; i + 16 <= half; i += 16) {
-        __m512 even = _mm512_loadu_ps(&data[2 * i]);  // Load with stride 2 (simplified)
-
-        // Manual gather for strided load
-        float even_vals[16];
-        for (int j = 0; j < 16 && (i + j) < half; j++) {
-            even_vals[j] = data[2 * (i + j)];
-        }
-        even = _mm512_loadu_ps(even_vals);
-
-        // Load high-pass neighbours
-        float high_prev[16], high_curr[16];
-        for (int j = 0; j < 16 && (i + j) < half; j++) {
-            high_prev[j] = ((i + j) > 0) ? temp[half + (i + j) - 1] : 0.0f;
-            high_curr[j] = ((i + j) < half - 1) ? temp[half + (i + j)] : 0.0f;
-        }
-
-        __m512 hp = _mm512_loadu_ps(high_prev);
-        __m512 hc = _mm512_loadu_ps(high_curr);
-        __m512 update = _mm512_mul_ps(_mm512_add_ps(hp, hc), _mm512_set1_ps(0.25f));
-        __m512 low = _mm512_add_ps(even, update);
-
-        __mmask16 store_mask = (i + 16 <= half) ? 0xFFFF : (1 << (half - i)) - 1;
-        _mm512_mask_storeu_ps(&temp[i], store_mask, low);
-    }
-
-    // Handle remaining elements
-    for (; i < half; i++) {
-        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
-                               (i < half - 1 ? temp[half + i] : 0));
-        temp[i] = data[2 * i] + update;
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// 9/7 Irreversible Forward DWT with AVX-512
-static inline void dwt_97_forward_1d_avx512(float *data, int length) {
-    if (length < 2) return;
-
-    int half = (length + 1) / 2;
-
-    // Allocate aligned temp buffer once (64-byte align for cache lines)
-    float *temp = NULL;
-#if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE)
-    if (posix_memalign((void**)&temp, 64, (size_t)length * sizeof(float)) != 0) {
-        temp = (float*)malloc((size_t)length * sizeof(float));
-    }
-#else
-    temp = (float*)aligned_alloc(64, ((size_t)length * sizeof(float) + 63) & ~63);
-    if (!temp) temp = (float*)malloc((size_t)length * sizeof(float));
-#endif
-    if (!temp) return; // allocation failure: bail out (preserve original behavior could be different)
-
-    // FAST SPLIT: interleave into temp: first half = evens, second half = odds
-    // This is simple, streaming-friendly, and much faster than per-iteration small-array gathers.
-    {
-        float *even = temp;
-        float *odd  = temp + half;
-        int i = 0;
-        // process pairs to minimize branches and memory ops
-        for (; i + 1 < length; i += 2) {
-            even[0] = data[i];
-            odd[0]  = data[i + 1];
-            ++even; ++odd;
-        }
-        if (i < length) { // odd leftover
-            even[0] = data[i];
-        }
-    }
-
-    // Lifting coefficients as vectors
-    const __m512 alpha_vec = _mm512_set1_ps(-1.586134342f);
-    const __m512 beta_vec  = _mm512_set1_ps(-0.052980118f);
-    const __m512 gamma_vec = _mm512_set1_ps(0.882911076f);
-    const __m512 delta_vec = _mm512_set1_ps(0.443506852f);
-    const __m512 K_vec     = _mm512_set1_ps(1.230174105f);
-    const __m512 invK_vec  = _mm512_set1_ps(1.0f / 1.230174105f);
-
-    // Helper variables
-    int i;
-
-    // -----------------------
-    // Step 1: Predict α
-    // d[i] += alpha * (s[i] + s[i+1])
-    // -----------------------
-    if (half > 0) {
-        // handle small or trivial cases
-        if (half == 1) {
-            if (half < length) {
-                temp[half + 0] += -1.586134342f * (temp[0] + temp[0]);
-            }
-        } else {
-            // main vectorised body: ensure s_next loads (i+1) valid -> i <= half-2
-            int limit = (half - 1);
-            int n_full = (limit / 16) * 16; // process up to n_full (multiple of 16)
-            i = 0;
-            for (; i + 32 <= n_full; i += 32) {
-                // unroll 2x (i and i+16)
-                __m512 s0 = _mm512_loadu_ps(&temp[i]);
-                __m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
-                __m512 d0 = _mm512_loadu_ps(&temp[half + i]);
-                __m512 sum0 = _mm512_add_ps(s0, s0n);
-                d0 = _mm512_fmadd_ps(alpha_vec, sum0, d0);
-                _mm512_storeu_ps(&temp[half + i], d0);
-
-                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
-                __m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
-                __m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
-                __m512 sum1 = _mm512_add_ps(s1, s1n);
-                d1 = _mm512_fmadd_ps(alpha_vec, sum1, d1);
-                _mm512_storeu_ps(&temp[half + i + 16], d1);
-            }
-            for (; i + 16 <= n_full; i += 16) {
-                __m512 s = _mm512_loadu_ps(&temp[i]);
-                __m512 sn = _mm512_loadu_ps(&temp[i + 1]);
-                __m512 d = _mm512_loadu_ps(&temp[half + i]);
-                __m512 sum = _mm512_add_ps(s, sn);
-                d = _mm512_fmadd_ps(alpha_vec, sum, d);
-                _mm512_storeu_ps(&temp[half + i], d);
-            }
-            // scalar remainder up to limit (half-2 -> last vector handled below)
-            for (; i < limit; ++i) {
-                temp[half + i] += -1.586134342f * (temp[i] + temp[i + 1]);
-            }
-            // handle last index i = half-1 (mirror)
-            int last = half - 1;
-            if (half + last < length) {
-                float s_curr = temp[last];
-                float s_next = s_curr;
-                temp[half + last] += -1.586134342f * (s_curr + s_next);
-            }
-        }
-    }
-
-    // -----------------------
-    // Step 2: Update β
-    // s[i] += beta * (d[i-1] + d[i])
-    // -----------------------
-    if (half > 0) {
-        // handle i == 0 separately (d_prev = d_curr for boundary semantics)
-        if (half >= 1) {
-            // i == 0
-            if (half + 0 < length) {
-                float d_curr0 = temp[half + 0];
-                temp[0] += -0.052980118f * (d_curr0 + d_curr0);
-            }
-        }
-
-        if (half > 1) {
-            // main vector loop starting from i = 1 to half-1 (we will write s[i] for i>=1)
-            int start = 1;
-            int limit = half; // exclusive
-            int n_elems = limit - start;
-            int n_full = (n_elems / 16) * 16;
-            i = start;
-            for (; i + 32 <= start + n_full; i += 32) {
-                // unroll 2x
-                __m512 s0 = _mm512_loadu_ps(&temp[i]);
-                __m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
-                __m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
-                __m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
-                s0 = _mm512_fmadd_ps(beta_vec, sum0, s0);
-                _mm512_storeu_ps(&temp[i], s0);
-
-                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
-                __m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
-                __m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
-                __m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
-                s1 = _mm512_fmadd_ps(beta_vec, sum1, s1);
-                _mm512_storeu_ps(&temp[i + 16], s1);
-            }
-            for (; i + 16 <= start + n_full; i += 16) {
-                __m512 s = _mm512_loadu_ps(&temp[i]);
-                __m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
-                __m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
-                __m512 sum = _mm512_add_ps(dprev, dcurr);
-                s = _mm512_fmadd_ps(beta_vec, sum, s);
-                _mm512_storeu_ps(&temp[i], s);
-            }
-            // scalar remainder
-            for (; i < limit; ++i) {
-                float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-                float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
-                temp[i] += -0.052980118f * (d_prev + d_curr);
-            }
-        }
-    }
-
-    // -----------------------
-    // Step 3: Predict γ
-    // d[i] += gamma * (s[i] + s[i+1])
-    // -----------------------
-    if (half > 0) {
-        if (half == 1) {
-            if (half < length) {
-                temp[half + 0] += 0.882911076f * (temp[0] + temp[0]);
-            }
-        } else {
-            int limit = (half - 1);
-            int n_full = (limit / 16) * 16;
-            i = 0;
-            for (; i + 32 <= n_full; i += 32) {
-                __m512 s0 = _mm512_loadu_ps(&temp[i]);
-                __m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
-                __m512 d0 = _mm512_loadu_ps(&temp[half + i]);
-                __m512 sum0 = _mm512_add_ps(s0, s0n);
-                d0 = _mm512_fmadd_ps(gamma_vec, sum0, d0);
-                _mm512_storeu_ps(&temp[half + i], d0);
-
-                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
-                __m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
-                __m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
-                __m512 sum1 = _mm512_add_ps(s1, s1n);
-                d1 = _mm512_fmadd_ps(gamma_vec, sum1, d1);
-                _mm512_storeu_ps(&temp[half + i + 16], d1);
-            }
-            for (; i + 16 <= n_full; i += 16) {
-                __m512 s = _mm512_loadu_ps(&temp[i]);
-                __m512 sn = _mm512_loadu_ps(&temp[i + 1]);
-                __m512 d = _mm512_loadu_ps(&temp[half + i]);
-                __m512 sum = _mm512_add_ps(s, sn);
-                d = _mm512_fmadd_ps(gamma_vec, sum, d);
-                _mm512_storeu_ps(&temp[half + i], d);
-            }
-            for (; i < limit; ++i) {
-                temp[half + i] += 0.882911076f * (temp[i] + temp[i + 1]);
-            }
-            // last index mirror
-            int last = half - 1;
-            if (half + last < length) {
-                float s_curr = temp[last];
-                float s_next = s_curr;
-                temp[half + last] += 0.882911076f * (s_curr + s_next);
-            }
-        }
-    }
-
-    // -----------------------
-    // Step 4: Update δ
-    // s[i] += delta * (d[i-1] + d[i])
-    // -----------------------
-    if (half > 0) {
-        // i == 0
-        if (half >= 1) {
-            if (half + 0 < length) {
-                float d_curr0 = temp[half + 0];
-                temp[0] += 0.443506852f * (d_curr0 + d_curr0);
-            }
-        }
-
-        if (half > 1) {
-            int start = 1;
-            int limit = half; // exclusive
-            int n_elems = limit - start;
-            int n_full = (n_elems / 16) * 16;
-            i = start;
-            for (; i + 32 <= start + n_full; i += 32) {
-                __m512 s0 = _mm512_loadu_ps(&temp[i]);
-                __m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
-                __m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
-                __m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
-                s0 = _mm512_fmadd_ps(delta_vec, sum0, s0);
-                _mm512_storeu_ps(&temp[i], s0);
-
-                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
-                __m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
-                __m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
-                __m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
-                s1 = _mm512_fmadd_ps(delta_vec, sum1, s1);
-                _mm512_storeu_ps(&temp[i + 16], s1);
-            }
-            for (; i + 16 <= start + n_full; i += 16) {
-                __m512 s = _mm512_loadu_ps(&temp[i]);
-                __m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
-                __m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
-                __m512 sum = _mm512_add_ps(dprev, dcurr);
-                s = _mm512_fmadd_ps(delta_vec, sum, s);
-                _mm512_storeu_ps(&temp[i], s);
-            }
-            for (; i < limit; ++i) {
-                float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-                float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
-                temp[i] += 0.443506852f * (d_prev + d_curr);
-            }
-        }
-    }
-
-    // -----------------------
-    // Step 5: Scaling
-    // s *= K, d *= invK
-    // -----------------------
-    // s (first half)
-    {
-        int n_full = (half / 16) * 16;
-        i = 0;
-        for (; i + 32 <= n_full; i += 32) {
-            __m512 s0 = _mm512_loadu_ps(&temp[i]);
-            s0 = _mm512_mul_ps(s0, K_vec);
-            _mm512_storeu_ps(&temp[i], s0);
-
-            __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
-            s1 = _mm512_mul_ps(s1, K_vec);
-            _mm512_storeu_ps(&temp[i + 16], s1);
-        }
-        for (; i + 16 <= n_full; i += 16) {
-            __m512 s = _mm512_loadu_ps(&temp[i]);
-            s = _mm512_mul_ps(s, K_vec);
-            _mm512_storeu_ps(&temp[i], s);
-        }
-        for (; i < half; ++i) temp[i] *= 1.230174105f;
-    }
-
-    // d (second half)
-    {
-        int dlen = length - half;
-        int n_full = (dlen / 16) * 16;
-        i = 0;
-        for (; i + 32 <= n_full; i += 32) {
-            __m512 d0 = _mm512_loadu_ps(&temp[half + i]);
-            d0 = _mm512_mul_ps(d0, invK_vec);
-            _mm512_storeu_ps(&temp[half + i], d0);
-
-            __m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
-            d1 = _mm512_mul_ps(d1, invK_vec);
-            _mm512_storeu_ps(&temp[half + i + 16], d1);
-        }
-        for (; i + 16 <= n_full; i += 16) {
-            __m512 d = _mm512_loadu_ps(&temp[half + i]);
-            d = _mm512_mul_ps(d, invK_vec);
-            _mm512_storeu_ps(&temp[half + i], d);
-        }
-        for (; i < dlen; ++i) {
-            if (half + i < length) temp[half + i] /= 1.230174105f;
-        }
-    }
-
-    // Copy back and free
-    memcpy(data, temp, (size_t)length * sizeof(float));
-    free(temp);
-}
-
-// Haar Forward DWT with AVX-512
-static inline void dwt_haar_forward_1d_avx512(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = (float*)malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    const __m512 half_vec = _mm512_set1_ps(0.5f);
-
-    // Process 16 pairs at a time
-    int i;
-    for (i = 0; i + 16 <= half; i += 16) {
-        __mmask16 valid_mask = 0xFFFF;
-
-        float even_vals[16], odd_vals[16];
-        for (int j = 0; j < 16; j++) {
-            even_vals[j] = data[2 * (i + j)];
-            if (2 * (i + j) + 1 < length) {
-                odd_vals[j] = data[2 * (i + j) + 1];
-            } else {
-                odd_vals[j] = even_vals[j];
-                valid_mask &= ~(1 << j);
-            }
-        }
-
-        __m512 even = _mm512_loadu_ps(even_vals);
-        __m512 odd = _mm512_loadu_ps(odd_vals);
-
-        // Low-pass: (even + odd) / 2
-        __m512 low = _mm512_mul_ps(_mm512_add_ps(even, odd), half_vec);
-        // High-pass: (even - odd) / 2
-        __m512 high = _mm512_mul_ps(_mm512_sub_ps(even, odd), half_vec);
-
-        _mm512_storeu_ps(&temp[i], low);
-        _mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
-    }
-
-    // Remaining scalar
-    for (; i < half; i++) {
-        if (2 * i + 1 < length) {
-            temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
-            temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
-        } else {
-            temp[i] = data[2 * i];
-            if (half + i < length) {
-                temp[half + i] = 0.0f;
-            }
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// =============================================================================
-// AVX-512 Optimised Quantisation Functions
-// =============================================================================
-
-static inline void quantise_dwt_coefficients_avx512(
-    float *coeffs, int16_t *quantised, int size,
-    float effective_q, float dead_zone_threshold,
-    int width, int height, int decomp_levels, int is_chroma,
-    int (*get_subband_level)(int, int, int, int),
-    int (*get_subband_type)(int, int, int, int)
-) {
-    const __m512 q_vec = _mm512_set1_ps(effective_q);
-    const __m512 inv_q_vec = _mm512_set1_ps(1.0f / effective_q);
-    const __m512 half_vec = _mm512_set1_ps(0.5f);
-    const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
-    const __m512 zero_vec = _mm512_setzero_ps();
-    const __m512i min_i32 = _mm512_set1_epi32(-32768);
-    const __m512i max_i32 = _mm512_set1_epi32(32767);
-
-    int i;
-    for (i = 0; i + 16 <= size; i += 16) {
-        __m512 coeff = _mm512_loadu_ps(&coeffs[i]);
-        __m512 quant = _mm512_mul_ps(coeff, inv_q_vec);
-
-        // Dead-zone handling (simplified - full version needs per-coeff logic)
-        if (dead_zone_threshold > 0.0f && !is_chroma) {
-            __m512 threshold_vec = _mm512_set1_ps(dead_zone_threshold);
-            __m512 abs_quant = _mm512_abs_ps(quant);
-            __mmask16 dead_mask = _mm512_cmp_ps_mask(abs_quant, threshold_vec, _CMP_LE_OQ);
-            quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
-        }
-
-        // Manual rounding to match scalar behaviour (round away from zero)
-        // First add 0.5 or -0.5 based on sign
-        __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
-        __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
-        quant = _mm512_add_ps(quant, round_val);
-
-        // Now truncate to int32 (this matches scalar (int32_t) cast after adding 0.5)
-        __m512i quant_i32 = _mm512_cvttps_epi32(quant);  // cvtt = truncate (round toward zero)
-        quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
-        quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
-
-        // Pack to int16 (AVX-512 has cvtsepi32_epi16)
-        __m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
-        _mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
-    }
-
-    // Remaining scalar
-    for (; i < size; i++) {
-        float quantised_val = coeffs[i] / effective_q;
-
-        // Dead-zone (simplified)
-        if (dead_zone_threshold > 0.0f && !is_chroma) {
-            if (fabsf(quantised_val) <= dead_zone_threshold) {
-                quantised_val = 0.0f;
-            }
-        }
-
-        int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
-        quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
-    }
-}
-
-// Perceptual quantisation with per-coefficient weighting
-static inline void quantise_dwt_coefficients_perceptual_avx512(
-    float *coeffs, int16_t *quantised, int size,
-    float *weights,  // Pre-computed per-coefficient weights
-    float base_quantiser
-) {
-    const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
-    const __m512 half_vec = _mm512_set1_ps(0.5f);
-    const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
-    const __m512 zero_vec = _mm512_setzero_ps();
-    const __m512i min_i32 = _mm512_set1_epi32(-32768);
-    const __m512i max_i32 = _mm512_set1_epi32(32767);
-
-    int i;
-    for (i = 0; i + 16 <= size; i += 16) {
-        __m512 coeff = _mm512_loadu_ps(&coeffs[i]);
-        __m512 weight = _mm512_loadu_ps(&weights[i]);
-
-        // effective_q = base_q * weight
-        __m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
-        __m512 quant = _mm512_div_ps(coeff, effective_q);
-
-        // Manual rounding to match scalar behaviour
-        __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
-        __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
-        quant = _mm512_add_ps(quant, round_val);
-
-        // Truncate to int32 (matches scalar cast after rounding)
-        __m512i quant_i32 = _mm512_cvttps_epi32(quant);
-        quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
-        quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
-
-        __m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
-        _mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
-    }
-
-    // Remaining scalar
-    for (; i < size; i++) {
-        float effective_q = base_quantiser * weights[i];
-        float quantised_val = coeffs[i] / effective_q;
-        int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
-        quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
-    }
-}
-
-// =============================================================================
-// AVX-512 Optimised Dequantisation Functions
-// =============================================================================
-
-// Basic dequantisation: quantised[i] * effective_q
-static inline void dequantise_dwt_coefficients_avx512(
-    const int16_t *quantised, float *coeffs, int size,
-    float effective_q
-) {
-    const __m512 q_vec = _mm512_set1_ps(effective_q);
-
-    int i;
-    for (i = 0; i + 16 <= size; i += 16) {
-        // Load 16 int16 values
-        __m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
-
-        // Convert int16 to int32
-        __m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
-
-        // Convert int32 to float
-        __m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
-
-        // Multiply by quantiser
-        __m512 dequant = _mm512_mul_ps(quant_f32, q_vec);
-
-        _mm512_storeu_ps(&coeffs[i], dequant);
-    }
-
-    // Remaining scalar
-    for (; i < size; i++) {
-        coeffs[i] = (float)quantised[i] * effective_q;
-    }
-}
-
-// Perceptual dequantisation with per-coefficient weights
-static inline void dequantise_dwt_coefficients_perceptual_avx512(
-    const int16_t *quantised, float *coeffs, int size,
-    const float *weights, float base_quantiser
-) {
-    const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
-
-    int i;
-    for (i = 0; i + 16 <= size; i += 16) {
-        // Load 16 int16 values
-        __m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
-
-        // Convert int16 → int32 → float
-        __m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
-        __m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
-
-        // Load weights
-        __m512 weight = _mm512_loadu_ps(&weights[i]);
-
-        // effective_q = base_q * weight
-        __m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
-
-        // dequant = quantised * effective_q
-        __m512 dequant = _mm512_mul_ps(quant_f32, effective_q);
-
-        _mm512_storeu_ps(&coeffs[i], dequant);
-    }
-
-    // Remaining scalar
-    for (; i < size; i++) {
-        float effective_q = base_quantiser * weights[i];
-        coeffs[i] = (float)quantised[i] * effective_q;
-    }
-}
-
-// =============================================================================
-// AVX-512 Optimised RGB to YCoCg Conversion
-// =============================================================================
-
-static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) {
-    const int total_pixels = width * height;
-    const __m512 half_vec = _mm512_set1_ps(0.5f);
-
-    int i;
-    // Process 16 pixels at a time (48 bytes of RGB data)
-    for (i = 0; i + 16 <= total_pixels; i += 16) {
-        // Load 16 RGB triplets (48 bytes)
-        // We need to deinterleave R, G, B channels
-
-        // Manual load and deinterleave (AVX-512 doesn't have direct RGB deinterleave)
-        float r_vals[16], g_vals[16], b_vals[16];
-        for (int j = 0; j < 16; j++) {
-            r_vals[j] = (float)rgb[(i + j) * 3 + 0];
-            g_vals[j] = (float)rgb[(i + j) * 3 + 1];
-            b_vals[j] = (float)rgb[(i + j) * 3 + 2];
-        }
-
-        __m512 r = _mm512_loadu_ps(r_vals);
-        __m512 g = _mm512_loadu_ps(g_vals);
-        __m512 b = _mm512_loadu_ps(b_vals);
-
-        // YCoCg-R transform:
-        // co = r - b
-        // tmp = b + co * 0.5
-        // cg = g - tmp
-        // y = tmp + cg * 0.5
-
-        __m512 co_vec = _mm512_sub_ps(r, b);
-        __m512 tmp = _mm512_fmadd_ps(co_vec, half_vec, b);  // tmp = b + co * 0.5
-        __m512 cg_vec = _mm512_sub_ps(g, tmp);
-        __m512 y_vec = _mm512_fmadd_ps(cg_vec, half_vec, tmp);  // y = tmp + cg * 0.5
-
-        _mm512_storeu_ps(&y[i], y_vec);
-        _mm512_storeu_ps(&co[i], co_vec);
-        _mm512_storeu_ps(&cg[i], cg_vec);
-    }
-
-    // Remaining pixels (scalar)
-    for (; i < total_pixels; i++) {
-        const float r = rgb[i * 3 + 0];
-        const float g = rgb[i * 3 + 1];
-        const float b = rgb[i * 3 + 2];
-
-        co[i] = r - b;
-        const float tmp = b + co[i] * 0.5f;
-        cg[i] = g - tmp;
-        y[i] = tmp + cg[i] * 0.5f;
-    }
-}
-
-// =============================================================================
-// AVX-512 Optimised 2D DWT with Gather/Scatter
-// =============================================================================
-
-// Optimised column extraction using gather
-static inline void dwt_2d_extract_column_avx512(
-    const float *tile_data, float *column,
-    int x, int width, int height
-) {
-    // Create gather indices for column extraction
-    // indices[i] = (i * width + x)
-
-    int y;
-    for (y = 0; y + 16 <= height; y += 16) {
-        // Build gather indices
-        int indices[16];
-        for (int j = 0; j < 16; j++) {
-            indices[j] = (y + j) * width + x;
-        }
-
-        __m512i vindex = _mm512_loadu_si512((__m512i*)indices);
-        __m512 col_data = _mm512_i32gather_ps(vindex, tile_data, 4);
-        _mm512_storeu_ps(&column[y], col_data);
-    }
-
-    // Remaining scalar
-    for (; y < height; y++) {
-        column[y] = tile_data[y * width + x];
-    }
-}
-
-// Optimised column insertion using scatter
-static inline void dwt_2d_insert_column_avx512(
-    float *tile_data, const float *column,
-    int x, int width, int height
-) {
-    int y;
-    for (y = 0; y + 16 <= height; y += 16) {
-        // Build scatter indices
-        int indices[16];
-        for (int j = 0; j < 16; j++) {
-            indices[j] = (y + j) * width + x;
-        }
-
-        __m512i vindex = _mm512_loadu_si512((__m512i*)indices);
-        __m512 col_data = _mm512_loadu_ps(&column[y]);
-        _mm512_i32scatter_ps(tile_data, vindex, col_data, 4);
-    }
-
-    // Remaining scalar
-    for (; y < height; y++) {
-        tile_data[y * width + x] = column[y];
-    }
-}
-
-#endif // __AVX512F__
-
-#endif // TAV_AVX512_H
diff --git a/video_encoder/include/tav_encoder_lib.h b/video_encoder/include/tav_encoder_lib.h
deleted file mode 100644
index e01e7f0..0000000
--- a/video_encoder/include/tav_encoder_lib.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/**
- * TAV Encoder Library - Public API
- *
- * High-level interface for encoding video using the TSVM Advanced Video (TAV) codec.
- * Supports GOP-based encoding with internal multi-threading for optimal performance.
- *
- * Created by CuriousTorvald and Claude on 2025-12-03.
- */
-
-#ifndef TAV_ENCODER_LIB_H
-#define TAV_ENCODER_LIB_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// =============================================================================
-// Opaque Encoder Context
-// =============================================================================
-
-/**
- * TAV encoder context - opaque to users.
- * Created with tav_encoder_create(), freed with tav_encoder_free().
- */
-typedef struct tav_encoder_context tav_encoder_context_t;
-
-// =============================================================================
-// Configuration Structures
-// =============================================================================
-
-/**
- * Video encoding parameters.
- */
-typedef struct {
-    // === Video Dimensions ===
-    int width;                    // Frame width (must be even)
-    int height;                   // Frame height (must be even)
-    int fps_num;                  // Framerate numerator (e.g., 60 for 60fps)
-    int fps_den;                  // Framerate denominator (e.g., 1 for 60/1)
-
-    // === Wavelet Configuration ===
-    int wavelet_type;             // Spatial wavelet: 0=CDF 5/3, 1=CDF 9/7 (default), 2=CDF 13/7, 16=DD-4, 255=Haar
-    int temporal_wavelet;         // Temporal wavelet: 0=Haar, 1=CDF 5/3 (default for smooth motion)
-    int decomp_levels;            // Spatial DWT levels (0=auto, typically 6)
-    int temporal_levels;          // Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
-
-    // === Color Space ===
-    int channel_layout;           // 0=YCoCg-R (default), 1=ICtCp (for HDR/BT.2100 sources)
-    int perceptual_tuning;        // 1=enable HVS perceptual quantization (default), 0=uniform
-
-    // === GOP Configuration ===
-    int enable_temporal_dwt;      // 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
-    int gop_size;                 // Frames per GOP (8, 16, or 24; 0=auto based on framerate)
-    int enable_two_pass;          // 1=enable two-pass with scene change detection (default), 0=single-pass
-
-    // === Quality Control ===
-    int quality_level;
-    int quantiser_y;                // Luma quantiser (0-255, indexed against QLUT)
-    int quantiser_co;               // Orange chrominance quantiser (0-255, indexed against QLUT)
-    int quantiser_cg;               // Green chrominance quantiser (0-255, indexed against QLUT)
-    float dead_zone_threshold;    // Dead-zone quantization threshold (0.0=disabled, 0.6-1.5 typical)
-
-    // === Entropy Coding ===
-    int entropy_coder;            // 0=Twobitmap (default), 1=EZBC (better for high-quality)
-    int zstd_level;               // Zstd compression level (3-22, default: 7)
-
-    // === Multi-threading ===
-    int num_threads;              // Worker threads (0=single-threaded, -1=auto, 1-16=explicit)
-
-    // === Encoder Presets ===
-    int encoder_preset;           // Preset flags: 0x01=sports (finer temporal quant), 0x02=anime (disable grain)
-
-    // === Advanced Options ===
-    int verbose;                  // 1=enable debug output, 0=quiet (default)
-    int monoblock;                // -1=auto (based on dimensions), 0=force tiled, 1=force monoblock
-
-} tav_encoder_params_t;
-
-/**
- * Initialize encoder parameters with default values.
- *
- * @param params  Parameter structure to initialize
- * @param width   Frame width
- * @param height  Frame height
- */
-void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height);
-
-/**
- * Encoder output packet.
- * Contains encoded video or audio data.
- */
-typedef struct {
-    uint8_t *data;                // Packet data (owned by encoder, valid until next encode/flush)
-    size_t size;                  // Packet size in bytes
-    uint8_t packet_type;          // TAV packet type (0x10=I-frame, 0x12=GOP, 0x24=audio, etc.)
-    int frame_number;             // Frame number (for video packets)
-    int is_video;                 // 1=video packet, 0=audio packet
-} tav_encoder_packet_t;
-
-// =============================================================================
-// Encoder Lifecycle
-// =============================================================================
-
-/**
- * Create TAV encoder context.
- *
- * Allocates internal buffers, initializes thread pool (if multi-threading enabled),
- * and prepares encoder for frame submission.
- *
- * @param params  Encoder parameters (copied internally)
- * @return        Encoder context, or NULL on failure
- */
-tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params);
-
-/**
- * Free TAV encoder context.
- *
- * Shuts down thread pool, frees all buffers and resources.
- * Any unflushed frames in the GOP buffer will be lost.
- *
- * @param ctx  Encoder context
- */
-void tav_encoder_free(tav_encoder_context_t *ctx);
-
-/**
- * Get last error message.
- *
- * @param ctx  Encoder context
- * @return     Error message string (valid until next encode operation)
- */
-const char *tav_encoder_get_error(tav_encoder_context_t *ctx);
-
-/**
- * Get encoder parameters (with calculated values).
- * After context creation, params will contain actual values used
- * (e.g., auto-calculated decomp_levels, gop_size).
- *
- * @param ctx     Encoder context
- * @param params  Output parameters structure
- */
-void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params);
-
-/**
- * DEBUG: Validate encoder context integrity
- * Returns 1 if context appears valid, 0 otherwise
- */
-int tav_encoder_validate_context(tav_encoder_context_t *ctx);
-
-// =============================================================================
-// Video Encoding
-// =============================================================================
-
-/*
- * DEPRECATED: tav_encoder_encode_frame() and tav_encoder_flush() have been
- * removed. Use tav_encoder_encode_gop() instead, which works for both
- * single-threaded and multi-threaded modes. The CLI should buffer frames
- * and call encode_gop() when a full GOP is ready.
- */
-
-/**
- * Encode a complete GOP (Group of Pictures) directly.
- *
- * This function is STATELESS and THREAD-SAFE with separate contexts.
- * Perfect for multithreaded encoding from CLI:
- * - Each thread creates its own encoder context
- * - Each thread calls encode_gop() with a batch of frames
- * - No shared state, no locking needed
- *
- * Example multithreaded usage:
- * ```c
- * // Worker thread function
- * void* worker(void* arg) {
- *     work_item_t* item = (work_item_t*)arg;
- *
- *     // Create thread-local encoder context
- *     tav_encoder_context_t* ctx = tav_encoder_create(&shared_params);
- *
- *     // Encode this GOP
- *     tav_encoder_packet_t* packet;
- *     tav_encoder_encode_gop(ctx, item->frames, item->num_frames,
- *                            item->frame_numbers, &packet);
- *
- *     // Store packet in output queue
- *     queue_push(output_queue, packet);
- *
- *     tav_encoder_free(ctx);
- *     return NULL;
- * }
- * ```
- *
- * @param ctx            Encoder context (one per thread)
- * @param rgb_frames     Array of RGB24 frames [frame][width*height*3]
- * @param num_frames     Number of frames in GOP (1-24)
- * @param frame_numbers  Frame indices for timecodes (can be NULL)
- * @param packet         Output packet pointer
- * @return               1 if packet ready, -1 on error
- */
-int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
-                            const uint8_t **rgb_frames,
-                            int num_frames,
-                            const int *frame_numbers,
-                            tav_encoder_packet_t **packet);
-
-/**
- * Free a packet returned by encode_frame(), flush(), or encode_gop().
- *
- * @param packet  Packet to free (can be NULL)
- */
-void tav_encoder_free_packet(tav_encoder_packet_t *packet);
-
-// =============================================================================
-// Audio Encoding (Optional)
-// =============================================================================
-
-/**
- * Encode audio samples (TAD codec).
- *
- * Audio is encoded synchronously and returned immediately.
- * For TAV muxing: interleave audio packets with video packets by frame PTS.
- *
- * @param ctx              Encoder context
- * @param pcm_samples      PCM32f stereo samples (interleaved: L,R,L,R,...), num_samples×2 floats
- * @param num_samples      Number of samples per channel
- * @param packet           Output packet pointer
- * @return                 1 if packet ready, -1 on error
- */
-int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
-                              const float *pcm_samples,
-                              size_t num_samples,
-                              tav_encoder_packet_t **packet);
-
-// =============================================================================
-// Statistics and Info
-// =============================================================================
-
-/**
- * Get encoding statistics.
- */
-typedef struct {
-    int64_t frames_encoded;       // Total frames encoded
-    int64_t gops_encoded;         // Total GOPs encoded
-    size_t total_bytes;           // Total bytes output (video + audio)
-    size_t video_bytes;           // Video bytes
-    size_t audio_bytes;           // Audio bytes
-    double avg_bitrate_kbps;      // Average bitrate (kbps)
-    double encoding_fps;          // Encoding speed (frames/sec)
-} tav_encoder_stats_t;
-
-/**
- * Get encoding statistics.
- *
- * @param ctx    Encoder context
- * @param stats  Output statistics structure
- */
-void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats);
-
-// =============================================================================
-// TAV Packet Types (for reference)
-// =============================================================================
-
-#define TAV_PACKET_IFRAME        0x10  // I-frame (intra-only, single frame)
-#define TAV_PACKET_PFRAME        0x11  // P-frame (delta from previous)
-#define TAV_PACKET_GOP_UNIFIED   0x12  // GOP unified (3D DWT, multiple frames)
-#define TAV_PACKET_AUDIO_TAD     0x24  // TAD audio (DWT-based perceptual codec)
-#define TAV_PACKET_AUDIO_PCM8    0x20  // PCM8 audio (legacy)
-#define TAV_PACKET_LOOP_START    0xF0  // Loop point start (no payload)
-#define TAV_PACKET_GOP_SYNC      0xFC  // GOP sync (frame count marker)
-#define TAV_PACKET_TIMECODE      0xFD  // Timecode metadata
-#define TAV_PACKET_SYNC          0xFF  // Sync packet (no payload)
-
-// =============================================================================
-// Tile Settings (for multi-tile mode)
-// =============================================================================
-
-#define TAV_TILE_SIZE_X 640               // Base tile width
-#define TAV_TILE_SIZE_Y 540               // Base tile height
-#define TAV_DWT_FILTER_HALF_SUPPORT 4     // For 9/7 filter (filter lengths 9,7 → L=4)
-#define TAV_TILE_MARGIN_LEVELS 3          // Use margin for 3 levels: 4 * (2^3) = 32px
-#define TAV_TILE_MARGIN (TAV_DWT_FILTER_HALF_SUPPORT * (1 << TAV_TILE_MARGIN_LEVELS))  // 32px
-#define TAV_PADDED_TILE_SIZE_X (TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN)  // 704
-#define TAV_PADDED_TILE_SIZE_Y (TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN)  // 604
-
-// Monoblock threshold: D1 PAL resolution (720x576)
-// If width > 720 OR height > 576, automatically switch to tiled mode
-#define TAV_MONOBLOCK_MAX_WIDTH  720
-#define TAV_MONOBLOCK_MAX_HEIGHT 576
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TAV_ENCODER_LIB_H
diff --git a/video_encoder/include/tav_simd_dispatch.h b/video_encoder/include/tav_simd_dispatch.h
deleted file mode 100644
index d5488e7..0000000
--- a/video_encoder/include/tav_simd_dispatch.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * TAV SIMD Function Dispatcher
- *
- * This file provides runtime CPU detection and function pointer dispatch
- * for SIMD-optimized versions of performance-critical TAV encoder functions.
- *
- * Usage:
- * 1. Include this header after defining all scalar functions
- * 2. Call tav_simd_init() once at encoder initialization
- * 3. Use function pointers (e.g., dwt_53_forward_1d_ptr) throughout code
- *
- * The dispatcher will automatically select AVX-512, AVX2, or scalar versions
- * based on runtime CPU capabilities.
- */
-
-#ifndef TAV_SIMD_DISPATCH_H
-#define TAV_SIMD_DISPATCH_H
-
-#include <stdint.h>
-
-// =============================================================================
-// Function Pointer Types
-// =============================================================================
-
-// 1D DWT function pointer types
-typedef void (*dwt_1d_func_t)(float *data, int length);
-
-// Quantization function pointer types
-typedef void (*quantise_basic_func_t)(
-    float *coeffs, int16_t *quantised, int size,
-    float effective_q, float dead_zone_threshold,
-    int width, int height, int decomp_levels, int is_chroma,
-    int (*get_subband_level)(int, int, int, int),
-    int (*get_subband_type)(int, int, int, int)
-);
-
-typedef void (*quantise_perceptual_func_t)(
-    float *coeffs, int16_t *quantised, int size,
-    float *weights, float base_quantiser
-);
-
-// Color conversion function pointer type
-typedef void (*rgb_to_ycocg_func_t)(
-    const uint8_t *rgb, float *y, float *co, float *cg,
-    int width, int height
-);
-
-// 2D DWT column operations
-typedef void (*dwt_2d_column_extract_func_t)(
-    const float *tile_data, float *column,
-    int x, int width, int height
-);
-
-typedef void (*dwt_2d_column_insert_func_t)(
-    float *tile_data, const float *column,
-    int x, int width, int height
-);
-
-// =============================================================================
-// Global Function Pointers (initialized by tav_simd_init)
-// =============================================================================
-
-// DWT 1D transforms
-static dwt_1d_func_t dwt_53_forward_1d_ptr = NULL;
-static dwt_1d_func_t dwt_97_forward_1d_ptr = NULL;
-static dwt_1d_func_t dwt_haar_forward_1d_ptr = NULL;
-static dwt_1d_func_t dwt_53_inverse_1d_ptr = NULL;
-static dwt_1d_func_t dwt_haar_inverse_1d_ptr = NULL;
-
-// Quantization
-static quantise_basic_func_t quantise_dwt_coefficients_ptr = NULL;
-static quantise_perceptual_func_t quantise_dwt_coefficients_perceptual_ptr = NULL;
-
-// Color conversion
-static rgb_to_ycocg_func_t rgb_to_ycocg_ptr = NULL;
-
-// 2D DWT column operations
-static dwt_2d_column_extract_func_t dwt_2d_extract_column_ptr = NULL;
-static dwt_2d_column_insert_func_t dwt_2d_insert_column_ptr = NULL;
-
-// =============================================================================
-// SIMD Capability Detection
-// =============================================================================
-
-typedef enum {
-    SIMD_NONE = 0,
-    SIMD_AVX512F = 1,
-    SIMD_AVX2 = 2,
-    SIMD_SSE42 = 3
-} simd_level_t;
-
-static simd_level_t detected_simd_level = SIMD_NONE;
-
-static inline simd_level_t detect_simd_capabilities(void) {
-#if defined(__GNUC__) || defined(__clang__)
-    // Use GCC/Clang built-in CPU detection
-    if (!__builtin_cpu_supports("sse4.2")) {
-        return SIMD_NONE;
-    }
-
-#ifdef __AVX512F__
-    if (__builtin_cpu_supports("avx512f") &&
-        __builtin_cpu_supports("avx512dq") &&
-        __builtin_cpu_supports("avx512bw") &&
-        __builtin_cpu_supports("avx512vl")) {
-        return SIMD_AVX512F;
-    }
-#endif
-
-#ifdef __AVX2__
-    if (__builtin_cpu_supports("avx2")) {
-        return SIMD_AVX2;
-    }
-#endif
-
-    if (__builtin_cpu_supports("sse4.2")) {
-        return SIMD_SSE42;
-    }
-#endif
-
-    return SIMD_NONE;
-}
-
-// =============================================================================
-// Scalar Fallback Wrappers
-// =============================================================================
-
-// These wrappers adapt the scalar functions to match function pointer signatures
-
-static void quantise_dwt_coefficients_scalar_wrapper(
-    float *coeffs, int16_t *quantised, int size,
-    float effective_q, float dead_zone_threshold,
-    int width, int height, int decomp_levels, int is_chroma,
-    int (*get_subband_level)(int, int, int, int),
-    int (*get_subband_type)(int, int, int, int)
-);
-// Implementation provided by including encoder - just declare prototype
-
-static void quantise_dwt_coefficients_perceptual_scalar_wrapper(
-    float *coeffs, int16_t *quantised, int size,
-    float *weights, float base_quantiser
-);
-// Implementation provided by including encoder
-
-static void dwt_2d_extract_column_scalar(
-    const float *tile_data, float *column,
-    int x, int width, int height
-) {
-    for (int y = 0; y < height; y++) {
-        column[y] = tile_data[y * width + x];
-    }
-}
-
-static void dwt_2d_insert_column_scalar(
-    float *tile_data, const float *column,
-    int x, int width, int height
-) {
-    for (int y = 0; y < height; y++) {
-        tile_data[y * width + x] = column[y];
-    }
-}
-
-// =============================================================================
-// SIMD Initialization
-// =============================================================================
-
-static void tav_simd_init(void) {
-    // Detect CPU capabilities
-    detected_simd_level = detect_simd_capabilities();
-
-    const char *simd_names[] = {"None", "AVX-512", "AVX2", "SSE4.2"};
-    fprintf(stderr, "[TAV] SIMD level detected: %s\n",
-            simd_names[detected_simd_level]);
-
-#ifdef __AVX512F__
-    if (detected_simd_level == SIMD_AVX512F) {
-        fprintf(stderr, "[TAV] Using AVX-512 optimizations\n");
-
-        // DWT functions
-        extern void dwt_53_forward_1d_avx512(float *data, int length);
-        extern void dwt_97_forward_1d_avx512(float *data, int length);
-        extern void dwt_haar_forward_1d_avx512(float *data, int length);
-
-        dwt_53_forward_1d_ptr = dwt_53_forward_1d_avx512;
-        dwt_97_forward_1d_ptr = dwt_97_forward_1d_avx512;
-        dwt_haar_forward_1d_ptr = dwt_haar_forward_1d_avx512;
-
-        // Quantization
-        // Note: Need wrapper functions that match the complex signature
-        // For now, using scalar versions
-        extern void dwt_53_forward_1d(float *data, int length);
-        extern void dwt_97_forward_1d(float *data, int length);
-        extern void dwt_haar_forward_1d(float *data, int length);
-        extern void dwt_53_inverse_1d(float *data, int length);
-        extern void dwt_haar_inverse_1d(float *data, int length);
-
-        // Fallback to scalar for inverse (can optimize later)
-        dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
-        dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
-
-        // Color conversion
-        extern void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
-        rgb_to_ycocg_ptr = rgb_to_ycocg_avx512;
-
-        // 2D column operations
-        extern void dwt_2d_extract_column_avx512(const float *tile_data, float *column, int x, int width, int height);
-        extern void dwt_2d_insert_column_avx512(float *tile_data, const float *column, int x, int width, int height);
-
-        dwt_2d_extract_column_ptr = dwt_2d_extract_column_avx512;
-        dwt_2d_insert_column_ptr = dwt_2d_insert_column_avx512;
-
-        // Quantization uses scalar for now (needs integration work)
-        extern void dwt_53_forward_1d(float *data, int length);
-        extern void dwt_97_forward_1d(float *data, int length);
-        extern void dwt_haar_forward_1d(float *data, int length);
-        extern void dwt_53_inverse_1d(float *data, int length);
-        extern void dwt_haar_inverse_1d(float *data, int length);
-        extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
-
-        quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
-        quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
-
-        return;
-    }
-#endif
-
-    // Fallback to scalar implementations
-    fprintf(stderr, "[TAV] Using scalar (non-SIMD) implementations\n");
-
-    extern void dwt_53_forward_1d(float *data, int length);
-    extern void dwt_97_forward_1d(float *data, int length);
-    extern void dwt_haar_forward_1d(float *data, int length);
-    extern void dwt_53_inverse_1d(float *data, int length);
-    extern void dwt_haar_inverse_1d(float *data, int length);
-    extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
-
-    dwt_53_forward_1d_ptr = dwt_53_forward_1d;
-    dwt_97_forward_1d_ptr = dwt_97_forward_1d;
-    dwt_haar_forward_1d_ptr = dwt_haar_forward_1d;
-    dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
-    dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
-
-    rgb_to_ycocg_ptr = rgb_to_ycocg;
-
-    dwt_2d_extract_column_ptr = dwt_2d_extract_column_scalar;
-    dwt_2d_insert_column_ptr = dwt_2d_insert_column_scalar;
-
-    quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
-    quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
-}
-
-// =============================================================================
-// Convenience Macros for Code Readability
-// =============================================================================
-
-// Use these macros in encoder code for cleaner dispatch
-#define DWT_53_FORWARD_1D(data, length) \
-    dwt_53_forward_1d_ptr((data), (length))
-
-#define DWT_97_FORWARD_1D(data, length) \
-    dwt_97_forward_1d_ptr((data), (length))
-
-#define DWT_HAAR_FORWARD_1D(data, length) \
-    dwt_haar_forward_1d_ptr((data), (length))
-
-#define RGB_TO_YCOCG(rgb, y, co, cg, width, height) \
-    rgb_to_ycocg_ptr((rgb), (y), (co), (cg), (width), (height))
-
-#define DWT_2D_EXTRACT_COLUMN(tile_data, column, x, width, height) \
-    dwt_2d_extract_column_ptr((tile_data), (column), (x), (width), (height))
-
-#define DWT_2D_INSERT_COLUMN(tile_data, column, x, width, height) \
-    dwt_2d_insert_column_ptr((tile_data), (column), (x), (width), (height))
-
-#endif // TAV_SIMD_DISPATCH_H
diff --git a/video_encoder/include/tav_video_decoder.h b/video_encoder/include/tav_video_decoder.h
deleted file mode 100644
index a8b9e58..0000000
--- a/video_encoder/include/tav_video_decoder.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Created by CuriousTorvald and Claude on 2025-12-02.
-// TAV Video Decoder Library - Shared decoding functions for TAV format
-// Can be used by both regular TAV decoder and TAV-DT decoder
-
-#ifndef TAV_VIDEO_DECODER_H
-#define TAV_VIDEO_DECODER_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-// Video decoder context - opaque to users
-typedef struct tav_video_context tav_video_context_t;
-
-// Video parameters structure
-typedef struct {
-    int width;
-    int height;
-    int decomp_levels;        // Spatial DWT levels (typically 4)
-    int temporal_levels;      // Temporal DWT levels (typically 2)
-    int wavelet_filter;       // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
-    int temporal_wavelet;     // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
-    int entropy_coder;        // 0=Twobitmap, 1=EZBC, 2=RAW
-    int channel_layout;       // 0=YCoCg-R, 1=ICtCp
-    int perceptual_tuning;    // 1=perceptual quantisation, 0=uniform
-    uint8_t quantiser_y;      // Base quantiser index for Y/I
-    uint8_t quantiser_co;     // Base quantiser index for Co/Ct
-    uint8_t quantiser_cg;     // Base quantiser index for Cg/Cp
-    uint8_t encoder_preset;   // Encoder preset flags (sports, anime, etc.)
-    int monoblock;            // 1=single tile (monoblock), 0=multi-tile
-    int no_zstd;              // 1=packets are uncompressed (Video Flags bit 4), 0=Zstd compressed
-} tav_video_params_t;
-
-// Create video decoder context
-// Returns NULL on failure
-tav_video_context_t *tav_video_create(const tav_video_params_t *params);
-
-// Free video decoder context
-void tav_video_free(tav_video_context_t *ctx);
-
-// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
-// Input: compressed_data - GOP packet data (after packet type byte)
-//        compressed_size - size of compressed data
-//        gop_size - number of frames in GOP (read from packet)
-// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
-//         Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
-// Returns: 0 on success, -1 on error
-int tav_video_decode_gop(tav_video_context_t *ctx,
-                         const uint8_t *compressed_data, uint32_t compressed_size,
-                         uint8_t gop_size, uint8_t **rgb_frames);
-
-// Decode IFRAME packet (0x10) to RGB24 frame
-// Input: compressed_data - I-frame packet data (after packet type byte)
-//        packet_size - size of packet data
-// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
-//         Must be pre-allocated by caller
-// Returns: 0 on success, -1 on error
-int tav_video_decode_iframe(tav_video_context_t *ctx,
-                            const uint8_t *compressed_data, uint32_t packet_size,
-                            uint8_t *rgb_frame);
-
-// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
-// Input: compressed_data - P-frame packet data (after packet type byte)
-//        packet_size - size of packet data
-// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
-//         Must be pre-allocated by caller
-// Returns: 0 on success, -1 on error
-// Note: Requires previous frame to be decoded first (stored internally as reference)
-int tav_video_decode_pframe(tav_video_context_t *ctx,
-                            const uint8_t *compressed_data, uint32_t packet_size,
-                            uint8_t *rgb_frame);
-
-// Get last error message
-const char *tav_video_get_error(tav_video_context_t *ctx);
-
-// Enable verbose debug output
-void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
-
-#endif // TAV_VIDEO_DECODER_H
diff --git a/video_encoder/lib/libfec/ldpc.c b/video_encoder/lib/libfec/ldpc.c
deleted file mode 100644
index ce2f2ff..0000000
--- a/video_encoder/lib/libfec/ldpc.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/**
- * LDPC Rate 1/2 Codec Implementation
- *
- * LDPC for TAV-DT header protection.
- * Uses a systematic rate 1/2 code with sum-product belief propagation decoder.
- *
- * The parity-check matrix is designed for good error correction on small blocks.
- * Each parity bit is computed as XOR of multiple data bits using a pseudo-random
- * but deterministic pattern.
- *
- * Created by CuriousTorvald and Claude on 2025-12-09.
- * Updated 2025-12-17: Replaced bit-flipping with belief propagation decoder.
- */
-
-#include "ldpc.h"
-#include <string.h>
-#include <stdio.h>
-#include <math.h>
-
-// Channel LLR magnitude for hard-decision input
-// Higher value = more confidence in received bits
-// For BER ~0.01, optimal is about 4.6; we use slightly lower for robustness
-#define CHANNEL_LLR_MAG 4.0f
-
-// Clipping value to prevent numerical overflow in tanh operations
-#define LLR_CLIP 20.0f
-
-// =============================================================================
-// Parity-Check Matrix Generation
-// =============================================================================
-
-// For rate 1/2 LDPC: n = 2k bits, parity-check matrix H is (n-k) x n = k x 2k
-// We use H = [P | I_k] where P is the parity pattern matrix
-// This gives systematic encoding: c = [data | parity] where parity = P * data
-
-// Parity pattern: each parity bit j depends on data bits where pattern[j][i] = 1
-// We use a regular pattern with column weight 3 (each data bit affects 3 parity bits)
-// and row weight varies to cover the data bits well
-
-// Simple hash function for generating parity connections
-static inline uint32_t hash_mix(uint32_t a, uint32_t b) {
-    a ^= b;
-    a = (a ^ (a >> 16)) * 0x85ebca6b;
-    a = (a ^ (a >> 13)) * 0xc2b2ae35;
-    return a ^ (a >> 16);
-}
-
-// Get bit from byte array
-static inline int get_bit(const uint8_t *data, int bit_idx) {
-    return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
-}
-
-// Set bit in byte array
-static inline void set_bit(uint8_t *data, int bit_idx, int value) {
-    int byte_idx = bit_idx >> 3;
-    int bit_pos = 7 - (bit_idx & 7);
-    if (value) {
-        data[byte_idx] |= (1 << bit_pos);
-    } else {
-        data[byte_idx] &= ~(1 << bit_pos);
-    }
-}
-
-// Flip bit in byte array
-static inline void flip_bit(uint8_t *data, int bit_idx) {
-    int byte_idx = bit_idx >> 3;
-    int bit_pos = 7 - (bit_idx & 7);
-    data[byte_idx] ^= (1 << bit_pos);
-}
-
-// Get list of data bits that affect parity bit j
-// Returns number of connected data bits, stores indices in connections[]
-// For rate 1/2: data bits are 0 to k*8-1, parity bits are k*8 to 2*k*8-1
-static int get_parity_connections(int parity_idx, int k_bits, int *connections) {
-    int count = 0;
-
-    // Use a deterministic pseudo-random pattern
-    // Each parity bit connects to approximately k_bits/3 data bits
-    // Different seeds for different parity positions ensure coverage
-
-    uint32_t seed = hash_mix(0xDEADBEEF, (uint32_t)parity_idx);
-
-    for (int i = 0; i < k_bits; i++) {
-        // Each data bit has ~3/k_bits chance of connecting to this parity bit
-        // Total connections per parity ~ 3 (column weight)
-        uint32_t h = hash_mix(seed, (uint32_t)i);
-        if ((h % (k_bits / 3 + 1)) == 0) {
-            connections[count++] = i;
-        }
-    }
-
-    // Ensure at least 2 connections per parity bit
-    if (count < 2) {
-        connections[count++] = parity_idx % k_bits;
-        connections[count++] = (parity_idx + k_bits / 2) % k_bits;
-    }
-
-    return count;
-}
-
-// Get list of parity bits affected by data bit i
-static int get_data_connections(int data_idx, int k_bits, int *connections) {
-    int count = 0;
-
-    for (int j = 0; j < k_bits; j++) {
-        int parity_conns[LDPC_MAX_DATA_BYTES * 8];
-        int n_conns = get_parity_connections(j, k_bits, parity_conns);
-
-        for (int c = 0; c < n_conns; c++) {
-            if (parity_conns[c] == data_idx) {
-                connections[count++] = j;
-                break;
-            }
-        }
-    }
-
-    return count;
-}
-
-// =============================================================================
-// Initialization
-// =============================================================================
-
-static int ldpc_initialized = 0;
-
-void ldpc_init(void) {
-    if (ldpc_initialized) return;
-    // No pre-computation needed - patterns generated on the fly
-    ldpc_initialized = 1;
-}
-
-// =============================================================================
-// Encoding
-// =============================================================================
-
-size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
-    if (!ldpc_initialized) ldpc_init();
-
-    if (data_len > LDPC_MAX_DATA_BYTES) {
-        data_len = LDPC_MAX_DATA_BYTES;
-    }
-
-    int k_bits = (int)(data_len * 8);  // Number of data bits
-
-    // Copy data to output (systematic encoding)
-    memcpy(output, data, data_len);
-
-    // Initialize parity bytes to zero
-    memset(output + data_len, 0, data_len);
-
-    // Compute parity bits
-    for (int j = 0; j < k_bits; j++) {
-        // Get data bits connected to parity bit j
-        int connections[LDPC_MAX_DATA_BYTES * 8];
-        int n_conns = get_parity_connections(j, k_bits, connections);
-
-        // Parity bit = XOR of connected data bits
-        int parity = 0;
-        for (int c = 0; c < n_conns; c++) {
-            parity ^= get_bit(data, connections[c]);
-        }
-
-        // Set parity bit
-        set_bit(output + data_len, j, parity);
-    }
-
-    return data_len * 2;
-}
-
-// =============================================================================
-// Decoding
-// =============================================================================
-
-int ldpc_check_syndrome(const uint8_t *codeword, size_t len) {
-    if (!ldpc_initialized) ldpc_init();
-
-    size_t data_len = len / 2;
-    int k_bits = (int)(data_len * 8);
-
-    // Check all parity equations
-    for (int j = 0; j < k_bits; j++) {
-        int connections[LDPC_MAX_DATA_BYTES * 8];
-        int n_conns = get_parity_connections(j, k_bits, connections);
-
-        // Compute syndrome bit: XOR of connected data bits XOR parity bit
-        int syndrome = get_bit(codeword + data_len, j);
-        for (int c = 0; c < n_conns; c++) {
-            syndrome ^= get_bit(codeword, connections[c]);
-        }
-
-        if (syndrome != 0) {
-            return 0;  // Syndrome non-zero: errors detected
-        }
-    }
-
-    return 1;  // Zero syndrome: valid codeword
-}
-
-// Clip LLR to prevent overflow
-static inline float clip_llr(float llr) {
-    if (llr > LLR_CLIP) return LLR_CLIP;
-    if (llr < -LLR_CLIP) return -LLR_CLIP;
-    return llr;
-}
-
-// Sign of a float (returns +1 or -1)
-static inline float sign_f(float x) {
-    return (x >= 0.0f) ? 1.0f : -1.0f;
-}
-
-int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output) {
-    if (!ldpc_initialized) ldpc_init();
-
-    if (encoded_len < 2 || (encoded_len & 1) != 0) {
-        return -1;  // Invalid length
-    }
-
-    size_t data_len = encoded_len / 2;
-    if (data_len > LDPC_MAX_DATA_BYTES) {
-        return -1;
-    }
-
-    int k_bits = (int)(data_len * 8);
-    int n_bits = k_bits * 2;  // Total codeword bits (data + parity)
-
-    // Pre-compute the parity check matrix structure for efficiency
-    // For each check node j: which variable nodes it connects to
-    int check_to_var[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
-    int check_degree[LDPC_MAX_DATA_BYTES * 8];
-
-    for (int j = 0; j < k_bits; j++) {
-        int connections[LDPC_MAX_DATA_BYTES * 8];
-        int n_conns = get_parity_connections(j, k_bits, connections);
-
-        // Check j connects to: data bits in connections[] + parity bit j
-        check_degree[j] = n_conns + 1;
-        for (int c = 0; c < n_conns; c++) {
-            check_to_var[j][c] = connections[c];  // Data bit index
-        }
-        check_to_var[j][n_conns] = k_bits + j;  // Parity bit index
-    }
-
-    // Initialize channel LLRs from received hard bits
-    // LLR > 0 means bit is probably 0, LLR < 0 means bit is probably 1
-    float channel_llr[LDPC_MAX_DATA_BYTES * 16];
-    for (int i = 0; i < n_bits; i++) {
-        int bit = get_bit(encoded, i);
-        channel_llr[i] = bit ? -CHANNEL_LLR_MAG : CHANNEL_LLR_MAG;
-    }
-
-    // Message arrays for BP
-    // check_to_var_msg[j][idx] = message from check j to variable check_to_var[j][idx]
-    float check_to_var_msg[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
-
-    // Initialize check-to-variable messages to zero
-    memset(check_to_var_msg, 0, sizeof(check_to_var_msg));
-
-    // Belief Propagation iterations
-    for (int iter = 0; iter < LDPC_MAX_ITERATIONS; iter++) {
-        // Step 1: Variable-to-check messages (implicit, computed on the fly)
-        // var_to_check[v→j] = channel_llr[v] + sum of all check_to_var_msg[k][idx_v] for k != j
-
-        // Step 2: Check-to-variable messages using min-sum approximation
-        // For each check node j, for each connected variable v:
-        // check_to_var_msg[j→v] = sign * min(|incoming messages from other vars|)
-
-        for (int j = 0; j < k_bits; j++) {
-            int degree = check_degree[j];
-
-            // First, compute variable-to-check messages for all variables in this check
-            float var_to_check[LDPC_MAX_DATA_BYTES * 8 + 1];
-            for (int idx = 0; idx < degree; idx++) {
-                int v = check_to_var[j][idx];
-
-                // Sum all incoming check messages to variable v, except from check j
-                float sum = channel_llr[v];
-                for (int jj = 0; jj < k_bits; jj++) {
-                    if (jj == j) continue;
-                    // Find if check jj connects to variable v
-                    for (int idx2 = 0; idx2 < check_degree[jj]; idx2++) {
-                        if (check_to_var[jj][idx2] == v) {
-                            sum += check_to_var_msg[jj][idx2];
-                            break;
-                        }
-                    }
-                }
-                var_to_check[idx] = clip_llr(sum);
-            }
-
-            // Now compute check-to-variable messages using min-sum
-            for (int idx = 0; idx < degree; idx++) {
-                float sign_prod = 1.0f;
-                float min_abs = 1e30f;
-
-                for (int idx2 = 0; idx2 < degree; idx2++) {
-                    if (idx2 == idx) continue;
-                    float msg = var_to_check[idx2];
-                    sign_prod *= sign_f(msg);
-                    float abs_msg = fabsf(msg);
-                    if (abs_msg < min_abs) min_abs = abs_msg;
-                }
-
-                // Min-sum with scaling factor 0.75 for better performance
-                check_to_var_msg[j][idx] = clip_llr(sign_prod * min_abs * 0.75f);
-            }
-        }
-
-        // Step 3: Compute posterior LLRs and make hard decisions
-        float posterior[LDPC_MAX_DATA_BYTES * 16];
-        for (int v = 0; v < n_bits; v++) {
-            float sum = channel_llr[v];
-            // Add all incoming check-to-variable messages
-            for (int j = 0; j < k_bits; j++) {
-                for (int idx = 0; idx < check_degree[j]; idx++) {
-                    if (check_to_var[j][idx] == v) {
-                        sum += check_to_var_msg[j][idx];
-                        break;
-                    }
-                }
-            }
-            posterior[v] = sum;
-        }
-
-        // Make hard decisions
-        uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
-        memset(decoded, 0, encoded_len);
-        for (int v = 0; v < n_bits; v++) {
-            if (posterior[v] < 0) {
-                set_bit(decoded, v, 1);
-            }
-        }
-
-        // Check syndrome
-        int syndrome_count = 0;
-        for (int j = 0; j < k_bits; j++) {
-            int syn = 0;
-            for (int idx = 0; idx < check_degree[j]; idx++) {
-                syn ^= get_bit(decoded, check_to_var[j][idx]);
-            }
-            if (syn) syndrome_count++;
-        }
-
-        // If all syndromes are zero, we're done
-        if (syndrome_count == 0) {
-            memcpy(output, decoded, data_len);
-            return 0;
-        }
-
-        // Early termination if syndrome count is very small (nearly converged)
-        if (iter > 5 && syndrome_count <= 2) {
-            // Try one more iteration, if still stuck, accept
-        }
-    }
-
-    // Decoding did not converge - compute final estimate
-    float posterior[LDPC_MAX_DATA_BYTES * 16];
-    for (int v = 0; v < n_bits; v++) {
-        float sum = channel_llr[v];
-        for (int j = 0; j < k_bits; j++) {
-            for (int idx = 0; idx < check_degree[j]; idx++) {
-                if (check_to_var[j][idx] == v) {
-                    sum += check_to_var_msg[j][idx];
-                    break;
-                }
-            }
-        }
-        posterior[v] = sum;
-    }
-
-    uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
-    memset(decoded, 0, encoded_len);
-    for (int v = 0; v < n_bits; v++) {
-        if (posterior[v] < 0) {
-            set_bit(decoded, v, 1);
-        }
-    }
-
-    // Check final syndrome count
-    int final_syndromes = 0;
-    for (int j = 0; j < k_bits; j++) {
-        int syn = 0;
-        for (int idx = 0; idx < check_degree[j]; idx++) {
-            syn ^= get_bit(decoded, check_to_var[j][idx]);
-        }
-        if (syn) final_syndromes++;
-    }
-
-    // Accept if syndrome count is low enough
-    if (final_syndromes <= k_bits / 4) {
-        memcpy(output, decoded, data_len);
-        return 0;  // Soft success
-    }
-
-    // Total failure - return original data as best effort
-    memcpy(output, encoded, data_len);
-    return -1;
-}
diff --git a/video_encoder/lib/libfec/ldpc.h b/video_encoder/lib/libfec/ldpc.h
deleted file mode 100644
index d8af04f..0000000
--- a/video_encoder/lib/libfec/ldpc.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * LDPC Rate 1/2 Codec for TAV-DT
- *
- * Simple LDPC implementation for header protection in TAV-DT format.
- * Rate 1/2: k data bytes → 2k encoded bytes (doubles the size)
- *
- * Uses systematic encoding where first k bytes are data, last k bytes are parity.
- * Decoding uses iterative bit-flipping algorithm.
- *
- * Designed for small blocks (headers up to 64 bytes).
- *
- * Created by CuriousTorvald and Claude on 2025-12-09.
- */
-
-#ifndef LDPC_H
-#define LDPC_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-// Maximum block size (data bytes before encoding)
-#define LDPC_MAX_DATA_BYTES 64
-
-// LDPC decoder parameters
-#define LDPC_MAX_ITERATIONS 50
-
-/**
- * Initialize LDPC codec.
- * Must be called once before using encode/decode functions.
- * Thread-safe: uses static initialization.
- */
-void ldpc_init(void);
-
-/**
- * Encode data block with LDPC rate 1/2.
- *
- * @param data      Input data bytes
- * @param data_len  Length of input data (1 to LDPC_MAX_DATA_BYTES)
- * @param output    Output buffer (must hold 2 * data_len bytes)
- * @return          Output length (2 * data_len)
- *
- * Output format: [data bytes][parity bytes]
- * The output is systematic: first data_len bytes are the original data.
- */
-size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output);
-
-/**
- * Decode LDPC rate 1/2 encoded block.
- *
- * @param encoded     Input encoded data (2 * data_len bytes)
- * @param encoded_len Length of encoded data (must be even, max 2*LDPC_MAX_DATA_BYTES)
- * @param output      Output buffer for decoded data (encoded_len / 2 bytes)
- * @return            0 on success, -1 if decoding failed (too many errors)
- *
- * Uses iterative bit-flipping decoder.
- */
-int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output);
-
-/**
- * Calculate syndrome for validation.
- *
- * @param codeword   Encoded codeword (2 * data_len bytes)
- * @param len        Length of codeword
- * @return           1 if valid (zero syndrome), 0 if errors detected
- */
-int ldpc_check_syndrome(const uint8_t *codeword, size_t len);
-
-#endif // LDPC_H
diff --git a/video_encoder/lib/libfec/ldpc_payload.c b/video_encoder/lib/libfec/ldpc_payload.c
deleted file mode 100644
index 118fdb0..0000000
--- a/video_encoder/lib/libfec/ldpc_payload.c
+++ /dev/null
@@ -1,478 +0,0 @@
-/**
- * LDPC(255,223) Codec Implementation - Enhanced Version
- *
- * This implements a high-rate LDPC code designed to compete with RS(255,223).
- *
- * Key improvements in this version:
- * - Sum-Product (Belief Propagation) decoder for optimal performance
- * - Quasi-cyclic H matrix with optimized degree distribution
- * - Layered scheduling for faster convergence
- * - Adaptive LLR initialization
- *
- * Created by CuriousTorvald and Claude on 2025-12-15.
- */
-
-#include "ldpc_payload.h"
-#include <string.h>
-#include <stdlib.h>
-#include <math.h>
-#include <stdio.h>
-
-// =============================================================================
-// Constants
-// =============================================================================
-
-#define N_BITS    (LDPC_P_BLOCK_SIZE * 8)   // 2040 total bits
-#define K_BITS    (LDPC_P_DATA_SIZE * 8)    // 1784 data bits
-#define M_BITS    (LDPC_P_PARITY_SIZE * 8)  // 256 parity bits
-
-// LLR bounds - tighter bounds help prevent numerical issues
-#define LLR_MAX  20.0f
-#define LLR_MIN -20.0f
-
-// Decoding parameters
-#define LDPC_MAX_ITER 100
-
-// =============================================================================
-// Sparse Matrix Storage
-// =============================================================================
-
-#define MAX_CHECK_DEGREE 50
-#define MAX_VAR_DEGREE   12
-
-static int ldpc_p_initialized = 0;
-
-static int check_degree[M_BITS];
-static int check_to_var[M_BITS][MAX_CHECK_DEGREE];
-static int check_to_var_idx[M_BITS][MAX_CHECK_DEGREE];
-
-static int var_degree[N_BITS];
-static int var_to_check[N_BITS][MAX_VAR_DEGREE];
-static int var_to_check_idx[N_BITS][MAX_VAR_DEGREE];
-
-// =============================================================================
-// Bit manipulation
-// =============================================================================
-
-static inline int get_bit(const uint8_t *data, int bit_idx) {
-    return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
-}
-
-static inline void set_bit(uint8_t *data, int bit_idx, int value) {
-    int byte_idx = bit_idx >> 3;
-    int bit_pos = 7 - (bit_idx & 7);
-    if (value) {
-        data[byte_idx] |= (1 << bit_pos);
-    } else {
-        data[byte_idx] &= ~(1 << bit_pos);
-    }
-}
-
-// =============================================================================
-// H Matrix Construction - Quasi-Cyclic with Optimized Distribution
-// =============================================================================
-
-// Hash function for deterministic pseudo-random connections
-static inline uint32_t hash32(uint32_t a, uint32_t b) {
-    uint32_t h = a ^ (b * 0x9E3779B9);
-    h ^= h >> 16;
-    h *= 0x85EBCA6B;
-    h ^= h >> 13;
-    h *= 0xC2B2AE35;
-    h ^= h >> 16;
-    return h;
-}
-
-static void add_edge(int check, int var) {
-    // Check if already connected
-    for (int i = 0; i < check_degree[check]; i++) {
-        if (check_to_var[check][i] == var) return;
-    }
-
-    if (check_degree[check] >= MAX_CHECK_DEGREE || var_degree[var] >= MAX_VAR_DEGREE) {
-        return;
-    }
-
-    int cidx = check_degree[check];
-    int vidx = var_degree[var];
-
-    check_to_var[check][cidx] = var;
-    check_to_var_idx[check][cidx] = vidx;
-    check_degree[check]++;
-
-    var_to_check[var][vidx] = check;
-    var_to_check_idx[var][vidx] = cidx;
-    var_degree[var]++;
-}
-
-// Simplified cycle check - only check direct neighbors (faster)
-static int would_create_short_cycle(int v, int c) {
-    // Quick check: if v is already connected to c, skip
-    for (int i = 0; i < var_degree[v]; i++) {
-        if (var_to_check[v][i] == c) return 1;
-    }
-
-    // For speed, only do basic 4-cycle check for low-degree nodes
-    if (var_degree[v] > 4 || check_degree[c] > 20) return 0;
-
-    // Check for 4-cycles
-    for (int i = 0; i < var_degree[v]; i++) {
-        int c_prime = var_to_check[v][i];
-        for (int j = 0; j < check_degree[c_prime] && j < 15; j++) {
-            int v_prime = check_to_var[c_prime][j];
-            if (v_prime == v) continue;
-            for (int k = 0; k < var_degree[v_prime] && k < 8; k++) {
-                if (var_to_check[v_prime][k] == c) {
-                    return 1;
-                }
-            }
-        }
-    }
-    return 0;
-}
-
-// Quasi-cyclic expansion: shift value determines cyclic permutation
-static int qc_shift(int base_idx, int shift, int size) {
-    return (base_idx + shift) % size;
-}
-
-static void build_h_matrix(void) {
-    memset(check_degree, 0, sizeof(check_degree));
-    memset(var_degree, 0, sizeof(var_degree));
-
-    // ==========================================================================
-    // H matrix with staircase parity and PEG-based data connections
-    // ==========================================================================
-
-    // --- Part 1: Staircase parity structure ---
-    for (int c = 0; c < M_BITS; c++) {
-        int parity_bit = K_BITS + c;
-        add_edge(c, parity_bit);
-        if (c > 0) {
-            add_edge(c, K_BITS + c - 1);
-        }
-    }
-
-    // --- Part 2: Connect data bits using PEG approach ---
-    for (int v = 0; v < K_BITS; v++) {
-        // Target 6 connections per variable
-        int target = 6;
-
-        for (int d = 0; d < target; d++) {
-            uint32_t h = hash32((uint32_t)v * 2654435769U, (uint32_t)d * 1597334677U);
-
-            // Find best check (lowest degree)
-            int best_c = -1;
-            int best_deg = MAX_CHECK_DEGREE;
-
-            for (int attempt = 0; attempt < 16; attempt++) {
-                int c = (int)((h + attempt * 127) % M_BITS);
-
-                if (check_degree[c] < best_deg && check_degree[c] < MAX_CHECK_DEGREE - 2) {
-                    // Check not already connected
-                    int connected = 0;
-                    for (int i = 0; i < var_degree[v]; i++) {
-                        if (var_to_check[v][i] == c) { connected = 1; break; }
-                    }
-                    if (!connected) {
-                        best_deg = check_degree[c];
-                        best_c = c;
-                        if (best_deg < 30) break;  // Good enough
-                    }
-                }
-            }
-
-            if (best_c >= 0 && var_degree[v] < MAX_VAR_DEGREE - 1) {
-                add_edge(best_c, v);
-            }
-        }
-    }
-
-    // --- Part 3: Fill in low-degree variables ---
-    for (int v = 0; v < K_BITS; v++) {
-        while (var_degree[v] < 5) {
-            uint32_t h = hash32((uint32_t)v * 12345, (uint32_t)var_degree[v] * 67890);
-
-            int added = 0;
-            for (int attempt = 0; attempt < 64 && !added; attempt++) {
-                int c = (int)((h + attempt * 31) % M_BITS);
-                if (check_degree[c] < MAX_CHECK_DEGREE - 2) {
-                    int prev = var_degree[v];
-                    add_edge(c, v);
-                    if (var_degree[v] > prev) added = 1;
-                }
-            }
-            if (!added) break;
-        }
-    }
-
-    // --- Part 4: Balance check degrees ---
-    for (int c = 0; c < M_BITS; c++) {
-        int target = 35;
-        int attempts = 0;
-        while (check_degree[c] < target && attempts < 150) {
-            uint32_t h = hash32((uint32_t)c * 48271, (uint32_t)attempts * 16807);
-            int v = (int)(h % K_BITS);
-
-            if (var_degree[v] < MAX_VAR_DEGREE - 1) {
-                add_edge(c, v);
-            }
-            attempts++;
-        }
-    }
-}
-
-void ldpc_p_init(void) {
-    if (ldpc_p_initialized) return;
-    build_h_matrix();
-    ldpc_p_initialized = 1;
-}
-
-// =============================================================================
-// Syndrome Check
-// =============================================================================
-
-int ldpc_p_check_syndrome(const uint8_t *codeword) {
-    if (!ldpc_p_initialized) ldpc_p_init();
-
-    for (int c = 0; c < M_BITS; c++) {
-        int syndrome = 0;
-        for (int i = 0; i < check_degree[c]; i++) {
-            int v = check_to_var[c][i];
-            syndrome ^= get_bit(codeword, v);
-        }
-        if (syndrome != 0) {
-            return 0;
-        }
-    }
-    return 1;
-}
-
-// =============================================================================
-// Encoding
-// =============================================================================
-
-size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
-    if (!ldpc_p_initialized) ldpc_p_init();
-
-    if (data_len > LDPC_P_DATA_SIZE) {
-        data_len = LDPC_P_DATA_SIZE;
-    }
-
-    // Copy data to output and pad if necessary
-    memcpy(output, data, data_len);
-    if (data_len < LDPC_P_DATA_SIZE) {
-        memset(output + data_len, 0, LDPC_P_DATA_SIZE - data_len);
-    }
-
-    // Initialize parity bytes to zero
-    memset(output + LDPC_P_DATA_SIZE, 0, LDPC_P_PARITY_SIZE);
-
-    // Compute syndrome contribution from data bits
-    int syndrome[M_BITS];
-    for (int c = 0; c < M_BITS; c++) {
-        syndrome[c] = 0;
-        for (int i = 0; i < check_degree[c]; i++) {
-            int v = check_to_var[c][i];
-            if (v < K_BITS) {
-                syndrome[c] ^= get_bit(output, v);
-            }
-        }
-    }
-
-    // Back-substitution for parity bits (staircase structure)
-    int prev_parity = 0;
-    for (int c = 0; c < M_BITS; c++) {
-        int parity_bit = syndrome[c] ^ prev_parity;
-        set_bit(output + LDPC_P_DATA_SIZE, c, parity_bit);
-        prev_parity = parity_bit;
-    }
-
-    return LDPC_P_BLOCK_SIZE;
-}
-
-// =============================================================================
-// Min-Sum Decoder with Optimized Parameters
-// =============================================================================
-
-// Clamp LLR to valid range
-static inline float clamp_llr(float x) {
-    if (x > LLR_MAX) return LLR_MAX;
-    if (x < LLR_MIN) return LLR_MIN;
-    return x;
-}
-
-int ldpc_p_decode(uint8_t *data, size_t data_len) {
-    if (!ldpc_p_initialized) ldpc_p_init();
-
-    size_t total_len = data_len + LDPC_P_PARITY_SIZE;
-    if (total_len > LDPC_P_BLOCK_SIZE) {
-        return -1;
-    }
-
-    // Working codeword buffer
-    uint8_t codeword[LDPC_P_BLOCK_SIZE];
-    memcpy(codeword, data, total_len);
-    if (total_len < LDPC_P_BLOCK_SIZE) {
-        memset(codeword + total_len, 0, LDPC_P_BLOCK_SIZE - total_len);
-    }
-
-    // Quick check - if already valid, no decoding needed
-    if (ldpc_p_check_syndrome(codeword)) {
-        return 0;
-    }
-
-    // ==========================================================================
-    // Initialize channel LLRs
-    // ==========================================================================
-
-    float var_llr[N_BITS];
-    float llr_magnitude = 6.0f;
-
-    for (int v = 0; v < N_BITS; v++) {
-        int bit = get_bit(codeword, v);
-        var_llr[v] = bit ? -llr_magnitude : llr_magnitude;
-    }
-
-    // Message storage
-    static float c2v[M_BITS][MAX_CHECK_DEGREE];
-
-    for (int c = 0; c < M_BITS; c++) {
-        for (int i = 0; i < check_degree[c]; i++) {
-            c2v[c][i] = 0.0f;
-        }
-    }
-
-    // ==========================================================================
-    // Normalized Min-Sum Decoding with Layered Scheduling
-    // ==========================================================================
-
-    float v2c[MAX_CHECK_DEGREE];
-    const float alpha = 0.75f;  // Normalization factor
-
-    for (int iter = 0; iter < LDPC_MAX_ITER; iter++) {
-
-        // Process each check node (layer)
-        for (int c = 0; c < M_BITS; c++) {
-            int deg = check_degree[c];
-
-            // Step 1: Compute variable-to-check messages
-            for (int i = 0; i < deg; i++) {
-                int v = check_to_var[c][i];
-                v2c[i] = var_llr[v] - c2v[c][i];
-            }
-
-            // Step 2: Compute check-to-variable messages using min-sum
-            for (int i = 0; i < deg; i++) {
-                float sign_prod = 1.0f;
-                float min1 = LLR_MAX, min2 = LLR_MAX;
-
-                for (int j = 0; j < deg; j++) {
-                    if (j == i) continue;
-
-                    float val = v2c[j];
-                    if (val < 0) sign_prod = -sign_prod;
-
-                    float absval = fabsf(val);
-                    if (absval < min1) {
-                        min2 = min1;
-                        min1 = absval;
-                    } else if (absval < min2) {
-                        min2 = absval;
-                    }
-                }
-
-                // Normalized min-sum message
-                float msg_mag = alpha * min1;
-                float new_c2v = sign_prod * msg_mag;
-
-                // Update variable LLR immediately (layered approach)
-                int v = check_to_var[c][i];
-                var_llr[v] = clamp_llr(var_llr[v] - c2v[c][i] + new_c2v);
-                c2v[c][i] = new_c2v;
-            }
-        }
-
-        // Make hard decisions
-        for (int v = 0; v < N_BITS; v++) {
-            set_bit(codeword, v, var_llr[v] < 0 ? 1 : 0);
-        }
-
-        // Check if valid codeword
-        if (ldpc_p_check_syndrome(codeword)) {
-            memcpy(data, codeword, data_len);
-            return iter + 1;
-        }
-
-        // Adaptive restart at iteration milestones
-        if (iter == 25 || iter == 50 || iter == 75) {
-            float new_mag = 4.0f - (iter / 25) * 0.5f;
-            for (int v = 0; v < N_BITS; v++) {
-                int bit = get_bit(codeword, v);
-                var_llr[v] = bit ? -new_mag : new_mag;
-            }
-            for (int c = 0; c < M_BITS; c++) {
-                for (int i = 0; i < check_degree[c]; i++) {
-                    c2v[c][i] = 0.0f;
-                }
-            }
-        }
-    }
-
-    // Failed to converge
-    memcpy(data, codeword, data_len);
-    return -1;
-}
-
-// =============================================================================
-// Block-level operations
-// =============================================================================
-
-size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
-    if (!ldpc_p_initialized) ldpc_p_init();
-
-    size_t output_len = 0;
-    size_t remaining = data_len;
-    const uint8_t *src = data;
-    uint8_t *dst = output;
-
-    while (remaining > 0) {
-        size_t block_data = (remaining > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining;
-        ldpc_p_encode(src, block_data, dst);
-
-        src += block_data;
-        dst += LDPC_P_BLOCK_SIZE;
-        output_len += LDPC_P_BLOCK_SIZE;
-        remaining -= block_data;
-    }
-
-    return output_len;
-}
-
-int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
-    if (!ldpc_p_initialized) ldpc_p_init();
-
-    int total_iterations = 0;
-    size_t remaining_output = output_len;
-    uint8_t *src = data;
-    uint8_t *dst = output;
-
-    while (total_len >= LDPC_P_BLOCK_SIZE && remaining_output > 0) {
-        size_t bytes_to_copy = (remaining_output > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining_output;
-
-        int result = ldpc_p_decode(src, LDPC_P_DATA_SIZE);
-        if (result < 0) {
-            return -1;
-        }
-        total_iterations += result;
-
-        memcpy(dst, src, bytes_to_copy);
-
-        src += LDPC_P_BLOCK_SIZE;
-        dst += bytes_to_copy;
-        total_len -= LDPC_P_BLOCK_SIZE;
-        remaining_output -= bytes_to_copy;
-    }
-
-    return total_iterations;
-}
diff --git a/video_encoder/lib/libfec/ldpc_payload.h b/video_encoder/lib/libfec/ldpc_payload.h
deleted file mode 100644
index 4079846..0000000
--- a/video_encoder/lib/libfec/ldpc_payload.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * LDPC(255,223) Codec for TAV-DT Payloads
- *
- * Alternative to RS(255,223) with same rate (~0.875):
- * - Block size: 255 bytes (223 data + 32 parity)
- * - Uses quasi-cyclic LDPC structure for efficiency
- * - Soft-decision belief propagation decoder
- *
- * Designed as drop-in replacement for RS(255,223):
- * - Same input/output sizes
- * - Same API style
- * - Different error correction characteristics:
- *   - LDPC: Better at high BER (>1e-3), gradual degradation
- *   - RS: Better at low BER, hard threshold at 16 byte errors
- *
- * Created by CuriousTorvald and Claude on 2025-12-15.
- */
-
-#ifndef LDPC_PAYLOAD_H
-#define LDPC_PAYLOAD_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-// LDPC(255,223) parameters - matches RS(255,223) for drop-in replacement
-#define LDPC_P_BLOCK_SIZE    255   // Total codeword size (bytes)
-#define LDPC_P_DATA_SIZE     223   // Data bytes per block
-#define LDPC_P_PARITY_SIZE   32    // Parity bytes per block
-
-// Decoder parameters
-#define LDPC_P_MAX_ITERATIONS 30   // Maximum BP iterations
-#define LDPC_P_EARLY_TERM     1    // Enable early termination on valid codeword
-
-/**
- * Initialize LDPC(255,223) codec.
- * Must be called once before using encode/decode functions.
- * Thread-safe: uses static initialization.
- */
-void ldpc_p_init(void);
-
-/**
- * Encode data block with LDPC(255,223).
- *
- * @param data      Input data (up to LDPC_P_DATA_SIZE bytes)
- * @param data_len  Length of input data (1 to LDPC_P_DATA_SIZE)
- * @param output    Output buffer (must hold data_len + LDPC_P_PARITY_SIZE bytes)
- *                  Format: [data][parity]
- * @return          Total output length (data_len + LDPC_P_PARITY_SIZE)
- *
- * Note: For data shorter than LDPC_P_DATA_SIZE, the encoder pads with zeros
- * internally but only outputs actual data + parity.
- */
-size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output);
-
-/**
- * Decode and correct LDPC(255,223) encoded block.
- *
- * @param data      Buffer containing [data][parity] (modified in-place)
- * @param data_len  Length of data portion (1 to LDPC_P_DATA_SIZE)
- * @return          Number of iterations used (1-30), or -1 if uncorrectable
- *
- * On success, data buffer contains corrected data.
- * On failure, data buffer contents are undefined.
- */
-int ldpc_p_decode(uint8_t *data, size_t data_len);
-
-/**
- * Encode data with automatic block splitting.
- * For data larger than LDPC_P_DATA_SIZE, splits into multiple blocks.
- *
- * @param data        Input data
- * @param data_len    Length of input data
- * @param output      Output buffer (must hold ceil(data_len/223) * 255 bytes)
- * @return            Total output length
- */
-size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
-
-/**
- * Decode data with automatic block splitting.
- *
- * @param data        Buffer containing LDPC-encoded blocks (modified in-place)
- * @param total_len   Total length of encoded data (multiple of LDPC_P_BLOCK_SIZE)
- * @param output      Output buffer for decoded data
- * @param output_len  Expected length of decoded data
- * @return            Total iterations across all blocks, or -1 if any block failed
- */
-int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
-
-/**
- * Check if codeword is valid (syndrome check).
- *
- * @param codeword   Full codeword (LDPC_P_BLOCK_SIZE bytes)
- * @return           1 if valid (zero syndrome), 0 if errors detected
- */
-int ldpc_p_check_syndrome(const uint8_t *codeword);
-
-#endif // LDPC_PAYLOAD_H
diff --git a/video_encoder/lib/libfec/reed_solomon.c b/video_encoder/lib/libfec/reed_solomon.c
deleted file mode 100644
index b8256a5..0000000
--- a/video_encoder/lib/libfec/reed_solomon.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/**
- * Reed-Solomon (255,223) Codec Implementation
- *
- * Standard RS code over GF(2^8) for TAV-DT forward error correction.
- *
- * Created by CuriousTorvald and Claude on 2025-12-09.
- */
-
-#include "reed_solomon.h"
-#include <string.h>
-#include <stdio.h>
-
-// =============================================================================
-// Galois Field GF(2^8) Arithmetic
-// =============================================================================
-
-// Primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 = 0x11D
-#define GF_PRIMITIVE 0x11D
-#define GF_SIZE      256
-#define GF_MAX       255
-
-// Lookup tables for GF(2^8) arithmetic
-static uint8_t gf_exp[512];  // Anti-log table (doubled for easy modular reduction)
-static uint8_t gf_log[256];  // Log table
-static uint8_t gf_generator[RS_PARITY_SIZE + 1];  // Generator polynomial coefficients
-
-static int rs_initialized = 0;
-
-// Initialize GF(2^8) exp/log tables
-static void init_gf_tables(void) {
-    uint16_t x = 1;
-
-    for (int i = 0; i < GF_MAX; i++) {
-        gf_exp[i] = (uint8_t)x;
-        gf_log[x] = (uint8_t)i;
-
-        // Multiply by alpha (primitive element = 2)
-        x <<= 1;
-        if (x & 0x100) {
-            x ^= GF_PRIMITIVE;
-        }
-    }
-
-    // Double the exp table for easy modular reduction
-    for (int i = GF_MAX; i < 512; i++) {
-        gf_exp[i] = gf_exp[i - GF_MAX];
-    }
-
-    // gf_log[0] is undefined, set to 0 for safety
-    gf_log[0] = 0;
-}
-
-// GF multiplication
-static inline uint8_t gf_mul(uint8_t a, uint8_t b) {
-    if (a == 0 || b == 0) return 0;
-    return gf_exp[gf_log[a] + gf_log[b]];
-}
-
-// GF division
-static inline uint8_t gf_div(uint8_t a, uint8_t b) {
-    if (a == 0) return 0;
-    if (b == 0) return 0;  // Division by zero - shouldn't happen
-    return gf_exp[gf_log[a] + GF_MAX - gf_log[b]];
-}
-
-// GF power
-static inline uint8_t gf_pow(uint8_t a, int n) {
-    if (n == 0) return 1;
-    if (a == 0) return 0;
-    return gf_exp[(gf_log[a] * n) % GF_MAX];
-}
-
-// GF inverse
-static inline uint8_t gf_inv(uint8_t a) {
-    if (a == 0) return 0;
-    return gf_exp[GF_MAX - gf_log[a]];
-}
-
-// =============================================================================
-// Generator Polynomial
-// =============================================================================
-
-// Build generator polynomial: g(x) = (x - alpha^0)(x - alpha^1)...(x - alpha^31)
-static void init_generator(void) {
-    // Start with g(x) = 1
-    gf_generator[0] = 1;
-    for (int i = 1; i <= RS_PARITY_SIZE; i++) {
-        gf_generator[i] = 0;
-    }
-
-    // Multiply by (x - alpha^i) for i = 0 to 31
-    for (int i = 0; i < RS_PARITY_SIZE; i++) {
-        uint8_t alpha_i = gf_exp[i];  // alpha^i
-
-        // Multiply current polynomial by (x - alpha^i)
-        for (int j = RS_PARITY_SIZE; j > 0; j--) {
-            gf_generator[j] = gf_generator[j - 1] ^ gf_mul(gf_generator[j], alpha_i);
-        }
-        gf_generator[0] = gf_mul(gf_generator[0], alpha_i);
-    }
-}
-
-// =============================================================================
-// Public API
-// =============================================================================
-
-void rs_init(void) {
-    if (rs_initialized) return;
-
-    init_gf_tables();
-    init_generator();
-    rs_initialized = 1;
-}
-
-size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
-    if (!rs_initialized) rs_init();
-
-    // Validate input
-    if (data_len > RS_DATA_SIZE) {
-        data_len = RS_DATA_SIZE;
-    }
-
-    // Copy data to output
-    memcpy(output, data, data_len);
-
-    // Initialize parity bytes to zero
-    memset(output + data_len, 0, RS_PARITY_SIZE);
-
-    // Create padded message polynomial (RS_DATA_SIZE + RS_PARITY_SIZE coefficients)
-    // Message is shifted to leave room for parity (systematic encoding)
-    uint8_t msg[RS_BLOCK_SIZE];
-    memset(msg, 0, sizeof(msg));
-    memcpy(msg, data, data_len);
-
-    // Polynomial division: compute remainder of msg(x) * x^32 / g(x)
-    uint8_t remainder[RS_PARITY_SIZE];
-    memset(remainder, 0, RS_PARITY_SIZE);
-
-    for (size_t i = 0; i < data_len; i++) {
-        uint8_t coef = msg[i] ^ remainder[0];
-
-        // Shift remainder
-        memmove(remainder, remainder + 1, RS_PARITY_SIZE - 1);
-        remainder[RS_PARITY_SIZE - 1] = 0;
-
-        // Subtract coef * g(x) from remainder
-        if (coef != 0) {
-            for (int j = 0; j < RS_PARITY_SIZE; j++) {
-                remainder[j] ^= gf_mul(gf_generator[RS_PARITY_SIZE - 1 - j], coef);
-            }
-        }
-    }
-
-    // Append parity to output
-    memcpy(output + data_len, remainder, RS_PARITY_SIZE);
-
-    return data_len + RS_PARITY_SIZE;
-}
-
-// =============================================================================
-// Berlekamp-Massey Decoder
-// =============================================================================
-
-// Compute syndromes S_i = r(alpha^i) for i = 0..31
-static void compute_syndromes(const uint8_t *r, size_t len, uint8_t *syndromes) {
-    for (int i = 0; i < RS_PARITY_SIZE; i++) {
-        syndromes[i] = 0;
-        for (size_t j = 0; j < len; j++) {
-            syndromes[i] ^= gf_mul(r[j], gf_pow(gf_exp[i], (int)(len - 1 - j)));
-        }
-    }
-}
-
-// Berlekamp-Massey algorithm to find error locator polynomial
-static int berlekamp_massey(const uint8_t *syndromes, uint8_t *sigma, int *sigma_deg) {
-    uint8_t C[RS_PARITY_SIZE + 1];  // Connection polynomial
-    uint8_t B[RS_PARITY_SIZE + 1];  // Previous connection polynomial
-    int L = 0;  // Current length of LFSR
-    int m = 1;  // Number of steps since last update
-    uint8_t b = 1;  // Previous discrepancy
-
-    // Initialize: C(x) = 1, B(x) = 1
-    memset(C, 0, sizeof(C));
-    memset(B, 0, sizeof(B));
-    C[0] = 1;
-    B[0] = 1;
-
-    for (int n = 0; n < RS_PARITY_SIZE; n++) {
-        // Compute discrepancy
-        uint8_t d = syndromes[n];
-        for (int i = 1; i <= L; i++) {
-            d ^= gf_mul(C[i], syndromes[n - i]);
-        }
-
-        if (d == 0) {
-            // No update needed
-            m++;
-        } else if (2 * L <= n) {
-            // Update both C and L
-            uint8_t T[RS_PARITY_SIZE + 1];
-            memcpy(T, C, sizeof(T));
-
-            uint8_t factor = gf_div(d, b);
-            for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
-                C[i + m] ^= gf_mul(factor, B[i]);
-            }
-
-            L = n + 1 - L;
-            memcpy(B, T, sizeof(B));
-            b = d;
-            m = 1;
-        } else {
-            // Only update C
-            uint8_t factor = gf_div(d, b);
-            for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
-                C[i + m] ^= gf_mul(factor, B[i]);
-            }
-            m++;
-        }
-    }
-
-    // Copy result
-    memcpy(sigma, C, RS_PARITY_SIZE + 1);
-    *sigma_deg = L;
-
-    return L;
-}
-
-// Chien search: find error positions (roots of sigma)
-static int chien_search(const uint8_t *sigma, int sigma_deg, size_t n, uint8_t *positions, int *num_errors) {
-    *num_errors = 0;
-
-    // Evaluate sigma(alpha^(-i)) for i = 0 to n-1
-    for (size_t i = 0; i < n; i++) {
-        uint8_t eval = 0;
-        for (int j = 0; j <= sigma_deg; j++) {
-            // sigma(alpha^(-i)) = sum of sigma[j] * alpha^(-i*j)
-            int exp = (GF_MAX - (int)((i * j) % GF_MAX)) % GF_MAX;
-            eval ^= gf_mul(sigma[j], gf_exp[exp]);
-        }
-
-        if (eval == 0) {
-            // Found a root - error at position n-1-i
-            positions[*num_errors] = (uint8_t)(n - 1 - i);
-            (*num_errors)++;
-        }
-    }
-
-    // Check if we found the expected number of errors
-    return (*num_errors == sigma_deg) ? 0 : -1;
-}
-
-// Compute formal derivative of polynomial
-static void poly_derivative(const uint8_t *poly, int deg, uint8_t *deriv) {
-    for (int i = 0; i < deg; i++) {
-        // Derivative of x^(i+1) is (i+1) * x^i
-        // In GF(2^m), coefficient is 1 if (i+1) is odd, 0 if even
-        deriv[i] = ((i + 1) & 1) ? poly[i + 1] : 0;
-    }
-}
-
-// Forney algorithm: compute error values
-static void forney(const uint8_t *syndromes, const uint8_t *sigma, int sigma_deg,
-                   const uint8_t *positions, int num_errors, size_t n, uint8_t *errors) {
-    // Compute error evaluator polynomial omega(x) = S(x) * sigma(x) mod x^2t
-    uint8_t omega[RS_PARITY_SIZE + 1];
-    memset(omega, 0, sizeof(omega));
-
-    for (int i = 0; i < RS_PARITY_SIZE; i++) {
-        for (int j = 0; j <= sigma_deg && i - j >= 0; j++) {
-            omega[i] ^= gf_mul(syndromes[i - j], sigma[j]);
-        }
-    }
-
-    // Compute formal derivative of sigma
-    uint8_t sigma_prime[RS_PARITY_SIZE];
-    poly_derivative(sigma, sigma_deg, sigma_prime);
-
-    // Compute error values using Forney formula
-    for (int i = 0; i < num_errors; i++) {
-        uint8_t pos = positions[i];
-        uint8_t Xi = gf_exp[n - 1 - pos];  // alpha^(n-1-pos)
-        uint8_t Xi_inv = gf_inv(Xi);
-
-        // Evaluate omega at Xi_inv
-        uint8_t omega_val = 0;
-        for (int j = 0; j < RS_PARITY_SIZE; j++) {
-            omega_val ^= gf_mul(omega[j], gf_pow(Xi_inv, j));
-        }
-
-        // Evaluate sigma' at Xi_inv
-        uint8_t sigma_prime_val = 0;
-        for (int j = 0; j < sigma_deg; j++) {
-            sigma_prime_val ^= gf_mul(sigma_prime[j], gf_pow(Xi_inv, j));
-        }
-
-        // Error value: e_i = Xi * omega(Xi_inv) / sigma'(Xi_inv)
-        errors[i] = gf_mul(Xi, gf_div(omega_val, sigma_prime_val));
-    }
-}
-
-int rs_decode(uint8_t *data, size_t data_len) {
-    if (!rs_initialized) rs_init();
-
-    size_t total_len = data_len + RS_PARITY_SIZE;
-    if (total_len > RS_BLOCK_SIZE) {
-        return -1;
-    }
-
-    // Compute syndromes
-    uint8_t syndromes[RS_PARITY_SIZE];
-    compute_syndromes(data, total_len, syndromes);
-
-    // Check if all syndromes are zero (no errors)
-    int has_errors = 0;
-    for (int i = 0; i < RS_PARITY_SIZE; i++) {
-        if (syndromes[i] != 0) {
-            has_errors = 1;
-            break;
-        }
-    }
-
-    if (!has_errors) {
-        return 0;  // No errors
-    }
-
-    // Find error locator polynomial using Berlekamp-Massey
-    uint8_t sigma[RS_PARITY_SIZE + 1];
-    int sigma_deg;
-    int num_errors_expected = berlekamp_massey(syndromes, sigma, &sigma_deg);
-
-    if (num_errors_expected > RS_MAX_ERRORS) {
-        return -1;  // Too many errors
-    }
-
-    // Find error positions using Chien search
-    uint8_t positions[RS_MAX_ERRORS];
-    int num_errors;
-    if (chien_search(sigma, sigma_deg, total_len, positions, &num_errors) != 0) {
-        return -1;  // Inconsistent error count
-    }
-
-    // Compute error values using Forney algorithm
-    uint8_t error_values[RS_MAX_ERRORS];
-    forney(syndromes, sigma, sigma_deg, positions, num_errors, total_len, error_values);
-
-    // Apply corrections
-    for (int i = 0; i < num_errors; i++) {
-        if (positions[i] < total_len) {
-            data[positions[i]] ^= error_values[i];
-        }
-    }
-
-    return num_errors;
-}
-
-// =============================================================================
-// Block-level operations
-// =============================================================================
-
-size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
-    if (!rs_initialized) rs_init();
-
-    size_t output_len = 0;
-    size_t remaining = data_len;
-    const uint8_t *src = data;
-    uint8_t *dst = output;
-
-    while (remaining > 0) {
-        size_t block_data = (remaining > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining;
-        size_t encoded_len = rs_encode(src, block_data, dst);
-
-        // Pad to full block size for consistent block boundaries
-        if (encoded_len < RS_BLOCK_SIZE) {
-            memset(dst + encoded_len, 0, RS_BLOCK_SIZE - encoded_len);
-        }
-
-        src += block_data;
-        dst += RS_BLOCK_SIZE;
-        output_len += RS_BLOCK_SIZE;
-        remaining -= block_data;
-    }
-
-    return output_len;
-}
-
-int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
-    if (!rs_initialized) rs_init();
-
-    int total_errors = 0;
-    size_t remaining_output = output_len;
-    uint8_t *src = data;
-    uint8_t *dst = output;
-
-    while (total_len >= RS_BLOCK_SIZE && remaining_output > 0) {
-        // Always decode with full RS_DATA_SIZE since encoder pads to full blocks
-        // But only copy the bytes we actually need
-        size_t bytes_to_copy = (remaining_output > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining_output;
-
-        // Decode block with full data size (modifies src in place)
-        int errors = rs_decode(src, RS_DATA_SIZE);
-        if (errors < 0) {
-            return -1;  // Uncorrectable block
-        }
-        total_errors += errors;
-
-        // Copy only the bytes we need to output
-        memcpy(dst, src, bytes_to_copy);
-
-        src += RS_BLOCK_SIZE;
-        dst += bytes_to_copy;
-        total_len -= RS_BLOCK_SIZE;
-        remaining_output -= bytes_to_copy;
-    }
-
-    return total_errors;
-}
diff --git a/video_encoder/lib/libfec/reed_solomon.h b/video_encoder/lib/libfec/reed_solomon.h
deleted file mode 100644
index 23858c7..0000000
--- a/video_encoder/lib/libfec/reed_solomon.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Reed-Solomon (255,223) Codec for TAV-DT
- *
- * Standard RS code over GF(2^8):
- * - Block size: 255 bytes (223 data + 32 parity)
- * - Error correction: up to 16 byte errors
- * - Error detection: up to 32 byte errors
- *
- * Uses primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x11D)
- * Generator polynomial: g(x) = product of (x - alpha^i) for i = 0..31
- *
- * Created by CuriousTorvald and Claude on 2025-12-09.
- */
-
-#ifndef REED_SOLOMON_H
-#define REED_SOLOMON_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-// RS(255,223) parameters
-#define RS_BLOCK_SIZE     255   // Total codeword size
-#define RS_DATA_SIZE      223   // Data bytes per block
-#define RS_PARITY_SIZE    32    // Parity bytes per block (2t = 32, t = 16)
-#define RS_MAX_ERRORS     16    // Maximum correctable errors (t)
-
-/**
- * Initialize Reed-Solomon codec.
- * Must be called once before using encode/decode functions.
- * Thread-safe: uses static initialization.
- */
-void rs_init(void);
-
-/**
- * Encode data block with Reed-Solomon parity.
- *
- * @param data      Input data (up to RS_DATA_SIZE bytes)
- * @param data_len  Length of input data (1 to RS_DATA_SIZE)
- * @param output    Output buffer (must hold data_len + RS_PARITY_SIZE bytes)
- *                  Format: [data][parity]
- * @return          Total output length (data_len + RS_PARITY_SIZE)
- *
- * Note: For data shorter than RS_DATA_SIZE, the encoder pads with zeros
- * internally but only outputs actual data + parity.
- */
-size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output);
-
-/**
- * Decode and correct Reed-Solomon encoded block.
- *
- * @param data      Buffer containing [data][parity] (modified in-place)
- * @param data_len  Length of data portion (1 to RS_DATA_SIZE)
- * @return          Number of errors corrected (0-16), or -1 if uncorrectable
- *
- * On success, data buffer contains corrected data (parity may also be corrected).
- * On failure, data buffer contents are undefined.
- */
-int rs_decode(uint8_t *data, size_t data_len);
-
-/**
- * Encode data with automatic block splitting.
- * For data larger than RS_DATA_SIZE, splits into multiple RS blocks.
- *
- * @param data        Input data
- * @param data_len    Length of input data
- * @param output      Output buffer (must hold ceil(data_len/223) * 255 bytes)
- * @return            Total output length
- */
-size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
-
-/**
- * Decode data with automatic block splitting.
- *
- * @param data        Buffer containing RS-encoded blocks (modified in-place)
- * @param total_len   Total length of encoded data (multiple of RS_BLOCK_SIZE)
- * @param output      Output buffer for decoded data
- * @param output_len  Expected length of decoded data
- * @return            Total errors corrected across all blocks, or -1 if any block failed
- */
-int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
-
-#endif // REED_SOLOMON_H
diff --git a/video_encoder/lib/libtaddec/decoder_tad.c b/video_encoder/lib/libtaddec/decoder_tad.c
deleted file mode 100644
index efa3f0e..0000000
--- a/video_encoder/lib/libtaddec/decoder_tad.c
+++ /dev/null
@@ -1,1192 +0,0 @@
-// Created by CuriousTorvald and Claude on 2025-10-23.
-// TAD (Terrarum Advanced Audio) Decoder - Reconstructs audio from TAD format
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-#include <zstd.h>
-#include <getopt.h>
-#include "encoder_tad.h"
-
-// TAD format constants (must match encoder)
-#undef TAD32_COEFF_SCALARS
-
-// Coefficient scalars for each subband (CDF 9/7 with 9 decomposition levels)
-// Index 0 = LL band, Index 1-9 = H bands (L9 to L1)
-static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
-
-// Base quantiser weight table (10 subbands: LL + 9 H bands)
-// These weights are multiplied by quantiser_scale during quantisation
-static const float BASE_QUANTISER_WEIGHTS[2][10] = {
-{ // mid channel
-    4.0f,    // LL (L9) DC
-    2.0f,    // H (L9) 31.25 hz
-    1.8f,    // H (L8) 62.5 hz
-    1.6f,    // H (L7) 125 hz
-    1.4f,    // H (L6) 250 hz
-    1.2f,    // H (L5) 500 hz
-    1.0f,    // H (L4) 1 khz
-    1.0f,    // H (L3) 2 khz
-    1.3f,    // H (L2) 4 khz
-    2.0f     // H (L1) 8 khz
-},
-{ // side channel
-    6.0f,    // LL (L9) DC
-    5.0f,    // H (L9) 31.25 hz
-    2.6f,    // H (L8) 62.5 hz
-    2.4f,    // H (L7) 125 hz
-    1.8f,    // H (L6) 250 hz
-    1.3f,    // H (L5) 500 hz
-    1.0f,    // H (L4) 1 khz
-    1.0f,    // H (L3) 2 khz
-    1.6f,    // H (L2) 4 khz
-    3.2f     // H (L1) 8 khz
-}};
-
-#define TAD_DEFAULT_CHUNK_SIZE 32768
-#define TAD_MIN_CHUNK_SIZE 1024
-#define TAD_SAMPLE_RATE 32000
-#define TAD_CHANNELS 2
-
-// Significance map methods
-#define TAD_SIGMAP_1BIT 0
-#define TAD_SIGMAP_2BIT 1
-#define TAD_SIGMAP_RLE  2
-
-// Quality levels
-#define TAD_QUALITY_MIN 0
-#define TAD_QUALITY_MAX 5
-
-static inline float FCLAMP(float x, float min, float max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-//=============================================================================
-// Spectral Interpolation for Coefficient Reconstruction
-//=============================================================================
-
-// Fast PRNG for light dithering (xorshift32)
-static inline uint32_t xorshift32(uint32_t *s) {
-    uint32_t x = *s;
-    x ^= x << 13;
-    x ^= x >> 17;
-    x ^= x << 5;
-    return *s = x;
-}
-
-static inline float urand(uint32_t *s) {
-    return (xorshift32(s) & 0xFFFFFF) / 16777216.0f;
-}
-
-static inline float tpdf(uint32_t *s) {
-    return urand(s) - urand(s);
-}
-
-// Compute RMS energy of a coefficient band
-static float compute_band_rms(const float *c, size_t len) {
-    if (len == 0) return 0.0f;
-    double sumsq = 0.0;
-    for (size_t i = 0; i < len; i++) {
-        sumsq += (double)c[i] * c[i];
-    }
-    return sqrtf((float)(sumsq / (double)len));
-}
-
-// Simplified spectral reconstruction for wavelet coefficients
-// Conservative approach: only interpolate obvious holes, add light dither
-// Avoids aggressive AR prediction that can create artifacts
-static void spectral_interpolate_band(float *c, size_t len, float Q, float lower_band_rms) {
-    if (len < 4) return;
-
-    uint32_t seed = 0x9E3779B9u ^ (uint32_t)len ^ (uint32_t)(Q * 65536.0f);
-    const float dither_amp = 0.02f * Q;  // Very light dither
-
-    // Just add ultra-light TPDF dither to reduce quantisation grain
-    // No aggressive hole filling or AR prediction that might create artifacts
-    for (size_t i = 0; i < len; i++) {
-        c[i] += tpdf(&seed) * dither_amp;
-    }
-
-    (void)lower_band_rms;  // Unused for now - conservative approach
-}
-
-//=============================================================================
-// WAV Header Writing
-//=============================================================================
-
-static void write_wav_header(FILE *output, uint32_t data_size, uint16_t channels, uint32_t sample_rate, uint16_t bits_per_sample) {
-    uint32_t byte_rate = sample_rate * channels * bits_per_sample / 8;
-    uint16_t block_align = channels * bits_per_sample / 8;
-    uint32_t chunk_size = 36 + data_size;
-
-    // RIFF header
-    fwrite("RIFF", 1, 4, output);
-    fwrite(&chunk_size, 4, 1, output);
-    fwrite("WAVE", 1, 4, output);
-
-    // fmt chunk
-    fwrite("fmt ", 1, 4, output);
-    uint32_t fmt_size = 16;
-    fwrite(&fmt_size, 4, 1, output);
-    uint16_t audio_format = 1;  // PCM
-    fwrite(&audio_format, 2, 1, output);
-    fwrite(&channels, 2, 1, output);
-    fwrite(&sample_rate, 4, 1, output);
-    fwrite(&byte_rate, 4, 1, output);
-    fwrite(&block_align, 2, 1, output);
-    fwrite(&bits_per_sample, 2, 1, output);
-
-    // data chunk header
-    fwrite("data", 1, 4, output);
-    fwrite(&data_size, 4, 1, output);
-}
-
-// Calculate DWT levels from chunk size (must be power of 2, >= 1024)
-static int calculate_dwt_levels(int chunk_size) {
-    /*if (chunk_size < TAD_MIN_CHUNK_SIZE) {
-        fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE);
-        return -1;
-    }
-
-    // Calculate levels: log2(chunk_size) - 1
-    int levels = 0;
-    int size = chunk_size;
-    while (size > 1) {
-        size >>= 1;
-        levels++;
-    }
-    return levels - 2;*/
-    return 9;
-}
-
-//=============================================================================
-// Stochastic Reconstruction for Deadzoned Coefficients
-//=============================================================================
-
-// Special marker for deadzoned coefficients (must match encoder)
-#define DEADZONE_MARKER_QUANT (-128)
-
-// Deadband thresholds (must match encoder)
-static const float DEADBANDS[2][10] = {
-{ // mid channel
-    0.20f,    // LL (L9) DC
-    0.06f,    // H (L9) 31.25 hz
-    0.06f,    // H (L8) 62.5 hz
-    0.06f,    // H (L7) 125 hz
-    0.06f,    // H (L6) 250 hz
-    0.04f,    // H (L5) 500 hz
-    0.04f,    // H (L4) 1 khz
-    0.01f,    // H (L3) 2 khz
-    0.01f,    // H (L2) 4 khz
-    0.01f     // H (L1) 8 khz
-},
-{ // side channel
-    0.20f,    // LL (L9) DC
-    0.06f,    // H (L9) 31.25 hz
-    0.06f,    // H (L8) 62.5 hz
-    0.06f,    // H (L7) 125 hz
-    0.06f,    // H (L6) 250 hz
-    0.04f,    // H (L5) 500 hz
-    0.04f,    // H (L4) 1 khz
-    0.01f,    // H (L3) 2 khz
-    0.01f,    // H (L2) 4 khz
-    0.01f     // H (L1) 8 khz
-}};
-
-// Fast PRNG state (xorshift32) for stochastic reconstruction
-static uint32_t deadzone_rng_state = 0x12345678u;
-
-// Laplacian-distributed noise (better approximation than TPDF)
-// Uses inverse CDF method: X = -sign(U) * ln(1 - 2*|U|) / λ
-static float laplacian_noise(float scale) {
-    float u = urand(&deadzone_rng_state) - 0.5f;  // [-0.5, 0.5)
-    float sign = (u >= 0.0f) ? 1.0f : -1.0f;
-    float abs_u = fabsf(u);
-
-    // Avoid log(0) by clamping
-    if (abs_u >= 0.49999f) abs_u = 0.49999f;
-
-    // Inverse Laplacian CDF with λ = 1/scale
-    float x = -sign * logf(1.0f - 2.0f * abs_u) * scale;
-
-    return x;
-}
-
-//=============================================================================
-// Haar DWT Implementation (inverse only needed for decoder)
-//=============================================================================
-
-static void dwt_haar_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    for (int i = 0; i < half; i++) {
-        if (2 * i + 1 < length) {
-            temp[2 * i] = data[i] + data[half + i];
-            temp[2 * i + 1] = data[i] - data[half + i];
-        } else {
-            temp[2 * i] = data[i];
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// 9/7 inverse DWT (from TSVM Kotlin code)
-static void dwt_97_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Split into low and high frequency components (matching TSVM layout)
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[i];  // Low-pass coefficients (first half)
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] = data[half + i];  // High-pass coefficients (second half)
-        }
-    }
-
-    // 9/7 inverse lifting coefficients from TSVM
-    const float alpha = -1.586134342f;
-    const float beta = -0.052980118f;
-    const float gamma = 0.882911076f;
-    const float delta = 0.443506852f;
-    const float K = 1.230174105f;
-
-    // Step 1: Undo scaling
-    for (int i = 0; i < half; i++) {
-        temp[i] /= K;  // Low-pass coefficients
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] *= K;  // High-pass coefficients
-        }
-    }
-
-    // Step 2: Undo δ update
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] -= delta * (d_curr + d_prev);
-    }
-
-    // Step 3: Undo γ predict
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] -= gamma * (s_curr + s_next);
-        }
-    }
-
-    // Step 4: Undo β update
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] -= beta * (d_curr + d_prev);
-    }
-
-    // Step 5: Undo α predict
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] -= alpha * (s_curr + s_next);
-        }
-    }
-
-    // Reconstruction - interleave low and high pass
-    for (int i = 0; i < length; i++) {
-        if (i % 2 == 0) {
-            // Even positions: low-pass coefficients
-            data[i] = temp[i / 2];
-        } else {
-            // Odd positions: high-pass coefficients
-            int idx = i / 2;
-            if (half + idx < length) {
-                data[i] = temp[half + idx];
-            } else {
-                data[i] = 0.0f;
-            }
-        }
-    }
-
-    free(temp);
-}
-
-// Inverse 1D transform of Four-point interpolating Deslauriers-Dubuc (DD-4)
-static void dwt_dd4_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Split into low (even) and high (odd) parts
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[i];               // Even (low-pass)
-    }
-    for (int i = 0; i < length / 2; i++) {
-        temp[half + i] = data[half + i]; // Odd (high-pass)
-    }
-
-    // Undo update step: s[i] -= 0.25 * (d[i-1] + d[i])
-    for (int i = 0; i < half; i++) {
-        float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
-        temp[i] -= 0.25f * (d_prev + d_curr);
-    }
-
-    // Undo prediction step: d[i] += P(s[i-1], s[i], s[i+1], s[i+2])
-    for (int i = 0; i < length / 2; i++) {
-        float s_m1, s_0, s_1, s_2;
-
-        if (i > 0) s_m1 = temp[i - 1];
-        else s_m1 = temp[0];  // mirror boundary
-
-        s_0 = temp[i];
-
-        if (i + 1 < half) s_1 = temp[i + 1];
-        else s_1 = temp[half - 1];
-
-        if (i + 2 < half) s_2 = temp[i + 2];
-        else if (half > 1) s_2 = temp[half - 2];
-        else s_2 = temp[half - 1];
-
-        float prediction = (-1.0f/16.0f)*s_m1 + (9.0f/16.0f)*s_0 +
-                           (9.0f/16.0f)*s_1 + (-1.0f/16.0f)*s_2;
-
-        temp[half + i] += prediction;
-    }
-
-    // Merge evens and odds back into the original order
-    for (int i = 0; i < half; i++) {
-        data[2 * i] = temp[i];
-        if (2 * i + 1 < length)
-            data[2 * i + 1] = temp[half + i];
-    }
-
-    free(temp);
-}
-
-static void dwt_inverse_multilevel(float *data, int length, int levels) {
-    // Pre-calculate all intermediate lengths used during forward transform
-    // Forward uses: data[0..length-1], then data[0..(length+1)/2-1], etc.
-    int *lengths = malloc((levels + 1) * sizeof(int));
-    lengths[0] = length;
-    for (int i = 1; i <= levels; i++) {
-        lengths[i] = (lengths[i - 1] + 1) / 2;
-    }
-
-    // Inverse transform: apply inverse DWT using exact forward lengths in reverse order
-    // Forward applied DWT with lengths: [length, (length+1)/2, ((length+1)/2+1)/2, ...]
-    // Inverse must use same lengths in reverse: [..., ((length+1)/2+1)/2, (length+1)/2, length]
-    for (int level = levels - 1; level >= 0; level--) {
-        int current_length = lengths[level];
-//        dwt_haar_inverse_1d(data, current_length);  // THEN apply inverse
-//        dwt_dd4_inverse_1d(data, current_length);  // THEN apply inverse
-        dwt_97_inverse_1d(data, current_length);  // THEN apply inverse
-    }
-
-    free(lengths);
-}
-
-//=============================================================================
-// M/S Stereo Correlation (inverse of decorrelation)
-//=============================================================================
-
-// Uniform random in [0, 1)
-static inline float frand01(void) {
-    return (float)rand() / ((float)RAND_MAX + 1.0f);
-}
-
-// TPDF noise in [-1, +1)
-static inline float tpdf1(void) {
-    return (frand01() - frand01());
-}
-
-static void ms_correlate(const float *mid, const float *side, float *left, float *right, size_t count) {
-    for (size_t i = 0; i < count; i++) {
-        // Decode M/S → L/R
-        float m = mid[i];
-        float s = side[i];
-        left[i] = FCLAMP((m + s), -1.0f, 1.0f);
-        right[i] = FCLAMP((m - s), -1.0f, 1.0f);
-    }
-}
-
-static float signum(float x) {
-    if (x > 0.0f) return 1.0f;
-    if (x < 0.0f) return -1.0f;
-    return 0.0f;
-}
-
-static void expand_gamma(float *left, float *right, size_t count) {
-    for (size_t i = 0; i < count; i++) {
-        // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
-        float x = left[i]; float a = fabsf(x);
-        left[i] = signum(x) * a * a;
-        float y = right[i]; float b = fabsf(y);
-        right[i] = signum(y) * b * b;
-    }
-}
-
-static void expand_mu_law(float *left, float *right, size_t count) {
-    static float MU = 255.0f;
-
-    for (size_t i = 0; i < count; i++) {
-        // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
-        float x = left[i];
-        left[i] = signum(x) * (powf(1.0f + MU, fabsf(x)) - 1.0f) / MU;
-        float y = right[i];
-        right[i] = signum(y) * (powf(1.0f + MU, fabsf(y)) - 1.0f) / MU;
-    }
-}
-
-//=============================================================================
-// De-emphasis Filter
-//=============================================================================
-
-static void calculate_deemphasis_coeffs(float *b0, float *b1, float *a1) {
-    // De-emphasis factor
-    const float alpha = 0.5f;
-
-    *b0 = 1.0f;
-    *b1 = 0.0f;  // No feedforward delay
-    *a1 = -alpha;  // NEGATIVE because equation has minus sign: y = x - a1*prev_y
-}
-
-static void apply_deemphasis(float *left, float *right, size_t count) {
-    // Static state variables - persistent across chunks to prevent discontinuities
-    static float prev_x_l = 0.0f;
-    static float prev_y_l = 0.0f;
-    static float prev_x_r = 0.0f;
-    static float prev_y_r = 0.0f;
-
-    float b0, b1, a1;
-    calculate_deemphasis_coeffs(&b0, &b1, &a1);
-
-    // Left channel - use persistent state
-    for (size_t i = 0; i < count; i++) {
-        float x = left[i];
-        float y = b0 * x + b1 * prev_x_l - a1 * prev_y_l;
-        left[i] = y;
-        prev_x_l = x;
-        prev_y_l = y;
-    }
-
-    // Right channel - use persistent state
-    for (size_t i = 0; i < count; i++) {
-        float x = right[i];
-        float y = b0 * x + b1 * prev_x_r - a1 * prev_y_r;
-        right[i] = y;
-        prev_x_r = x;
-        prev_y_r = y;
-    }
-}
-
-static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) {
-    const float b1 = 1.5f;   // 1st feedback coefficient
-    const float b2 = -0.75f; // 2nd feedback coefficient
-    const float scale = 127.5f;
-    const float bias  = 128.0f;
-
-    // Reduced dither amplitude to coordinate with coefficient-domain dithering
-    // The decoder now adds TPDF dither in coefficient domain, so we reduce
-    // sample-domain dither by ~60% to avoid doubling the noise floor
-    const float dither_scale = 0.2f;  // Reduced from 0.5 (was ±0.5 LSB, now ±0.2 LSB)
-
-    for (size_t i = 0; i < count; i++) {
-        // --- LEFT channel ---
-        float feedbackL = b1 * dither_error[0][0] + b2 * dither_error[0][1];
-        float ditherL = dither_scale * tpdf1(); // Reduced TPDF dither
-        float shapedL = fleft[i] + feedbackL + ditherL / scale;
-        shapedL = FCLAMP(shapedL, -1.0f, 1.0f);
-
-        int qL = (int)lrintf(shapedL * scale);
-        if (qL < -128) qL = -128;
-        else if (qL > 127) qL = 127;
-        left[i] = (uint8_t)(qL + bias);
-
-        float qerrL = shapedL - (float)qL / scale;
-        dither_error[0][1] = dither_error[0][0]; // shift history
-        dither_error[0][0] = qerrL;
-
-        // --- RIGHT channel ---
-        float feedbackR = b1 * dither_error[1][0] + b2 * dither_error[1][1];
-        float ditherR = dither_scale * tpdf1(); // Reduced TPDF dither
-        float shapedR = fright[i] + feedbackR + ditherR / scale;
-        shapedR = FCLAMP(shapedR, -1.0f, 1.0f);
-
-        int qR = (int)lrintf(shapedR * scale);
-        if (qR < -128) qR = -128;
-        else if (qR > 127) qR = 127;
-        right[i] = (uint8_t)(qR + bias);
-
-        float qerrR = shapedR - (float)qR / scale;
-        dither_error[1][1] = dither_error[1][0];
-        dither_error[1][0] = qerrR;
-    }
-}
-
-//=============================================================================
-// Dequantisation (inverse of quantisation)
-//=============================================================================
-
-
-#define LAMBDA_FIXED 6.0f
-
-// Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder)
-// Converts quantised index back to normalised float in [-1, 1]
-static float lambda_decompanding(int8_t quant_val, int max_index) {
-    // Handle zero
-    if (quant_val == 0) {
-        return 0.0f;
-    }
-
-    int sign = (quant_val < 0) ? -1 : 1;
-    int abs_index = abs(quant_val);
-
-    // Clamp to valid range
-    if (abs_index > max_index) abs_index = max_index;
-
-    // Map index back to normalised CDF [0, 1]
-    float normalised_cdf = (float)abs_index / max_index;
-
-    // Map from [0, 1] back to [0.5, 1.0] (CDF range for positive half)
-    float cdf = 0.5f + normalised_cdf * 0.5f;
-
-    // Inverse Laplacian CDF for x >= 0: x = -(1/λ) * ln(2*(1-F))
-    // For F in [0.5, 1.0]: x = -(1/λ) * ln(2*(1-F))
-    float abs_val = -(1.0f / LAMBDA_FIXED) * logf(2.0f * (1.0f - cdf));
-
-    // Clamp to [0, 1]
-    if (abs_val > 1.0f) abs_val = 1.0f;
-    if (abs_val < 0.0f) abs_val = 0.0f;
-
-    return sign * abs_val;
-}
-
-static void dequantise_dwt_coefficients(int channel, const int8_t *quantised, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) {
-
-    // Calculate sideband boundaries dynamically
-    int first_band_size = chunk_size >> dwt_levels;
-
-    int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
-    sideband_starts[0] = 0;
-    sideband_starts[1] = first_band_size;
-    for (int i = 2; i <= dwt_levels + 1; i++) {
-        sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
-    }
-
-    // Dequantise all coefficients with stochastic reconstruction for deadzoned values
-    for (size_t i = 0; i < count; i++) {
-        int sideband = dwt_levels;
-        for (int s = 0; s <= dwt_levels; s++) {
-            if (i < sideband_starts[s + 1]) {
-                sideband = s;
-                break;
-            }
-        }
-
-        // Check for deadzone marker
-        /*if (quantised[i] == (int8_t)0) {//DEADZONE_MARKER_QUANT) {
-            // Stochastic reconstruction: generate Laplacian noise in deadband range
-            float deadband_threshold = DEADBANDS[channel][sideband];
-
-            // Generate Laplacian-distributed noise scaled to deadband width
-            // Use scale = threshold/3 to keep ~99% of samples within [-threshold, +threshold]
-            float noise = tpdf1() * deadband_threshold / 10.0f;
-
-            // Clamp to deadband range
-            if (noise > deadband_threshold) noise = deadband_threshold;
-            if (noise < -deadband_threshold) noise = -deadband_threshold;
-
-            // Apply scalar (but not quantiser weight - noise is already in correct range)
-            coeffs[i] = noise * TAD32_COEFF_SCALARS[sideband];
-        } else {*/
-            // Normal dequantisation using lambda decompanding
-            float normalised_val = lambda_decompanding(quantised[i], max_index);
-
-            // Denormalise using the subband scalar and apply base weight + quantiser scaling
-            float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
-            coeffs[i] = normalised_val * TAD32_COEFF_SCALARS[sideband] * weight;
-//        }
-    }
-
-    // Note: Stochastic reconstruction replaces the old spectral interpolation step
-    // No need for additional processing - deadzoned coefficients already have appropriate noise
-
-    free(sideband_starts);
-}
-
-//=============================================================================
-// Binary Tree EZBC Decoder (1D Variant for TAD)
-//=============================================================================
-
-#include <stdbool.h>
-
-// Bitstream reader for EZBC
-typedef struct {
-    const uint8_t *data;
-    size_t size;
-    size_t byte_pos;
-    uint8_t bit_pos;  // 0-7, current bit position in current byte
-} tad_bitstream_reader_t;
-
-// Block structure for 1D binary tree (same as encoder)
-typedef struct {
-    int start;
-    int length;
-} tad_decode_block_t;
-
-// Queue for block processing (same as encoder)
-typedef struct {
-    tad_decode_block_t *blocks;
-    size_t count;
-    size_t capacity;
-} tad_decode_queue_t;
-
-// Track coefficient state for refinement
-typedef struct {
-    bool significant;
-    int first_bitplane;
-} tad_decode_state_t;
-
-// Bitstream read operations
-static void tad_bitstream_reader_init(tad_bitstream_reader_t *bs, const uint8_t *data, size_t size) {
-    bs->data = data;
-    bs->size = size;
-    bs->byte_pos = 0;
-    bs->bit_pos = 0;
-}
-
-static int tad_bitstream_read_bit(tad_bitstream_reader_t *bs) {
-    if (bs->byte_pos >= bs->size) {
-        fprintf(stderr, "Error: Bitstream underflow\n");
-        return 0;
-    }
-
-    int bit = (bs->data[bs->byte_pos] >> bs->bit_pos) & 1;
-
-    bs->bit_pos++;
-    if (bs->bit_pos == 8) {
-        bs->bit_pos = 0;
-        bs->byte_pos++;
-    }
-
-    return bit;
-}
-
-static uint32_t tad_bitstream_read_bits(tad_bitstream_reader_t *bs, int num_bits) {
-    uint32_t value = 0;
-    for (int i = 0; i < num_bits; i++) {
-        value |= (tad_bitstream_read_bit(bs) << i);
-    }
-    return value;
-}
-
-// Queue operations
-static void tad_decode_queue_init(tad_decode_queue_t *q) {
-    q->capacity = 1024;
-    q->blocks = malloc(q->capacity * sizeof(tad_decode_block_t));
-    q->count = 0;
-}
-
-static void tad_decode_queue_push(tad_decode_queue_t *q, tad_decode_block_t block) {
-    if (q->count >= q->capacity) {
-        q->capacity *= 2;
-        q->blocks = realloc(q->blocks, q->capacity * sizeof(tad_decode_block_t));
-    }
-    q->blocks[q->count++] = block;
-}
-
-static void tad_decode_queue_free(tad_decode_queue_t *q) {
-    free(q->blocks);
-}
-
-// Context for recursive EZBC decoding
-typedef struct {
-    tad_bitstream_reader_t *bs;
-    int8_t *coeffs;
-    tad_decode_state_t *states;
-    int bitplane;
-    tad_decode_queue_t *next_insignificant;
-    tad_decode_queue_t *next_significant;
-} tad_decode_context_t;
-
-// Recursively decode a significant block - subdivide until size 1
-static void tad_decode_significant_block_recursive(tad_decode_context_t *ctx, tad_decode_block_t block) {
-    // If size 1: read sign bit and reconstruct value
-    if (block.length == 1) {
-        int idx = block.start;
-        int sign_bit = tad_bitstream_read_bit(ctx->bs);
-
-        // Reconstruct absolute value from bitplane
-        int abs_val = 1 << ctx->bitplane;
-
-        // Apply sign
-        ctx->coeffs[idx] = sign_bit ? -abs_val : abs_val;
-
-        ctx->states[idx].significant = true;
-        ctx->states[idx].first_bitplane = ctx->bitplane;
-        tad_decode_queue_push(ctx->next_significant, block);
-        return;
-    }
-
-    // Block is > 1: subdivide into left and right halves
-    int mid = block.length / 2;
-    if (mid == 0) mid = 1;
-
-    // Process left child
-    tad_decode_block_t left = {block.start, mid};
-    int left_sig = tad_bitstream_read_bit(ctx->bs);
-    if (left_sig) {
-        tad_decode_significant_block_recursive(ctx, left);
-    } else {
-        tad_decode_queue_push(ctx->next_insignificant, left);
-    }
-
-    // Process right child (if exists)
-    if (block.length > mid) {
-        tad_decode_block_t right = {block.start + mid, block.length - mid};
-        int right_sig = tad_bitstream_read_bit(ctx->bs);
-        if (right_sig) {
-            tad_decode_significant_block_recursive(ctx, right);
-        } else {
-            tad_decode_queue_push(ctx->next_insignificant, right);
-        }
-    }
-}
-
-// Binary tree EZBC decoding for a single channel (1D variant)
-static int tad_decode_channel_ezbc(const uint8_t *input, size_t input_size, int8_t *coeffs, size_t *bytes_consumed) {
-    tad_bitstream_reader_t bs;
-    tad_bitstream_reader_init(&bs, input, input_size);
-
-    // Read header: MSB bitplane and length
-    int msb_bitplane = tad_bitstream_read_bits(&bs, 8);
-    uint32_t count = tad_bitstream_read_bits(&bs, 16);
-
-    // Initialise coefficient array to zero
-    memset(coeffs, 0, count * sizeof(int8_t));
-
-    // Track coefficient significance
-    tad_decode_state_t *states = calloc(count, sizeof(tad_decode_state_t));
-
-    // Initialise queues
-    tad_decode_queue_t insignificant_queue, next_insignificant;
-    tad_decode_queue_t significant_queue, next_significant;
-
-    tad_decode_queue_init(&insignificant_queue);
-    tad_decode_queue_init(&next_insignificant);
-    tad_decode_queue_init(&significant_queue);
-    tad_decode_queue_init(&next_significant);
-
-    // Start with root block as insignificant
-    tad_decode_block_t root = {0, (int)count};
-    tad_decode_queue_push(&insignificant_queue, root);
-
-    // Process bitplanes from MSB to LSB
-    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
-        // Process insignificant blocks
-        for (size_t i = 0; i < insignificant_queue.count; i++) {
-            tad_decode_block_t block = insignificant_queue.blocks[i];
-
-            int sig = tad_bitstream_read_bit(&bs);
-            if (sig == 0) {
-                // Still insignificant
-                tad_decode_queue_push(&next_insignificant, block);
-            } else {
-                // Became significant: recursively decode
-                tad_decode_context_t ctx = {
-                    .bs = &bs,
-                    .coeffs = coeffs,
-                    .states = states,
-                    .bitplane = bitplane,
-                    .next_insignificant = &next_insignificant,
-                    .next_significant = &next_significant
-                };
-                tad_decode_significant_block_recursive(&ctx, block);
-            }
-        }
-
-        // Refinement pass: read next bit for already-significant coefficients
-        for (size_t i = 0; i < significant_queue.count; i++) {
-            tad_decode_block_t block = significant_queue.blocks[i];
-            int idx = block.start;
-
-            int bit = tad_bitstream_read_bit(&bs);
-
-            // Add this bit to the coefficient's magnitude
-            if (bit) {
-                int sign = (coeffs[idx] < 0) ? -1 : 1;
-                int abs_val = abs(coeffs[idx]);
-                abs_val |= (1 << bitplane);
-                coeffs[idx] = sign * abs_val;
-            }
-
-            // Add to next_significant so it continues being refined
-            tad_decode_queue_push(&next_significant, block);
-        }
-
-        // Swap queues for next bitplane
-        tad_decode_queue_t temp_insig = insignificant_queue;
-        insignificant_queue = next_insignificant;
-        next_insignificant = temp_insig;
-        next_insignificant.count = 0;
-
-        tad_decode_queue_t temp_sig = significant_queue;
-        significant_queue = next_significant;
-        next_significant = temp_sig;
-        next_significant.count = 0;
-    }
-
-    // Cleanup
-    tad_decode_queue_free(&insignificant_queue);
-    tad_decode_queue_free(&next_insignificant);
-    tad_decode_queue_free(&significant_queue);
-    tad_decode_queue_free(&next_significant);
-    free(states);
-
-    // Calculate bytes consumed
-    *bytes_consumed = bs.byte_pos + (bs.bit_pos > 0 ? 1 : 0);
-
-    return 0;  // Success
-}
-
-//=============================================================================
-// Chunk Decoding
-//=============================================================================
-
-// Public API: TAD32 chunk decoder (can be used by both standalone decoder and TAV decoder)
-int tad32_decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
-                       size_t *bytes_consumed, size_t *samples_decoded) {
-    const uint8_t *read_ptr = input;
-
-    // Read chunk header
-    uint16_t sample_count = *((const uint16_t*)read_ptr);
-    read_ptr += sizeof(uint16_t);
-
-    uint8_t max_index = *read_ptr;
-    read_ptr += sizeof(uint8_t);
-
-    uint32_t payload_size_field = *((const uint32_t*)read_ptr);
-    read_ptr += sizeof(uint32_t);
-
-    // Check MSB for uncompressed flag
-    int is_uncompressed = (payload_size_field & 0x80000000) != 0;
-    uint32_t payload_size = payload_size_field & 0x7FFFFFFF;
-
-    // Calculate DWT levels from sample count
-    int dwt_levels = calculate_dwt_levels(sample_count);
-    if (dwt_levels < 0) {
-        fprintf(stderr, "Error: Invalid sample count %u\n", sample_count);
-        return -1;
-    }
-
-    // Decompress Zstd (or use raw data if uncompressed)
-    uint8_t *decompressed = NULL;
-    size_t actual_size;
-    int should_free_decompressed;
-
-    if (is_uncompressed) {
-        // Data is not compressed - use directly
-        decompressed = (uint8_t *)read_ptr;  // Cast away const, won't modify
-        actual_size = payload_size;
-        should_free_decompressed = 0;
-    } else {
-        // Normal Zstd decompression
-        // Estimate decompressed size (generous upper bound)
-        size_t decompressed_size = sample_count * 4 * sizeof(int8_t);
-        decompressed = malloc(decompressed_size);
-
-        actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size);
-
-        if (ZSTD_isError(actual_size)) {
-            fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(actual_size));
-            free(decompressed);
-            return -1;
-        }
-        should_free_decompressed = 1;
-    }
-
-    read_ptr += payload_size;
-    *bytes_consumed = read_ptr - input;
-    *samples_decoded = sample_count;
-
-    // Allocate working buffers
-    int8_t *quant_mid = malloc(sample_count * sizeof(int8_t));
-    int8_t *quant_side = malloc(sample_count * sizeof(int8_t));
-    float *dwt_mid = malloc(sample_count * sizeof(float));
-    float *dwt_side = malloc(sample_count * sizeof(float));
-    float *pcm32_left = malloc(sample_count * sizeof(float));
-    float *pcm32_right = malloc(sample_count * sizeof(float));
-    uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t));
-    uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t));
-
-    // Decode Mid/Side using binary tree EZBC - FIXED!
-    size_t mid_bytes_consumed = 0;
-    size_t side_bytes_consumed = 0;
-
-    // Decode Mid channel
-    int result = tad_decode_channel_ezbc(decompressed, actual_size, quant_mid, &mid_bytes_consumed);
-    if (result != 0) {
-        fprintf(stderr, "Error: EZBC decoding failed for Mid channel\n");
-        if (should_free_decompressed) free(decompressed);
-        free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side);
-        free(pcm32_left); free(pcm32_right); free(pcm8_left); free(pcm8_right);
-        return -1;
-    }
-
-    // Decode Side channel (starts after Mid channel data)
-    result = tad_decode_channel_ezbc(decompressed + mid_bytes_consumed,
-                                      actual_size - mid_bytes_consumed,
-                                      quant_side, &side_bytes_consumed);
-    if (result != 0) {
-        fprintf(stderr, "Error: EZBC decoding failed for Side channel\n");
-        if (should_free_decompressed) free(decompressed);
-        free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side);
-        free(pcm32_left); free(pcm32_right); free(pcm8_left); free(pcm8_right);
-        return -1;
-    }
-
-    // Dequantise with quantiser scaling and spectral interpolation
-    // Use quantiser_scale = 1.0f for baseline (must match encoder)
-    float quantiser_scale = 1.0f;
-    dequantise_dwt_coefficients(0, quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
-    dequantise_dwt_coefficients(1, quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
-
-    // Inverse DWT
-    dwt_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
-    dwt_inverse_multilevel(dwt_side, sample_count, dwt_levels);
-
-    float err[2][2] = {{0,0},{0,0}};
-
-    // M/S to L/R correlation
-    ms_correlate(dwt_mid, dwt_side, pcm32_left, pcm32_right, sample_count);
-
-    // expand dynamic range
-    expand_gamma(pcm32_left, pcm32_right, sample_count);
-//    expand_mu_law(pcm32_left, pcm32_right, sample_count);
-
-    // Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8)
-    apply_deemphasis(pcm32_left, pcm32_right, sample_count);
-
-    // dither to 8-bit
-    pcm32f_to_pcm8(pcm32_left, pcm32_right, pcm8_left, pcm8_right, sample_count, err);
-
-    // Interleave stereo output (PCMu8)
-    for (size_t i = 0; i < sample_count; i++) {
-        pcmu8_stereo[i * 2] = pcm8_left[i];
-        pcmu8_stereo[i * 2 + 1] = pcm8_right[i];
-    }
-
-    // Cleanup
-    free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side);
-    free(pcm32_left); free(pcm32_right); free(pcm8_left); free(pcm8_right);
-    if (should_free_decompressed && decompressed) free(decompressed);
-
-    return 0;
-}
-
-//=============================================================================
-// Main Decoder
-//=============================================================================
-
-#ifndef TAD_DECODER_LIB  // Only compile main() when building standalone decoder
-static void print_usage(const char *prog_name) {
-    printf("Usage: %s -i <input> [options]\n", prog_name);
-    printf("Options:\n");
-    printf("  -i <file>       Input TAD file\n");
-    printf("  -o <file>       Output file (optional, auto-generated from input)\n");
-    printf("                  Default: input_qNN.wav (or .pcm with --raw-pcm)\n");
-    printf("  --raw-pcm       Output raw PCMu8 instead of WAV file\n");
-    printf("  -v              Verbose output\n");
-    printf("  -h, --help      Show this help\n");
-    printf("Default output: WAV file (8-bit unsigned PCM, stereo @ 32000 Hz)\n");
-    printf("With --raw-pcm: PCMu8 raw file (8-bit unsigned stereo @ 32000 Hz)\n");
-}
-
-int main(int argc, char *argv[]) {
-    char *input_file = NULL;
-    char *output_file = NULL;
-    int verbose = 0;
-    int raw_pcm = 0;
-
-    static struct option long_options[] = {
-        {"raw-pcm", no_argument, 0, 'r'},
-        {"help", no_argument, 0, 'h'},
-        {0, 0, 0, 0}
-    };
-
-    int opt;
-    int option_index = 0;
-    while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, &option_index)) != -1) {
-        switch (opt) {
-            case 'i':
-                input_file = optarg;
-                break;
-            case 'o':
-                output_file = optarg;
-                break;
-            case 'r':
-                raw_pcm = 1;
-                break;
-            case 'v':
-                verbose = 1;
-                break;
-            case 'h':
-                print_usage(argv[0]);
-                return 0;
-            default:
-                print_usage(argv[0]);
-                return 1;
-        }
-    }
-
-    if (!input_file) {
-        fprintf(stderr, "Error: Input file is required\n");
-        print_usage(argv[0]);
-        return 1;
-    }
-
-    // Generate output filename if not provided
-    if (!output_file) {
-        size_t input_len = strlen(input_file);
-        output_file = malloc(input_len + 32);  // Extra space for extension
-
-        // Find the last directory separator
-        const char *basename_start = strrchr(input_file, '/');
-        if (!basename_start) basename_start = strrchr(input_file, '\\');
-        basename_start = basename_start ? basename_start + 1 : input_file;
-
-        // Copy directory part
-        size_t dir_len = basename_start - input_file;
-        strncpy(output_file, input_file, dir_len);
-
-        // Find the .tad extension
-        const char *ext = strrchr(basename_start, '.');
-        if (ext && strcmp(ext, ".tad") == 0) {
-            // Copy basename without .tad
-            size_t name_len = ext - basename_start;
-            strncpy(output_file + dir_len, basename_start, name_len);
-            output_file[dir_len + name_len] = '\0';
-        } else {
-            // No .tad extension, copy entire basename
-            strcpy(output_file + dir_len, basename_start);
-        }
-
-        // Append appropriate extension
-        strcat(output_file, raw_pcm ? ".pcm" : ".wav");
-
-        if (verbose) {
-            printf("Auto-generated output path: %s\n", output_file);
-        }
-    }
-
-    if (verbose) {
-        printf("Input: %s\n", input_file);
-        printf("Output: %s\n", output_file);
-    }
-
-    // Open input file
-    FILE *input = fopen(input_file, "rb");
-    if (!input) {
-        fprintf(stderr, "Error: Could not open input file: %s\n", input_file);
-        return 1;
-    }
-
-    // Get file size
-    fseek(input, 0, SEEK_END);
-    size_t input_size = ftell(input);
-    fseek(input, 0, SEEK_SET);
-
-    // Read entire file into memory
-    uint8_t *input_data = malloc(input_size);
-    fread(input_data, 1, input_size, input);
-    fclose(input);
-
-    // Open output file
-    FILE *output = fopen(output_file, "wb");
-    if (!output) {
-        fprintf(stderr, "Error: Could not open output file: %s\n", output_file);
-        free(input_data);
-        return 1;
-    }
-
-    // Write placeholder WAV header if not in raw PCM mode
-    if (!raw_pcm) {
-        write_wav_header(output, 0, TAD_CHANNELS, TAD_SAMPLE_RATE, 8);
-    }
-
-    // Decode chunks
-    size_t offset = 0;
-    size_t chunk_count = 0;
-    size_t total_samples = 0;
-    // Allocate buffer for maximum chunk size (can handle variable sizes up to default)
-    uint8_t *chunk_output = malloc(TAD_DEFAULT_CHUNK_SIZE * TAD_CHANNELS);
-
-    while (offset < input_size) {
-        size_t bytes_consumed, samples_decoded;
-        int result = tad32_decode_chunk(input_data + offset, input_size - offset,
-                                        chunk_output, &bytes_consumed, &samples_decoded);
-
-        if (result != 0) {
-            fprintf(stderr, "Error: Chunk decoding failed at offset %zu\n", offset);
-            free(input_data);
-            free(chunk_output);
-            fclose(output);
-            return 1;
-        }
-
-        // Write decoded chunk (only the actual samples)
-        fwrite(chunk_output, TAD_CHANNELS, samples_decoded, output);
-
-        offset += bytes_consumed;
-        total_samples += samples_decoded;
-        chunk_count++;
-
-        if (verbose && (chunk_count % 10 == 0)) {
-            printf("Decoded chunk %zu (offset %zu/%zu, %zu samples)\r", chunk_count, offset, input_size, samples_decoded);
-            fflush(stdout);
-        }
-    }
-
-    if (verbose) {
-        printf("\nDecoding complete!\n");
-        printf("Decoded %zu chunks\n", chunk_count);
-        printf("Total samples: %zu (%.2f seconds)\n",
-               total_samples,
-               total_samples / (double)TAD_SAMPLE_RATE);
-    }
-
-    // Update WAV header with correct size if not in raw PCM mode
-    if (!raw_pcm) {
-        uint32_t data_size = total_samples * TAD_CHANNELS;
-        fseek(output, 0, SEEK_SET);
-        write_wav_header(output, data_size, TAD_CHANNELS, TAD_SAMPLE_RATE, 8);
-    }
-
-    // Cleanup
-    free(input_data);
-    free(chunk_output);
-    fclose(output);
-
-    printf("Output written to: %s\n", output_file);
-    if (raw_pcm) {
-        printf("Format: PCMu8 stereo @ %d Hz (raw PCM)\n", TAD_SAMPLE_RATE);
-    } else {
-        printf("Format: WAV file (8-bit unsigned PCM, stereo @ %d Hz)\n", TAD_SAMPLE_RATE);
-    }
-
-    return 0;
-}
-#endif  // TAD_DECODER_LIB
diff --git a/video_encoder/lib/libtadenc/encoder_tad.c b/video_encoder/lib/libtadenc/encoder_tad.c
deleted file mode 100644
index cdd65ed..0000000
--- a/video_encoder/lib/libtadenc/encoder_tad.c
+++ /dev/null
@@ -1,1291 +0,0 @@
-// Created by CuriousTorvald and Claude on 2025-10-24.
-// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder Library
-// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
-// This file contains only the encoding functions for comparison testing
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-#include <zstd.h>
-#include "encoder_tad.h"
-
-// Undefine the macro version from header and define as array
-#undef TAD32_COEFF_SCALARS
-
-// Coefficient scalars for each subband (CDF 9/7 with 9 decomposition levels)
-// Index 0 = LL band, Index 1-9 = H bands (L9 to L1)
-static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
-
-// Base quantiser weight table (10 subbands: LL + 9 H bands)
-// These weights are multiplied by quantiser_scale during quantisation
-static const float BASE_QUANTISER_WEIGHTS[2][10] = {
-{ // mid channel
-    4.0f,    // LL (L9) DC
-    2.0f,    // H (L9) 31.25 hz
-    1.8f,    // H (L8) 62.5 hz
-    1.6f,    // H (L7) 125 hz
-    1.4f,    // H (L6) 250 hz
-    1.2f,    // H (L5) 500 hz
-    1.0f,    // H (L4) 1 khz
-    1.0f,    // H (L3) 2 khz
-    1.3f,    // H (L2) 4 khz
-    2.0f     // H (L1) 8 khz
-},
-{ // side channel
-    6.0f,    // LL (L9) DC
-    5.0f,    // H (L9) 31.25 hz
-    2.6f,    // H (L8) 62.5 hz
-    2.4f,    // H (L7) 125 hz
-    1.8f,    // H (L6) 250 hz
-    1.3f,    // H (L5) 500 hz
-    1.0f,    // H (L4) 1 khz
-    1.0f,    // H (L3) 2 khz
-    1.6f,    // H (L2) 4 khz
-    3.2f     // H (L1) 8 khz
-}};
-
-// target: before quantisation
-static const float DEADBANDS[2][10] = {
-{ // mid channel
-    0.20f,    // LL (L9) DC
-    0.06f,    // H (L9) 31.25 hz
-    0.06f,    // H (L8) 62.5 hz
-    0.06f,    // H (L7) 125 hz
-    0.06f,    // H (L6) 250 hz
-    0.04f,    // H (L5) 500 hz
-    0.04f,    // H (L4) 1 khz
-    0.01f,    // H (L3) 2 khz
-    0.01f,    // H (L2) 4 khz
-    0.01f     // H (L1) 8 khz
-},
-{ // side channel
-    0.20f,    // LL (L9) DC
-    0.06f,    // H (L9) 31.25 hz
-    0.06f,    // H (L8) 62.5 hz
-    0.06f,    // H (L7) 125 hz
-    0.06f,    // H (L6) 250 hz
-    0.04f,    // H (L5) 500 hz
-    0.04f,    // H (L4) 1 khz
-    0.01f,    // H (L3) 2 khz
-    0.01f,    // H (L2) 4 khz
-    0.01f     // H (L1) 8 khz
-}};
-
-static inline float FCLAMP(float x, float min, float max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-// Calculate DWT levels from chunk size
-static int calculate_dwt_levels(int chunk_size) {
-    /*if (chunk_size < TAD32_MIN_CHUNK_SIZE) {
-        fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD32_MIN_CHUNK_SIZE);
-        return -1;
-    }
-
-    int levels = 0;
-    int size = chunk_size;
-    while (size > 1) {
-        size >>= 1;
-        levels++;
-    }
-
-    // For non-power-of-2, we need to add 1 to levels
-    int pow2 = 1 << levels;
-    if (pow2 < chunk_size) {
-        levels++;
-    }
-
-    return levels - 2;*/  // Maximum decomposition
-
-    return 9;
-}
-
-// Special marker for deadzoned coefficients (will be reconstructed with noise on decode)
-#define DEADZONE_MARKER_FLOAT (-999.0f)  // Unmistakable marker in float domain
-#define DEADZONE_MARKER_QUANT (-128)     // Maps to this in quantised domain (int8 minimum)
-
-// Perceptual epsilon - coefficients below this are truly zero (inaudible)
-#define EPSILON_PERCEPTUAL 0.001f
-
-static void apply_coeff_deadzone(int channel, float *coeffs, size_t num_samples) {
-    // Apply deadzonning to each DWT subband using frequency-dependent thresholds
-    // Instead of zeroing, mark small coefficients for stochastic reconstruction
-
-    const int dwt_levels = 9;  // Fixed to match encoder
-
-    // Calculate subband boundaries (same logic as decoder)
-    const int first_band_size = num_samples >> dwt_levels;
-    int sideband_starts[11];  // dwt_levels + 2
-    sideband_starts[0] = 0;
-    sideband_starts[1] = first_band_size;
-    for (int i = 2; i <= dwt_levels + 1; i++) {
-        sideband_starts[i] = sideband_starts[i - 1] + (first_band_size << (i - 2));
-    }
-
-    // Apply deadzone threshold to each coefficient
-    for (size_t i = 0; i < num_samples; i++) {
-        // Determine which subband this coefficient belongs to
-        int sideband = dwt_levels;  // Default to highest frequency
-        for (int s = 0; s <= dwt_levels; s++) {
-            if (i < (size_t)sideband_starts[s + 1]) {
-                sideband = s;
-                break;
-            }
-        }
-
-        // Get threshold for this subband and channel
-        float threshold = DEADBANDS[channel][sideband];
-        float abs_coeff = fabsf(coeffs[i]);
-
-        // If coefficient is within deadband AND perceptually non-zero, mark it
-        if (abs_coeff > EPSILON_PERCEPTUAL && abs_coeff < threshold) {
-            // Mark for stochastic reconstruction (decoder will add noise)
-            coeffs[i] = 0.0f;//DEADZONE_MARKER_FLOAT;
-        }
-        // If below perceptual epsilon, truly zero it
-        else if (abs_coeff <= EPSILON_PERCEPTUAL) {
-            coeffs[i] = 0.0f;
-        }
-        // Otherwise keep coefficient unchanged
-    }
-}
-
-//=============================================================================
-// DD-4 DWT Implementation
-//=============================================================================
-
-// Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform
-static void dwt_dd4_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Split into even/odd samples
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[2 * i];           // Even (low)
-    }
-    for (int i = 0; i < length / 2; i++) {
-        temp[half + i] = data[2 * i + 1]; // Odd (high)
-    }
-
-    // DD-4 forward prediction step with four-point kernel
-    for (int i = 0; i < length / 2; i++) {
-        float s_m1, s_0, s_1, s_2;
-
-        if (i > 0) s_m1 = temp[i - 1];
-        else s_m1 = temp[0]; // Mirror boundary
-
-        s_0 = temp[i];
-
-        if (i + 1 < half) s_1 = temp[i + 1];
-        else s_1 = temp[half - 1];
-
-        if (i + 2 < half) s_2 = temp[i + 2];
-        else if (half > 1) s_2 = temp[half - 2];
-        else s_2 = temp[half - 1];
-
-        float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
-                          (9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
-
-        temp[half + i] -= prediction;
-    }
-
-    // DD-4 update step
-    for (int i = 0; i < half; i++) {
-        float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
-        temp[i] += 0.25f * (d_prev + d_curr);
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// 1D DWT using lifting scheme for 9/7 irreversible filter
-static void dwt_97_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;  // Handle odd lengths properly
-
-    // Split into even/odd samples
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[2 * i];           // Even (low)
-    }
-    for (int i = 0; i < length / 2; i++) {
-        temp[half + i] = data[2 * i + 1]; // Odd (high)
-    }
-
-    // JPEG2000 9/7 forward lifting steps (corrected to match decoder)
-    const float alpha = -1.586134342f;
-    const float beta = -0.052980118f;
-    const float gamma = 0.882911076f;
-    const float delta = 0.443506852f;
-    const float K = 1.230174105f;
-
-    // Step 1: Predict α - d[i] += α * (s[i] + s[i+1])
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] += alpha * (s_curr + s_next);
-        }
-    }
-
-    // Step 2: Update β - s[i] += β * (d[i-1] + d[i])
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] += beta * (d_prev + d_curr);
-    }
-
-    // Step 3: Predict γ - d[i] += γ * (s[i] + s[i+1])
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] += gamma * (s_curr + s_next);
-        }
-    }
-
-    // Step 4: Update δ - s[i] += δ * (d[i-1] + d[i])
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] += delta * (d_prev + d_curr);
-    }
-
-    // Step 5: Scaling - s[i] *= K, d[i] /= K
-    for (int i = 0; i < half; i++) {
-        temp[i] *= K;  // Low-pass coefficients
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] /= K;  // High-pass coefficients
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// Apply multi-level DWT (using DD-4 wavelet)
-static void dwt_forward_multilevel(float *data, int length, int levels) {
-    int current_length = length;
-    for (int level = 0; level < levels; level++) {
-//        dwt_dd4_forward_1d(data, current_length);
-        dwt_97_forward_1d(data, current_length);
-        current_length = (current_length + 1) / 2;
-    }
-}
-
-//=============================================================================
-// Pre-emphasis Filter
-//=============================================================================
-
-static void calculate_preemphasis_coeffs(float *b0, float *b1, float *a1) {
-    // Simple first-order digital pre-emphasis
-    // Corner frequency ≈ 1200 Hz (chosen for 32 kHz codec)
-    // Provides ~6 dB/octave boost above corner
-
-    // Pre-emphasis factor (0.95 = gentle, 0.90 = moderate, 0.85 = aggressive)
-    const float alpha = 0.5f;  // Gentle boost suitable for music
-
-    *b0 = 1.0f;
-    *b1 = -alpha;
-    *a1 = 0.0f;  // No feedback
-}
-
-// emphasis at alpha=0.5 shifts quantisation crackles to lower frequency which MIGHT be more preferable
-static void apply_preemphasis(float *left, float *right, size_t count) {
-    // Static state variables - persistent across chunks to prevent discontinuities
-    static float prev_x_l = 0.0f;
-    static float prev_y_l = 0.0f;
-    static float prev_x_r = 0.0f;
-    static float prev_y_r = 0.0f;
-
-    float b0, b1, a1;
-    calculate_preemphasis_coeffs(&b0, &b1, &a1);
-
-    // Left channel - use persistent state
-    for (size_t i = 0; i < count; i++) {
-        float x = left[i];
-        float y = b0 * x + b1 * prev_x_l - a1 * prev_y_l;
-        left[i] = y;
-        prev_x_l = x;
-        prev_y_l = y;
-    }
-
-    // Right channel - use persistent state
-    for (size_t i = 0; i < count; i++) {
-        float x = right[i];
-        float y = b0 * x + b1 * prev_x_r - a1 * prev_y_r;
-        right[i] = y;
-        prev_x_r = x;
-        prev_y_r = y;
-    }
-}
-
-//=============================================================================
-// M/S Stereo Decorrelation (PCM32f version)
-//=============================================================================
-
-static void ms_decorrelate(const float *left, const float *right, float *mid, float *side, size_t count) {
-    for (size_t i = 0; i < count; i++) {
-        // Mid = (L + R) / 2, Side = (L - R) / 2
-        float l = left[i];
-        float r = right[i];
-        mid[i] = (l + r) / 2.0f;
-        side[i] = (l - r) / 2.0f;
-    }
-}
-
-static float signum(float x) {
-    if (x > 0.0f) return 1.0f;
-    if (x < 0.0f) return -1.0f;
-    return 0.0f;
-}
-
-static void compress_gamma(float *left, float *right, size_t count) {
-    for (size_t i = 0; i < count; i++) {
-        // encode(x) = sign(x) * |x|^γ where γ=0.5
-        float x = left[i];
-        left[i] = signum(x) * powf(fabsf(x), 0.5f);
-        float y = right[i];
-        right[i] = signum(y) * powf(fabsf(y), 0.5f);
-    }
-}
-
-static void compress_mu_law(float *left, float *right, size_t count) {
-    static float MU = 255.0f;
-
-    for (size_t i = 0; i < count; i++) {
-        // encode(x) = sign(x) * |x|^γ where γ=0.5
-        float x = left[i];
-        left[i] = signum(x) * logf(1.0f + MU * fabsf(x)) / logf(1.0f + MU);
-        float y = right[i];
-        right[i] = signum(y) * logf(1.0f + MU * fabsf(y)) / logf(1.0f + MU);
-    }
-}
-
-//=============================================================================
-// Quantisation with Frequency-Dependent Weighting
-//=============================================================================
-
-#define LAMBDA_FIXED 6.0f
-
-// Lambda-based companding encoder (based on Laplacian distribution CDF)
-// val must be normalised to [-1,1]
-// Returns quantised index in range [-127, +127]
-static int8_t lambda_companding(float val, int max_index) {
-    // Handle zero
-    if (fabsf(val) < 1e-9f) {
-        return 0;
-    }
-
-    int sign = (val < 0) ? -1 : 1;
-    float abs_val = fabsf(val);
-
-    // Clamp to [0, 1]
-    if (abs_val > 1.0f) abs_val = 1.0f;
-
-
-    // Laplacian CDF for x >= 0: F(x) = 1 - 0.5 * exp(-λ*x)
-    // Map to [0.5, 1.0] range (half of CDF for positive values)
-    float cdf = 1.0f - 0.5f * expf(-LAMBDA_FIXED * abs_val);
-
-    // Map CDF from [0.5, 1.0] to [0, 1] for positive half
-    float normalised_cdf = (cdf - 0.5f) * 2.0f;
-
-    // Quantise to index
-    int index = (int)roundf(normalised_cdf * max_index);
-
-    // Clamp index to valid range [0, max_index]
-    if (index < 0) index = 0;
-    if (index > max_index) index = max_index;
-
-    return (int8_t)(sign * index);
-}
-
-static void quantise_dwt_coefficients(int channel, const float *coeffs, int8_t *quantised, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale) {
-    int first_band_size = chunk_size >> dwt_levels;
-
-    int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
-    sideband_starts[0] = 0;
-    sideband_starts[1] = first_band_size;
-    for (int i = 2; i <= dwt_levels + 1; i++) {
-        sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
-    }
-
-    for (size_t i = 0; i < count; i++) {
-        int sideband = dwt_levels;
-        for (int s = 0; s <= dwt_levels; s++) {
-            if (i < (size_t)sideband_starts[s + 1]) {
-                sideband = s;
-                break;
-            }
-        }
-
-        // Store subband index (LL=0, H1=1, H2=2, ..., H9=9 for dwt_levels=9)
-        if (current_subband_index != NULL) {
-            current_subband_index[i] = sideband;
-        }
-
-        // Check for deadzone marker (special handling)
-        /*if (coeffs[i] == DEADZONE_MARKER_FLOAT) {
-            // Map to special quantised marker for stochastic reconstruction
-            quantised[i] = (int8_t)DEADZONE_MARKER_QUANT;
-        } else {*/
-            // Normal quantisation
-            float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
-            float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
-            int8_t quant_val = lambda_companding(val, max_index);
-            quantised[i] = quant_val;
-//        }
-    }
-
-    free(sideband_starts);
-}
-
-//=============================================================================
-// Coefficient Statistics
-//=============================================================================
-
-static int compare_float(const void *a, const void *b) {
-    float fa = *(const float*)a;
-    float fb = *(const float*)b;
-    if (fa < fb) return -1;
-    if (fa > fb) return 1;
-    return 0;
-}
-
-typedef struct {
-    float min;
-    float q1;
-    float median;
-    float q3;
-    float max;
-    float lambda;  // Laplacian distribution parameter (1/b, where b is scale)
-} CoeffStats;
-
-typedef struct {
-    float *data;
-    size_t count;
-    size_t capacity;
-} CoeffAccumulator;
-
-typedef struct {
-    int8_t *data;
-    size_t count;
-    size_t capacity;
-} QuantAccumulator;
-
-// Global accumulators for statistics
-static CoeffAccumulator *mid_accumulators = NULL;
-static CoeffAccumulator *side_accumulators = NULL;
-static QuantAccumulator *mid_quant_accumulators = NULL;
-static QuantAccumulator *side_quant_accumulators = NULL;
-static int num_subbands = 0;
-static int stats_initialised = 0;
-static int stats_dwt_levels = 0;
-
-static void init_statistics(int dwt_levels) {
-    if (stats_initialised) return;
-
-    num_subbands = dwt_levels + 1;
-    stats_dwt_levels = dwt_levels;
-
-    mid_accumulators = calloc(num_subbands, sizeof(CoeffAccumulator));
-    side_accumulators = calloc(num_subbands, sizeof(CoeffAccumulator));
-    mid_quant_accumulators = calloc(num_subbands, sizeof(QuantAccumulator));
-    side_quant_accumulators = calloc(num_subbands, sizeof(QuantAccumulator));
-
-    for (int i = 0; i < num_subbands; i++) {
-        mid_accumulators[i].capacity = 1024;
-        mid_accumulators[i].data = malloc(mid_accumulators[i].capacity * sizeof(float));
-        mid_accumulators[i].count = 0;
-
-        side_accumulators[i].capacity = 1024;
-        side_accumulators[i].data = malloc(side_accumulators[i].capacity * sizeof(float));
-        side_accumulators[i].count = 0;
-
-        mid_quant_accumulators[i].capacity = 1024;
-        mid_quant_accumulators[i].data = malloc(mid_quant_accumulators[i].capacity * sizeof(int8_t));
-        mid_quant_accumulators[i].count = 0;
-
-        side_quant_accumulators[i].capacity = 1024;
-        side_quant_accumulators[i].data = malloc(side_quant_accumulators[i].capacity * sizeof(int8_t));
-        side_quant_accumulators[i].count = 0;
-    }
-
-    stats_initialised = 1;
-}
-
-static void accumulate_coefficients(const float *coeffs, int dwt_levels, int chunk_size, CoeffAccumulator *accumulators) {
-    int first_band_size = chunk_size >> dwt_levels;
-
-    int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
-    sideband_starts[0] = 0;
-    sideband_starts[1] = first_band_size;
-    for (int i = 2; i <= dwt_levels + 1; i++) {
-        sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
-    }
-
-    for (int s = 0; s <= dwt_levels; s++) {
-        size_t start = sideband_starts[s];
-        size_t end = sideband_starts[s + 1];
-        size_t band_size = end - start;
-
-        // Expand capacity if needed
-        while (accumulators[s].count + band_size > accumulators[s].capacity) {
-            accumulators[s].capacity *= 2;
-            accumulators[s].data = realloc(accumulators[s].data,
-                                          accumulators[s].capacity * sizeof(float));
-        }
-
-        // Copy coefficients
-        memcpy(accumulators[s].data + accumulators[s].count,
-               coeffs + start, band_size * sizeof(float));
-        accumulators[s].count += band_size;
-    }
-
-    free(sideband_starts);
-}
-
-static void accumulate_quantised(const int8_t *quant, int dwt_levels, int chunk_size, QuantAccumulator *accumulators) {
-    int first_band_size = chunk_size >> dwt_levels;
-
-    int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
-    sideband_starts[0] = 0;
-    sideband_starts[1] = first_band_size;
-    for (int i = 2; i <= dwt_levels + 1; i++) {
-        sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
-    }
-
-    for (int s = 0; s <= dwt_levels; s++) {
-        size_t start = sideband_starts[s];
-        size_t end = sideband_starts[s + 1];
-        size_t band_size = end - start;
-
-        // Expand capacity if needed
-        while (accumulators[s].count + band_size > accumulators[s].capacity) {
-            accumulators[s].capacity *= 2;
-            accumulators[s].data = realloc(accumulators[s].data,
-                                          accumulators[s].capacity * sizeof(int8_t));
-        }
-
-        // Copy coefficients
-        memcpy(accumulators[s].data + accumulators[s].count,
-               quant + start, band_size * sizeof(int8_t));
-        accumulators[s].count += band_size;
-    }
-
-    free(sideband_starts);
-}
-
-static void calculate_coeff_stats(const float *coeffs, size_t count, CoeffStats *stats) {
-    if (count == 0) {
-        stats->min = stats->q1 = stats->median = stats->q3 = stats->max = 0.0f;
-        stats->lambda = 0.0f;
-        return;
-    }
-
-    // Copy coefficients for sorting
-    float *sorted = malloc(count * sizeof(float));
-    memcpy(sorted, coeffs, count * sizeof(float));
-    qsort(sorted, count, sizeof(float), compare_float);
-
-    stats->min = sorted[0];
-    stats->max = sorted[count - 1];
-    stats->median = sorted[count / 2];
-    stats->q1 = sorted[count / 4];
-    stats->q3 = sorted[(3 * count) / 4];
-
-    free(sorted);
-
-    // Estimate Laplacian distribution parameter λ = 1/b
-    // For Laplacian centered at μ=0, MLE gives: b = mean(|x|)
-    // Therefore: λ = 1/b = 1/mean(|x|)
-    double sum_abs = 0.0;
-    for (size_t i = 0; i < count; i++) {
-        sum_abs += fabs(coeffs[i]);
-    }
-    double mean_abs = sum_abs / count;
-    stats->lambda = (mean_abs > 1e-9) ? (1.0f / mean_abs) : 0.0f;
-}
-
-#define HISTOGRAM_BINS 40
-#define HISTOGRAM_WIDTH 60
-
-static void print_histogram(const float *coeffs, size_t count, const char *title) {
-    if (count == 0) return;
-
-    // Find min/max
-    float min_val = coeffs[0];
-    float max_val = coeffs[0];
-    for (size_t i = 1; i < count; i++) {
-        if (coeffs[i] < min_val) min_val = coeffs[i];
-        if (coeffs[i] > max_val) max_val = coeffs[i];
-    }
-
-    // Handle case where all values are the same
-    if (fabsf(max_val - min_val) < 1e-9f) {
-        fprintf(stderr, "  %s: All values are %.3f\n", title, min_val);
-        return;
-    }
-
-    // Create histogram bins
-    size_t bins[HISTOGRAM_BINS] = {0};
-    float bin_width = (max_val - min_val) / HISTOGRAM_BINS;
-
-    for (size_t i = 0; i < count; i++) {
-        int bin = (int)((coeffs[i] - min_val) / bin_width);
-        if (bin >= HISTOGRAM_BINS) bin = HISTOGRAM_BINS - 1;
-        if (bin < 0) bin = 0;
-        bins[bin]++;
-    }
-
-    // Find max bin count for scaling
-    size_t max_bin = 0;
-    for (int i = 0; i < HISTOGRAM_BINS; i++) {
-        if (bins[i] > max_bin) max_bin = bins[i];
-    }
-
-    // Print histogram
-    fprintf(stderr, "  %s Histogram (range: %.3f to %.3f):\n", title, min_val, max_val);
-
-    // Print top 20 bins to keep output manageable
-    for (int i = 0; i < HISTOGRAM_BINS; i++) {
-        float bin_start = min_val + i * bin_width;
-        float bin_end = bin_start + bin_width;
-        int bar_width = (int)((bins[i] * HISTOGRAM_WIDTH) / max_bin);
-
-        // Only print bins with significant content (> 1% of max)
-        if (bins[i] > max_bin / 100) {
-            fprintf(stderr, "  %8.3f-%8.3f [%7zu]: ", bin_start, bin_end, bins[i]);
-            for (int j = 0; j < bar_width; j++) {
-                fprintf(stderr, "█");
-            }
-            fprintf(stderr, "\n");
-        }
-    }
-    fprintf(stderr, "\n");
-}
-
-typedef struct {
-    int8_t value;
-    size_t count;
-    float percentage;
-} ValueFrequency;
-
-static int compare_value_frequency(const void *a, const void *b) {
-    const ValueFrequency *va = (const ValueFrequency*)a;
-    const ValueFrequency *vb = (const ValueFrequency*)b;
-    // Sort by count descending
-    if (vb->count > va->count) return 1;
-    if (vb->count < va->count) return -1;
-    return 0;
-}
-
-static void print_top5_quantised_values(const int8_t *quant, size_t count, const char *title) {
-    if (count == 0) {
-        fprintf(stderr, "  %s: No data\n", title);
-        return;
-    }
-
-    // For int8_t range is at most 256, so we can use direct indexing
-    // Map from [-128, 127] to [0, 255]
-    size_t freq[256] = {0};
-
-    for (size_t i = 0; i < count; i++) {
-        int idx = (int)quant[i] + 128;
-        freq[idx]++;
-    }
-
-    // Find all unique values with their frequencies
-    ValueFrequency values[256];
-    int unique_count = 0;
-    for (int i = 0; i < 256; i++) {
-        if (freq[i] > 0) {
-            values[unique_count].value = (int8_t)(i - 128);
-            values[unique_count].count = freq[i];
-            values[unique_count].percentage = (float)(freq[i] * 100.0) / count;
-            unique_count++;
-        }
-    }
-
-    // Sort by frequency
-    qsort(values, unique_count, sizeof(ValueFrequency), compare_value_frequency);
-
-    // Print top 10
-    fprintf(stderr, "  %s Top 100 Values:\n", title);
-    int print_count = (unique_count < 100) ? unique_count : 100;
-    for (int i = 0; i < print_count; i++) {
-        fprintf(stderr, "    %6d: %8zu occurrences (%5.2f%%)\n",
-                values[i].value, values[i].count, values[i].percentage);
-    }
-    fprintf(stderr, "\n");
-}
-
-void tad32_print_statistics(void) {
-    if (!stats_initialised) return;
-
-    fprintf(stderr, "\n=== TAD Coefficient Statistics (before quantisation) ===\n");
-
-    // Print Mid channel statistics
-    fprintf(stderr, "\nMid Channel:\n");
-    fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s %10s\n",
-            "Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max", "Lambda");
-    fprintf(stderr, "----------------------------------------------------------------------------------------\n");
-
-    for (int s = 0; s < num_subbands; s++) {
-        CoeffStats stats;
-        calculate_coeff_stats(mid_accumulators[s].data, mid_accumulators[s].count, &stats);
-
-        char band_name[16];
-        if (s == 0) {
-            snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
-        } else {
-            snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
-        }
-
-        fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f %10.3f\n",
-                band_name, mid_accumulators[s].count,
-                stats.min, stats.q1, stats.median, stats.q3, stats.max, stats.lambda);
-    }
-
-    // Print Mid channel histograms
-    fprintf(stderr, "\nMid Channel Histograms:\n");
-    for (int s = 0; s < num_subbands; s++) {
-        char band_name[32];
-        if (s == 0) {
-            snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
-        } else {
-            snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
-        }
-        print_histogram(mid_accumulators[s].data, mid_accumulators[s].count, band_name);
-    }
-
-    // Print Side channel statistics
-    fprintf(stderr, "\nSide Channel:\n");
-    fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s %10s\n",
-            "Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max", "Lambda");
-    fprintf(stderr, "----------------------------------------------------------------------------------------\n");
-
-    for (int s = 0; s < num_subbands; s++) {
-        CoeffStats stats;
-        calculate_coeff_stats(side_accumulators[s].data, side_accumulators[s].count, &stats);
-
-        char band_name[16];
-        if (s == 0) {
-            snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
-        } else {
-            snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
-        }
-
-        fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f %10.3f\n",
-                band_name, side_accumulators[s].count,
-                stats.min, stats.q1, stats.median, stats.q3, stats.max, stats.lambda);
-    }
-
-    // Print Side channel histograms
-    fprintf(stderr, "\nSide Channel Histograms:\n");
-    for (int s = 0; s < num_subbands; s++) {
-        char band_name[32];
-        if (s == 0) {
-            snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
-        } else {
-            snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
-        }
-        print_histogram(side_accumulators[s].data, side_accumulators[s].count, band_name);
-    }
-
-    // Print quantised values statistics
-    fprintf(stderr, "\n=== TAD Quantised Values Statistics (after quantisation) ===\n");
-
-    // Print Mid channel quantised values
-    fprintf(stderr, "\nMid Channel Quantised Values:\n");
-    for (int s = 0; s < num_subbands; s++) {
-        char band_name[32];
-        if (s == 0) {
-            snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
-        } else {
-            snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
-        }
-        print_top5_quantised_values(mid_quant_accumulators[s].data, mid_quant_accumulators[s].count, band_name);
-    }
-
-    // Print Side channel quantised values
-    fprintf(stderr, "\nSide Channel Quantised Values:\n");
-    for (int s = 0; s < num_subbands; s++) {
-        char band_name[32];
-        if (s == 0) {
-            snprintf(band_name, sizeof(band_name), "LL (L%d)", stats_dwt_levels);
-        } else {
-            snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
-        }
-        print_top5_quantised_values(side_quant_accumulators[s].data, side_quant_accumulators[s].count, band_name);
-    }
-
-    fprintf(stderr, "\n");
-}
-
-void tad32_free_statistics(void) {
-    if (!stats_initialised) return;
-
-    for (int i = 0; i < num_subbands; i++) {
-        free(mid_accumulators[i].data);
-        free(side_accumulators[i].data);
-        free(mid_quant_accumulators[i].data);
-        free(side_quant_accumulators[i].data);
-    }
-    free(mid_accumulators);
-    free(side_accumulators);
-    free(mid_quant_accumulators);
-    free(side_quant_accumulators);
-
-    mid_accumulators = NULL;
-    side_accumulators = NULL;
-    mid_quant_accumulators = NULL;
-    side_quant_accumulators = NULL;
-    stats_initialised = 0;
-}
-
-//=============================================================================
-// Binary Tree EZBC (1D Variant for TAD)
-//=============================================================================
-
-#include <stdbool.h>
-
-// Bitstream writer for EZBC
-typedef struct {
-    uint8_t *data;
-    size_t capacity;
-    size_t byte_pos;
-    uint8_t bit_pos;  // 0-7, current bit position in current byte
-} tad_bitstream_t;
-
-// Block structure for 1D binary tree
-typedef struct {
-    int start;    // Start index in 1D array
-    int length;   // Block length
-} tad_block_t;
-
-// Queue for block processing
-typedef struct {
-    tad_block_t *blocks;
-    size_t count;
-    size_t capacity;
-} tad_block_queue_t;
-
-// Track coefficient state for refinement
-typedef struct {
-    bool significant;     // Has been marked significant
-    int first_bitplane;   // Bitplane where it became significant
-} tad_coeff_state_t;
-
-// Bitstream operations
-static void tad_bitstream_init(tad_bitstream_t *bs, size_t initial_capacity) {
-    bs->capacity = initial_capacity;
-    bs->data = calloc(1, initial_capacity);
-    bs->byte_pos = 0;
-    bs->bit_pos = 0;
-}
-
-static void tad_bitstream_write_bit(tad_bitstream_t *bs, int bit) {
-    // Grow if needed
-    if (bs->byte_pos >= bs->capacity) {
-        bs->capacity *= 2;
-        bs->data = realloc(bs->data, bs->capacity);
-        // Clear new memory
-        memset(bs->data + bs->byte_pos, 0, bs->capacity - bs->byte_pos);
-    }
-
-    if (bit) {
-        bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
-    }
-
-    bs->bit_pos++;
-    if (bs->bit_pos == 8) {
-        bs->bit_pos = 0;
-        bs->byte_pos++;
-    }
-}
-
-static void tad_bitstream_write_bits(tad_bitstream_t *bs, uint32_t value, int num_bits) {
-    for (int i = 0; i < num_bits; i++) {
-        tad_bitstream_write_bit(bs, (value >> i) & 1);
-    }
-}
-
-static size_t tad_bitstream_size(tad_bitstream_t *bs) {
-    return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
-}
-
-static void tad_bitstream_free(tad_bitstream_t *bs) {
-    free(bs->data);
-}
-
-// Block queue operations
-static void tad_queue_init(tad_block_queue_t *q) {
-    q->capacity = 1024;
-    q->blocks = malloc(q->capacity * sizeof(tad_block_t));
-    q->count = 0;
-}
-
-static void tad_queue_push(tad_block_queue_t *q, tad_block_t block) {
-    if (q->count >= q->capacity) {
-        q->capacity *= 2;
-        q->blocks = realloc(q->blocks, q->capacity * sizeof(tad_block_t));
-    }
-    q->blocks[q->count++] = block;
-}
-
-static void tad_queue_free(tad_block_queue_t *q) {
-    free(q->blocks);
-}
-
-// Check if all coefficients in block have |coeff| < threshold
-static bool tad_is_zero_block(int8_t *coeffs, const tad_block_t *block, int threshold) {
-    for (int i = block->start; i < block->start + block->length; i++) {
-        if (abs(coeffs[i]) >= threshold) {
-            return false;
-        }
-    }
-    return true;
-}
-
-// Find maximum absolute coefficient value
-static int tad_find_max_abs(int8_t *coeffs, size_t count) {
-    int max_abs = 0;
-    for (size_t i = 0; i < count; i++) {
-        int abs_val = abs(coeffs[i]);
-        if (abs_val > max_abs) {
-            max_abs = abs_val;
-        }
-    }
-    return max_abs;
-}
-
-// Get MSB position (bitplane number)
-static int tad_get_msb_bitplane(int value) {
-    if (value == 0) return 0;
-    int bitplane = 0;
-    while (value > 1) {
-        value >>= 1;
-        bitplane++;
-    }
-    return bitplane;
-}
-
-// Context for recursive EZBC processing
-typedef struct {
-    tad_bitstream_t *bs;
-    int8_t *coeffs;
-    tad_coeff_state_t *states;
-    int length;
-    int bitplane;
-    int threshold;
-    tad_block_queue_t *next_insignificant;
-    tad_block_queue_t *next_significant;
-    int *sign_count;
-} tad_ezbc_context_t;
-
-// Recursively process a significant block - subdivide until size 1
-static void tad_process_significant_block_recursive(tad_ezbc_context_t *ctx, tad_block_t block) {
-    // If size 1: emit sign bit and add to significant queue
-    if (block.length == 1) {
-        int idx = block.start;
-        tad_bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
-        (*ctx->sign_count)++;
-        ctx->states[idx].significant = true;
-        ctx->states[idx].first_bitplane = ctx->bitplane;
-        tad_queue_push(ctx->next_significant, block);
-        return;
-    }
-
-    // Block is > 1: subdivide into left and right halves
-    int mid = block.length / 2;
-    if (mid == 0) mid = 1;
-
-    // Process left child
-    tad_block_t left = {block.start, mid};
-    if (!tad_is_zero_block(ctx->coeffs, &left, ctx->threshold)) {
-        tad_bitstream_write_bit(ctx->bs, 1);  // Significant
-        tad_process_significant_block_recursive(ctx, left);
-    } else {
-        tad_bitstream_write_bit(ctx->bs, 0);  // Insignificant
-        tad_queue_push(ctx->next_insignificant, left);
-    }
-
-    // Process right child (if exists)
-    if (block.length > mid) {
-        tad_block_t right = {block.start + mid, block.length - mid};
-        if (!tad_is_zero_block(ctx->coeffs, &right, ctx->threshold)) {
-            tad_bitstream_write_bit(ctx->bs, 1);
-            tad_process_significant_block_recursive(ctx, right);
-        } else {
-            tad_bitstream_write_bit(ctx->bs, 0);
-            tad_queue_push(ctx->next_insignificant, right);
-        }
-    }
-}
-
-// Binary tree EZBC encoding for a single channel (1D variant)
-// Made non-static for testing
-size_t tad_encode_channel_ezbc(int8_t *coeffs, size_t count, uint8_t **output) {
-    tad_bitstream_t bs;
-    tad_bitstream_init(&bs, count / 4);  // Initial guess
-
-    // Track coefficient significance
-    tad_coeff_state_t *states = calloc(count, sizeof(tad_coeff_state_t));
-
-    // Find maximum value to determine MSB bitplane
-    int max_abs = tad_find_max_abs(coeffs, count);
-    int msb_bitplane = tad_get_msb_bitplane(max_abs);
-
-    // Write header: MSB bitplane and length
-    tad_bitstream_write_bits(&bs, msb_bitplane, 8);
-    tad_bitstream_write_bits(&bs, (uint32_t)count, 16);
-
-    // Initialise queues
-    tad_block_queue_t insignificant_queue, next_insignificant;
-    tad_block_queue_t significant_queue, next_significant;
-
-    tad_queue_init(&insignificant_queue);
-    tad_queue_init(&next_insignificant);
-    tad_queue_init(&significant_queue);
-    tad_queue_init(&next_significant);
-
-    // Start with root block as insignificant
-    tad_block_t root = {0, (int)count};
-    tad_queue_push(&insignificant_queue, root);
-
-    // Process bitplanes from MSB to LSB
-    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
-        int threshold = 1 << bitplane;
-
-        // Process insignificant blocks - check if they become significant
-        for (size_t i = 0; i < insignificant_queue.count; i++) {
-            tad_block_t block = insignificant_queue.blocks[i];
-
-            if (tad_is_zero_block(coeffs, &block, threshold)) {
-                // Still insignificant: emit 0
-                tad_bitstream_write_bit(&bs, 0);
-                // Keep in insignificant queue for next bitplane
-                tad_queue_push(&next_insignificant, block);
-            } else {
-                // Became significant: emit 1
-                tad_bitstream_write_bit(&bs, 1);
-
-                // Use recursive subdivision
-                int sign_count = 0;
-                tad_ezbc_context_t ctx = {
-                    .bs = &bs,
-                    .coeffs = coeffs,
-                    .states = states,
-                    .length = (int)count,
-                    .bitplane = bitplane,
-                    .threshold = threshold,
-                    .next_insignificant = &next_insignificant,
-                    .next_significant = &next_significant,
-                    .sign_count = &sign_count
-                };
-                tad_process_significant_block_recursive(&ctx, block);
-            }
-        }
-
-        // Refinement pass: emit next bit for already-significant coefficients
-        for (size_t i = 0; i < significant_queue.count; i++) {
-            tad_block_t block = significant_queue.blocks[i];
-            int idx = block.start;
-
-            // Emit refinement bit (bit at position 'bitplane')
-            int bit = (abs(coeffs[idx]) >> bitplane) & 1;
-            tad_bitstream_write_bit(&bs, bit);
-
-            // Add to next_significant so it continues being refined
-            tad_queue_push(&next_significant, block);
-        }
-
-        // Swap queues for next bitplane
-        tad_block_queue_t temp_insig = insignificant_queue;
-        insignificant_queue = next_insignificant;
-        next_insignificant = temp_insig;
-        next_insignificant.count = 0;  // Clear for reuse
-
-        tad_block_queue_t temp_sig = significant_queue;
-        significant_queue = next_significant;
-        next_significant = temp_sig;
-        next_significant.count = 0;  // Clear for reuse
-    }
-
-    // Cleanup queues
-    tad_queue_free(&insignificant_queue);
-    tad_queue_free(&next_insignificant);
-    tad_queue_free(&significant_queue);
-    tad_queue_free(&next_significant);
-    free(states);
-
-    // Copy bitstream to output
-    size_t output_size = tad_bitstream_size(&bs);
-    *output = malloc(output_size);
-    memcpy(*output, bs.data, output_size);
-    tad_bitstream_free(&bs);
-
-    return output_size;
-}
-
-//=============================================================================
-// Public API: Chunk Encoding
-//=============================================================================
-
-size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
-                          int max_index,
-                          float quantiser_scale, int zstd_level, uint8_t *output) {
-    // Calculate DWT levels from chunk size
-    int dwt_levels = calculate_dwt_levels(num_samples);
-    if (dwt_levels < 0) {
-        fprintf(stderr, "Error: Invalid chunk size %zu\n", num_samples);
-        return 0;
-    }
-
-    // Allocate working buffers (PCM32f throughout, int32 coefficients)
-    float *pcm32_left = malloc(num_samples * sizeof(float));
-    float *pcm32_right = malloc(num_samples * sizeof(float));
-    float *pcm32_mid = malloc(num_samples * sizeof(float));
-    float *pcm32_side = malloc(num_samples * sizeof(float));
-
-    float *dwt_mid = malloc(num_samples * sizeof(float));
-    float *dwt_side = malloc(num_samples * sizeof(float));
-
-    int8_t *quant_mid = malloc(num_samples * sizeof(int8_t));
-    int8_t *quant_side = malloc(num_samples * sizeof(int8_t));
-
-    // Step 1: Deinterleave stereo
-    for (size_t i = 0; i < num_samples; i++) {
-        pcm32_left[i] = pcm32_stereo[i * 2];
-        pcm32_right[i] = pcm32_stereo[i * 2 + 1];
-    }
-
-    // Step 1.1: Apply pre-emphasis filter (BEFORE gamma compression)
-    apply_preemphasis(pcm32_left, pcm32_right, num_samples);
-
-    // Step 1.2: Compress dynamic range
-    compress_gamma(pcm32_left, pcm32_right, num_samples);
-//    compress_mu_law(pcm32_left, pcm32_right, num_samples);
-
-    // Step 2: M/S decorrelation
-    ms_decorrelate(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples);
-
-    // Step 3: Convert to float and apply DWT
-    for (size_t i = 0; i < num_samples; i++) {
-        dwt_mid[i] = pcm32_mid[i];
-        dwt_side[i] = pcm32_side[i];
-    }
-
-    dwt_forward_multilevel(dwt_mid, num_samples, dwt_levels);
-    dwt_forward_multilevel(dwt_side, num_samples, dwt_levels);
-
-    // Step 3.5: Accumulate coefficient statistics if enabled
-    static int stats_enabled = -1;
-    if (stats_enabled == -1) {
-        stats_enabled = getenv("TAD_COEFF_STATS") != NULL;
-        if (stats_enabled) {
-            init_statistics(dwt_levels);
-        }
-    }
-    if (stats_enabled) {
-        accumulate_coefficients(dwt_mid, dwt_levels, num_samples, mid_accumulators);
-        accumulate_coefficients(dwt_side, dwt_levels, num_samples, side_accumulators);
-    }
-
-//    apply_coeff_deadzone(0, dwt_mid, num_samples);
-//    apply_coeff_deadzone(1, dwt_side, num_samples);
-
-    // Step 4: Quantise with frequency-dependent weights and quantiser scaling
-    quantise_dwt_coefficients(0, dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
-    quantise_dwt_coefficients(1, dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
-
-    // Step 4.5: Accumulate quantised coefficient statistics if enabled
-    if (stats_enabled) {
-        accumulate_quantised(quant_mid, dwt_levels, num_samples, mid_quant_accumulators);
-        accumulate_quantised(quant_side, dwt_levels, num_samples, side_quant_accumulators);
-    }
-
-    // Step 5: Encode with binary tree EZBC (1D variant) - FIXED!
-    uint8_t *mid_ezbc = NULL;
-    uint8_t *side_ezbc = NULL;
-
-    size_t mid_size = tad_encode_channel_ezbc(quant_mid, num_samples, &mid_ezbc);
-    size_t side_size = tad_encode_channel_ezbc(quant_side, num_samples, &side_ezbc);
-
-    // Concatenate EZBC outputs
-    size_t uncompressed_size = mid_size + side_size;
-    uint8_t *temp_buffer = malloc(uncompressed_size);
-    memcpy(temp_buffer, mid_ezbc, mid_size);
-    memcpy(temp_buffer + mid_size, side_ezbc, side_size);
-
-    free(mid_ezbc);
-    free(side_ezbc);
-
-    // Step 6: Zstd compression (or bypass if zstd_level < 0)
-    uint8_t *write_ptr = output;
-
-    // Write chunk header
-    *((uint16_t*)write_ptr) = (uint16_t)num_samples;
-    write_ptr += sizeof(uint16_t);
-
-    *write_ptr = (uint8_t)max_index;
-    write_ptr += sizeof(uint8_t);
-
-    uint32_t *payload_size_ptr = (uint32_t*)write_ptr;
-    write_ptr += sizeof(uint32_t);
-
-    size_t payload_size;
-    int is_uncompressed = 0;
-
-    if (zstd_level < 0) {
-        // Bypass Zstd compression - use raw EZBC data
-        payload_size = uncompressed_size;
-        memcpy(write_ptr, temp_buffer, payload_size);
-        is_uncompressed = 1;
-    } else {
-        // Normal Zstd compression
-        int effective_level = (zstd_level > 0) ? zstd_level : TAD32_ZSTD_LEVEL;
-        size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
-        uint8_t *zstd_buffer = malloc(zstd_bound);
-
-        payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, effective_level);
-
-        if (ZSTD_isError(payload_size)) {
-            fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
-            free(zstd_buffer);
-            free(pcm32_left); free(pcm32_right);
-            free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
-            free(quant_mid); free(quant_side); free(temp_buffer);
-            return 0;
-        }
-
-        memcpy(write_ptr, zstd_buffer, payload_size);
-        free(zstd_buffer);
-    }
-
-    // Set payload size (MSB=1 indicates uncompressed data)
-    uint32_t size_field = (uint32_t)payload_size;
-    if (is_uncompressed) {
-        size_field |= 0x80000000;
-    }
-    *payload_size_ptr = size_field;
-    write_ptr += payload_size;
-
-    // Cleanup
-    free(pcm32_left); free(pcm32_right);
-    free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
-    free(quant_mid); free(quant_side); free(temp_buffer);
-
-    return write_ptr - output;
-}
diff --git a/video_encoder/lib/libtavdec/tav_video_decoder.c b/video_encoder/lib/libtavdec/tav_video_decoder.c
deleted file mode 100644
index bae2f6f..0000000
--- a/video_encoder/lib/libtavdec/tav_video_decoder.c
+++ /dev/null
@@ -1,1913 +0,0 @@
-// Created by CuriousTorvald and Claude on 2025-12-02.
-// TAV Video Decoder Library - Shared decoding functions for TAV format
-// Can be used by both regular TAV decoder and TAV-DT decoder
-
-#include "tav_video_decoder.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <zstd.h>
-
-//=============================================================================
-// Internal Constants and Macros
-//=============================================================================
-
-#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
-
-// Perceptual quantisation constants (match TSVM)
-static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f};
-static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
-static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
-static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
-static const float FOUR_PIXEL_DETAILER = 0.88f;
-static const float TWO_PIXEL_DETAILER = 0.92f;
-
-// Quantisation Lookup Table (matches TSVM exactly)
-static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
-
-//=============================================================================
-// Internal Structures
-//=============================================================================
-
-// DWT subband information
-typedef struct {
-    int level;              // Decomposition level (1 to decompLevels)
-    int subband_type;       // 0=LL, 1=LH, 2=HL, 3=HH
-    int coeff_start;        // Starting index in linear coefficient array
-    int coeff_count;        // Number of coefficients in this subband
-} dwt_subband_info_t;
-
-// EZBC Block structure for quadtree
-typedef struct {
-    int x, y;
-    int width, height;
-} ezbc_block_t;
-
-// EZBC bitstream reader state
-typedef struct {
-    const uint8_t *data;
-    size_t size;
-    size_t byte_pos;
-    int bit_pos;
-} ezbc_bitreader_t;
-
-// EZBC block queues (simple dynamic arrays)
-typedef struct {
-    ezbc_block_t *blocks;
-    int count;
-    int capacity;
-} ezbc_block_queue_t;
-
-// Video decoder context (opaque to users)
-struct tav_video_context {
-    tav_video_params_t params;
-
-    // Working buffers
-    float *dwt_buffer_y;
-    float *dwt_buffer_co;
-    float *dwt_buffer_cg;
-    float *reference_ycocg_y;   // For P-frame delta accumulation
-    float *reference_ycocg_co;
-    float *reference_ycocg_cg;
-
-    // Error message buffer
-    char error_msg[256];
-
-    // Debug flag
-    int verbose;
-};
-
-//=============================================================================
-// DWT Subband Layout Calculation (matches TSVM)
-//=============================================================================
-
-static int calculate_subband_layout(int width, int height, int decomp_levels, dwt_subband_info_t *subbands) {
-    int subband_count = 0;
-
-    // generate division series
-    int widths[decomp_levels + 1]; widths[0] = width;
-    int heights[decomp_levels + 1]; heights[0] = height;
-
-    for (int i = 1; i < decomp_levels + 1; i++) {
-        widths[i] = (int)roundf(widths[i - 1] / 2.0f);
-        heights[i] = (int)roundf(heights[i - 1] / 2.0f);
-    }
-
-    // LL subband at maximum decomposition level
-    int ll_width = widths[decomp_levels];
-    int ll_height = heights[decomp_levels];
-    subbands[subband_count++] = (dwt_subband_info_t){decomp_levels, 0, 0, ll_width * ll_height};
-    int coeff_offset = ll_width * ll_height;
-
-    // LH, HL, HH subbands for each level from max down to 1
-    for (int level = decomp_levels; level >= 1; level--) {
-        int level_width = widths[decomp_levels - level + 1];
-        int level_height = heights[decomp_levels - level + 1];
-        const int subband_size = level_width * level_height;
-
-        // LH subband
-        subbands[subband_count++] = (dwt_subband_info_t){level, 1, coeff_offset, subband_size};
-        coeff_offset += subband_size;
-
-        // HL subband
-        subbands[subband_count++] = (dwt_subband_info_t){level, 2, coeff_offset, subband_size};
-        coeff_offset += subband_size;
-
-        // HH subband
-        subbands[subband_count++] = (dwt_subband_info_t){level, 3, coeff_offset, subband_size};
-        coeff_offset += subband_size;
-    }
-
-    return subband_count;
-}
-
-//=============================================================================
-// Perceptual Quantisation Model (matches TSVM exactly)
-//=============================================================================
-
-static int tav_derive_encoder_qindex(int q_index, int q_y_global) {
-    if (q_index > 0) return q_index - 1;
-    if (q_y_global >= 60) return 0;
-    else if (q_y_global >= 42) return 1;
-    else if (q_y_global >= 25) return 2;
-    else if (q_y_global >= 12) return 3;
-    else if (q_y_global >= 6) return 4;
-    else if (q_y_global >= 2) return 5;
-    else return 5;
-}
-
-static float perceptual_model3_LH(float level) {
-    const float H4 = 1.2f;
-    const float K = 2.0f;
-    const float K12 = K * 12.0f;
-    const float x = level;
-
-    const float Lx = H4 - ((K + 1.0f) / 15.0f) * (x - 4.0f);
-    const float C3 = -1.0f / 45.0f * (K12 + 92.0f);
-    const float G3x = (-x / 180.0f) * (K12 + 5.0f * x * x - 60.0f * x + 252.0f) - C3 + H4;
-
-    return (level >= 4.0f) ? Lx : G3x;
-}
-
-static float perceptual_model3_HL(int quality, float LH) {
-    return LH * ANISOTROPY_MULT[quality] + ANISOTROPY_BIAS[quality];
-}
-
-static float lerp(float x, float y, float a) {
-    return x * (1.0f - a) + y * a;
-}
-
-static float perceptual_model3_HH(float LH, float HL, float level) {
-    const float Kx = (sqrtf(level) - 1.0f) * 0.5f + 0.5f;
-    return lerp(LH, HL, Kx);
-}
-
-static float perceptual_model3_LL(float level) {
-    const float n = perceptual_model3_LH(level);
-    const float m = perceptual_model3_LH(level - 1.0f) / n;
-    return n / m;
-}
-
-static float perceptual_model3_chroma_basecurve(int quality, float level) {
-    return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
-}
-
-static float get_perceptual_weight(int q_index, int q_y_global, int level0, int subband_type,
-                                  int is_chroma, int max_levels) {
-    // Convert to perceptual level (1-6 scale)
-    const float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
-    const int quality_level = tav_derive_encoder_qindex(q_index, q_y_global);
-
-    if (!is_chroma) {
-        // LUMA CHANNEL
-        if (subband_type == 0) {
-            return perceptual_model3_LL(level);
-        }
-
-        const float LH = perceptual_model3_LH(level);
-        if (subband_type == 1) {
-            return LH;
-        }
-
-        const float HL = perceptual_model3_HL(quality_level, LH);
-        if (subband_type == 2) {
-            float detailer = 1.0f;
-            if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER;
-            else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER;
-            return HL * detailer;
-        } else {
-            // HH subband
-            float detailer = 1.0f;
-            if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER;
-            else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER;
-            return perceptual_model3_HH(LH, HL, level) * detailer;
-        }
-    } else {
-        // CHROMA CHANNELS
-        const float base = perceptual_model3_chroma_basecurve(quality_level, level - 1);
-        if (subband_type == 0) {
-            return 1.0f;
-        } else if (subband_type == 1) {
-            return fmaxf(base, 1.0f);
-        } else if (subband_type == 2) {
-            return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level], 1.0f);
-        } else {
-            return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level] + ANISOTROPY_BIAS_CHROMA[quality_level], 1.0f);
-        }
-    }
-}
-
-static void dequantise_dwt_subbands_perceptual(int q_index, int q_y_global, const int16_t *quantised,
-                                              float *dequantised, int width, int height, int decomp_levels,
-                                              float base_quantiser, int is_chroma) {
-    dwt_subband_info_t subbands[32]; // Max possible subbands
-    const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
-
-    const int coeff_count = width * height;
-    memset(dequantised, 0, coeff_count * sizeof(float));
-
-    // Apply perceptual weighting to each subband
-    for (int s = 0; s < subband_count; s++) {
-        const dwt_subband_info_t *subband = &subbands[s];
-        const float weight = get_perceptual_weight(q_index, q_y_global, subband->level,
-                                                  subband->subband_type, is_chroma, decomp_levels);
-        const float effective_quantiser = base_quantiser * weight;
-
-        // Apply linear dequantisation with perceptual weights
-        for (int i = 0; i < subband->coeff_count; i++) {
-            const int idx = subband->coeff_start + i;
-            if (idx < coeff_count) {
-                const float untruncated = quantised[idx] * effective_quantiser;
-                dequantised[idx] = untruncated;
-            }
-        }
-    }
-}
-
-//=============================================================================
-// Grain Synthesis (matches TSVM exactly)
-//=============================================================================
-
-// Deterministic RNG for grain synthesis (matches encoder)
-static inline uint32_t tav_grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) {
-    uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x;
-    uint32_t hash = key;
-    hash = hash ^ (hash >> 16);
-    hash = hash * 0x7feb352du;
-    hash = hash ^ (hash >> 15);
-    hash = hash * 0x846ca68bu;
-    hash = hash ^ (hash >> 16);
-    return hash;
-}
-
-// Generate triangular noise from uint32 RNG (returns value in range [-1.0, 1.0])
-static inline float tav_grain_triangular_noise(uint32_t rng_val) {
-    float u1 = (rng_val & 0xFFFFu) / 65535.0f;
-    float u2 = ((rng_val >> 16) & 0xFFFFu) / 65535.0f;
-    return (u1 + u2) - 1.0f;
-}
-
-// Apply grain synthesis from DWT coefficients (decoder subtracts noise)
-static void apply_grain_synthesis(float *coeffs, int width, int height,
-                                 int decomp_levels, int frame_num, int q_y_global, uint8_t encoder_preset) {
-    // Anime preset: completely disable grain synthesis
-    if (encoder_preset & 0x02) {
-        return;
-    }
-
-    dwt_subband_info_t subbands[32];
-    const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
-
-    // Noise amplitude (matches Kotlin)
-    const float noise_amplitude = (q_y_global < 32 ? q_y_global : 32) * 0.4f;
-
-    // Process each subband (skip LL band which is level 0)
-    for (int s = 0; s < subband_count; s++) {
-        const dwt_subband_info_t *subband = &subbands[s];
-        if (subband->level == 0) continue;
-
-        uint32_t band = subband->level + subband->subband_type * 31 + 16777619;
-
-        for (int i = 0; i < subband->coeff_count; i++) {
-            const int idx = subband->coeff_start + i;
-            if (idx < width * height) {
-                int y = idx / width;
-                int x = idx % width;
-
-                uint32_t rng_val = tav_grain_synthesis_rng(frame_num, band, x, y);
-                float noise = tav_grain_triangular_noise(rng_val);
-
-                coeffs[idx] -= noise * noise_amplitude;
-            }
-        }
-    }
-}
-
-//=============================================================================
-// Significance Map Postprocessing (2-bit map format)
-//=============================================================================
-
-// Helper: Extract 2-bit code from bit-packed array
-static inline int get_twobit_code(const uint8_t *map_data, int map_bytes, int coeff_idx) {
-    int bit_pos = coeff_idx * 2;
-    int byte_idx = bit_pos / 8;
-    int bit_offset = bit_pos % 8;
-
-    uint8_t byte0 = map_data[byte_idx];
-    int code = (byte0 >> bit_offset) & 0x03;
-
-    // Handle byte boundary crossing
-    if (bit_offset == 7 && byte_idx + 1 < map_bytes) {
-        uint8_t byte1 = map_data[byte_idx + 1];
-        code = ((byte0 >> 7) & 0x01) | ((byte1 << 1) & 0x02);
-    }
-
-    return code;
-}
-
-// Decoder: reconstruct coefficients from 2-bit map format (entropyCoder=0)
-static void postprocess_coefficients_twobit(uint8_t *compressed_data, int coeff_count,
-                                           int16_t *output_y, int16_t *output_co, int16_t *output_cg) {
-    int map_bytes = (coeff_count * 2 + 7) / 8;
-
-    uint8_t *y_map = compressed_data;
-    uint8_t *co_map = compressed_data + map_bytes;
-    uint8_t *cg_map = compressed_data + map_bytes * 2;
-
-    // Count "other" values (code 11) for each channel
-    int y_others = 0, co_others = 0, cg_others = 0;
-    for (int i = 0; i < coeff_count; i++) {
-        if (get_twobit_code(y_map, map_bytes, i) == 3) y_others++;
-        if (get_twobit_code(co_map, map_bytes, i) == 3) co_others++;
-        if (get_twobit_code(cg_map, map_bytes, i) == 3) cg_others++;
-    }
-
-    // Value array offsets (after all maps)
-    uint8_t *value_ptr = compressed_data + map_bytes * 3;
-    int16_t *y_values = (int16_t *)value_ptr;
-    int16_t *co_values = (int16_t *)(value_ptr + y_others * 2);
-    int16_t *cg_values = (int16_t *)(value_ptr + y_others * 2 + co_others * 2);
-
-    // Reconstruct coefficients
-    int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0;
-
-    for (int i = 0; i < coeff_count; i++) {
-        // Y channel
-        int y_code = get_twobit_code(y_map, map_bytes, i);
-        switch (y_code) {
-            case 0: output_y[i] = 0; break;
-            case 1: output_y[i] = 1; break;
-            case 2: output_y[i] = -1; break;
-            case 3: output_y[i] = y_values[y_value_idx++]; break;
-        }
-
-        // Co channel
-        int co_code = get_twobit_code(co_map, map_bytes, i);
-        switch (co_code) {
-            case 0: output_co[i] = 0; break;
-            case 1: output_co[i] = 1; break;
-            case 2: output_co[i] = -1; break;
-            case 3: output_co[i] = co_values[co_value_idx++]; break;
-        }
-
-        // Cg channel
-        int cg_code = get_twobit_code(cg_map, map_bytes, i);
-        switch (cg_code) {
-            case 0: output_cg[i] = 0; break;
-            case 1: output_cg[i] = 1; break;
-            case 2: output_cg[i] = -1; break;
-            case 3: output_cg[i] = cg_values[cg_value_idx++]; break;
-        }
-    }
-}
-
-//=============================================================================
-// EZBC (Embedded Zero Block Coding) Decoder
-//=============================================================================
-
-// Read N bits from EZBC bitstream (LSB-first within each byte)
-static int ezbc_read_bits(ezbc_bitreader_t *reader, int num_bits) {
-    int result = 0;
-    for (int i = 0; i < num_bits; i++) {
-        if (reader->byte_pos >= reader->size) {
-            return result;
-        }
-
-        const int bit = (reader->data[reader->byte_pos] >> reader->bit_pos) & 1;
-        result |= (bit << i);
-
-        reader->bit_pos++;
-        if (reader->bit_pos == 8) {
-            reader->bit_pos = 0;
-            reader->byte_pos++;
-        }
-    }
-    return result;
-}
-
-static void ezbc_queue_init(ezbc_block_queue_t *q) {
-    q->capacity = 256;
-    q->count = 0;
-    q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
-}
-
-static void ezbc_queue_free(ezbc_block_queue_t *q) {
-    free(q->blocks);
-    q->blocks = NULL;
-    q->count = 0;
-}
-
-static void ezbc_queue_add(ezbc_block_queue_t *q, ezbc_block_t block) {
-    if (q->count >= q->capacity) {
-        q->capacity *= 2;
-        q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
-    }
-    q->blocks[q->count++] = block;
-}
-
-// Forward declaration
-static int ezbc_process_significant_block_recursive(
-    ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold,
-    int16_t *output, int width, int8_t *significant, int *first_bitplane,
-    ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant);
-
-// EZBC recursive block decoder (matches Kotlin implementation)
-static int ezbc_process_significant_block_recursive(
-    ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold,
-    int16_t *output, int width, int8_t *significant, int *first_bitplane,
-    ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant) {
-
-    int sign_bits_read = 0;
-
-    // If 1x1 block: read sign bit and add to significant queue
-    if (block.width == 1 && block.height == 1) {
-        const int idx = block.y * width + block.x;
-        const int sign_bit = ezbc_read_bits(reader, 1);
-        sign_bits_read++;
-
-        output[idx] = sign_bit ? -threshold : threshold;
-        significant[idx] = 1;
-        first_bitplane[idx] = bitplane;
-        ezbc_queue_add(next_significant, block);
-        return sign_bits_read;
-    }
-
-    // Block is > 1x1: subdivide and recursively process children
-    int mid_x = block.width / 2;
-    int mid_y = block.height / 2;
-    if (mid_x == 0) mid_x = 1;
-    if (mid_y == 0) mid_y = 1;
-
-    // Top-left child
-    ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
-    const int tl_flag = ezbc_read_bits(reader, 1);
-    if (tl_flag) {
-        sign_bits_read += ezbc_process_significant_block_recursive(
-            reader, tl, bitplane, threshold, output, width, significant, first_bitplane,
-            next_significant, next_insignificant);
-    } else {
-        ezbc_queue_add(next_insignificant, tl);
-    }
-
-    // Top-right child (if exists)
-    if (block.width > mid_x) {
-        ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
-        const int tr_flag = ezbc_read_bits(reader, 1);
-        if (tr_flag) {
-            sign_bits_read += ezbc_process_significant_block_recursive(
-                reader, tr, bitplane, threshold, output, width, significant, first_bitplane,
-                next_significant, next_insignificant);
-        } else {
-            ezbc_queue_add(next_insignificant, tr);
-        }
-    }
-
-    // Bottom-left child (if exists)
-    if (block.height > mid_y) {
-        ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
-        const int bl_flag = ezbc_read_bits(reader, 1);
-        if (bl_flag) {
-            sign_bits_read += ezbc_process_significant_block_recursive(
-                reader, bl, bitplane, threshold, output, width, significant, first_bitplane,
-                next_significant, next_insignificant);
-        } else {
-            ezbc_queue_add(next_insignificant, bl);
-        }
-    }
-
-    // Bottom-right child (if exists)
-    if (block.width > mid_x && block.height > mid_y) {
-        ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
-        const int br_flag = ezbc_read_bits(reader, 1);
-        if (br_flag) {
-            sign_bits_read += ezbc_process_significant_block_recursive(
-                reader, br, bitplane, threshold, output, width, significant, first_bitplane,
-                next_significant, next_insignificant);
-        } else {
-            ezbc_queue_add(next_insignificant, br);
-        }
-    }
-
-    return sign_bits_read;
-}
-
-// Decode a single channel with EZBC
-static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t size,
-                               int16_t *output, int expected_count) {
-    ezbc_bitreader_t reader = {ezbc_data, offset + size, offset, 0};
-
-    // Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
-    const int msb_bitplane = ezbc_read_bits(&reader, 8);
-    const int width = ezbc_read_bits(&reader, 16);
-    const int height = ezbc_read_bits(&reader, 16);
-
-    const int actual_count = width * height;
-    if (actual_count > expected_count) {
-        memset(output, 0, expected_count * sizeof(int16_t));
-        return;
-    }
-
-    expected_count = actual_count;
-
-    // Initialise output and state tracking
-    memset(output, 0, expected_count * sizeof(int16_t));
-    int8_t *significant = calloc(expected_count, sizeof(int8_t));
-    int *first_bitplane = calloc(expected_count, sizeof(int));
-
-    // Initialise queues
-    ezbc_block_queue_t insignificant, next_insignificant, significant_queue, next_significant;
-    ezbc_queue_init(&insignificant);
-    ezbc_queue_init(&next_insignificant);
-    ezbc_queue_init(&significant_queue);
-    ezbc_queue_init(&next_significant);
-
-    // Start with root block
-    ezbc_block_t root = {0, 0, width, height};
-    ezbc_queue_add(&insignificant, root);
-
-    // Process bitplanes from MSB to LSB
-    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
-        const int threshold = 1 << bitplane;
-
-        // Process insignificant blocks
-        for (int i = 0; i < insignificant.count; i++) {
-            const int flag = ezbc_read_bits(&reader, 1);
-
-            if (flag == 0) {
-                ezbc_queue_add(&next_insignificant, insignificant.blocks[i]);
-            } else {
-                ezbc_process_significant_block_recursive(
-                    &reader, insignificant.blocks[i], bitplane, threshold,
-                    output, width, significant, first_bitplane,
-                    &next_significant, &next_insignificant);
-            }
-        }
-
-        // Process significant 1x1 blocks (refinement)
-        for (int i = 0; i < significant_queue.count; i++) {
-            ezbc_block_t block = significant_queue.blocks[i];
-            const int idx = block.y * width + block.x;
-            const int refine_bit = ezbc_read_bits(&reader, 1);
-
-            if (refine_bit) {
-                const int bit_value = 1 << bitplane;
-                if (output[idx] < 0) {
-                    output[idx] -= bit_value;
-                } else {
-                    output[idx] += bit_value;
-                }
-            }
-
-            ezbc_queue_add(&next_significant, block);
-        }
-
-        // Swap queues
-        ezbc_block_queue_t temp_insig = insignificant;
-        insignificant = next_insignificant;
-        next_insignificant = temp_insig;
-        next_insignificant.count = 0;
-
-        ezbc_block_queue_t temp_sig = significant_queue;
-        significant_queue = next_significant;
-        next_significant = temp_sig;
-        next_significant.count = 0;
-    }
-
-    // Cleanup
-    free(significant);
-    free(first_bitplane);
-    ezbc_queue_free(&insignificant);
-    ezbc_queue_free(&next_insignificant);
-    ezbc_queue_free(&significant_queue);
-    ezbc_queue_free(&next_significant);
-}
-
-// Helper: peek at EZBC header to get dimensions without decoding
-static int ezbc_peek_dimensions(const uint8_t *compressed_data, int channel_layout,
-                                 int *out_width, int *out_height) {
-    const int has_y = (channel_layout & 0x04) == 0;
-
-    if (!has_y) {
-        return -1;
-    }
-
-    const uint32_t size = ((uint32_t)compressed_data[0]) |
-                         ((uint32_t)compressed_data[1] << 8) |
-                         ((uint32_t)compressed_data[2] << 16) |
-                         ((uint32_t)compressed_data[3] << 24);
-
-    if (size < 6) {
-        return -1;
-    }
-
-    const uint8_t *ezbc_data = compressed_data + 4;
-
-    ezbc_bitreader_t reader;
-    reader.data = ezbc_data;
-    reader.size = size;
-    reader.byte_pos = 0;
-    reader.bit_pos = 0;
-
-    ezbc_read_bits(&reader, 8);  // Skip MSB bitplane
-    *out_width = ezbc_read_bits(&reader, 16);
-    *out_height = ezbc_read_bits(&reader, 16);
-
-    return 0;
-}
-
-// EZBC postprocessing for single frames
-static void postprocess_coefficients_ezbc(uint8_t *compressed_data, int coeff_count,
-                                          int16_t *output_y, int16_t *output_co, int16_t *output_cg,
-                                          int channel_layout) {
-    const int has_y = (channel_layout & 0x04) == 0;
-    const int has_co = (channel_layout & 0x02) == 0;
-    const int has_cg = (channel_layout & 0x02) == 0;
-
-    int offset = 0;
-
-    // Decode Y channel
-    if (has_y && output_y) {
-        const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
-                             ((uint32_t)compressed_data[offset + 1] << 8) |
-                             ((uint32_t)compressed_data[offset + 2] << 16) |
-                             ((uint32_t)compressed_data[offset + 3] << 24);
-        offset += 4;
-        decode_channel_ezbc(compressed_data, offset, size, output_y, coeff_count);
-        offset += size;
-    }
-
-    // Decode Co channel
-    if (has_co && output_co) {
-        const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
-                             ((uint32_t)compressed_data[offset + 1] << 8) |
-                             ((uint32_t)compressed_data[offset + 2] << 16) |
-                             ((uint32_t)compressed_data[offset + 3] << 24);
-        offset += 4;
-        decode_channel_ezbc(compressed_data, offset, size, output_co, coeff_count);
-        offset += size;
-    }
-
-    // Decode Cg channel
-    if (has_cg && output_cg) {
-        const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
-                             ((uint32_t)compressed_data[offset + 1] << 8) |
-                             ((uint32_t)compressed_data[offset + 2] << 16) |
-                             ((uint32_t)compressed_data[offset + 3] << 24);
-        offset += 4;
-        decode_channel_ezbc(compressed_data, offset, size, output_cg, coeff_count);
-        offset += size;
-    }
-}
-
-//=============================================================================
-// DWT Inverse Transforms (matches TSVM)
-//=============================================================================
-
-// 9/7 inverse DWT (from TSVM Kotlin code)
-static void dwt_97_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Split into low and high frequency components
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[i];
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] = data[half + i];
-        }
-    }
-
-    // 9/7 inverse lifting coefficients
-    const float alpha = -1.586134342f;
-    const float beta = -0.052980118f;
-    const float gamma = 0.882911076f;
-    const float delta = 0.443506852f;
-    const float K = 1.230174105f;
-
-    // Step 1: Undo scaling
-    for (int i = 0; i < half; i++) {
-        temp[i] /= K;
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] *= K;
-        }
-    }
-
-    // Step 2: Undo δ update
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] -= delta * (d_curr + d_prev);
-    }
-
-    // Step 3: Undo γ predict
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] -= gamma * (s_curr + s_next);
-        }
-    }
-
-    // Step 4: Undo β update
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] -= beta * (d_curr + d_prev);
-    }
-
-    // Step 5: Undo α predict
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] -= alpha * (s_curr + s_next);
-        }
-    }
-
-    // Reconstruction - interleave low and high pass
-    for (int i = 0; i < length; i++) {
-        if (i % 2 == 0) {
-            data[i] = temp[i / 2];
-        } else {
-            int idx = i / 2;
-            if (half + idx < length) {
-                data[i] = temp[half + idx];
-            } else {
-                data[i] = 0.0f;
-            }
-        }
-    }
-
-    free(temp);
-}
-
-// 5/3 inverse DWT using lifting scheme
-static void dwt_53_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    memcpy(temp, data, length * sizeof(float));
-
-    // Undo update step
-    for (int i = 0; i < half; i++) {
-        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
-                               (i < half - 1 ? temp[half + i] : 0));
-        temp[i] -= update;
-    }
-
-    // Undo predict step and interleave
-    for (int i = 0; i < half; i++) {
-        data[2 * i] = temp[i];
-        int idx = 2 * i + 1;
-        if (idx < length) {
-            float pred = 0.5f * (temp[i] + (i < half - 1 ? temp[i + 1] : temp[i]));
-            data[idx] = temp[half + i] + pred;
-        }
-    }
-
-    free(temp);
-}
-
-// CDF 13/7 inverse DWT
-static void dwt_cdf137_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    const float K = 1.230174105f;  // Same scaling factor as encoder
-
-    float *temp = malloc(sizeof(float) * length);
-    int half = (length + 1) / 2;
-
-    int nE = half;
-    int nO = length / 2;
-
-    float *even = temp;
-    float *odd  = temp + nE;
-
-    // Load L and H
-    for (int i = 0; i < nE; i++) {
-        even[i] = data[i];
-    }
-    for (int i = 0; i < nO; i++) {
-        odd[i] = data[half + i];
-    }
-
-    // Undo scaling (reverse of encoder: low-pass /= K, high-pass *= K)
-    for (int i = 0; i < nE; i++) {
-        even[i] /= K;
-    }
-    for (int i = 0; i < nO; i++) {
-        odd[i] *= K;
-    }
-
-    // Inverse update: undo even[i] += 0.25 * (odd[i-1] + odd[i])
-    for (int i = 0; i < nE; i++) {
-        float oddPrev = (i > 0) ? odd[i - 1] : 0.0f;
-        float oddCurr = (i < nE - 1 && i < nO) ? odd[i] : 0.0f;
-        even[i] -= 0.25f * (oddPrev + oddCurr);
-    }
-
-    // Inverse predict: undo odd[i] = data[odd] - 0.5 * (even[i] + even[i+1])
-    for (int i = 0; i < nO; i++) {
-        float evenLeft = even[i];
-        float evenRight = (i + 1 < nE) ? even[i + 1] : even[i];
-        odd[i] += 0.5f * (evenLeft + evenRight);
-    }
-
-    // Interleave
-    for (int i = 0; i < nO; i++) {
-        data[2 * i]     = even[i];
-        data[2 * i + 1] = odd[i];
-    }
-    if (nE > nO) {
-        data[2 * nO] = even[nO];
-    }
-
-    free(temp);
-}
-
-// DD-4 inverse DWT
-static void dwt_dd4_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    memcpy(temp, data, length * sizeof(float));
-
-    // DD-4 inverse lifting
-    for (int i = 0; i < half; i++) {
-        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
-                               (i < half - 1 ? temp[half + i] : 0));
-        temp[i] -= update;
-    }
-
-    for (int i = 0; i < half; i++) {
-        data[2 * i] = temp[i];
-        int idx = 2 * i + 1;
-        if (idx < length) {
-            float pred = 0.5f * (temp[i] + (i < half - 1 ? temp[i + 1] : temp[i]));
-            data[idx] = temp[half + i] + pred;
-        }
-    }
-
-    free(temp);
-}
-
-// Haar inverse DWT
-static void dwt_haar_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    const int half = (length + 1) / 2;
-
-    for (int i = 0; i < half; i++) {
-        if (2 * i + 1 < length) {
-            temp[2 * i] = data[i] + data[half + i];
-            temp[2 * i + 1] = data[i] - data[half + i];
-        } else {
-            temp[2 * i] = data[i];
-        }
-    }
-
-    for (int i = 0; i < length; i++) {
-        data[i] = temp[i];
-    }
-
-    free(temp);
-}
-
-// Multi-level inverse DWT
-static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) {
-    int max_size = (width > height) ? width : height;
-    float *temp_row = malloc(max_size * sizeof(float));
-    float *temp_col = malloc(max_size * sizeof(float));
-
-    // Pre-calculate exact sequence of widths/heights
-    int *widths = malloc((levels + 1) * sizeof(int));
-    int *heights = malloc((levels + 1) * sizeof(int));
-
-    widths[0] = width;
-    heights[0] = height;
-    for (int i = 1; i <= levels; i++) {
-        widths[i] = (widths[i - 1] + 1) / 2;
-        heights[i] = (heights[i - 1] + 1) / 2;
-    }
-
-    // Apply inverse transforms
-    for (int level = levels - 1; level >= 0; level--) {
-        int current_width = widths[level];
-        int current_height = heights[level];
-
-        if (current_width < 1 || current_height < 1) continue;
-        if (current_width == 1 && current_height == 1) continue;
-
-        // Column inverse transform first (vertical)
-        for (int x = 0; x < current_width; x++) {
-            for (int y = 0; y < current_height; y++) {
-                temp_col[y] = data[y * width + x];
-            }
-
-            if (filter_type == 0) {
-                dwt_53_inverse_1d(temp_col, current_height);
-            } else if (filter_type == 1) {
-                dwt_97_inverse_1d(temp_col, current_height);
-            } else if (filter_type == 2) {
-                dwt_cdf137_inverse_1d(temp_col, current_height);
-            } else if (filter_type == 16) {
-                dwt_dd4_inverse_1d(temp_col, current_height);
-            } else if (filter_type == 255) {
-                dwt_haar_inverse_1d(temp_col, current_height);
-            }
-
-            for (int y = 0; y < current_height; y++) {
-                data[y * width + x] = temp_col[y];
-            }
-        }
-
-        // Row inverse transform second (horizontal)
-        for (int y = 0; y < current_height; y++) {
-            for (int x = 0; x < current_width; x++) {
-                temp_row[x] = data[y * width + x];
-            }
-
-            if (filter_type == 0) {
-                dwt_53_inverse_1d(temp_row, current_width);
-            } else if (filter_type == 1) {
-                dwt_97_inverse_1d(temp_row, current_width);
-            } else if (filter_type == 2) {
-                dwt_cdf137_inverse_1d(temp_row, current_width);
-            } else if (filter_type == 16) {
-                dwt_dd4_inverse_1d(temp_row, current_width);
-            } else if (filter_type == 255) {
-                dwt_haar_inverse_1d(temp_row, current_width);
-            }
-
-            for (int x = 0; x < current_width; x++) {
-                data[y * width + x] = temp_row[x];
-            }
-        }
-    }
-
-    free(widths);
-    free(heights);
-    free(temp_row);
-    free(temp_col);
-}
-
-//=============================================================================
-// Temporal DWT Functions
-//=============================================================================
-
-// Get temporal subband level for a given frame index in a GOP
-static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
-    for (int level = 0; level < temporal_levels; level++) {
-        int frames_at_this_level = num_frames >> (temporal_levels - level);
-        if (frame_idx < frames_at_this_level) {
-            return level;
-        }
-    }
-    return temporal_levels;
-}
-
-// Calculate temporal quantiser scale for a given temporal subband level
-static float get_temporal_quantiser_scale(uint8_t encoder_preset, int temporal_level) {
-    const float BETA = (encoder_preset & 0x01) ? 0.0f : 0.6f;
-    const float KAPPA = (encoder_preset & 0x01) ? 1.0f : 1.14f;
-    return powf(2.0f, BETA * powf(temporal_level, KAPPA));
-}
-
-// Apply inverse 3D DWT to GOP data (spatial + temporal)
-static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
-                                int width, int height, int gop_size,
-                                int spatial_levels, int temporal_levels, int filter_type,
-                                int temporal_wavelet) {
-    // Step 1: Apply inverse 2D spatial DWT to each frame
-    for (int t = 0; t < gop_size; t++) {
-        apply_inverse_dwt_multilevel(gop_y[t], width, height, spatial_levels, filter_type);
-        apply_inverse_dwt_multilevel(gop_co[t], width, height, spatial_levels, filter_type);
-        apply_inverse_dwt_multilevel(gop_cg[t], width, height, spatial_levels, filter_type);
-    }
-
-    // Step 2: Apply inverse temporal DWT
-    if (gop_size < 2) return;
-
-    int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
-    temporal_lengths[0] = gop_size;
-    for (int i = 1; i <= temporal_levels; i++) {
-        temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
-    }
-
-    float *temporal_line = malloc(gop_size * sizeof(float));
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            const int pixel_idx = y * width + x;
-
-            // Process Y channel
-            for (int t = 0; t < gop_size; t++) {
-                temporal_line[t] = gop_y[t][pixel_idx];
-            }
-            for (int level = temporal_levels - 1; level >= 0; level--) {
-                const int level_frames = temporal_lengths[level];
-                if (level_frames >= 2) {
-                    if (temporal_wavelet == 255) {
-                        dwt_haar_inverse_1d(temporal_line, level_frames);
-                    } else {
-                        dwt_53_inverse_1d(temporal_line, level_frames);
-                    }
-                }
-            }
-            for (int t = 0; t < gop_size; t++) {
-                gop_y[t][pixel_idx] = temporal_line[t];
-            }
-
-            // Process Co channel
-            for (int t = 0; t < gop_size; t++) {
-                temporal_line[t] = gop_co[t][pixel_idx];
-            }
-            for (int level = temporal_levels - 1; level >= 0; level--) {
-                const int level_frames = temporal_lengths[level];
-                if (level_frames >= 2) {
-                    if (temporal_wavelet == 255) {
-                        dwt_haar_inverse_1d(temporal_line, level_frames);
-                    } else {
-                        dwt_53_inverse_1d(temporal_line, level_frames);
-                    }
-                }
-            }
-            for (int t = 0; t < gop_size; t++) {
-                gop_co[t][pixel_idx] = temporal_line[t];
-            }
-
-            // Process Cg channel
-            for (int t = 0; t < gop_size; t++) {
-                temporal_line[t] = gop_cg[t][pixel_idx];
-            }
-            for (int level = temporal_levels - 1; level >= 0; level--) {
-                const int level_frames = temporal_lengths[level];
-                if (level_frames >= 2) {
-                    if (temporal_wavelet == 255) {
-                        dwt_haar_inverse_1d(temporal_line, level_frames);
-                    } else {
-                        dwt_53_inverse_1d(temporal_line, level_frames);
-                    }
-                }
-            }
-            for (int t = 0; t < gop_size; t++) {
-                gop_cg[t][pixel_idx] = temporal_line[t];
-            }
-        }
-    }
-
-    free(temporal_line);
-    free(temporal_lengths);
-}
-
-//=============================================================================
-// GOP Postprocessing Functions
-//=============================================================================
-
-// Postprocess GOP unified block (2-bit map format)
-static int16_t ***postprocess_gop_unified(const uint8_t *decompressed_data, size_t data_size,
-                                         int gop_size, int num_pixels, int channel_layout) {
-    const int map_bytes_per_frame = (num_pixels * 2 + 7) / 8;
-
-    const int has_y = (channel_layout & 0x04) == 0;
-    const int has_co = (channel_layout & 0x02) == 0;
-    const int has_cg = (channel_layout & 0x02) == 0;
-
-    int read_ptr = 0;
-    const int y_maps_start = has_y ? read_ptr : -1;
-    if (has_y) read_ptr += map_bytes_per_frame * gop_size;
-
-    const int co_maps_start = has_co ? read_ptr : -1;
-    if (has_co) read_ptr += map_bytes_per_frame * gop_size;
-
-    const int cg_maps_start = has_cg ? read_ptr : -1;
-    if (has_cg) read_ptr += map_bytes_per_frame * gop_size;
-
-    // Count "other" values
-    int y_other_count = 0, co_other_count = 0, cg_other_count = 0;
-
-    for (int frame = 0; frame < gop_size; frame++) {
-        const int frame_map_offset = frame * map_bytes_per_frame;
-        for (int i = 0; i < num_pixels; i++) {
-            const int bit_pos = i * 2;
-            const int byte_idx = bit_pos / 8;
-            const int bit_offset = bit_pos % 8;
-
-            if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) {
-                int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
-                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
-                    const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
-                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
-                }
-                if (code == 3) y_other_count++;
-            }
-            if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) {
-                int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
-                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
-                    const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
-                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
-                }
-                if (code == 3) co_other_count++;
-            }
-            if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) {
-                int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
-                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
-                    const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
-                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
-                }
-                if (code == 3) cg_other_count++;
-            }
-        }
-    }
-
-    const int y_values_start = read_ptr;
-    read_ptr += y_other_count * 2;
-
-    const int co_values_start = read_ptr;
-    read_ptr += co_other_count * 2;
-
-    const int cg_values_start = read_ptr;
-
-    // Allocate output arrays
-    int16_t ***output = malloc(gop_size * sizeof(int16_t **));
-    for (int t = 0; t < gop_size; t++) {
-        output[t] = malloc(3 * sizeof(int16_t *));
-        output[t][0] = calloc(num_pixels, sizeof(int16_t));
-        output[t][1] = calloc(num_pixels, sizeof(int16_t));
-        output[t][2] = calloc(num_pixels, sizeof(int16_t));
-    }
-
-    int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0;
-
-    for (int frame = 0; frame < gop_size; frame++) {
-        const int frame_map_offset = frame * map_bytes_per_frame;
-        for (int i = 0; i < num_pixels; i++) {
-            const int bit_pos = i * 2;
-            const int byte_idx = bit_pos / 8;
-            const int bit_offset = bit_pos % 8;
-
-            // Decode Y
-            if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) {
-                int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
-                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
-                    const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
-                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
-                }
-                if (code == 0) {
-                    output[frame][0][i] = 0;
-                } else if (code == 1) {
-                    output[frame][0][i] = 1;
-                } else if (code == 2) {
-                    output[frame][0][i] = -1;
-                } else {
-                    const int val_offset = y_values_start + y_value_idx * 2;
-                    y_value_idx++;
-                    if (val_offset + 1 < (int)data_size) {
-                        const int lo = decompressed_data[val_offset] & 0xFF;
-                        const int hi = (int8_t)decompressed_data[val_offset + 1];
-                        output[frame][0][i] = (int16_t)((hi << 8) | lo);
-                    } else {
-                        output[frame][0][i] = 0;
-                    }
-                }
-            }
-
-            // Decode Co
-            if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) {
-                int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
-                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
-                    const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
-                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
-                }
-                if (code == 0) {
-                    output[frame][1][i] = 0;
-                } else if (code == 1) {
-                    output[frame][1][i] = 1;
-                } else if (code == 2) {
-                    output[frame][1][i] = -1;
-                } else {
-                    const int val_offset = co_values_start + co_value_idx * 2;
-                    co_value_idx++;
-                    if (val_offset + 1 < (int)data_size) {
-                        const int lo = decompressed_data[val_offset] & 0xFF;
-                        const int hi = (int8_t)decompressed_data[val_offset + 1];
-                        output[frame][1][i] = (int16_t)((hi << 8) | lo);
-                    } else {
-                        output[frame][1][i] = 0;
-                    }
-                }
-            }
-
-            // Decode Cg
-            if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) {
-                int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
-                if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
-                    const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
-                    code = (code & 0x01) | ((next_byte & 0x01) << 1);
-                }
-                if (code == 0) {
-                    output[frame][2][i] = 0;
-                } else if (code == 1) {
-                    output[frame][2][i] = 1;
-                } else if (code == 2) {
-                    output[frame][2][i] = -1;
-                } else {
-                    const int val_offset = cg_values_start + cg_value_idx * 2;
-                    cg_value_idx++;
-                    if (val_offset + 1 < (int)data_size) {
-                        const int lo = decompressed_data[val_offset] & 0xFF;
-                        const int hi = (int8_t)decompressed_data[val_offset + 1];
-                        output[frame][2][i] = (int16_t)((hi << 8) | lo);
-                    } else {
-                        output[frame][2][i] = 0;
-                    }
-                }
-            }
-        }
-    }
-
-    return output;
-}
-
-// Postprocess GOP RAW format
-static int16_t ***postprocess_gop_raw(const uint8_t *decompressed_data, size_t data_size,
-                                     int gop_size, int num_pixels, int channel_layout) {
-    const int has_y = (channel_layout & 0x04) == 0;
-    const int has_co = (channel_layout & 0x02) == 0;
-    const int has_cg = (channel_layout & 0x02) == 0;
-
-    int16_t ***output = malloc(gop_size * sizeof(int16_t **));
-    for (int t = 0; t < gop_size; t++) {
-        output[t] = malloc(3 * sizeof(int16_t *));
-        output[t][0] = calloc(num_pixels, sizeof(int16_t));
-        output[t][1] = calloc(num_pixels, sizeof(int16_t));
-        output[t][2] = calloc(num_pixels, sizeof(int16_t));
-    }
-
-    int offset = 0;
-
-    if (has_y) {
-        const int channel_size = gop_size * num_pixels * sizeof(int16_t);
-        if (offset + channel_size > (int)data_size) {
-            goto error_cleanup;
-        }
-        const int16_t *y_data = (const int16_t *)(decompressed_data + offset);
-        for (int t = 0; t < gop_size; t++) {
-            memcpy(output[t][0], y_data + t * num_pixels, num_pixels * sizeof(int16_t));
-        }
-        offset += channel_size;
-    }
-
-    if (has_co) {
-        const int channel_size = gop_size * num_pixels * sizeof(int16_t);
-        if (offset + channel_size > (int)data_size) {
-            goto error_cleanup;
-        }
-        const int16_t *co_data = (const int16_t *)(decompressed_data + offset);
-        for (int t = 0; t < gop_size; t++) {
-            memcpy(output[t][1], co_data + t * num_pixels, num_pixels * sizeof(int16_t));
-        }
-        offset += channel_size;
-    }
-
-    if (has_cg) {
-        const int channel_size = gop_size * num_pixels * sizeof(int16_t);
-        if (offset + channel_size > (int)data_size) {
-            goto error_cleanup;
-        }
-        const int16_t *cg_data = (const int16_t *)(decompressed_data + offset);
-        for (int t = 0; t < gop_size; t++) {
-            memcpy(output[t][2], cg_data + t * num_pixels, num_pixels * sizeof(int16_t));
-        }
-        offset += channel_size;
-    }
-
-    return output;
-
-error_cleanup:
-    for (int t = 0; t < gop_size; t++) {
-        free(output[t][0]);
-        free(output[t][1]);
-        free(output[t][2]);
-        free(output[t]);
-    }
-    free(output);
-    return NULL;
-}
-
-// Postprocess GOP EZBC format
-static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t data_size,
-                                      int gop_size, int num_pixels, int channel_layout,
-                                      int *out_width, int *out_height) {
-    int actual_width = 0, actual_height = 0;
-    int actual_pixels = num_pixels;
-
-    if (data_size >= 8) {
-        const uint32_t first_frame_size = ((uint32_t)decompressed_data[0]) |
-                                         ((uint32_t)decompressed_data[1] << 8) |
-                                         ((uint32_t)decompressed_data[2] << 16) |
-                                         ((uint32_t)decompressed_data[3] << 24);
-
-        if (4 + first_frame_size <= data_size) {
-            if (ezbc_peek_dimensions(decompressed_data + 4, channel_layout,
-                                     &actual_width, &actual_height) == 0) {
-                actual_pixels = actual_width * actual_height;
-            }
-        }
-    }
-
-    if (actual_width == 0 || actual_height == 0) {
-        actual_width = (int)sqrt(num_pixels);
-        actual_height = num_pixels / actual_width;
-        actual_pixels = actual_width * actual_height;
-    }
-
-    if (out_width) *out_width = actual_width;
-    if (out_height) *out_height = actual_height;
-
-    int16_t ***output = malloc(gop_size * sizeof(int16_t **));
-    for (int t = 0; t < gop_size; t++) {
-        output[t] = malloc(3 * sizeof(int16_t *));
-        output[t][0] = calloc(actual_pixels, sizeof(int16_t));
-        output[t][1] = calloc(actual_pixels, sizeof(int16_t));
-        output[t][2] = calloc(actual_pixels, sizeof(int16_t));
-    }
-
-    int offset = 0;
-
-    for (int t = 0; t < gop_size; t++) {
-        if (offset + 4 > (int)data_size) {
-            goto error_cleanup;
-        }
-
-        const uint32_t frame_size = ((uint32_t)decompressed_data[offset + 0]) |
-                                   ((uint32_t)decompressed_data[offset + 1] << 8) |
-                                   ((uint32_t)decompressed_data[offset + 2] << 16) |
-                                   ((uint32_t)decompressed_data[offset + 3] << 24);
-        offset += 4;
-
-        if (offset + frame_size > data_size) {
-            goto error_cleanup;
-        }
-
-        postprocess_coefficients_ezbc(
-            (uint8_t *)(decompressed_data + offset), actual_pixels,
-            output[t][0], output[t][1], output[t][2],
-            channel_layout);
-
-        offset += frame_size;
-    }
-
-    return output;
-
-error_cleanup:
-    for (int t = 0; t < gop_size; t++) {
-        free(output[t][0]);
-        free(output[t][1]);
-        free(output[t][2]);
-        free(output[t]);
-    }
-    free(output);
-    return NULL;
-}
-
-//=============================================================================
-// Color Conversion
-//=============================================================================
-
-static void ycocgr_to_rgb(float y, float co, float cg, uint8_t *r, uint8_t *g, uint8_t *b) {
-    float tmp = y - cg / 2.0f;
-    float g_val = cg + tmp;
-    float b_val = tmp - co / 2.0f;
-    float r_val = co + b_val;
-
-    *r = CLAMP(roundf(r_val), 0, 255);
-    *g = CLAMP(roundf(g_val), 0, 255);
-    *b = CLAMP(roundf(b_val), 0, 255);
-}
-
-static void ictcp_to_rgb(float i, float ct, float cp, uint8_t *r, uint8_t *g, uint8_t *b) {
-    float l = i + 0.008609f * ct;
-    float m = i - 0.008609f * ct;
-    float s = i + 0.560031f * cp;
-
-    l = powf(fmaxf(l, 0.0f), 1.0f / 0.1593f);
-    m = powf(fmaxf(m, 0.0f), 1.0f / 0.1593f);
-    s = powf(fmaxf(s, 0.0f), 1.0f / 0.1593f);
-
-    float r_val = 5.432622f * l - 4.679910f * m + 0.247288f * s;
-    float g_val = -1.106160f * l + 2.311198f * m - 0.205038f * s;
-    float b_val = 0.028262f * l - 0.195689f * m + 1.167427f * s;
-
-    *r = CLAMP((int)(r_val * 255.0f + 0.5f), 0, 255);
-    *g = CLAMP((int)(g_val * 255.0f + 0.5f), 0, 255);
-    *b = CLAMP((int)(b_val * 255.0f + 0.5f), 0, 255);
-}
-
-//=============================================================================
-// Public API Implementation
-//=============================================================================
-
-tav_video_context_t *tav_video_create(const tav_video_params_t *params) {
-    if (!params) return NULL;
-
-    tav_video_context_t *ctx = calloc(1, sizeof(tav_video_context_t));
-    if (!ctx) return NULL;
-
-    ctx->params = *params;
-    ctx->verbose = 0;
-
-    const int buffer_size = params->width * params->height;
-
-    // Allocate working buffers
-    ctx->dwt_buffer_y = calloc(buffer_size, sizeof(float));
-    ctx->dwt_buffer_co = calloc(buffer_size, sizeof(float));
-    ctx->dwt_buffer_cg = calloc(buffer_size, sizeof(float));
-    ctx->reference_ycocg_y = calloc(buffer_size, sizeof(float));
-    ctx->reference_ycocg_co = calloc(buffer_size, sizeof(float));
-    ctx->reference_ycocg_cg = calloc(buffer_size, sizeof(float));
-
-    if (!ctx->dwt_buffer_y || !ctx->dwt_buffer_co || !ctx->dwt_buffer_cg ||
-        !ctx->reference_ycocg_y || !ctx->reference_ycocg_co || !ctx->reference_ycocg_cg) {
-        tav_video_free(ctx);
-        return NULL;
-    }
-
-    snprintf(ctx->error_msg, sizeof(ctx->error_msg), "No error");
-    return ctx;
-}
-
-void tav_video_free(tav_video_context_t *ctx) {
-    if (!ctx) return;
-
-    free(ctx->dwt_buffer_y);
-    free(ctx->dwt_buffer_co);
-    free(ctx->dwt_buffer_cg);
-    free(ctx->reference_ycocg_y);
-    free(ctx->reference_ycocg_co);
-    free(ctx->reference_ycocg_cg);
-    free(ctx);
-}
-
-int tav_video_decode_gop(tav_video_context_t *ctx,
-                         const uint8_t *compressed_data, uint32_t compressed_size,
-                         uint8_t gop_size, uint8_t **rgb_frames) {
-    if (!ctx || !compressed_data || !rgb_frames) {
-        if (ctx) snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Invalid parameters");
-        return -1;
-    }
-
-    const int width = ctx->params.width;
-    const int height = ctx->params.height;
-    const int num_pixels = width * height;
-
-    // Decompress with Zstd (or use raw data if no_zstd flag is set)
-    uint8_t *decompressed_data;
-    size_t decompressed_size;
-    int should_free_data;
-
-    if (ctx->params.no_zstd) {
-        // No Zstd compression - use data directly
-        decompressed_data = (uint8_t *)compressed_data;  // Cast away const, won't modify
-        decompressed_size = compressed_size;
-        should_free_data = 0;
-    } else {
-        // Normal Zstd decompression
-        const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, compressed_size);
-        if (ZSTD_isError(decompressed_bound)) {
-            snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed");
-            return -1;
-        }
-
-        decompressed_data = malloc(decompressed_bound);
-        if (!decompressed_data) {
-            snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Memory allocation failed");
-            return -1;
-        }
-
-        decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound,
-                                            compressed_data, compressed_size);
-        if (ZSTD_isError(decompressed_size)) {
-            free(decompressed_data);
-            snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed");
-            return -1;
-        }
-        should_free_data = 1;
-    }
-
-    // Postprocess GOP data based on entropy coder type
-    int16_t ***gop_coeffs = NULL;
-    int actual_width = width;
-    int actual_height = height;
-
-    if (ctx->params.entropy_coder == 0) {
-        gop_coeffs = postprocess_gop_unified(decompressed_data, decompressed_size, gop_size, num_pixels, ctx->params.channel_layout);
-    } else if (ctx->params.entropy_coder == 1) {
-        gop_coeffs = postprocess_gop_ezbc(decompressed_data, decompressed_size, gop_size, num_pixels, ctx->params.channel_layout, &actual_width, &actual_height);
-    } else if (ctx->params.entropy_coder == 2) {
-        gop_coeffs = postprocess_gop_raw(decompressed_data, decompressed_size, gop_size, num_pixels, ctx->params.channel_layout);
-    }
-
-    if (should_free_data) {
-        free(decompressed_data);
-    }
-
-    if (!gop_coeffs) {
-        snprintf(ctx->error_msg, sizeof(ctx->error_msg), "GOP postprocessing failed");
-        return -1;
-    }
-
-    // Use actual dimensions from EZBC data (may differ from params for interlaced content)
-    int final_width = width;
-    int final_height = height;
-    int final_num_pixels = num_pixels;
-
-    if (actual_width != 0 && actual_height != 0) {
-        if (actual_width != width || actual_height != height) {
-            if (ctx->verbose) {
-                fprintf(stderr, "Warning: EZBC dimensions (%dx%d) differ from params (%dx%d), using EZBC dimensions\n",
-                        actual_width, actual_height, width, height);
-            }
-        }
-        final_width = actual_width;
-        final_height = actual_height;
-        final_num_pixels = actual_width * actual_height;
-    }
-
-    // Allocate GOP float buffers for 3D DWT using actual dimensions
-    float **gop_y = malloc(gop_size * sizeof(float *));
-    float **gop_co = malloc(gop_size * sizeof(float *));
-    float **gop_cg = malloc(gop_size * sizeof(float *));
-
-    for (int t = 0; t < gop_size; t++) {
-        gop_y[t] = calloc(final_num_pixels, sizeof(float));
-        gop_co[t] = calloc(final_num_pixels, sizeof(float));
-        gop_cg[t] = calloc(final_num_pixels, sizeof(float));
-    }
-
-    // Dequantise each frame
-    for (int t = 0; t < gop_size; t++) {
-        const int temporal_level = get_temporal_subband_level(t, gop_size, ctx->params.temporal_levels);
-        const float temporal_scale = get_temporal_quantiser_scale(ctx->params.encoder_preset, temporal_level);
-
-        const float base_q_y =  roundf(QLUT[ctx->params.quantiser_y] * temporal_scale);
-        const float base_q_co = roundf(QLUT[ctx->params.quantiser_co] * temporal_scale);
-        const float base_q_cg = roundf(QLUT[ctx->params.quantiser_cg] * temporal_scale);
-
-        if (ctx->params.perceptual_tuning) {
-            dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                              gop_coeffs[t][0], gop_y[t], final_width, final_height,
-                                              ctx->params.decomp_levels, base_q_y, 0);
-            dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                              gop_coeffs[t][1], gop_co[t], final_width, final_height,
-                                              ctx->params.decomp_levels, base_q_co, 1);
-            dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                              gop_coeffs[t][2], gop_cg[t], final_width, final_height,
-                                              ctx->params.decomp_levels, base_q_cg, 1);
-        } else {
-            // Uniform dequantisation
-            for (int i = 0; i < final_num_pixels; i++) {
-                gop_y[t][i] = gop_coeffs[t][0][i] * base_q_y;
-                gop_co[t][i] = gop_coeffs[t][1][i] * base_q_co;
-                gop_cg[t][i] = gop_coeffs[t][2][i] * base_q_cg;
-            }
-        }
-
-        // Apply grain synthesis to Y channel ONLY (use final dimensions to match allocated buffer)
-        // Note: Grain synthesis is NOT applied to chroma channels
-        apply_grain_synthesis(gop_y[t], final_width, final_height, ctx->params.decomp_levels, t,
-                            QLUT[ctx->params.quantiser_y], ctx->params.encoder_preset);
-    }
-
-    // Free quantised coefficients
-    for (int t = 0; t < gop_size; t++) {
-        free(gop_coeffs[t][0]);
-        free(gop_coeffs[t][1]);
-        free(gop_coeffs[t][2]);
-        free(gop_coeffs[t]);
-    }
-    free(gop_coeffs);
-
-    // Apply inverse 3D DWT
-    apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, final_width, final_height, gop_size,
-                        ctx->params.decomp_levels, ctx->params.temporal_levels,
-                        ctx->params.wavelet_filter, ctx->params.temporal_wavelet);
-
-    // Convert to RGB and write to output frames
-    for (int t = 0; t < gop_size; t++) {
-        for (int y = 0; y < final_height; y++) {
-            for (int x = 0; x < final_width; x++) {
-                const int idx = y * final_width + x;
-                const int rgb_idx = (y * final_width + x) * 3;
-
-                if (ctx->params.channel_layout == 0) {
-                    ycocgr_to_rgb(gop_y[t][idx], gop_co[t][idx], gop_cg[t][idx],
-                                 &rgb_frames[t][rgb_idx], &rgb_frames[t][rgb_idx + 1], &rgb_frames[t][rgb_idx + 2]);
-                } else {
-                    ictcp_to_rgb(gop_y[t][idx], gop_co[t][idx], gop_cg[t][idx],
-                                &rgb_frames[t][rgb_idx], &rgb_frames[t][rgb_idx + 1], &rgb_frames[t][rgb_idx + 2]);
-                }
-            }
-        }
-    }
-
-    // Free GOP buffers
-    for (int t = 0; t < gop_size; t++) {
-        free(gop_y[t]);
-        free(gop_co[t]);
-        free(gop_cg[t]);
-    }
-    free(gop_y);
-    free(gop_co);
-    free(gop_cg);
-
-    return 0;
-}
-
-int tav_video_decode_iframe(tav_video_context_t *ctx,
-                            const uint8_t *compressed_data, uint32_t packet_size,
-                            uint8_t *rgb_frame) {
-    if (!ctx || !compressed_data || !rgb_frame) {
-        if (ctx) snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Invalid parameters");
-        return -1;
-    }
-
-    const int width = ctx->params.width;
-    const int height = ctx->params.height;
-    const int num_pixels = width * height;
-
-    // Decompress (or use raw data if no_zstd flag is set)
-    uint8_t *decompressed_data;
-    size_t decompressed_size;
-    int should_free_data;
-
-    if (ctx->params.no_zstd) {
-        // No Zstd compression - use data directly
-        decompressed_data = (uint8_t *)compressed_data;
-        decompressed_size = packet_size;
-        should_free_data = 0;
-    } else {
-        // Normal Zstd decompression
-        const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, packet_size);
-        if (ZSTD_isError(decompressed_bound)) {
-            snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed");
-            return -1;
-        }
-
-        decompressed_data = malloc(decompressed_bound);
-        decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound,
-                                            compressed_data, packet_size);
-        if (ZSTD_isError(decompressed_size)) {
-            free(decompressed_data);
-            snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed");
-            return -1;
-        }
-        should_free_data = 1;
-    }
-
-    // Allocate coefficient buffers
-    int16_t *coeffs_y = calloc(num_pixels, sizeof(int16_t));
-    int16_t *coeffs_co = calloc(num_pixels, sizeof(int16_t));
-    int16_t *coeffs_cg = calloc(num_pixels, sizeof(int16_t));
-
-    // Skip 4-byte tile header: [mode][qY_override][qCo_override][qCg_override]
-    // The tile header is written by the encoder before the EZBC/twobit data
-    uint8_t *coeff_data = decompressed_data + 4;
-
-    // Postprocess based on entropy coder
-    if (ctx->params.entropy_coder == 0) {
-        postprocess_coefficients_twobit(coeff_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg);
-    } else if (ctx->params.entropy_coder == 1) {
-        postprocess_coefficients_ezbc(coeff_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg, ctx->params.channel_layout);
-    }
-
-    if (should_free_data) {
-        free(decompressed_data);
-    }
-
-    // Dequantise
-    const float base_q_y = QLUT[ctx->params.quantiser_y];
-    const float base_q_co = QLUT[ctx->params.quantiser_co];
-    const float base_q_cg = QLUT[ctx->params.quantiser_cg];
-
-    if (ctx->params.perceptual_tuning) {
-        dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                          coeffs_y, ctx->dwt_buffer_y, width, height,
-                                          ctx->params.decomp_levels, base_q_y, 0);
-        dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                          coeffs_co, ctx->dwt_buffer_co, width, height,
-                                          ctx->params.decomp_levels, base_q_co, 1);
-        dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                          coeffs_cg, ctx->dwt_buffer_cg, width, height,
-                                          ctx->params.decomp_levels, base_q_cg, 1);
-    } else {
-        for (int i = 0; i < num_pixels; i++) {
-            ctx->dwt_buffer_y[i] = coeffs_y[i] * base_q_y;
-            ctx->dwt_buffer_co[i] = coeffs_co[i] * base_q_co;
-            ctx->dwt_buffer_cg[i] = coeffs_cg[i] * base_q_cg;
-        }
-    }
-
-    free(coeffs_y);
-    free(coeffs_co);
-    free(coeffs_cg);
-
-    // Apply grain synthesis to Y channel only (not applied to chroma)
-    apply_grain_synthesis(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, 0,
-                        QLUT[ctx->params.quantiser_y], ctx->params.encoder_preset);
-
-    // Apply inverse DWT
-    apply_inverse_dwt_multilevel(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter);
-    apply_inverse_dwt_multilevel(ctx->dwt_buffer_co, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter);
-    apply_inverse_dwt_multilevel(ctx->dwt_buffer_cg, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter);
-
-    // Store as reference for P-frames
-    memcpy(ctx->reference_ycocg_y, ctx->dwt_buffer_y, num_pixels * sizeof(float));
-    memcpy(ctx->reference_ycocg_co, ctx->dwt_buffer_co, num_pixels * sizeof(float));
-    memcpy(ctx->reference_ycocg_cg, ctx->dwt_buffer_cg, num_pixels * sizeof(float));
-
-    // Convert to RGB
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            const int idx = y * width + x;
-            const int rgb_idx = (y * width + x) * 3;
-
-            if (ctx->params.channel_layout == 0) {
-                ycocgr_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx],
-                             &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]);
-            } else {
-                ictcp_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx],
-                            &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]);
-            }
-        }
-    }
-
-    return 0;
-}
-
-int tav_video_decode_pframe(tav_video_context_t *ctx,
-                            const uint8_t *compressed_data, uint32_t packet_size,
-                            uint8_t *rgb_frame) {
-    if (!ctx || !compressed_data || !rgb_frame) {
-        if (ctx) snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Invalid parameters");
-        return -1;
-    }
-
-    const int width = ctx->params.width;
-    const int height = ctx->params.height;
-    const int num_pixels = width * height;
-
-    // Decompress (or use raw data if no_zstd flag is set)
-    uint8_t *decompressed_data;
-    size_t decompressed_size;
-    int should_free_data;
-
-    if (ctx->params.no_zstd) {
-        // No Zstd compression - use data directly
-        decompressed_data = (uint8_t *)compressed_data;
-        decompressed_size = packet_size;
-        should_free_data = 0;
-    } else {
-        // Normal Zstd decompression
-        const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, packet_size);
-        if (ZSTD_isError(decompressed_bound)) {
-            snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed");
-            return -1;
-        }
-
-        decompressed_data = malloc(decompressed_bound);
-        decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound,
-                                            compressed_data, packet_size);
-        if (ZSTD_isError(decompressed_size)) {
-            free(decompressed_data);
-            snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed");
-            return -1;
-        }
-        should_free_data = 1;
-    }
-
-    // Allocate coefficient buffers
-    int16_t *coeffs_y = calloc(num_pixels, sizeof(int16_t));
-    int16_t *coeffs_co = calloc(num_pixels, sizeof(int16_t));
-    int16_t *coeffs_cg = calloc(num_pixels, sizeof(int16_t));
-
-    // Skip 4-byte tile header: [mode][qY_override][qCo_override][qCg_override]
-    // The tile header is written by the encoder before the EZBC/twobit data
-    uint8_t *coeff_data = decompressed_data + 4;
-
-    // Postprocess
-    if (ctx->params.entropy_coder == 0) {
-        postprocess_coefficients_twobit(coeff_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg);
-    } else if (ctx->params.entropy_coder == 1) {
-        postprocess_coefficients_ezbc(coeff_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg, ctx->params.channel_layout);
-    }
-
-    if (should_free_data) {
-        free(decompressed_data);
-    }
-
-    // Dequantise
-    const float base_q_y = QLUT[ctx->params.quantiser_y];
-    const float base_q_co = QLUT[ctx->params.quantiser_co];
-    const float base_q_cg = QLUT[ctx->params.quantiser_cg];
-
-    if (ctx->params.perceptual_tuning) {
-        dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                          coeffs_y, ctx->dwt_buffer_y, width, height,
-                                          ctx->params.decomp_levels, base_q_y, 0);
-        dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                          coeffs_co, ctx->dwt_buffer_co, width, height,
-                                          ctx->params.decomp_levels, base_q_co, 1);
-        dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y],
-                                          coeffs_cg, ctx->dwt_buffer_cg, width, height,
-                                          ctx->params.decomp_levels, base_q_cg, 1);
-    } else {
-        for (int i = 0; i < num_pixels; i++) {
-            ctx->dwt_buffer_y[i] = coeffs_y[i] * base_q_y;
-            ctx->dwt_buffer_co[i] = coeffs_co[i] * base_q_co;
-            ctx->dwt_buffer_cg[i] = coeffs_cg[i] * base_q_cg;
-        }
-    }
-
-    free(coeffs_y);
-    free(coeffs_co);
-    free(coeffs_cg);
-
-    // Apply grain synthesis to Y channel only (not applied to chroma)
-    apply_grain_synthesis(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, 0,
-                        QLUT[ctx->params.quantiser_y], ctx->params.encoder_preset);
-
-    // Apply inverse DWT
-    apply_inverse_dwt_multilevel(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter);
-    apply_inverse_dwt_multilevel(ctx->dwt_buffer_co, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter);
-    apply_inverse_dwt_multilevel(ctx->dwt_buffer_cg, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter);
-
-    // Add to reference frame (delta mode)
-    for (int i = 0; i < num_pixels; i++) {
-        ctx->dwt_buffer_y[i] += ctx->reference_ycocg_y[i];
-        ctx->dwt_buffer_co[i] += ctx->reference_ycocg_co[i];
-        ctx->dwt_buffer_cg[i] += ctx->reference_ycocg_cg[i];
-    }
-
-    // Store as new reference
-    memcpy(ctx->reference_ycocg_y, ctx->dwt_buffer_y, num_pixels * sizeof(float));
-    memcpy(ctx->reference_ycocg_co, ctx->dwt_buffer_co, num_pixels * sizeof(float));
-    memcpy(ctx->reference_ycocg_cg, ctx->dwt_buffer_cg, num_pixels * sizeof(float));
-
-    // Convert to RGB
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            const int idx = y * width + x;
-            const int rgb_idx = (y * width + x) * 3;
-
-            if (ctx->params.channel_layout == 0) {
-                ycocgr_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx],
-                             &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]);
-            } else {
-                ictcp_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx],
-                            &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]);
-            }
-        }
-    }
-
-    return 0;
-}
-
-const char *tav_video_get_error(tav_video_context_t *ctx) {
-    if (!ctx) return "Invalid context";
-    return ctx->error_msg;
-}
-
-void tav_video_set_verbose(tav_video_context_t *ctx, int verbose) {
-    if (ctx) ctx->verbose = verbose;
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_color.c b/video_encoder/lib/libtavenc/tav_encoder_color.c
deleted file mode 100644
index 74e976e..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_color.c
+++ /dev/null
@@ -1,255 +0,0 @@
-/**
- * TAV Encoder - Color Space Conversion Library
- *
- * Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
- * for the TSVM Advanced Video (TAV) encoder.
- *
- * Extracted from encoder_tav.c as part of library refactoring.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-
-// =============================================================================
-// Utility Functions
-// =============================================================================
-
-static inline int CLAMP(int x, int min, int max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-static inline float FCLAMP(float x, float min, float max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-static inline int iround(double v) {
-    return (int)floor(v + 0.5);
-}
-
-// =============================================================================
-// sRGB Gamma Helpers
-// =============================================================================
-
-static inline double srgb_linearise(double val) {
-    if (val <= 0.04045) return val / 12.92;
-    return pow((val + 0.055) / 1.055, 2.4);
-}
-
-static inline double srgb_unlinearise(double val) {
-    if (val <= 0.0031308) return 12.92 * val;
-    return 1.055 * pow(val, 1.0/2.4) - 0.055;
-}
-
-// =============================================================================
-// HLG (Hybrid Log-Gamma) Transfer Functions
-// =============================================================================
-
-static inline double HLG_OETF(double E) {
-    const double a = 0.17883277;
-    const double b = 0.28466892;  // 1 - 4*a
-    const double c = 0.55991073;  // 0.5 - a*ln(4*a)
-
-    if (E <= 1.0/12.0) return sqrt(3.0 * E);
-    return a * log(12.0 * E - b) + c;
-}
-
-static inline double HLG_EOTF(double Ep) {
-    const double a = 0.17883277;
-    const double b = 0.28466892;
-    const double c = 0.55991073;
-
-    if (Ep <= 0.5) {
-        double val = Ep * Ep / 3.0;
-        return val;
-    }
-    double val = (exp((Ep - c) / a) + b) / 12.0;
-    return val;
-}
-
-// =============================================================================
-// Color Space Transformation Matrices
-// =============================================================================
-
-// BT.2100 RGB -> LMS matrix
-static const double M_RGB_TO_LMS[3][3] = {
-    {1688.0/4096, 2146.0/4096,  262.0/4096},
-    { 683.0/4096, 2951.0/4096,  462.0/4096},
-    {  99.0/4096,  309.0/4096, 3688.0/4096}
-};
-
-// LMS -> RGB inverse matrix
-static const double M_LMS_TO_RGB[3][3] = {
-    { 6.1723815689243215, -5.319534979827695,   0.14699442094633924},
-    {-1.3243428148026244,  2.560286104841917,  -0.2359203727576164},
-    {-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
-};
-
-// ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
-static const double M_LMSPRIME_TO_ICTCP[3][3] = {
-    { 2048.0/4096.0,   2048.0/4096.0,     0.0          },
-    { 3625.0/4096.0,  -7465.0/4096.0,  3840.0/4096.0   },
-    { 9500.0/4096.0,  -9212.0/4096.0,  -288.0/4096.0   }
-};
-
-// ICtCp -> L' M' S' inverse matrix
-static const double M_ICTCP_TO_LMSPRIME[3][3] = {
-    { 1.0,   0.015718580108730416,   0.2095810681164055 },
-    { 1.0,  -0.015718580108730416,  -0.20958106811640548},
-    { 1.0,   1.0212710798422344,    -0.6052744909924316 }
-};
-
-// =============================================================================
-// YCoCg-R Color Space Conversion
-// =============================================================================
-
-/**
- * Convert RGB24 to YCoCg-R color space for a full frame.
- *
- * YCoCg-R is a reversible color transform optimized for compression:
- * - Y  = luma (G + (R-B)/2)
- * - Co = orange chrominance (R - B)
- * - Cg = green chrominance (G - (R+B)/2)
- *
- * @param rgb    Input RGB24 data (planar: RRRR...GGGG...BBBB...)
- * @param y      Output luma channel
- * @param co     Output orange chrominance
- * @param cg     Output green chrominance
- * @param width  Frame width
- * @param height Frame height
- */
-void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
-                      int width, int height)
-{
-    const int total_pixels = width * height;
-
-    // Process 4 pixels at a time for better cache utilization
-    int i = 0;
-    const int simd_end = (total_pixels / 4) * 4;
-
-    // Vectorized processing for groups of 4 pixels
-    for (i = 0; i < simd_end; i += 4) {
-        const uint8_t *rgb_ptr = &rgb[i * 3];
-
-        // Process 4 pixels simultaneously with loop unrolling
-        for (int j = 0; j < 4; j++) {
-            const int idx = i + j;
-            const float r = rgb_ptr[j * 3 + 0];
-            const float g = rgb_ptr[j * 3 + 1];
-            const float b = rgb_ptr[j * 3 + 2];
-
-            // YCoCg-R transform
-            co[idx] = r - b;
-            const float tmp = b + co[idx] * 0.5f;
-            cg[idx] = g - tmp;
-            y[idx] = tmp + cg[idx] * 0.5f;
-        }
-    }
-
-    // Handle remaining pixels (1-3 pixels)
-    for (; i < total_pixels; i++) {
-        const float r = rgb[i * 3 + 0];
-        const float g = rgb[i * 3 + 1];
-        const float b = rgb[i * 3 + 2];
-
-        co[i] = r - b;
-        const float tmp = b + co[i] * 0.5f;
-        cg[i] = g - tmp;
-        y[i] = tmp + cg[i] * 0.5f;
-    }
-}
-
-// =============================================================================
-// ICtCp Color Space Conversion (HDR-capable)
-// =============================================================================
-
-/**
- * Convert sRGB8 to ICtCp color space using HLG transfer function.
- *
- * ICtCp is a perceptually uniform color space designed for HDR content:
- * - I  = intensity (luma)
- * - Ct = tritanope (blue-yellow)
- * - Cp = protanope (red-green)
- *
- * Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
- *
- * @param r8     Input red component (0-255)
- * @param g8     Input green component (0-255)
- * @param b8     Input blue component (0-255)
- * @param out_I  Output intensity (0-255)
- * @param out_Ct Output tritanope (0-255, centered at 127.5)
- * @param out_Cp Output protanope (0-255, centered at 127.5)
- */
-void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
-                             double *out_I, double *out_Ct, double *out_Cp)
-{
-    // 1) Linearize sRGB to 0..1
-    double r = srgb_linearise((double)r8 / 255.0);
-    double g = srgb_linearise((double)g8 / 255.0);
-    double b = srgb_linearise((double)b8 / 255.0);
-
-    // 2) Linear RGB -> LMS (3x3 multiply)
-    double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
-    double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
-    double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
-
-    // 3) Apply HLG OETF (Hybrid Log-Gamma)
-    double Lp = HLG_OETF(L);
-    double Mp = HLG_OETF(M);
-    double Sp = HLG_OETF(S);
-
-    // 4) L'M'S' -> ICtCp
-    double I  = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
-    double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
-    double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
-
-    // 5) Scale and offset to 0-255 range
-    *out_I = FCLAMP(I * 255.0, 0.0, 255.0);
-    *out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
-    *out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
-}
-
-/**
- * Convert ICtCp back to sRGB8 using HLG inverse transfer function.
- *
- * @param I8  Input intensity (0-255)
- * @param Ct8 Input tritanope (0-255, centered at 127.5)
- * @param Cp8 Input protanope (0-255, centered at 127.5)
- * @param r8  Output red component (0-255)
- * @param g8  Output green component (0-255)
- * @param b8  Output blue component (0-255)
- */
-void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
-                             uint8_t *r8, uint8_t *g8, uint8_t *b8)
-{
-    // 1) Denormalize from 0-255 range
-    double I = I8 / 255.0;
-    double Ct = (Ct8 - 127.5) / 255.0;
-    double Cp = (Cp8 - 127.5) / 255.0;
-
-    // 2) ICtCp -> L' M' S' (3x3 inverse multiply)
-    double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
-    double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
-    double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
-
-    // 3) Apply HLG inverse EOTF
-    double L = HLG_EOTF(Lp);
-    double M = HLG_EOTF(Mp);
-    double S = HLG_EOTF(Sp);
-
-    // 4) LMS -> linear sRGB (3x3 inverse multiply)
-    double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
-    double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
-    double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
-
-    // 5) Apply sRGB gamma and convert to 0-255 with rounding
-    double r = srgb_unlinearise(r_lin);
-    double g = srgb_unlinearise(g_lin);
-    double b = srgb_unlinearise(b_lin);
-
-    *r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
-    *g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
-    *b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_color.h b/video_encoder/lib/libtavenc/tav_encoder_color.h
deleted file mode 100644
index e2ccd7c..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_color.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * TAV Encoder - Color Space Conversion Library
- *
- * Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
- */
-
-#ifndef TAV_ENCODER_COLOR_H
-#define TAV_ENCODER_COLOR_H
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// =============================================================================
-// YCoCg-R Color Space Conversion
-// =============================================================================
-
-/**
- * Convert RGB24 to YCoCg-R color space for a full frame.
- *
- * @param rgb    Input RGB24 data (interleaved: RGBRGBRGB...)
- * @param y      Output luma channel
- * @param co     Output orange chrominance
- * @param cg     Output green chrominance
- * @param width  Frame width
- * @param height Frame height
- */
-void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
-                      int width, int height);
-
-// =============================================================================
-// ICtCp Color Space Conversion (HDR-capable)
-// =============================================================================
-
-/**
- * Convert sRGB8 to ICtCp color space using HLG transfer function.
- *
- * @param r8     Input red component (0-255)
- * @param g8     Input green component (0-255)
- * @param b8     Input blue component (0-255)
- * @param out_I  Output intensity (0-255)
- * @param out_Ct Output tritanope (0-255, centered at 127.5)
- * @param out_Cp Output protanope (0-255, centered at 127.5)
- */
-void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
-                             double *out_I, double *out_Ct, double *out_Cp);
-
-/**
- * Convert ICtCp back to sRGB8 using HLG inverse transfer function.
- *
- * @param I8  Input intensity (0-255)
- * @param Ct8 Input tritanope (0-255, centered at 127.5)
- * @param Cp8 Input protanope (0-255, centered at 127.5)
- * @param r8  Output red component (0-255)
- * @param g8  Output green component (0-255)
- * @param b8  Output blue component (0-255)
- */
-void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
-                             uint8_t *r8, uint8_t *g8, uint8_t *b8);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TAV_ENCODER_COLOR_H
diff --git a/video_encoder/lib/libtavenc/tav_encoder_dwt.c b/video_encoder/lib/libtavenc/tav_encoder_dwt.c
deleted file mode 100644
index 3c5105b..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_dwt.c
+++ /dev/null
@@ -1,619 +0,0 @@
-/**
- * TAV Encoder - Discrete Wavelet Transform (DWT) Library
- *
- * Provides multi-resolution wavelet decomposition for video compression.
- * Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
- *
- * Extracted from encoder_tav.c as part of library refactoring.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-
-// =============================================================================
-// Wavelet Type Constants
-// =============================================================================
-
-#define WAVELET_5_3_REVERSIBLE 0       // CDF 5/3 - Lossless capable
-#define WAVELET_9_7_IRREVERSIBLE 1     // CDF 9/7 - Higher compression (default)
-#define WAVELET_BIORTHOGONAL_13_7 2    // Biorthogonal 13/7
-#define WAVELET_DD4 16                 // Deslauriers-Dubuc 4-point interpolating
-#define WAVELET_HAAR 255               // Haar - Simplest wavelet
-
-// =============================================================================
-// 1D Forward DWT Transforms
-// =============================================================================
-
-/**
- * CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
- *
- * Uses lifting scheme with predict and update steps.
- * Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
- *
- * @param data   In/out signal data (modified in-place)
- * @param length Signal length (handles non-power-of-2)
- */
-static void dwt_53_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = calloc(length, sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Predict step (high-pass)
-    for (int i = 0; i < half; i++) {
-        int idx = 2 * i + 1;
-        if (idx < length) {
-            float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
-            temp[half + i] = data[idx] - pred;
-        }
-    }
-
-    // Update step (low-pass)
-    for (int i = 0; i < half; i++) {
-        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
-                               (i < half - 1 ? temp[half + i] : 0));
-        temp[i] = data[2 * i] + update;
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-/**
- * CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
- *
- * Five-step lifting scheme with scaling for optimal compression.
- * Output layout: [LL...LL, HH...HH]
- *
- * @param data   In/out signal data
- * @param length Signal length
- */
-static void dwt_97_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Split into even/odd samples
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[2 * i];           // Even (low)
-    }
-    for (int i = 0; i < length / 2; i++) {
-        temp[half + i] = data[2 * i + 1]; // Odd (high)
-    }
-
-    // JPEG2000 9/7 lifting coefficients
-    const float alpha = -1.586134342f;
-    const float beta = -0.052980118f;
-    const float gamma = 0.882911076f;
-    const float delta = 0.443506852f;
-    const float K = 1.230174105f;
-
-    // Step 1: Predict α
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] += alpha * (s_curr + s_next);
-        }
-    }
-
-    // Step 2: Update β
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] += beta * (d_prev + d_curr);
-    }
-
-    // Step 3: Predict γ
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] += gamma * (s_curr + s_next);
-        }
-    }
-
-    // Step 4: Update δ
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] += delta * (d_prev + d_curr);
-    }
-
-    // Step 5: Scaling
-    for (int i = 0; i < half; i++) {
-        temp[i] *= K;
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] /= K;
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-/**
- * CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
- *
- * Same structure as 9/7 irreversible but uses integer arithmetic.
- *
- * @param data   In/out signal data
- * @param length Signal length
- */
-static void dwt_97_iint_forward_1d(float *data, int length) {
-    if (length < 2) return;
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    for (int i = 0; i < half; ++i) temp[i] = data[2*i];
-    for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
-
-    const int SHIFT = 16;
-    const int64_t ROUND = 1LL << (SHIFT - 1);
-    const int64_t A = -103949;  // α
-    const int64_t B = -3472;    // β
-    const int64_t G = 57862;    // γ
-    const int64_t D = 29066;    // δ
-    const int64_t K_FP  = 80542;  // ≈ 1.230174105 * 2^16
-    const int64_t Ki_FP = 53283;  // ≈ (1/1.230174105) * 2^16
-
-    #define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
-
-    // Predict α
-    for (int i = 0; i < length/2; ++i) {
-        int s = temp[i];
-        int sn = (i+1<half)? temp[i+1] : s;
-        temp[half+i] += RN(A * (int64_t)(s + sn));
-    }
-
-    // Update β
-    for (int i = 0; i < half; ++i) {
-        int d = (half+i<length)? temp[half+i]:0;
-        int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
-        temp[i] += RN(B * (int64_t)(dp + d));
-    }
-
-    // Predict γ
-    for (int i = 0; i < length/2; ++i) {
-        int s = temp[i];
-        int sn = (i+1<half)? temp[i+1]:s;
-        temp[half+i] += RN(G * (int64_t)(s + sn));
-    }
-
-    // Update δ
-    for (int i = 0; i < half; ++i) {
-        int d = (half+i<length)? temp[half+i]:0;
-        int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
-        temp[i] += RN(D * (int64_t)(dp + d));
-    }
-
-    // Scaling
-    for (int i = 0; i < half; ++i) {
-        temp[i] = (((int64_t)temp[i] * K_FP  + ROUND) >> SHIFT);
-    }
-    for (int i = 0; i < length/2; ++i) {
-        if (half + i < length) {
-            temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-    #undef RN
-}
-
-/**
- * Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
- *
- * Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
- * Good for smooth signals and still images.
- *
- * @param data   In/out signal data
- * @param length Signal length
- */
-static void dwt_dd4_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Split into even/odd samples
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[2 * i];
-    }
-    for (int i = 0; i < length / 2; i++) {
-        temp[half + i] = data[2 * i + 1];
-    }
-
-    // DD-4 prediction step with four-point kernel
-    for (int i = 0; i < length / 2; i++) {
-        // Get four neighbouring even samples with symmetric boundary extension
-        float s_m1, s_0, s_1, s_2;
-
-        s_m1 = (i > 0) ? temp[i - 1] : temp[0];
-        s_0 = temp[i];
-        s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
-        s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
-
-        float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
-                          (9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
-
-        temp[half + i] -= prediction;
-    }
-
-    // DD-4 update step
-    for (int i = 0; i < half; i++) {
-        float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
-        temp[i] += 0.25f * (d_prev + d_curr);
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-/**
- * Biorthogonal 13/7 wavelet forward 1D.
- *
- * Analysis filters: Low-pass (13 taps), High-pass (7 taps)
- * Simplified implementation using 5/3 structure with scaling.
- *
- * @param data   In/out signal data
- * @param length Signal length
- */
-static void dwt_bior137_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    const float K = 1.230174105f;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Predict step (high-pass)
-    for (int i = 0; i < half; i++) {
-        int idx = 2 * i + 1;
-        if (idx < length) {
-            float left = data[2 * i];
-            float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
-            float prediction = 0.5f * (left + right);
-            temp[half + i] = data[idx] - prediction;
-        }
-    }
-
-    // Update step (low-pass)
-    for (int i = 0; i < half; i++) {
-        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
-                               (i < half - 1 ? temp[half + i] : 0));
-        temp[i] = data[2 * i] + update;
-    }
-
-    // Scaling
-    for (int i = 0; i < half; i++) {
-        temp[i] *= K;
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] /= K;
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-/**
- * Haar wavelet forward 1D transform.
- *
- * The simplest wavelet: averages (low-pass) and differences (high-pass).
- * Useful for temporal DWT in GOPs.
- *
- * @param data   In/out signal data
- * @param length Signal length
- */
-static void dwt_haar_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    for (int i = 0; i < half; i++) {
-        if (2 * i + 1 < length) {
-            temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
-            temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
-        } else {
-            temp[i] = data[2 * i];
-            if (half + i < length) {
-                temp[half + i] = 0.0f;
-            }
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// =============================================================================
-// 1D Inverse DWT Transforms
-// =============================================================================
-
-/**
- * CDF 5/3 reversible wavelet inverse 1D transform.
- *
- * Reverses dwt_53_forward_1d() transform exactly.
- *
- * @param data   In/out coefficient data
- * @param length Signal length
- */
-static void dwt_53_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Copy low-pass and high-pass coefficients
-    memcpy(temp, data, length * sizeof(float));
-
-    // Undo update step
-    for (int i = 0; i < half; i++) {
-        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
-                               (i < half - 1 ? temp[half + i] : 0));
-        temp[i] -= update;
-    }
-
-    // Undo predict step
-    for (int i = 0; i < half; i++) {
-        int idx = 2 * i + 1;
-        if (idx < length) {
-            float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
-            data[2 * i] = temp[i];
-            data[idx] = temp[half + i] + pred;
-        } else {
-            data[2 * i] = temp[i];
-        }
-    }
-
-    free(temp);
-}
-
-/**
- * Haar wavelet inverse 1D transform.
- *
- * Reverses dwt_haar_forward_1d() transform.
- *
- * @param data   In/out coefficient data
- * @param length Signal length
- */
-static void dwt_haar_inverse_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Reconstruct from averages and differences
-    for (int i = 0; i < half; i++) {
-        if (2 * i + 1 < length) {
-            temp[2 * i] = data[i] + data[half + i];
-            temp[2 * i + 1] = data[i] - data[half + i];
-        } else {
-            temp[2 * i] = data[i];
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// =============================================================================
-// 2D DWT Transform
-// =============================================================================
-
-/**
- * Apply 2D forward DWT to a frame (in-place).
- *
- * Applies separable 1D transforms: horizontal (rows), then vertical (columns).
- * Supports multi-level decomposition.
- *
- * @param data        In/out 2D image data (row-major, width stride)
- * @param width       Image width
- * @param height      Image height
- * @param levels      Number of decomposition levels
- * @param filter_type Wavelet type (WAVELET_* constant)
- */
-void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
-    const int max_size = (width > height) ? width : height;
-    float *temp_row = malloc(max_size * sizeof(float));
-    float *temp_col = malloc(max_size * sizeof(float));
-
-    // Pre-calculate dimensions for each level
-    int *widths = malloc((levels + 1) * sizeof(int));
-    int *heights = malloc((levels + 1) * sizeof(int));
-    widths[0] = width;
-    heights[0] = height;
-    for (int i = 1; i <= levels; i++) {
-        widths[i] = (widths[i - 1] + 1) / 2;
-        heights[i] = (heights[i - 1] + 1) / 2;
-    }
-
-    // Apply multi-level decomposition
-    for (int level = 0; level < levels; level++) {
-        int current_width = widths[level];
-        int current_height = heights[level];
-        if (current_width < 1 || current_height < 1) break;
-
-        // Row transform (horizontal)
-        for (int y = 0; y < current_height; y++) {
-            // Extract row
-            for (int x = 0; x < current_width; x++) {
-                temp_row[x] = data[y * width + x];
-            }
-
-            // Apply 1D DWT
-            switch (filter_type) {
-                case WAVELET_5_3_REVERSIBLE:
-                    dwt_53_forward_1d(temp_row, current_width);
-                    break;
-                case WAVELET_9_7_IRREVERSIBLE:
-                    dwt_97_forward_1d(temp_row, current_width);
-                    break;
-                case WAVELET_BIORTHOGONAL_13_7:
-                    dwt_bior137_forward_1d(temp_row, current_width);
-                    break;
-                case WAVELET_DD4:
-                    dwt_dd4_forward_1d(temp_row, current_width);
-                    break;
-                case WAVELET_HAAR:
-                    dwt_haar_forward_1d(temp_row, current_width);
-                    break;
-            }
-
-            // Write back
-            for (int x = 0; x < current_width; x++) {
-                data[y * width + x] = temp_row[x];
-            }
-        }
-
-        // Column transform (vertical)
-        for (int x = 0; x < current_width; x++) {
-            // Extract column
-            for (int y = 0; y < current_height; y++) {
-                temp_col[y] = data[y * width + x];
-            }
-
-            // Apply 1D DWT
-            switch (filter_type) {
-                case WAVELET_5_3_REVERSIBLE:
-                    dwt_53_forward_1d(temp_col, current_height);
-                    break;
-                case WAVELET_9_7_IRREVERSIBLE:
-                    dwt_97_forward_1d(temp_col, current_height);
-                    break;
-                case WAVELET_BIORTHOGONAL_13_7:
-                    dwt_bior137_forward_1d(temp_col, current_height);
-                    break;
-                case WAVELET_DD4:
-                    dwt_dd4_forward_1d(temp_col, current_height);
-                    break;
-                case WAVELET_HAAR:
-                    dwt_haar_forward_1d(temp_col, current_height);
-                    break;
-            }
-
-            // Write back
-            for (int y = 0; y < current_height; y++) {
-                data[y * width + x] = temp_col[y];
-            }
-        }
-    }
-
-    free(widths);
-    free(heights);
-    free(temp_row);
-    free(temp_col);
-}
-
-// =============================================================================
-// 3D DWT Transform (Temporal + Spatial)
-// =============================================================================
-
-/**
- * Apply 3D forward DWT to a GOP (group of pictures).
- *
- * First applies temporal DWT across frames at each spatial location,
- * then applies 2D spatial DWT to each resulting temporal subband.
- *
- * @param gop_data        Array of frame pointers [num_frames][width*height]
- * @param width           Frame width
- * @param height          Frame height
- * @param num_frames      Number of frames in GOP
- * @param spatial_levels  Number of 2D spatial decomposition levels
- * @param temporal_levels Number of 1D temporal decomposition levels
- * @param spatial_filter  Wavelet type for spatial transform
- * @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
- */
-void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
-                        int spatial_levels, int temporal_levels,
-                        int spatial_filter, int temporal_filter) {
-    if (num_frames < 2 || width < 2 || height < 2) return;
-
-    float *temporal_line = malloc(num_frames * sizeof(float));
-
-    // Pre-calculate temporal lengths for non-power-of-2 GOPs
-    int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
-    temporal_lengths[0] = num_frames;
-    for (int i = 1; i <= temporal_levels; i++) {
-        temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
-    }
-
-    // Step 1: Apply temporal DWT across frames
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int pixel_idx = y * width + x;
-
-            // Extract temporal signal
-            for (int t = 0; t < num_frames; t++) {
-                temporal_line[t] = gop_data[t][pixel_idx];
-            }
-
-            // Apply temporal DWT with multiple levels
-            for (int level = 0; level < temporal_levels; level++) {
-                int level_frames = temporal_lengths[level];
-                if (level_frames >= 2) {
-                    if (temporal_filter == 255) {
-                        // Haar temporal (default)
-                        dwt_haar_forward_1d(temporal_line, level_frames);
-                    } else if (temporal_filter == 0) {
-                        // CDF 5/3 temporal
-                        dwt_53_forward_1d(temporal_line, level_frames);
-                    } else {
-                        // Fallback to Haar for unsupported wavelets
-                        dwt_haar_forward_1d(temporal_line, level_frames);
-                    }
-                }
-            }
-
-            // Write back temporal coefficients
-            for (int t = 0; t < num_frames; t++) {
-                gop_data[t][pixel_idx] = temporal_line[t];
-            }
-        }
-    }
-
-    free(temporal_lengths);
-    free(temporal_line);
-
-    // Step 2: Apply 2D spatial DWT to each temporal subband
-    for (int t = 0; t < num_frames; t++) {
-        tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
-    }
-}
-
-// =============================================================================
-// Utility Functions
-// =============================================================================
-
-/**
- * Calculate recommended number of decomposition levels for given dimensions.
- *
- * @param width  Image width
- * @param height Image height
- * @return       Recommended number of levels (1-6)
- */
-int tav_dwt_calculate_levels(int width, int height) {
-    int levels = 0;
-    int min_size = (width < height) ? width : height;
-
-    // Keep halving until we reach minimum size
-    while (min_size >= 32) {
-        min_size /= 2;
-        levels++;
-    }
-
-    // Cap at reasonable maximum
-    return (levels > 6) ? 6 : levels;
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_dwt.h b/video_encoder/lib/libtavenc/tav_encoder_dwt.h
deleted file mode 100644
index 99de36a..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_dwt.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * TAV Encoder - Discrete Wavelet Transform Library
- *
- * Public API for multi-resolution wavelet decomposition.
- * Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
- */
-
-#ifndef TAV_ENCODER_DWT_H
-#define TAV_ENCODER_DWT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// =============================================================================
-// Wavelet Type Constants
-// =============================================================================
-
-#define WAVELET_5_3_REVERSIBLE 0      // CDF 5/3 reversible (lossless capable)
-#define WAVELET_9_7_IRREVERSIBLE 1    // CDF 9/7 JPEG2000 (default, best compression)
-#define WAVELET_BIORTHOGONAL_13_7 2   // CDF 13/7 experimental
-#define WAVELET_DD4 16                // Deslauriers-Dubuc 4-point interpolating
-#define WAVELET_HAAR 255              // Haar (demonstration only)
-
-// =============================================================================
-// 2D Discrete Wavelet Transform
-// =============================================================================
-
-/**
- * Apply 2D wavelet transform to spatial data.
- *
- * Uses separable 1D transforms: apply horizontal rows, then vertical columns.
- * Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
- *
- * @param data         Input/output data array (modified in-place)
- * @param width        Frame width
- * @param height       Frame height
- * @param levels       Number of decomposition levels (0 = auto-calculate)
- * @param filter_type  Wavelet type (WAVELET_* constants)
- */
-void tav_dwt_2d_forward(float *data, int width, int height,
-                        int levels, int filter_type);
-
-// =============================================================================
-// 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
-// =============================================================================
-
-/**
- * Apply 3D wavelet transform to group-of-pictures (GOP).
- *
- * Process:
- * 1. Apply temporal 1D DWT across frames at each spatial position
- * 2. Apply spatial 2D DWT to each temporal subband frame
- *
- * @param gop_data         Array of frame pointers [num_frames]
- * @param width            Frame width
- * @param height           Frame height
- * @param num_frames       Number of frames in GOP
- * @param spatial_levels   Spatial decomposition levels (0 = auto)
- * @param temporal_levels  Temporal decomposition levels
- * @param spatial_filter   Wavelet type for spatial transform
- * @param temporal_filter  Wavelet type for temporal transform
- */
-void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
-                        int spatial_levels, int temporal_levels,
-                        int spatial_filter, int temporal_filter);
-
-// =============================================================================
-// Utility Functions
-// =============================================================================
-
-/**
- * Calculate optimal number of decomposition levels for given dimensions.
- *
- * Uses formula: floor(log2(min(width, height))) - 1
- * Ensures at least 2x2 low-pass subband remains after decomposition.
- *
- * @param width   Frame width
- * @param height  Frame height
- * @return        Recommended number of levels
- */
-int tav_dwt_calculate_levels(int width, int height);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TAV_ENCODER_DWT_H
diff --git a/video_encoder/lib/libtavenc/tav_encoder_ezbc.c b/video_encoder/lib/libtavenc/tav_encoder_ezbc.c
deleted file mode 100644
index 174c942..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_ezbc.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/**
- * TAV Encoder - EZBC (Embedded Zero Block Coding) Library
- *
- * Implements binary tree embedded zero block coding for efficient storage
- * of sparse wavelet coefficients. Exploits coefficient sparsity through
- * hierarchical significance testing and progressive bitplane encoding.
- *
- * Extracted from encoder_tav.c as part of library refactoring.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdbool.h>
-#include <math.h>
-
-// =============================================================================
-// EZBC Structures
-// =============================================================================
-
-/**
- * Bitstream writer for bit-level encoding.
- */
-typedef struct {
-    uint8_t *data;
-    size_t capacity;
-    size_t byte_pos;
-    uint8_t bit_pos;  // 0-7, current bit position in current byte
-} bitstream_t;
-
-/**
- * Block structure for EZBC quadtree decomposition.
- */
-typedef struct {
-    int x, y;           // Top-left position in 2D coefficient array
-    int width, height;  // Block dimensions
-} ezbc_block_t;
-
-/**
- * Queue for EZBC block processing.
- */
-typedef struct {
-    ezbc_block_t *blocks;
-    size_t count;
-    size_t capacity;
-} block_queue_t;
-
-/**
- * Track coefficient state for refinement.
- */
-typedef struct {
-    bool significant;     // Has been marked significant
-    int first_bitplane;   // Bitplane where it became significant
-} coeff_state_t;
-
-/**
- * EZBC encoding context for recursive processing.
- */
-typedef struct {
-    bitstream_t *bs;
-    int16_t *coeffs;
-    coeff_state_t *states;
-    int width;
-    int height;
-    int bitplane;
-    int threshold;
-    block_queue_t *next_insignificant;
-    block_queue_t *next_significant;
-    int *sign_count;
-} ezbc_context_t;
-
-// =============================================================================
-// Bitstream Operations
-// =============================================================================
-
-/**
- * Initialize bitstream with initial capacity.
- */
-static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
-    // Ensure minimum capacity to avoid issues with zero-size allocations
-    if (initial_capacity < 64) initial_capacity = 64;
-    bs->capacity = initial_capacity;
-    bs->data = calloc(1, initial_capacity);
-    if (!bs->data) {
-        fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
-        exit(1);
-    }
-    bs->byte_pos = 0;
-    bs->bit_pos = 0;
-}
-
-/**
- * Write a single bit to bitstream.
- */
-static void bitstream_write_bit(bitstream_t *bs, int bit) {
-    // Grow if needed
-    if (bs->byte_pos >= bs->capacity) {
-        size_t old_capacity = bs->capacity;
-        bs->capacity *= 2;
-        bs->data = realloc(bs->data, bs->capacity);
-        // Clear only the newly allocated memory region
-        memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
-    }
-
-    if (bit) {
-        bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
-    }
-
-    bs->bit_pos++;
-    if (bs->bit_pos == 8) {
-        bs->bit_pos = 0;
-        bs->byte_pos++;
-    }
-}
-
-/**
- * Write multiple bits to bitstream (LSB first).
- */
-static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
-    for (int i = 0; i < num_bits; i++) {
-        bitstream_write_bit(bs, (value >> i) & 1);
-    }
-}
-
-/**
- * Get current bitstream size in bytes.
- */
-static size_t bitstream_size(bitstream_t *bs) {
-    return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
-}
-
-/**
- * Free bitstream buffer.
- */
-static void bitstream_free(bitstream_t *bs) {
-    free(bs->data);
-}
-
-// =============================================================================
-// Block Queue Operations
-// =============================================================================
-
-/**
- * Initialize block queue with initial capacity.
- */
-static void queue_init(block_queue_t *q) {
-    q->capacity = 1024;
-    q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
-    q->count = 0;
-}
-
-/**
- * Push block onto queue, growing if needed.
- */
-static void queue_push(block_queue_t *q, ezbc_block_t block) {
-    if (q->count >= q->capacity) {
-        q->capacity *= 2;
-        q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
-    }
-    q->blocks[q->count++] = block;
-}
-
-/**
- * Free block queue.
- */
-static void queue_free(block_queue_t *q) {
-    free(q->blocks);
-}
-
-// =============================================================================
-// EZBC Helper Functions
-// =============================================================================
-
-/**
- * Check if all coefficients in block have |coeff| < threshold.
- */
-static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
-                                const ezbc_block_t *block, int threshold) {
-    for (int y = block->y; y < block->y + block->height && y < height; y++) {
-        for (int x = block->x; x < block->x + block->width && x < width; x++) {
-            int idx = y * width + x;
-            if (abs(coeffs[idx]) >= threshold) {
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-/**
- * Find maximum absolute value in coefficient array.
- */
-static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
-    int max_abs = 0;
-    for (size_t i = 0; i < count; i++) {
-        int abs_val = abs(coeffs[i]);
-        if (abs_val > max_abs) {
-            max_abs = abs_val;
-        }
-    }
-    return max_abs;
-}
-
-/**
- * Get MSB position (bitplane number).
- * Returns floor(log2(value)), i.e., the position of the highest set bit.
- */
-static int get_msb_bitplane(int value) {
-    if (value == 0) return 0;
-    int bitplane = 0;
-    while (value > 1) {
-        value >>= 1;
-        bitplane++;
-    }
-    return bitplane;
-}
-
-/**
- * Recursively process a significant block - subdivide until 1x1.
- */
-static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
-    // If 1x1 block: emit sign bit and add to significant queue
-    if (block.width == 1 && block.height == 1) {
-        int idx = block.y * ctx->width + block.x;
-        bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
-        (*ctx->sign_count)++;
-        ctx->states[idx].significant = true;
-        ctx->states[idx].first_bitplane = ctx->bitplane;
-        queue_push(ctx->next_significant, block);
-        return;
-    }
-
-    // Block is > 1x1: subdivide into children and recursively process each
-    int mid_x = block.width / 2;
-    int mid_y = block.height / 2;
-    if (mid_x == 0) mid_x = 1;
-    if (mid_y == 0) mid_y = 1;
-
-    // Process top-left child
-    ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
-    if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
-        bitstream_write_bit(ctx->bs, 1);  // Significant
-        process_significant_block_recursive(ctx, tl);
-    } else {
-        bitstream_write_bit(ctx->bs, 0);  // Insignificant
-        queue_push(ctx->next_insignificant, tl);
-    }
-
-    // Process top-right child (if exists)
-    if (block.width > mid_x) {
-        ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
-        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
-            bitstream_write_bit(ctx->bs, 1);
-            process_significant_block_recursive(ctx, tr);
-        } else {
-            bitstream_write_bit(ctx->bs, 0);
-            queue_push(ctx->next_insignificant, tr);
-        }
-    }
-
-    // Process bottom-left child (if exists)
-    if (block.height > mid_y) {
-        ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
-        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
-            bitstream_write_bit(ctx->bs, 1);
-            process_significant_block_recursive(ctx, bl);
-        } else {
-            bitstream_write_bit(ctx->bs, 0);
-            queue_push(ctx->next_insignificant, bl);
-        }
-    }
-
-    // Process bottom-right child (if exists)
-    if (block.width > mid_x && block.height > mid_y) {
-        ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
-        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
-            bitstream_write_bit(ctx->bs, 1);
-            process_significant_block_recursive(ctx, br);
-        } else {
-            bitstream_write_bit(ctx->bs, 0);
-            queue_push(ctx->next_insignificant, br);
-        }
-    }
-}
-
-// =============================================================================
-// Main EZBC Encoding Function
-// =============================================================================
-
-/**
- * EZBC encoding for a single channel.
- *
- * Uses two separate queues for insignificant blocks and significant 1x1 blocks.
- * Encodes coefficients progressively from MSB to LSB bitplane.
- *
- * Algorithm:
- * 1. Find MSB bitplane from maximum absolute coefficient value
- * 2. Write header: MSB bitplane, width, height
- * 3. For each bitplane from MSB to 0:
- *    a. Process insignificant blocks: check if they become significant
- *    b. For newly significant blocks: recursively subdivide until 1x1
- *    c. Emit sign bits for newly significant 1x1 coefficients
- *    d. Process already-significant coefficients: emit refinement bits
- * 4. Return encoded bitstream
- *
- * @param coeffs  Input quantized coefficients (int16_t array)
- * @param count   Number of coefficients
- * @param width   Frame width
- * @param height  Frame height
- * @param output  Output buffer pointer (allocated by this function)
- * @return        Encoded size in bytes
- */
-size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
-                                uint8_t **output) {
-    bitstream_t bs;
-    bitstream_init(&bs, count / 4);  // Initial guess
-
-    // Track coefficient significance
-    coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
-
-    // Find maximum value to determine MSB bitplane
-    int max_abs = find_max_abs_ezbc(coeffs, count);
-    int msb_bitplane = get_msb_bitplane(max_abs);
-
-    // Write header: MSB bitplane and dimensions
-    bitstream_write_bits(&bs, msb_bitplane, 8);
-    bitstream_write_bits(&bs, width, 16);
-    bitstream_write_bits(&bs, height, 16);
-
-    // Initialise two queues: insignificant blocks and significant 1x1 blocks
-    block_queue_t insignificant_queue, next_insignificant;
-    block_queue_t significant_queue, next_significant;
-
-    queue_init(&insignificant_queue);
-    queue_init(&next_insignificant);
-    queue_init(&significant_queue);
-    queue_init(&next_significant);
-
-    // Start with root block as insignificant
-    ezbc_block_t root = {0, 0, width, height};
-    queue_push(&insignificant_queue, root);
-
-    // Process bitplanes from MSB to LSB
-    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
-        int threshold = 1 << bitplane;
-
-        int sign_bits_this_bitplane = 0;
-
-        // Process insignificant blocks - check if they become significant
-        for (size_t i = 0; i < insignificant_queue.count; i++) {
-            ezbc_block_t block = insignificant_queue.blocks[i];
-
-            // Check if this block has any coefficient >= threshold
-            if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
-                // Still insignificant: emit 0
-                bitstream_write_bit(&bs, 0);
-                // Keep in insignificant queue for next bitplane
-                queue_push(&next_insignificant, block);
-            } else {
-                // Became significant: emit 1
-                bitstream_write_bit(&bs, 1);
-
-                // Use recursive subdivision to process this block and all children
-                ezbc_context_t ctx = {
-                    .bs = &bs,
-                    .coeffs = coeffs,
-                    .states = states,
-                    .width = width,
-                    .height = height,
-                    .bitplane = bitplane,
-                    .threshold = threshold,
-                    .next_insignificant = &next_insignificant,
-                    .next_significant = &next_significant,
-                    .sign_count = &sign_bits_this_bitplane
-                };
-                process_significant_block_recursive(&ctx, block);
-            }
-        }
-
-        // Process significant 1x1 blocks - emit refinement bits
-        for (size_t i = 0; i < significant_queue.count; i++) {
-            ezbc_block_t block = significant_queue.blocks[i];
-            int idx = block.y * width + block.x;
-            int abs_val = abs(coeffs[idx]);
-
-            // Emit refinement bit at current bitplane
-            int bit = (abs_val >> bitplane) & 1;
-            bitstream_write_bit(&bs, bit);
-
-            // Keep in significant queue for next bitplane
-            queue_push(&next_significant, block);
-        }
-
-        // Swap queues for next bitplane
-        queue_free(&insignificant_queue);
-        queue_free(&significant_queue);
-        insignificant_queue = next_insignificant;
-        significant_queue = next_significant;
-        queue_init(&next_insignificant);
-        queue_init(&next_significant);
-    }
-
-    // Free all queues
-    queue_free(&insignificant_queue);
-    queue_free(&significant_queue);
-    queue_free(&next_insignificant);
-    queue_free(&next_significant);
-    free(states);
-
-    size_t final_size = bitstream_size(&bs);
-    *output = bs.data;
-
-    return final_size;
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_ezbc.h b/video_encoder/lib/libtavenc/tav_encoder_ezbc.h
deleted file mode 100644
index 3fcb82e..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_ezbc.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * TAV Encoder - EZBC (Embedded Zero Block Coding) Library
- *
- * Public API for EZBC entropy coding of wavelet coefficients.
- */
-
-#ifndef TAV_ENCODER_EZBC_H
-#define TAV_ENCODER_EZBC_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// =============================================================================
-// EZBC Encoding
-// =============================================================================
-
-/**
- * EZBC encoding for a single channel.
- *
- * Implements binary tree embedded zero block coding for efficient storage
- * of sparse wavelet coefficients. Exploits coefficient sparsity through
- * hierarchical significance testing and progressive bitplane encoding.
- *
- * Algorithm:
- * 1. Find MSB bitplane from maximum absolute coefficient value
- * 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
- * 3. For each bitplane from MSB to 0:
- *    a. Process insignificant blocks: check if they become significant
- *       - Emit 0 if still insignificant, 1 if became significant
- *    b. For newly significant blocks: recursively subdivide until 1x1
- *       - Emit tree structure: 1=child is significant, 0=child insignificant
- *    c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
- *    d. Process already-significant coefficients: emit refinement bits
- *       - Emit bit at current bitplane for progressive reconstruction
- * 4. Return encoded bitstream
- *
- * Benefits:
- * - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
- * - Progressive refinement from MSB to LSB
- * - Spatial clustering through quadtree decomposition
- * - No additional entropy coding needed (bitstream is already compressed)
- *
- * @param coeffs  Input quantized coefficients (int16_t array)
- * @param count   Number of coefficients (width × height)
- * @param width   Frame width (must match coefficient array layout)
- * @param height  Frame height (must match coefficient array layout)
- * @param output  Output buffer pointer (allocated by this function, caller must free)
- * @return        Encoded size in bytes (including header)
- */
-size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
-                                uint8_t **output);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TAV_ENCODER_EZBC_H
diff --git a/video_encoder/lib/libtavenc/tav_encoder_lib.c b/video_encoder/lib/libtavenc/tav_encoder_lib.c
deleted file mode 100644
index 77266ba..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_lib.c
+++ /dev/null
@@ -1,1528 +0,0 @@
-/**
- * TAV Encoder Library - Main Implementation
- *
- * High-level API for encoding video using TAV codec with GOP-based
- * multi-threaded encoding.
- *
- * Based on encoder_tav.c - extracted into library form.
- */
-
-#include "tav_encoder_lib.h"
-#include "tav_encoder_color.h"
-#include "tav_encoder_dwt.h"
-#include "tav_encoder_quantize.h"
-#include "tav_encoder_ezbc.h"
-#include "tav_encoder_utils.h"
-#include "tav_encoder_tile.h"
-#include "encoder_tad.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <threads.h>
-#include <time.h>
-#include <zstd.h>
-
-// =============================================================================
-// Internal Constants
-// =============================================================================
-
-#define ENCODER_VERSION "TAV Encoder Library v1.0"
-#define MAX_ERROR_MESSAGE 256
-
-#define GOP_SIZE_MAX 24
-
-// GOP status values
-#define GOP_STATUS_EMPTY      0
-#define GOP_STATUS_FILLING    1
-#define GOP_STATUS_READY      2
-#define GOP_STATUS_ENCODING   3
-#define GOP_STATUS_COMPLETE   4
-
-// Quality to quantizer mapping (indices into QLUT)
-static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
-
-static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2};   // Quality levels 0-5
-static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
-static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
-static const float DEAD_ZONE_THRESHOLD[] = {1.5f, 1.5f, 1.2f, 1.1f, 0.8f, 0.6f, 0.0f};
-
-// Channel layout definitions (from TAV specification)
-#define CHANNEL_LAYOUT_YCOCG     0
-#define CHANNEL_LAYOUT_YCOCG_A   1
-#define CHANNEL_LAYOUT_Y_ONLY    2
-#define CHANNEL_LAYOUT_Y_A       3
-#define CHANNEL_LAYOUT_COCG      4
-#define CHANNEL_LAYOUT_COCG_A    5
-
-// Channel layout configuration
-typedef struct {
-    int layout_id;
-    int num_channels;
-    const char *channels[4];
-    int has_y, has_co, has_cg, has_alpha;
-} channel_layout_config_t;
-
-static const channel_layout_config_t channel_layouts[] = {
-    {CHANNEL_LAYOUT_YCOCG,   3, {"Y",  "Co", "Cg", NULL}, 1, 1, 1, 0},  // 0: Y-Co-Cg
-    {CHANNEL_LAYOUT_YCOCG_A, 4, {"Y",  "Co", "Cg", "A"}, 1, 1, 1, 1},   // 1: Y-Co-Cg-A
-    {CHANNEL_LAYOUT_Y_ONLY,  1, {"Y",  NULL, NULL, NULL}, 1, 0, 0, 0},  // 2: Y only
-    {CHANNEL_LAYOUT_Y_A,     2, {"Y",  NULL, NULL, "A"}, 1, 0, 0, 1},   // 3: Y-A
-    {CHANNEL_LAYOUT_COCG,    2, {NULL, "Co", "Cg", NULL}, 0, 1, 1, 0},  // 4: Co-Cg
-    {CHANNEL_LAYOUT_COCG_A,  3, {NULL, "Co", "Cg", "A"}, 0, 1, 1, 1}    // 5: Co-Cg-A
-};
-
-// Coefficient preprocessing modes
-typedef enum {
-    PREPROCESS_TWOBITMAP = 0,  // Twobit-plane significance map (default, best compression)
-    PREPROCESS_EZBC = 1,       // EZBC embedded zero block coding
-    PREPROCESS_RAW = 2         // No preprocessing - raw coefficients
-} preprocess_mode_t;
-
-// =============================================================================
-// Internal Structures
-// =============================================================================
-
-// Compatibility structure for extracted modules
-// The quantization and DWT modules expect a tav_encoder_t structure
-// with certain fields. This minimal structure provides those fields.
-struct tav_encoder_s {
-    int quality_level;           // For perceptual quantization
-    int *widths;                 // Subband widths array (per decomposition level)
-    int *heights;                // Subband heights array (per decomposition level)
-    int decomp_levels;           // Number of spatial DWT decomposition levels
-    float dead_zone_threshold;   // Dead-zone quantization threshold
-    int encoder_preset;          // Preset flags (sports mode, etc.)
-    int temporal_decomp_levels;  // Temporal DWT levels
-    int verbose;                 // Verbose output flag
-    int frame_count;             // Current frame number for encoding
-    float adjusted_quantiser_y_float;  // For bitrate control (if needed)
-    float dither_accumulator;    // Dither accumulator for bitrate mode
-    int width;                   // Frame width
-    int height;                  // Frame height
-    int perceptual_tuning;       // 1 = perceptual quantization, 0 = uniform
-};
-
-// GOP slot for circular buffering
-typedef struct gop_slot {
-    // Status
-    volatile int status;          // GOP_STATUS_* values
-    int gop_index;                // Sequential GOP number
-
-    // Input data
-    uint8_t **rgb_frames;         // [frame][width*height*3] RGB data
-    int num_frames;               // Number of frames in this GOP
-    int *frame_numbers;           // Original frame indices (for timecodes)
-    int width, height;            // Frame dimensions
-
-    // Audio data
-    float *pcm_samples;           // Stereo PCM32f samples (L,R,L,R,...)
-    size_t num_audio_samples;     // Samples per channel
-
-    // Output data (filled by worker thread)
-    tav_encoder_packet_t *packets;     // Array of output packets
-    int num_packets;                   // Number of packets in this GOP
-
-    // Error handling
-    int encoding_failed;
-    char error_message[MAX_ERROR_MESSAGE];
-
-    // Synchronization
-    mtx_t mutex;
-    cnd_t status_changed;
-} gop_slot_t;
-
-// Thread-local worker context
-typedef struct thread_worker_context {
-    int thread_id;
-    struct thread_pool *pool;
-
-    // Thread-local work buffers (reused across GOPs)
-    float **work_y_frames;        // [max_gop_size][max_pixels]
-    float **work_co_frames;
-    float **work_cg_frames;
-    int16_t **quantised_y;
-    int16_t **quantised_co;
-    int16_t **quantised_cg;
-    uint8_t *compression_buffer;
-    size_t compression_buffer_size;
-    ZSTD_CCtx *zstd_ctx;
-
-    // Buffer sizing
-    int max_gop_frames;
-    size_t max_frame_pixels;
-} thread_worker_context_t;
-
-// Thread pool structure
-typedef struct thread_pool {
-    int num_threads;
-    thrd_t *worker_threads;
-
-    // Circular buffer of GOP slots
-    gop_slot_t *slots;
-    int num_slots;                // 2 * num_threads
-    int slot_capacity;            // Max frames per GOP
-
-    // Producer state (frame submission)
-    int next_slot_to_fill;
-    int total_gops_produced;
-    int producer_finished;        // 1 when no more frames
-
-    // Job queue for workers
-    int *job_queue;
-    int job_queue_head;
-    int job_queue_tail;
-    int job_queue_size;
-    int job_queue_capacity;
-    mtx_t job_queue_mutex;
-    cnd_t job_available;
-    cnd_t slot_available;
-
-    // Shutdown signal
-    int shutdown;
-
-    // Shared encoder context (read-only)
-    struct tav_encoder_context *shared_ctx;
-} thread_pool_t;
-
-// Main encoder context (opaque to API users)
-struct tav_encoder_context {
-    // Configuration (from params)
-    int width, height;
-    int fps_num, fps_den;
-    int wavelet_type;
-    int temporal_wavelet;
-    int decomp_levels;
-    int temporal_levels;
-    int channel_layout;
-    int perceptual_tuning;
-    int enable_temporal_dwt;
-    int gop_size;
-    int enable_two_pass;
-    int quality_level;
-    float dead_zone_threshold;
-    int entropy_coder;
-    int zstd_level;
-    int num_threads;
-    int encoder_preset;
-    int verbose;
-    int monoblock;
-
-    // Tile configuration (derived from monoblock and dimensions)
-    int tiles_x, tiles_y;         // Number of tiles in x/y directions
-
-    // Derived quantizer values (QLUT indices)
-    int quantiser_y, quantiser_co, quantiser_cg;
-
-    // Compatibility encoder for modules (quantization, DWT)
-    tav_encoder_t *compat_enc;
-
-    // Thread pool (NULL if single-threaded)
-    thread_pool_t *pool;
-
-    // Single-threaded GOP buffer
-    uint8_t **gop_rgb_frames;     // [frame][pixel*3]
-    int gop_frame_count;
-    int64_t *gop_frame_pts;       // Presentation timestamps
-
-    // TAD audio quality mapping
-    int tad_max_index;
-
-    // Error handling
-    char error_message[MAX_ERROR_MESSAGE];
-
-    // Statistics
-    int64_t frames_encoded;
-    int64_t gops_encoded;
-    size_t total_bytes;
-    size_t video_bytes;
-    size_t audio_bytes;
-    time_t start_time;
-};
-
-// =============================================================================
-// Forward Declarations
-// =============================================================================
-
-static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot);
-static int encode_gop_unified(tav_encoder_context_t *ctx, gop_slot_t *slot);
-static int worker_thread_main(void *arg);
-static void free_gop_slot(gop_slot_t *slot);
-
-static tav_encoder_t *create_compat_encoder(tav_encoder_context_t *ctx);
-static void free_compat_encoder(tav_encoder_t *enc);
-
-static size_t preprocess_coefficients_ezbc(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
-                                           int coeff_count, int width, int height, int channel_layout,
-                                           uint8_t *output_buffer);
-static size_t preprocess_gop_unified(preprocess_mode_t preprocess_mode, int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
-                                     int num_frames, int num_pixels, int width, int height, int channel_layout,
-                                     uint8_t *output_buffer);
-static void rgb_to_colour_space_frame(tav_encoder_context_t *ctx, const uint8_t *rgb,
-                                     float *c1, float *c2, float *c3,
-                                     int width, int height);
-
-// =============================================================================
-// Parameter Initialization
-// =============================================================================
-
-void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height) {
-    memset(params, 0, sizeof(tav_encoder_params_t));
-
-    // Video dimensions
-    params->width = width;
-    params->height = height;
-    params->fps_num = 60;
-    params->fps_den = 1;
-
-    // Wavelet defaults
-    params->wavelet_type = 1;          // CDF 9/7 (best compression)
-    params->temporal_wavelet = 255;    // Always Haar
-    params->decomp_levels = 0;         // Auto-calculate
-    params->temporal_levels = 2;       // Always 2
-
-    // Color space
-    params->channel_layout = 0;        // YCoCg-R
-    params->perceptual_tuning = 1;     // Enable HVS model
-
-    // GOP settings
-    params->enable_temporal_dwt = 1;   // Enable 3D DWT GOP encoding
-    params->gop_size = 24;             // always 24
-    params->enable_two_pass = 1;       // Enable scene change detection
-
-    // Quality defaults (level 3 = balanced)
-    params->quality_level = 3;
-    params->quantiser_y = QUALITY_Y[3];    // 11 - quantiser index
-    params->quantiser_co = QUALITY_CO[3];  // 76 - quantiser index
-    params->quantiser_cg = QUALITY_CG[3];  // 99 - quantiser index
-    params->dead_zone_threshold = DEAD_ZONE_THRESHOLD[3];  // 1.1 for Q3
-
-    // Compression
-    params->entropy_coder = 1;         // EZBC as default
-    params->zstd_level = 7;            // Balanced compression/speed
-
-    // Threading
-    params->num_threads = 0;           // Single-threaded (multi-threading not yet implemented)
-
-    // Encoder presets
-    params->encoder_preset = 0;        // None
-
-    // Advanced
-    params->verbose = 0;
-    params->monoblock = 1;            // -1=auto (based on dimensions), 0=force tiled, 1=force monoblock
-    // monoblock: default to forced monoblock mode because tiling is not working very well...
-}
-
-// =============================================================================
-// Encoder Creation
-// =============================================================================
-
-tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params) {
-    if (!params) {
-        return NULL;
-    }
-
-    // Validate parameters
-    if (params->width <= 0 || params->height <= 0) {
-        fprintf(stderr, "ERROR: Invalid dimensions %dx%d\n", params->width, params->height);
-        return NULL;
-    }
-
-    if (params->width % 2 != 0 || params->height % 2 != 0) {
-        fprintf(stderr, "ERROR: Dimensions must be even (got %dx%d)\n", params->width, params->height);
-        return NULL;
-    }
-
-    // Allocate context
-    tav_encoder_context_t *ctx = calloc(1, sizeof(tav_encoder_context_t));
-    if (!ctx) {
-        fprintf(stderr, "ERROR: Failed to allocate encoder context\n");
-        return NULL;
-    }
-
-    // Copy configuration
-    ctx->width = params->width;
-    ctx->height = params->height;
-    ctx->fps_num = params->fps_num;
-    ctx->fps_den = params->fps_den;
-    ctx->wavelet_type = params->wavelet_type;
-    ctx->temporal_wavelet = params->temporal_wavelet;
-    ctx->decomp_levels = params->decomp_levels;
-    ctx->temporal_levels = params->temporal_levels;
-    ctx->channel_layout = params->channel_layout;
-    ctx->perceptual_tuning = params->perceptual_tuning;
-    ctx->enable_temporal_dwt = params->enable_temporal_dwt;
-    ctx->gop_size = params->gop_size;
-    ctx->enable_two_pass = params->enable_two_pass;
-    ctx->quality_level = params->quality_level;  // CRITICAL: Was missing, caused quality_level=0
-    ctx->quantiser_y = params->quantiser_y;
-    ctx->quantiser_co = params->quantiser_co;
-    ctx->quantiser_cg = params->quantiser_cg;
-    ctx->dead_zone_threshold = params->dead_zone_threshold;
-    ctx->entropy_coder = params->entropy_coder;
-    ctx->zstd_level = params->zstd_level;
-    ctx->num_threads = params->num_threads;
-    ctx->encoder_preset = params->encoder_preset;
-    ctx->verbose = params->verbose;
-    ctx->monoblock = params->monoblock;
-
-    // quantiser_y/co/cg already contain quantiser indices (0-255)
-    // Clamp to valid range
-    if (ctx->quantiser_y < 0) ctx->quantiser_y = 0;
-    if (ctx->quantiser_y > 255) ctx->quantiser_y = 255;
-    if (ctx->quantiser_co < 0) ctx->quantiser_co = 0;
-    if (ctx->quantiser_co > 255) ctx->quantiser_co = 255;
-    if (ctx->quantiser_cg < 0) ctx->quantiser_cg = 0;
-    if (ctx->quantiser_cg > 255) ctx->quantiser_cg = 255;
-
-    // Copy quantiser indices for encoding
-    ctx->quantiser_y = ctx->quantiser_y;
-    ctx->quantiser_co = ctx->quantiser_co;
-    ctx->quantiser_cg = ctx->quantiser_cg;
-
-    // Force EZBC entropy coder (Twobitmap is deprecated)
-    ctx->entropy_coder = 1;
-    // Force Haar temporal
-    ctx->temporal_wavelet = 255;
-    // Force temporal level 2
-    ctx->temporal_levels = 2;
-
-    // Handle monoblock mode:
-    // -1 = auto (select based on dimensions), 0 = force tiled, 1 = force monoblock
-    if (ctx->monoblock == -1) {
-        // Auto mode: use monoblock for <= D1 PAL, tiled for larger
-        if (ctx->width > TAV_MONOBLOCK_MAX_WIDTH || ctx->height > TAV_MONOBLOCK_MAX_HEIGHT) {
-            ctx->monoblock = 0;
-            if (ctx->verbose) {
-                printf("Auto-selected Padded Tiling mode: %dx%d exceeds D1 PAL threshold (%dx%d)\n",
-                       ctx->width, ctx->height, TAV_MONOBLOCK_MAX_WIDTH, TAV_MONOBLOCK_MAX_HEIGHT);
-            }
-        } else {
-            ctx->monoblock = 1;
-            if (ctx->verbose) {
-                printf("Auto-selected Monoblock mode: %dx%d within D1 PAL threshold\n",
-                       ctx->width, ctx->height);
-            }
-        }
-    } else if (ctx->monoblock == 0) {
-        if (ctx->verbose) {
-            printf("Forced Padded Tiling mode (--tiled)\n");
-        }
-    } else {
-        // monoblock == 1: force monoblock even for large dimensions
-        if (ctx->verbose) {
-            printf("Forced Monoblock mode (--monoblock)\n");
-        }
-    }
-
-    // Calculate tile dimensions based on monoblock setting
-    if (ctx->monoblock) {
-        // Monoblock mode: single tile covering entire frame
-        ctx->tiles_x = 1;
-        ctx->tiles_y = 1;
-    } else {
-        // Padded Tiling mode: multiple tiles of TILE_SIZE_X × TILE_SIZE_Y
-        ctx->tiles_x = (ctx->width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X;
-        ctx->tiles_y = (ctx->height + TAV_TILE_SIZE_Y - 1) / TAV_TILE_SIZE_Y;
-        if (ctx->verbose) {
-            printf("Padded Tiling mode: %dx%d tiles (%d total)\n",
-                   ctx->tiles_x, ctx->tiles_y, ctx->tiles_x * ctx->tiles_y);
-        }
-    }
-
-    // Calculate decomp levels if auto (0)
-    // For multi-tile mode, use tile size as the basis; for monoblock, use frame size
-    if (ctx->decomp_levels == 0) {
-        int levels = 0;
-        int min_dim;
-        if (ctx->monoblock) {
-            min_dim = (ctx->width < ctx->height) ? ctx->width : ctx->height;
-        } else {
-            // For tiled mode, calculate based on tile size
-            min_dim = (TAV_TILE_SIZE_X < TAV_TILE_SIZE_Y) ? TAV_TILE_SIZE_X : TAV_TILE_SIZE_Y;
-        }
-        // Keep halving until we reach minimum size
-        while (min_dim >= 32) {
-            min_dim /= 2;
-            levels++;
-        }
-        // Cap at 6 levels maximum
-        ctx->decomp_levels = (levels > 6) ? 6 : levels;
-    }
-
-    if (ctx->gop_size <= 0) {
-        ctx->gop_size = 24;
-    }
-
-    // Auto-select temporal wavelet if still at default (255=Haar) and temporal DWT enabled
-    if (ctx->enable_temporal_dwt && ctx->temporal_wavelet == 255) {
-        int num_pixels = ctx->width * ctx->height;
-        int use_pure_haar = 0;
-
-        // Smart preset based on resolution and quality
-        // For large videos with reasonable quality, use Haar (better compression)
-        // For smaller videos or low quality, use Haar with sports mode (better motion preservation)
-        if ((num_pixels >= 820000 && ctx->quantiser_y <= 29) ||
-            (num_pixels >= 500000 && ctx->quantiser_y <= 14) ||
-            (num_pixels >= 340000 && ctx->quantiser_y <= 7) ||
-            (num_pixels >= 260000 && ctx->quantiser_y <= 3)) {
-            use_pure_haar = 1;
-        }
-
-        if (use_pure_haar) {
-            ctx->temporal_wavelet = 255;  // Keep Haar
-            if (ctx->verbose) {
-                printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
-                       ctx->width, ctx->height, num_pixels, ctx->quantiser_y);
-            }
-        } else {
-            ctx->temporal_wavelet = 255;  // Keep Haar
-            ctx->encoder_preset |= 1; // Enable Sports mode
-            if (ctx->verbose) {
-                printf("Auto-selected Haar temporal wavelet with sports mode (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
-                       ctx->width, ctx->height, num_pixels, ctx->quantiser_y);
-            }
-        }
-    }
-
-    // Determine thread count
-    if (ctx->num_threads < 0) {
-        // Auto-detect: use system thread count
-        ctx->num_threads = 4;  // Conservative default (TODO: detect actual CPU count)
-    } else if (ctx->num_threads == 0) {
-        ctx->num_threads = 0;  // Single-threaded
-    }
-
-    // Allocate single-threaded GOP buffer if not using threading
-    if (ctx->num_threads == 0) {
-        ctx->gop_rgb_frames = calloc(ctx->gop_size, sizeof(uint8_t *));
-        ctx->gop_frame_pts = calloc(ctx->gop_size, sizeof(int64_t));
-        if (!ctx->gop_rgb_frames || !ctx->gop_frame_pts) {
-            snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                     "Failed to allocate GOP buffers");
-            tav_encoder_free(ctx);
-            return NULL;
-        }
-
-        size_t frame_size = ctx->width * ctx->height * 3;
-        for (int i = 0; i < ctx->gop_size; i++) {
-            ctx->gop_rgb_frames[i] = malloc(frame_size);
-            if (!ctx->gop_rgb_frames[i]) {
-                snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                         "Failed to allocate GOP frame buffer %d", i);
-                tav_encoder_free(ctx);
-                return NULL;
-            }
-        }
-    }
-
-    // Set TAD audio quality mapping (from quantiser_y)
-    ctx->tad_max_index = tad32_quality_to_max_index(ctx->quantiser_y);
-
-    // Initialize statistics
-    ctx->start_time = time(NULL);
-
-    // Create compatibility encoder for extracted modules
-    ctx->compat_enc = create_compat_encoder(ctx);
-    if (!ctx->compat_enc) {
-        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                 "Failed to create compatibility encoder");
-        tav_encoder_free(ctx);
-        return NULL;
-    }
-
-    if (ctx->verbose) {
-        printf("%s created:\n", ENCODER_VERSION);
-        printf("  Resolution: %dx%d @ %d/%d fps\n",
-               ctx->width, ctx->height, ctx->fps_num, ctx->fps_den);
-        printf("  Tiling: %s (%dx%d tiles)\n",
-               ctx->monoblock ? "Monoblock" : "Padded Tiling",
-               ctx->tiles_x, ctx->tiles_y);
-        printf("  GOP size: %d frames\n", ctx->gop_size);
-        printf("  Wavelet: %d (spatial), %d (temporal)\n",
-               ctx->wavelet_type, ctx->temporal_wavelet);
-        printf("  DWT levels: %d (spatial), %d (temporal)\n",
-               ctx->decomp_levels, ctx->temporal_levels);
-        printf("  Quality: Y=%d, Co=%d, Cg=%d\n",
-               ctx->quantiser_y, ctx->quantiser_co, ctx->quantiser_cg);
-        printf("  Threads: %d\n", ctx->num_threads);
-    }
-
-    return ctx;
-}
-
-// =============================================================================
-// Encoder Cleanup
-// =============================================================================
-
-void tav_encoder_free(tav_encoder_context_t *ctx) {
-    if (!ctx) return;
-
-    // Free single-threaded GOP buffers
-    if (ctx->gop_rgb_frames) {
-        for (int i = 0; i < ctx->gop_size; i++) {
-            free(ctx->gop_rgb_frames[i]);
-        }
-        free(ctx->gop_rgb_frames);
-    }
-    free(ctx->gop_frame_pts);
-
-    // Free compatibility encoder
-    free_compat_encoder(ctx->compat_enc);
-
-    // TODO: Shutdown thread pool if exists
-
-    free(ctx);
-}
-
-// =============================================================================
-// Error Handling
-// =============================================================================
-
-const char *tav_encoder_get_error(tav_encoder_context_t *ctx) {
-    if (!ctx) return "Invalid encoder context";
-    return ctx->error_message[0] ? ctx->error_message : NULL;
-}
-
-void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params) {
-    if (!ctx || !params) return;
-
-    params->width = ctx->width;
-    params->height = ctx->height;
-    params->fps_num = ctx->fps_num;
-    params->fps_den = ctx->fps_den;
-    params->wavelet_type = ctx->wavelet_type;
-    params->temporal_wavelet = ctx->temporal_wavelet;
-    params->decomp_levels = ctx->decomp_levels;           // Calculated value
-    params->temporal_levels = ctx->temporal_levels;       // Calculated value
-    params->channel_layout = ctx->channel_layout;
-    params->perceptual_tuning = ctx->perceptual_tuning;
-    params->enable_temporal_dwt = ctx->enable_temporal_dwt;
-    params->gop_size = ctx->gop_size;                     // Calculated value
-    params->enable_two_pass = ctx->enable_two_pass;
-    params->quantiser_y = ctx->quantiser_y;
-    params->quantiser_co = ctx->quantiser_co;
-    params->quantiser_cg = ctx->quantiser_cg;
-    params->dead_zone_threshold = ctx->dead_zone_threshold;
-    params->entropy_coder = ctx->entropy_coder;           // Forced to 1 (EZBC)
-    params->zstd_level = ctx->zstd_level;
-    params->num_threads = ctx->num_threads;
-    params->encoder_preset = ctx->encoder_preset;
-    params->verbose = ctx->verbose;
-    params->monoblock = ctx->monoblock;
-}
-
-int tav_encoder_validate_context(tav_encoder_context_t *ctx) {
-    if (!ctx) return 0;
-
-    // Basic sanity checks
-    if (ctx->width < 16 || ctx->width > 16777215) return 0;
-    if (ctx->height < 16 || ctx->height > 16777215) return 0;
-    if (ctx->gop_size < 1 || ctx->gop_size > GOP_SIZE_MAX) return 0;
-
-    return 1;
-}
-
-// =============================================================================
-// Statistics
-// =============================================================================
-
-void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats) {
-    if (!ctx || !stats) return;
-
-    memset(stats, 0, sizeof(tav_encoder_stats_t));
-
-    stats->frames_encoded = ctx->frames_encoded;
-    stats->gops_encoded = ctx->gops_encoded;
-    stats->total_bytes = ctx->total_bytes;
-    stats->video_bytes = ctx->video_bytes;
-    stats->audio_bytes = ctx->audio_bytes;
-
-    // Calculate average bitrate
-    time_t elapsed = time(NULL) - ctx->start_time;
-    if (elapsed > 0) {
-        double seconds = (double)ctx->frames_encoded / ((double)ctx->fps_num / ctx->fps_den);
-        if (seconds > 0) {
-            stats->avg_bitrate_kbps = (ctx->total_bytes * 8.0) / (seconds * 1000.0);
-        }
-    }
-
-    // Calculate encoding speed
-    if (elapsed > 0) {
-        stats->encoding_fps = (double)ctx->frames_encoded / elapsed;
-    }
-}
-
-// =============================================================================
-// Flush Encoder - DEPRECATED, CLI handles partial GOPs directly
-// =============================================================================
-
-void tav_encoder_free_packet(tav_encoder_packet_t *packet) {
-    if (!packet) return;
-
-    if (packet->data) {
-        free(packet->data);
-    }
-    free(packet);
-}
-
-// =============================================================================
-// GOP-Level Encoding (Thread-Safe)
-// =============================================================================
-
-int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
-                            const uint8_t **rgb_frames,
-                            int num_frames,
-                            const int *frame_numbers,
-                            tav_encoder_packet_t **packet) {
-    if (!ctx || !rgb_frames || !packet) {
-        if (ctx) {
-            snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
-        }
-        return -1;
-    }
-
-    if (num_frames < 1 || num_frames > GOP_SIZE_MAX) {
-        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                 "Invalid GOP size: %d (must be 1-%d)", num_frames, GOP_SIZE_MAX);
-        return -1;
-    }
-
-    *packet = NULL;
-
-    // Create temporary GOP slot
-    gop_slot_t slot = {0};
-
-    // Allocate array of frame pointers (casting away const for internal use)
-    slot.rgb_frames = tav_malloc(num_frames * sizeof(uint8_t*));
-    for (int i = 0; i < num_frames; i++) {
-        slot.rgb_frames[i] = (uint8_t*)rgb_frames[i];  // Cast away const
-    }
-
-    slot.num_frames = num_frames;
-    slot.width = ctx->width;
-    slot.height = ctx->height;
-
-    // Copy or generate frame numbers
-    slot.frame_numbers = tav_calloc(num_frames, sizeof(int));
-    if (frame_numbers) {
-        memcpy(slot.frame_numbers, frame_numbers, num_frames * sizeof(int));
-    } else {
-        // Generate sequential frame numbers if not provided
-        for (int i = 0; i < num_frames; i++) {
-            slot.frame_numbers[i] = i;
-        }
-    }
-
-    // Encode GOP
-    int result;
-    if (ctx->enable_temporal_dwt && num_frames > 1) {
-        result = encode_gop_unified(ctx, &slot);
-    } else {
-        result = encode_gop_intra_only(ctx, &slot);
-    }
-
-    // Cleanup temporary allocations
-    free(slot.rgb_frames);
-    free(slot.frame_numbers);
-
-    if (result < 0) {
-        // Error message already set by encoding function
-        return -1;
-    }
-
-    // Extract packet from slot
-    if (slot.num_packets > 0) {
-        *packet = &slot.packets[0];
-    } else {
-        snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Encoding produced no packets");
-        return -1;
-    }
-
-    // NOTE: Statistics NOT updated here - caller manages that
-    // This function is stateless for multithreading
-
-    return 1;  // Packet ready
-}
-
-// =============================================================================
-// Audio Encoding
-// =============================================================================
-
-int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
-                              const float *pcm_samples,
-                              size_t num_samples,
-                              tav_encoder_packet_t **packet) {
-    if (!ctx || !pcm_samples || !packet) {
-        if (ctx) {
-            snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
-        }
-        return -1;
-    }
-
-    *packet = NULL;
-
-    // Validate chunk size
-    if (num_samples < TAD32_MIN_CHUNK_SIZE) {
-        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                 "Audio chunk too small (%zu < %d)", num_samples, TAD32_MIN_CHUNK_SIZE);
-        return -1;
-    }
-
-    // Allocate output buffer (conservative estimate: 4 bytes per sample)
-    size_t output_capacity = num_samples * 4 + 1024;
-    uint8_t *tad_data = malloc(output_capacity);
-    if (!tad_data) {
-        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                 "Failed to allocate TAD output buffer");
-        return -1;
-    }
-
-    // Encode audio with TAD encoder (use same zstd_level as video)
-    size_t tad_size = tad32_encode_chunk(pcm_samples, num_samples,
-                                         ctx->tad_max_index, 1.0f,
-                                         ctx->zstd_level, tad_data);
-    if (tad_size == 0) {
-        free(tad_data);
-        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                 "TAD audio encoding failed");
-        return -1;
-    }
-
-    // Create packet
-    tav_encoder_packet_t *pkt = calloc(1, sizeof(tav_encoder_packet_t));
-    if (!pkt) {
-        free(tad_data);
-        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                 "Failed to allocate packet");
-        return -1;
-    }
-
-    pkt->data = tad_data;
-    pkt->size = tad_size;
-    pkt->packet_type = TAV_PACKET_AUDIO_TAD;
-    pkt->frame_number = -1;  // Audio doesn't have frame number
-    pkt->is_video = 0;
-
-    *packet = pkt;
-
-    ctx->audio_bytes += tad_size;
-    ctx->total_bytes += tad_size;
-
-    return 1;  // Packet ready
-}
-
-// =============================================================================
-// Compatibility Encoder Helpers
-// =============================================================================
-
-/**
- * Create compatibility encoder structure for extracted modules.
- * Calculates subband widths/heights arrays needed by quantization module.
- */
-static tav_encoder_t *create_compat_encoder(tav_encoder_context_t *ctx) {
-    tav_encoder_t *enc = calloc(1, sizeof(tav_encoder_t));
-    if (!enc) return NULL;
-
-    // Copy basic fields
-    enc->quality_level = ctx->quality_level;
-    enc->dead_zone_threshold = ctx->dead_zone_threshold;
-    enc->encoder_preset = ctx->encoder_preset;
-    enc->temporal_decomp_levels = ctx->temporal_levels;
-    enc->verbose = ctx->verbose;
-    enc->perceptual_tuning = ctx->perceptual_tuning;
-
-    // Copy frame dimensions (needed by quantisation functions)
-    enc->width = ctx->width;
-    enc->height = ctx->height;
-    enc->decomp_levels = ctx->decomp_levels;
-    enc->frame_count = 0;  // Will be updated during encoding
-
-    // Calculate subband widths and heights arrays
-    // These are needed by the perceptual quantization module
-    int max_levels = ctx->decomp_levels + 1;
-    enc->widths = calloc(max_levels, sizeof(int));
-    enc->heights = calloc(max_levels, sizeof(int));
-
-    if (!enc->widths || !enc->heights) {
-        free(enc->widths);
-        free(enc->heights);
-        free(enc);
-        return NULL;
-    }
-
-    // Level 0 is full resolution
-    int w = ctx->width;
-    int h = ctx->height;
-
-    for (int level = 0; level < max_levels; level++) {
-        enc->widths[level] = w;
-        enc->heights[level] = h;
-        w = (w + 1) / 2;  // Next level is half resolution (rounded up)
-        h = (h + 1) / 2;
-    }
-
-    return enc;
-}
-
-/**
- * Free compatibility encoder structure.
- */
-static void free_compat_encoder(tav_encoder_t *enc) {
-    if (!enc) return;
-    free(enc->widths);
-    free(enc->heights);
-    free(enc);
-}
-
-// =============================================================================
-// GOP Encoding Implementation
-// =============================================================================
-
-/**
- * Convert RGB frame to color space (YCoCg-R or ICtCp).
- * Helper function for GOP encoding.
- */
-static void rgb_to_colour_space_frame(tav_encoder_context_t *ctx, const uint8_t *rgb,
-                                     float *c1, float *c2, float *c3,
-                                     int width, int height) {
-    int num_pixels = width * height;
-
-    if (ctx->channel_layout == 1) {  // ICtCp mode
-        // Use color module function for ICtCp conversion
-        for (int i = 0; i < num_pixels; i++) {
-            double I, Ct, Cp;
-            tav_srgb8_to_ictcp_hlg(rgb[i*3], rgb[i*3+1], rgb[i*3+2], &I, &Ct, &Cp);
-            c1[i] = (float)I;
-            c2[i] = (float)Ct;
-            c3[i] = (float)Cp;
-        }
-    } else {  // YCoCg-R mode (default)
-        tav_rgb_to_ycocg(rgb, c1, c2, c3, width, height);
-    }
-}
-
-/**
- * Preprocess coefficients using EZBC encoding (single frame).
- * Based on encoder_tav.c:preprocess_coefficients_ezbc().
- * NOTE: EZBC encoder allocates its own output buffer, which we copy to output_buffer.
- */
-static size_t preprocess_coefficients_ezbc(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
-                                           int coeff_count, int width, int height, int channel_layout,
-                                           uint8_t *output_buffer) {
-    const channel_layout_config_t *config = &channel_layouts[channel_layout];
-    size_t total_size = 0;
-    uint8_t *write_ptr = output_buffer;
-
-    // Encode each active channel separately with EZBC
-    int16_t *channel_coeffs[4] = {coeffs_y, coeffs_co, coeffs_cg, coeffs_alpha};
-    int channel_active[4] = {config->has_y, config->has_co, config->has_cg, config->has_alpha};
-
-    for (int ch = 0; ch < 4; ch++) {
-        if (!channel_active[ch] || !channel_coeffs[ch]) continue;
-
-        // EZBC encoder allocates output buffer
-        uint8_t *ezbc_output = NULL;
-        size_t encoded_size = tav_encode_channel_ezbc(
-            channel_coeffs[ch], coeff_count, width, height,
-            &ezbc_output  // Double pointer - EZBC allocates memory
-        );
-
-        if (encoded_size == 0 || !ezbc_output) {
-            continue;  // Skip channel if encoding failed
-        }
-
-        // Write channel size header (4 bytes)
-        *((uint32_t*)write_ptr) = (uint32_t)encoded_size;
-        write_ptr += sizeof(uint32_t);
-
-        // Copy EZBC output to our buffer
-        memcpy(write_ptr, ezbc_output, encoded_size);
-        write_ptr += encoded_size;
-        total_size += sizeof(uint32_t) + encoded_size;
-
-        // Free EZBC-allocated buffer
-        free(ezbc_output);
-    }
-
-    return total_size;
-}
-
-/**
- * Unified GOP preprocessing function.
- * Handles twobitmap, EZBC, and raw coefficient modes.
- * Based on encoder_tav.c:preprocess_gop_unified().
- */
-static size_t preprocess_gop_unified(preprocess_mode_t preprocess_mode, int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
-                                     int num_frames, int num_pixels, int width, int height, int channel_layout,
-                                     uint8_t *output_buffer) {
-    const channel_layout_config_t *config = &channel_layouts[channel_layout];
-
-    // Raw mode: just concatenate all coefficients
-    if (preprocess_mode == PREPROCESS_RAW) {
-        size_t offset = 0;
-
-        // Copy all Y frames
-        if (config->has_y && quant_y) {
-            for (int frame = 0; frame < num_frames; frame++) {
-                if (quant_y[frame]) {
-                    memcpy(output_buffer + offset, quant_y[frame], num_pixels * sizeof(int16_t));
-                    offset += num_pixels * sizeof(int16_t);
-                }
-            }
-        }
-
-        // Copy all Co frames
-        if (config->has_co && quant_co) {
-            for (int frame = 0; frame < num_frames; frame++) {
-                if (quant_co[frame]) {
-                    memcpy(output_buffer + offset, quant_co[frame], num_pixels * sizeof(int16_t));
-                    offset += num_pixels * sizeof(int16_t);
-                }
-            }
-        }
-
-        // Copy all Cg frames
-        if (config->has_cg && quant_cg) {
-            for (int frame = 0; frame < num_frames; frame++) {
-                if (quant_cg[frame]) {
-                    memcpy(output_buffer + offset, quant_cg[frame], num_pixels * sizeof(int16_t));
-                    offset += num_pixels * sizeof(int16_t);
-                }
-            }
-        }
-
-        return offset;
-    }
-
-    // EZBC mode: encode each frame separately with EZBC
-    if (preprocess_mode == PREPROCESS_EZBC) {
-        size_t total_size = 0;
-        uint8_t *write_ptr = output_buffer;
-
-        for (int frame = 0; frame < num_frames; frame++) {
-            // Encode this frame with EZBC
-            size_t frame_size = preprocess_coefficients_ezbc(
-                quant_y ? quant_y[frame] : NULL,
-                quant_co ? quant_co[frame] : NULL,
-                quant_cg ? quant_cg[frame] : NULL,
-                NULL,  // No alpha in GOP mode
-                num_pixels, width, height, channel_layout,
-                write_ptr + sizeof(uint32_t)  // Leave space for size header
-            );
-
-            // Write frame size header
-            *((uint32_t*)write_ptr) = (uint32_t)frame_size;
-            write_ptr += sizeof(uint32_t) + frame_size;
-            total_size += sizeof(uint32_t) + frame_size;
-        }
-
-        return total_size;
-    }
-
-    // Twobit-map mode: original unified GOP preprocessing
-    const int map_bytes_per_frame = (num_pixels * 2 + 7) / 8;  // 2 bits per coefficient
-
-    // Count "other" values (not 0, +1, or -1) for each channel across ALL frames
-    int other_count_y = 0, other_count_co = 0, other_count_cg = 0;
-
-    for (int frame = 0; frame < num_frames; frame++) {
-        if (config->has_y && quant_y && quant_y[frame]) {
-            for (int i = 0; i < num_pixels; i++) {
-                int16_t val = quant_y[frame][i];
-                if (val != 0 && val != 1 && val != -1) other_count_y++;
-            }
-        }
-        if (config->has_co && quant_co && quant_co[frame]) {
-            for (int i = 0; i < num_pixels; i++) {
-                int16_t val = quant_co[frame][i];
-                if (val != 0 && val != 1 && val != -1) other_count_co++;
-            }
-        }
-        if (config->has_cg && quant_cg && quant_cg[frame]) {
-            for (int i = 0; i < num_pixels; i++) {
-                int16_t val = quant_cg[frame][i];
-                if (val != 0 && val != 1 && val != -1) other_count_cg++;
-            }
-        }
-    }
-
-    // Calculate buffer layout
-    uint8_t *write_ptr = output_buffer;
-
-    // Significance maps: grouped by channel (all Y frames, then all Co frames, then all Cg frames)
-    uint8_t *y_maps_start = write_ptr;
-    if (config->has_y) write_ptr += map_bytes_per_frame * num_frames;
-
-    uint8_t *co_maps_start = write_ptr;
-    if (config->has_co) write_ptr += map_bytes_per_frame * num_frames;
-
-    uint8_t *cg_maps_start = write_ptr;
-    if (config->has_cg) write_ptr += map_bytes_per_frame * num_frames;
-
-    // Value arrays: grouped by channel
-    int16_t *y_values = (int16_t *)write_ptr;
-    if (config->has_y) write_ptr += other_count_y * sizeof(int16_t);
-
-    int16_t *co_values = (int16_t *)write_ptr;
-    if (config->has_co) write_ptr += other_count_co * sizeof(int16_t);
-
-    int16_t *cg_values = (int16_t *)write_ptr;
-    if (config->has_cg) write_ptr += other_count_cg * sizeof(int16_t);
-
-    // Clear all map bytes
-    size_t total_map_bytes = 0;
-    if (config->has_y) total_map_bytes += map_bytes_per_frame * num_frames;
-    if (config->has_co) total_map_bytes += map_bytes_per_frame * num_frames;
-    if (config->has_cg) total_map_bytes += map_bytes_per_frame * num_frames;
-    memset(output_buffer, 0, total_map_bytes);
-
-    // Process each frame and fill maps/values
-    int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0;
-
-    for (int frame = 0; frame < num_frames; frame++) {
-        uint8_t *y_map = y_maps_start + frame * map_bytes_per_frame;
-        uint8_t *co_map = co_maps_start + frame * map_bytes_per_frame;
-        uint8_t *cg_map = cg_maps_start + frame * map_bytes_per_frame;
-
-        for (int i = 0; i < num_pixels; i++) {
-            size_t bit_pos = i * 2;
-            size_t byte_idx = bit_pos / 8;
-            size_t bit_offset = bit_pos % 8;
-
-            // Process Y channel
-            if (config->has_y && quant_y && quant_y[frame]) {
-                int16_t val = quant_y[frame][i];
-                uint8_t code;
-
-                if (val == 0) code = 0;       // 00
-                else if (val == 1) code = 1;  // 01
-                else if (val == -1) code = 2; // 10
-                else {
-                    code = 3;  // 11
-                    y_values[y_value_idx++] = val;
-                }
-
-                y_map[byte_idx] |= (code << bit_offset);
-                if (bit_offset == 7 && byte_idx + 1 < (size_t)map_bytes_per_frame) {
-                    y_map[byte_idx + 1] |= (code >> 1);
-                }
-            }
-
-            // Process Co channel
-            if (config->has_co && quant_co && quant_co[frame]) {
-                int16_t val = quant_co[frame][i];
-                uint8_t code;
-
-                if (val == 0) code = 0;
-                else if (val == 1) code = 1;
-                else if (val == -1) code = 2;
-                else {
-                    code = 3;
-                    co_values[co_value_idx++] = val;
-                }
-
-                co_map[byte_idx] |= (code << bit_offset);
-                if (bit_offset == 7 && byte_idx + 1 < (size_t)map_bytes_per_frame) {
-                    co_map[byte_idx + 1] |= (code >> 1);
-                }
-            }
-
-            // Process Cg channel
-            if (config->has_cg && quant_cg && quant_cg[frame]) {
-                int16_t val = quant_cg[frame][i];
-                uint8_t code;
-
-                if (val == 0) code = 0;
-                else if (val == 1) code = 1;
-                else if (val == -1) code = 2;
-                else {
-                    code = 3;
-                    cg_values[cg_value_idx++] = val;
-                }
-
-                cg_map[byte_idx] |= (code << bit_offset);
-                if (bit_offset == 7 && byte_idx + 1 < (size_t)map_bytes_per_frame) {
-                    cg_map[byte_idx + 1] |= (code >> 1);
-                }
-            }
-        }
-    }
-
-    // Return total size
-    return (size_t)(write_ptr - output_buffer);
-}
-
-/**
- * Encode single-frame I-frame (intra-only mode).
- * Uses 2D DWT on individual frame.
- */
-static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot) {
-    const int width = slot->width;
-    const int height = slot->height;
-    const int num_pixels = width * height;
-    const int num_frames = slot->num_frames;
-
-    if (num_frames != 1) {
-        snprintf(slot->error_message, MAX_ERROR_MESSAGE,
-                 "encode_gop_intra_only called with %d frames (expected 1)", num_frames);
-        return -1;
-    }
-
-    // Step 1: RGB to YCoCg-R (or ICtCp) for full frame
-    float *frame_y = tav_calloc(num_pixels, sizeof(float));
-    float *frame_co = tav_calloc(num_pixels, sizeof(float));
-    float *frame_cg = tav_calloc(num_pixels, sizeof(float));
-
-    rgb_to_colour_space_frame(ctx, slot->rgb_frames[0], frame_y, frame_co, frame_cg, width, height);
-
-    // Get quantiser values from QLUT indices
-    int base_quantiser_y = QLUT[ctx->quantiser_y];
-    int base_quantiser_co = QLUT[ctx->quantiser_co];
-    int base_quantiser_cg = QLUT[ctx->quantiser_cg];
-
-    // Allocate preprocess buffer for all tiles
-    // For tiled mode: num_tiles * (4-byte header + max_tile_coeff_size * 3 * sizeof(int16_t))
-    // For monoblock: just the frame
-    const int tile_coeff_count = ctx->monoblock ? num_pixels : (TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y);
-    const int num_tiles = ctx->tiles_x * ctx->tiles_y;
-    size_t preprocess_capacity = num_tiles * (4 + tile_coeff_count * 3 * sizeof(int16_t) * 2);  // Conservative with EZBC overhead
-    uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity);
-    size_t preprocess_offset = 0;
-
-    if (ctx->monoblock) {
-        // ======================================================================
-        // Monoblock mode: process entire frame as single tile
-        // ======================================================================
-        int16_t *quant_y = tav_calloc(num_pixels, sizeof(int16_t));
-        int16_t *quant_co = tav_calloc(num_pixels, sizeof(int16_t));
-        int16_t *quant_cg = tav_calloc(num_pixels, sizeof(int16_t));
-
-        // Write tile header: [mode(1)][qY_override(1)][qCo_override(1)][qCg_override(1)]
-        preprocess_buffer[preprocess_offset++] = 0x01;  // TAV_MODE_INTRA
-        preprocess_buffer[preprocess_offset++] = 0;     // qY override (0 = use header)
-        preprocess_buffer[preprocess_offset++] = 0;     // qCo override
-        preprocess_buffer[preprocess_offset++] = 0;     // qCg override
-
-        // Apply 2D DWT to full frame
-        tav_dwt_2d_forward(frame_y, width, height, ctx->decomp_levels, ctx->wavelet_type);
-        tav_dwt_2d_forward(frame_co, width, height, ctx->decomp_levels, ctx->wavelet_type);
-        tav_dwt_2d_forward(frame_cg, width, height, ctx->decomp_levels, ctx->wavelet_type);
-
-        // Quantize
-        if (ctx->perceptual_tuning) {
-            tav_quantise_perceptual(ctx->compat_enc, frame_y, quant_y, num_pixels,
-                                   base_quantiser_y, ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 0, 0);
-            tav_quantise_perceptual(ctx->compat_enc, frame_co, quant_co, num_pixels,
-                                   base_quantiser_co, ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0);
-            tav_quantise_perceptual(ctx->compat_enc, frame_cg, quant_cg, num_pixels,
-                                   base_quantiser_cg, ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0);
-        } else {
-            tav_quantise_uniform(frame_y, quant_y, num_pixels, base_quantiser_y,
-                                ctx->dead_zone_threshold, width, height,
-                                ctx->decomp_levels, 0);
-            tav_quantise_uniform(frame_co, quant_co, num_pixels, base_quantiser_co,
-                                ctx->dead_zone_threshold, width, height,
-                                ctx->decomp_levels, 1);
-            tav_quantise_uniform(frame_cg, quant_cg, num_pixels, base_quantiser_cg,
-                                ctx->dead_zone_threshold, width, height,
-                                ctx->decomp_levels, 1);
-        }
-
-        // EZBC encode
-        size_t tile_size = preprocess_coefficients_ezbc(
-            quant_y, quant_co, quant_cg, NULL,
-            num_pixels, width, height, ctx->channel_layout,
-            preprocess_buffer + preprocess_offset
-        );
-        preprocess_offset += tile_size;
-
-        free(quant_y); free(quant_co); free(quant_cg);
-
-    } else {
-        // ======================================================================
-        // Tiled mode: process each tile independently
-        // ======================================================================
-        const int padded_pixels = TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y;
-
-        // Allocate reusable tile buffers
-        float *tile_y = tav_calloc(padded_pixels, sizeof(float));
-        float *tile_co = tav_calloc(padded_pixels, sizeof(float));
-        float *tile_cg = tav_calloc(padded_pixels, sizeof(float));
-        int16_t *quant_y = tav_calloc(padded_pixels, sizeof(int16_t));
-        int16_t *quant_co = tav_calloc(padded_pixels, sizeof(int16_t));
-        int16_t *quant_cg = tav_calloc(padded_pixels, sizeof(int16_t));
-
-        for (int tile_y_idx = 0; tile_y_idx < ctx->tiles_y; tile_y_idx++) {
-            for (int tile_x_idx = 0; tile_x_idx < ctx->tiles_x; tile_x_idx++) {
-                // Write tile header: [mode(1)][qY_override(1)][qCo_override(1)][qCg_override(1)]
-                preprocess_buffer[preprocess_offset++] = 0x01;  // TAV_MODE_INTRA
-                preprocess_buffer[preprocess_offset++] = 0;     // qY override (0 = use header)
-                preprocess_buffer[preprocess_offset++] = 0;     // qCo override
-                preprocess_buffer[preprocess_offset++] = 0;     // qCg override
-
-                // Extract padded tile from full frame
-                tav_extract_padded_tile(frame_y, frame_co, frame_cg,
-                                       width, height,
-                                       tile_x_idx, tile_y_idx,
-                                       tile_y, tile_co, tile_cg);
-
-                // Apply 2D DWT to padded tile
-                tav_dwt_2d_forward_padded_tile(tile_y, ctx->decomp_levels, ctx->wavelet_type);
-                tav_dwt_2d_forward_padded_tile(tile_co, ctx->decomp_levels, ctx->wavelet_type);
-                tav_dwt_2d_forward_padded_tile(tile_cg, ctx->decomp_levels, ctx->wavelet_type);
-
-                // Quantize tile coefficients
-                if (ctx->perceptual_tuning) {
-                    tav_quantise_perceptual(ctx->compat_enc, tile_y, quant_y, padded_pixels,
-                                           base_quantiser_y, ctx->dead_zone_threshold,
-                                           TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                                           ctx->decomp_levels, 0, 0);
-                    tav_quantise_perceptual(ctx->compat_enc, tile_co, quant_co, padded_pixels,
-                                           base_quantiser_co, ctx->dead_zone_threshold,
-                                           TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                                           ctx->decomp_levels, 1, 0);
-                    tav_quantise_perceptual(ctx->compat_enc, tile_cg, quant_cg, padded_pixels,
-                                           base_quantiser_cg, ctx->dead_zone_threshold,
-                                           TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                                           ctx->decomp_levels, 1, 0);
-                } else {
-                    tav_quantise_uniform(tile_y, quant_y, padded_pixels, base_quantiser_y,
-                                        ctx->dead_zone_threshold,
-                                        TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                                        ctx->decomp_levels, 0);
-                    tav_quantise_uniform(tile_co, quant_co, padded_pixels, base_quantiser_co,
-                                        ctx->dead_zone_threshold,
-                                        TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                                        ctx->decomp_levels, 1);
-                    tav_quantise_uniform(tile_cg, quant_cg, padded_pixels, base_quantiser_cg,
-                                        ctx->dead_zone_threshold,
-                                        TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                                        ctx->decomp_levels, 1);
-                }
-
-                // EZBC encode tile
-                size_t tile_size = preprocess_coefficients_ezbc(
-                    quant_y, quant_co, quant_cg, NULL,
-                    padded_pixels, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                    ctx->channel_layout,
-                    preprocess_buffer + preprocess_offset
-                );
-                preprocess_offset += tile_size;
-
-                // Clear tile buffers for next iteration
-                memset(tile_y, 0, padded_pixels * sizeof(float));
-                memset(tile_co, 0, padded_pixels * sizeof(float));
-                memset(tile_cg, 0, padded_pixels * sizeof(float));
-            }
-        }
-
-        free(tile_y); free(tile_co); free(tile_cg);
-        free(quant_y); free(quant_co); free(quant_cg);
-    }
-
-    // Free full-frame YCoCg buffers
-    free(frame_y); free(frame_co); free(frame_cg);
-
-    // Step 5: Zstd compress all tile data (or bypass if zstd_level < 0)
-    size_t output_size;
-    uint8_t *output_buffer;
-    int is_uncompressed = 0;
-
-    if (ctx->zstd_level < 0) {
-        // Bypass Zstd compression - use raw data
-        output_size = preprocess_offset;
-        output_buffer = preprocess_buffer;  // Transfer ownership
-        is_uncompressed = 1;
-    } else {
-        // Normal Zstd compression
-        size_t compressed_bound = ZSTD_compressBound(preprocess_offset);
-        output_buffer = tav_malloc(compressed_bound);
-
-        output_size = ZSTD_compress(
-            output_buffer, compressed_bound,
-            preprocess_buffer, preprocess_offset,
-            ctx->zstd_level
-        );
-
-        free(preprocess_buffer);
-
-        if (ZSTD_isError(output_size)) {
-            free(output_buffer);
-            snprintf(slot->error_message, MAX_ERROR_MESSAGE,
-                     "Zstd compression failed: %s", ZSTD_getErrorName(output_size));
-            return -1;
-        }
-    }
-
-    // Step 6: Format I-frame packet
-    // Packet format: [type(1)][size(4)][data(N)]
-    // Size field MSB: 0=compressed, 1=uncompressed
-    size_t packet_size = 1 + 4 + output_size;
-    tav_encoder_packet_t *pkt = calloc(1, sizeof(tav_encoder_packet_t));
-    pkt->data = malloc(packet_size);
-    pkt->size = packet_size;
-    pkt->packet_type = TAV_PACKET_IFRAME;
-    pkt->frame_number = slot->frame_numbers[0];
-    pkt->is_video = 1;
-
-    uint8_t *write_ptr = pkt->data;
-    *write_ptr++ = TAV_PACKET_IFRAME;
-    uint32_t size_field = (uint32_t)output_size;
-    memcpy(write_ptr, &size_field, 4);
-    write_ptr += 4;
-    memcpy(write_ptr, output_buffer, output_size);
-
-    // Store packet in slot
-    slot->packets = pkt;
-    slot->num_packets = 1;
-
-    free(output_buffer);
-
-    return 0;  // Success
-}
-
-/**
- * Encode multi-frame GOP using 3D DWT (unified mode).
- * Uses temporal + spatial DWT for optimal compression.
- */
-static int encode_gop_unified(tav_encoder_context_t *ctx, gop_slot_t *slot) {
-    const int width = slot->width;
-    const int height = slot->height;
-    const int num_pixels = width * height;
-    const int num_frames = slot->num_frames;
-
-    // Allocate work buffers for all frames
-    float **work_y = tav_calloc(num_frames, sizeof(float*));
-    float **work_co = tav_calloc(num_frames, sizeof(float*));
-    float **work_cg = tav_calloc(num_frames, sizeof(float*));
-    int16_t **quant_y = tav_calloc(num_frames, sizeof(int16_t*));
-    int16_t **quant_co = tav_calloc(num_frames, sizeof(int16_t*));
-    int16_t **quant_cg = tav_calloc(num_frames, sizeof(int16_t*));
-
-    for (int i = 0; i < num_frames; i++) {
-        work_y[i] = tav_calloc(num_pixels, sizeof(float));
-        work_co[i] = tav_calloc(num_pixels, sizeof(float));
-        work_cg[i] = tav_calloc(num_pixels, sizeof(float));
-        quant_y[i] = tav_calloc(num_pixels, sizeof(int16_t));
-        quant_co[i] = tav_calloc(num_pixels, sizeof(int16_t));
-        quant_cg[i] = tav_calloc(num_pixels, sizeof(int16_t));
-    }
-
-    // Step 1: RGB to YCoCg-R for all frames
-    for (int frame = 0; frame < num_frames; frame++) {
-        rgb_to_colour_space_frame(ctx, slot->rgb_frames[frame],
-                                  work_y[frame], work_co[frame], work_cg[frame],
-                                  width, height);
-    }
-
-    // Step 2: Apply 3D DWT (temporal + spatial)
-    tav_dwt_3d_forward(work_y, width, height, num_frames,
-                      ctx->decomp_levels, ctx->temporal_levels,
-                      ctx->wavelet_type, ctx->temporal_wavelet);
-    tav_dwt_3d_forward(work_co, width, height, num_frames,
-                      ctx->decomp_levels, ctx->temporal_levels,
-                      ctx->wavelet_type, ctx->temporal_wavelet);
-    tav_dwt_3d_forward(work_cg, width, height, num_frames,
-                      ctx->decomp_levels, ctx->temporal_levels,
-                      ctx->wavelet_type, ctx->temporal_wavelet);
-
-    // Step 3: Quantize 3D coefficients
-    // ctx->quantiser_y/co/cg contain QLUT indices, lookup actual quantiser values
-    int base_quantiser_y = QLUT[ctx->quantiser_y];
-    int base_quantiser_co = QLUT[ctx->quantiser_co];
-    int base_quantiser_cg = QLUT[ctx->quantiser_cg];
-
-    // Use perceptual or uniform quantization based on user setting
-    if (ctx->verbose) {
-        fprintf(stderr, "[DEBUG] GOP quantization: decomp_levels=%d, base_q_y=%d, perceptual=%d, preset=0x%02x\n",
-                ctx->compat_enc->decomp_levels, base_quantiser_y, ctx->compat_enc->perceptual_tuning, ctx->compat_enc->encoder_preset);
-    }
-
-    tav_quantise_3d_dwt(ctx->compat_enc, work_y, quant_y, num_frames, num_pixels,
-                       base_quantiser_y, 0);
-    tav_quantise_3d_dwt(ctx->compat_enc, work_co, quant_co, num_frames, num_pixels,
-                       base_quantiser_co, 1);
-    tav_quantise_3d_dwt(ctx->compat_enc, work_cg, quant_cg, num_frames, num_pixels,
-                       base_quantiser_cg, 1);
-
-    // Step 4: Unified GOP preprocessing (EZBC only)
-    size_t preprocess_capacity = num_pixels * num_frames * 3 * sizeof(int16_t) + 65536;
-    uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity);
-
-    size_t preprocessed_size = preprocess_gop_unified(
-        PREPROCESS_EZBC, quant_y, quant_co, quant_cg,
-        num_frames, num_pixels, width, height, ctx->channel_layout,
-        preprocess_buffer
-    );
-
-    // Step 5: Zstd compress (or bypass if zstd_level < 0)
-    size_t output_size;
-    uint8_t *output_buffer;
-
-    if (ctx->zstd_level < 0) {
-        // Bypass Zstd compression - use raw preprocessed data
-        output_size = preprocessed_size;
-        output_buffer = preprocess_buffer;  // Transfer ownership
-    } else {
-        // Normal Zstd compression
-        size_t compressed_bound = ZSTD_compressBound(preprocessed_size);
-        output_buffer = tav_malloc(compressed_bound);
-
-        output_size = ZSTD_compress(
-            output_buffer, compressed_bound,
-            preprocess_buffer, preprocessed_size,
-            ctx->zstd_level
-        );
-
-        free(preprocess_buffer);
-
-        if (ZSTD_isError(output_size)) {
-            // Cleanup and return error
-            for (int i = 0; i < num_frames; i++) {
-                free(work_y[i]); free(work_co[i]); free(work_cg[i]);
-                free(quant_y[i]); free(quant_co[i]); free(quant_cg[i]);
-            }
-            free(work_y); free(work_co); free(work_cg);
-            free(quant_y); free(quant_co); free(quant_cg);
-            free(output_buffer);
-            snprintf(slot->error_message, MAX_ERROR_MESSAGE,
-                     "Zstd compression failed: %s", ZSTD_getErrorName(output_size));
-            return -1;
-        }
-    }
-
-    // Step 6: Format GOP unified packet
-    // Packet format: [type(1)][gop_size(1)][size(4)][data(N)]
-    size_t packet_size = 1 + 1 + 4 + output_size;
-    tav_encoder_packet_t *pkt = calloc(1, sizeof(tav_encoder_packet_t));
-    pkt->data = malloc(packet_size);
-    pkt->size = packet_size;
-    pkt->packet_type = TAV_PACKET_GOP_UNIFIED;
-    pkt->frame_number = slot->frame_numbers[0];  // First frame in GOP
-    pkt->is_video = 1;
-
-    uint8_t *write_ptr = pkt->data;
-    *write_ptr++ = TAV_PACKET_GOP_UNIFIED;
-    *write_ptr++ = (uint8_t)num_frames;
-    uint32_t size_field = (uint32_t)output_size;
-    memcpy(write_ptr, &size_field, 4);
-    write_ptr += 4;
-    memcpy(write_ptr, output_buffer, output_size);
-
-    // Store packet in slot
-    slot->packets = pkt;
-    slot->num_packets = 1;
-
-    // Cleanup
-    for (int i = 0; i < num_frames; i++) {
-        free(work_y[i]); free(work_co[i]); free(work_cg[i]);
-        free(quant_y[i]); free(quant_co[i]); free(quant_cg[i]);
-    }
-    free(work_y); free(work_co); free(work_cg);
-    free(quant_y); free(quant_co); free(quant_cg);
-    free(output_buffer);
-
-    return 0;  // Success
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_quantize.c b/video_encoder/lib/libtavenc/tav_encoder_quantize.c
deleted file mode 100644
index ed0e56f..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_quantize.c
+++ /dev/null
@@ -1,635 +0,0 @@
-/**
- * TAV Encoder - Quantization Library
- *
- * Provides DWT coefficient quantization with perceptual weighting based on
- * the Human Visual System (HVS). Implements separable 3D quantization for
- * temporal GOP encoding.
- *
- * Extracted from encoder_tav.c as part of library refactoring.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-
-// Forward declaration of encoder context (defined in main encoder)
-typedef struct tav_encoder_s tav_encoder_t;
-
-// =============================================================================
-// Utility Functions
-// =============================================================================
-
-static inline int CLAMP(int x, int min, int max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-static inline float FCLAMP(float x, float min, float max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-// =============================================================================
-// Constants for Perceptual Model
-// =============================================================================
-
-// Dead-zone quantization scaling factors (applied selectively to luma only)
-#define DEAD_ZONE_FINEST_SCALE 1.0f      // Full dead-zone for finest level
-#define DEAD_ZONE_FINE_SCALE 0.5f        // Reduced dead-zone for second-finest level
-
-// Anisotropy parameters for horizontal vs vertical detail quantization
-// Index by quality level (0-5)
-static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
-static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
-
-// Chroma-specific anisotropy (more aggressive quantization)
-static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
-static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
-
-// Detail preservation factors for 2-pixel and 4-pixel structures
-#define FOUR_PIXEL_DETAILER 0.88f
-#define TWO_PIXEL_DETAILER  0.92f
-
-// =============================================================================
-// Subband Analysis Helper Functions
-// =============================================================================
-
-/**
- * Get decomposition level for coefficient at 2D spatial position.
- * Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
- */
-static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
-    // Recursively determine which level this coefficient belongs to
-    // by checking which quadrant it's in at each level
-
-    for (int level = 1; level <= decomp_levels; level++) {
-        int half_w = width >> 1;
-        int half_h = height >> 1;
-
-        // Check if in top-left quadrant (LL - contains finer levels)
-        if (x < half_w && y < half_h) {
-            // Continue to finer level
-            width = half_w;
-            height = half_h;
-            continue;
-        }
-
-        // In one of the detail bands (LH, HL, HH) at this level
-        return level;
-    }
-
-    // Reached LL subband at coarsest level
-    return 0;
-}
-
-/**
- * Get subband type for coefficient at 2D spatial position.
- * Returns: 0=LL, 1=LH, 2=HL, 3=HH
- */
-static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
-    // Recursively determine which subband this coefficient belongs to
-
-    for (int level = 1; level <= decomp_levels; level++) {
-        int half_w = width >> 1;
-        int half_h = height >> 1;
-
-        // Check if in top-left quadrant (LL - contains finer levels)
-        if (x < half_w && y < half_h) {
-            // Continue to finer level
-            width = half_w;
-            height = half_h;
-            continue;
-        }
-
-        // Determine which detail band at this level
-        if (x >= half_w && y < half_h) {
-            return 1; // LH (top-right)
-        } else if (x < half_w && y >= half_h) {
-            return 2; // HL (bottom-left)
-        } else {
-            return 3; // HH (bottom-right)
-        }
-    }
-
-    // Reached LL subband at coarsest level
-    return 0;
-}
-
-/**
- * Legacy functions - convert linear index to 2D coords.
- */
-static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
-    int x = linear_idx % width;
-    int y = linear_idx / width;
-    return get_subband_level_2d(x, y, width, height, decomp_levels);
-}
-
-static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
-    int x = linear_idx % width;
-    int y = linear_idx / width;
-    return get_subband_type_2d(x, y, width, height, decomp_levels);
-}
-
-/**
- * Get temporal subband level for frame index in GOP.
- * After temporal DWT with N levels, frames are organized as:
- * - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
- * - Remaining frames are temporal high-pass subbands at various levels
- *
- * Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
- */
-static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
-    // Check each level boundary from coarsest to finest
-    for (int level = 0; level < temporal_levels; level++) {
-        int frames_at_this_level = num_frames >> (temporal_levels - level);
-        if (frame_idx < frames_at_this_level) {
-            return level;
-        }
-    }
-
-    // Finest level (first decomposition's high-pass)
-    return temporal_levels;
-}
-
-// =============================================================================
-// Perceptual Model Functions (HVS-based weighting)
-// =============================================================================
-
-// Linear interpolation helper
-static float lerp(float x, float y, float a) {
-    return x * (1.f - a) + y * a;
-}
-
-/**
- * Perceptual model for LH subband (horizontal details).
- * Human eyes are more sensitive to horizontal details than vertical.
- * Curve: https://www.desmos.com/calculator/mjlpwqm8ge
- *
- * @param quality  Quality level (0-5)
- * @param level    Normalized decomposition level (1.0-6.0)
- * @return         Perceptual weight multiplier
- */
-static float perceptual_model3_LH(int quality, float level) {
-    float H4 = 1.2f;
-    float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
-    float K12 = K * 12.f;
-    float x = level;
-
-    float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
-    float C3 = -1.f / 45.f * (K12 + 92);
-    float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
-
-    return (level >= 4) ? Lx : G3x;
-}
-
-/**
- * Perceptual model for HL subband (vertical details).
- * Derived from LH with anisotropy compensation.
- *
- * @param quality  Quality level (0-5)
- * @param LH       LH subband weight
- * @return         Perceptual weight multiplier
- */
-static float perceptual_model3_HL(int quality, float LH) {
-    return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
-}
-
-/**
- * Perceptual model for HH subband (diagonal details).
- * Interpolates between LH and HL based on level.
- *
- * @param LH     LH subband weight
- * @param HL     HL subband weight
- * @param level  Normalized decomposition level
- * @return       Perceptual weight multiplier
- */
-static float perceptual_model3_HH(float LH, float HL, float level) {
-    float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
-    return lerp(LH, HL, Kx);
-}
-
-/**
- * Perceptual model for LL subband (low-frequency baseband).
- * Contains most image energy, preserve carefully.
- *
- * @param quality  Quality level (0-5)
- * @param level    Normalized decomposition level
- * @return         Perceptual weight multiplier
- */
-static float perceptual_model3_LL(int quality, float level) {
-    float n = perceptual_model3_LH(quality, level);
-    float m = perceptual_model3_LH(quality, level - 1) / n;
-
-    return n / m;
-}
-
-/**
- * Chroma-specific perceptual model base curve.
- * Less critical for human perception, more aggressive quantization.
- *
- * @param quality  Quality level (0-5)
- * @param level    Normalized decomposition level
- * @return         Perceptual weight multiplier
- */
-static float perceptual_model3_chroma_basecurve(int quality, float level) {
-    return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
-}
-
-/**
- * Get perceptual weight for a specific subband and level.
- * Implements HVS-optimized frequency weighting.
- *
- * NOTE: This function requires enc->quality_level field from encoder context.
- *
- * @param enc           Encoder context (for quality_level)
- * @param level0        Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
- * @param subband_type  Subband type (0=LL, 1=LH, 2=HL, 3=HH)
- * @param is_chroma     1 for chroma channels, 0 for luma
- * @param max_levels    Maximum decomposition levels
- * @return              Perceptual weight multiplier (≥1.0)
- */
-static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
-
-/**
- * Get perceptual weight for coefficient at linear index position.
- * Maps linear coefficient index to DWT subband layout.
- *
- * NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
- *
- * @param enc             Encoder context (for widths/heights arrays and quality_level)
- * @param linear_idx      Linear coefficient index
- * @param width           Frame width
- * @param height          Frame height
- * @param decomp_levels   Number of decomposition levels
- * @param is_chroma       1 for chroma channels, 0 for luma
- * @return                Perceptual weight multiplier (≥1.0)
- */
-static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
-
-// =============================================================================
-// Quantization Functions
-// =============================================================================
-
-/**
- * Quantize DWT coefficients with uniform quantization and optional dead-zone.
- *
- * This is the basic quantization function without perceptual weighting.
- * Dead-zone quantization is applied selectively to luma channel only:
- * - HH1 (finest diagonal): full dead-zone
- * - LH1/HL1/HH2: half dead-zone
- * - Coarser levels: no dead-zone (preserve structure)
- *
- * @param coeffs               Input DWT coefficients (float)
- * @param quantised            Output quantized coefficients (int16_t)
- * @param size                 Number of coefficients
- * @param quantiser            Base quantizer value (1-4096)
- * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
- * @param width                Frame width
- * @param height               Frame height
- * @param decomp_levels        Number of decomposition levels
- * @param is_chroma            1 for chroma channels, 0 for luma
- */
-void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
-                          float dead_zone_threshold, int width, int height,
-                          int decomp_levels, int is_chroma);
-
-/**
- * Quantize DWT coefficients with per-coefficient perceptual weighting.
- *
- * Applies HVS-optimized frequency weighting to each coefficient based on its
- * position in the DWT subband tree. Implements the full perceptual model with
- * dead-zone quantization for luma.
- *
- * NOTE: This function requires encoder context fields:
- * - enc->widths[]/enc->heights[] for subband layout
- * - enc->quality_level for perceptual model
- * - enc->dead_zone_threshold for dead-zone quantization
- *
- * @param enc             Encoder context
- * @param coeffs          Input DWT coefficients (float)
- * @param quantised       Output quantized coefficients (int16_t)
- * @param size            Number of coefficients
- * @param base_quantiser  Base quantizer value (before perceptual weighting)
- * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
- * @param width           Frame width
- * @param height          Frame height
- * @param decomp_levels   Number of decomposition levels
- * @param is_chroma       1 for chroma channels, 0 for luma
- * @param frame_count     Current frame number (for any frame-dependent logic)
- */
-void tav_quantise_perceptual(tav_encoder_t *enc,
-                              float *coeffs, int16_t *quantised, int size,
-                              int base_quantiser, float dead_zone_threshold, int width, int height,
-                              int decomp_levels, int is_chroma, int frame_count);
-
-/**
- * Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
- *
- * After 3D DWT (temporal + spatial), GOP coefficients have this structure:
- * - Temporal DWT applied first → temporal subbands at different levels
- * - Spatial 2D DWT applied to each temporal subband
- *
- * Quantization strategy:
- * 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
- *    - tLL (level 0): coarsest temporal → smallest quantizer
- *    - tHH (highest level): finest temporal → largest quantizer
- * 2. Apply spatial perceptual weighting to tH_base
- * 3. Final quantizer: Q_effective = tH_base × spatial_weight
- *
- * NOTE: This function requires encoder context fields:
- * - enc->encoder_preset for sports mode detection
- * - enc->temporal_decomp_levels for temporal level calculation
- * - enc->verbose for debug output
- * - Plus all fields needed by tav_quantise_perceptual()
- *
- * @param enc             Encoder context
- * @param gop_coeffs      GOP coefficients [frame][pixel] (temporal subbands)
- * @param quantised       Output quantized coefficients [frame][pixel]
- * @param num_frames      Number of temporal subband frames
- * @param spatial_size    Number of spatial coefficients per frame
- * @param base_quantiser  Base quantizer value (before temporal/spatial scaling)
- * @param is_chroma       1 for chroma channels, 0 for luma
- */
-void tav_quantise_3d_dwt(tav_encoder_t *enc,
-                         float **gop_coeffs, int16_t **quantised, int num_frames,
-                         int spatial_size, int base_quantiser, int is_chroma);
-
-/**
- * Convert floating-point quantizer to integer with dithering (for bitrate mode).
- *
- * Implements Floyd-Steinberg style error diffusion to avoid quantization
- * artifacts when converting float quantizer values to integers for rate control.
- *
- * NOTE: This function requires encoder context fields:
- * - enc->adjusted_quantiser_y_float (current float quantizer)
- * - enc->dither_accumulator (accumulated error, modified by this function)
- *
- * @param enc  Encoder context
- * @return     Integer quantizer value (0-254)
- */
-int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
-
-// =============================================================================
-// Perceptual Weight Implementation (requires encoder context)
-// =============================================================================
-
-// NOTE: This implementation requires encoder context (enc->quality_level)
-// Struct definition will be in encoder header when integrated
-
-#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
-// Forward declare structure access - will be properly defined when integrated
-struct tav_encoder_s {
-    int quality_level;
-    int *widths;
-    int *heights;
-    int decomp_levels;
-    float dead_zone_threshold;
-    int encoder_preset;
-    int temporal_decomp_levels;
-    int verbose;
-    int frame_count;
-    float adjusted_quantiser_y_float;
-    float dither_accumulator;
-    int width;
-    int height;
-    int perceptual_tuning;
-};
-#endif
-
-static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
-    // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
-
-    float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
-
-    // strategy: more horizontal detail
-    if (!is_chroma) {
-        // LL subband - contains most image energy, preserve carefully
-        if (subband_type == 0)
-            return perceptual_model3_LL(enc->quality_level, level);
-
-        // LH subband - horizontal details (human eyes more sensitive)
-        float LH = perceptual_model3_LH(enc->quality_level, level);
-        if (subband_type == 1)
-            return LH;
-
-        // HL subband - vertical details
-        float HL = perceptual_model3_HL(enc->quality_level, LH);
-        if (subband_type == 2)
-            return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
-
-        // HH subband - diagonal details
-        else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
-    } else {
-        // CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
-        float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
-
-        if (subband_type == 0) { // LL chroma - still important but less than luma
-            return 1.0f;
-        } else if (subband_type == 1) { // LH chroma - horizontal chroma details
-            return FCLAMP(base, 1.0f, 100.0f);
-        } else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
-            return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
-        } else { // HH chroma - diagonal chroma details (most aggressive)
-            return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
-        }
-    }
-}
-
-static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
-    // If perceptual tuning is disabled, use uniform quantization (weight = 1.0)
-    if (!enc->perceptual_tuning) {
-        return 1.0f;
-    }
-
-    // Map linear coefficient index to DWT subband using same layout as decoder
-    int offset = 0;
-
-    // First: LL subband at maximum decomposition level
-    int ll_width = enc->widths[decomp_levels];
-    int ll_height = enc->heights[decomp_levels];
-    int ll_size = ll_width * ll_height;
-
-    if (linear_idx < offset + ll_size) {
-        // LL subband at maximum level - use get_perceptual_weight for consistency
-        return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
-    }
-    offset += ll_size;
-
-    // Then: LH, HL, HH subbands for each level from max down to 1
-    for (int level = decomp_levels; level >= 1; level--) {
-        int level_width = enc->widths[decomp_levels - level + 1];
-        int level_height = enc->heights[decomp_levels - level + 1];
-        const int subband_size = level_width * level_height;
-
-        // LH subband (horizontal details)
-        if (linear_idx < offset + subband_size) {
-            return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
-        }
-        offset += subband_size;
-
-        // HL subband (vertical details)
-        if (linear_idx < offset + subband_size) {
-            return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
-        }
-        offset += subband_size;
-
-        // HH subband (diagonal details)
-        if (linear_idx < offset + subband_size) {
-            return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
-        }
-        offset += subband_size;
-    }
-
-    // Fallback for out-of-bounds indices
-    return 1.0f;
-}
-
-// =============================================================================
-// Quantization Function Implementations
-// =============================================================================
-
-void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
-                          float dead_zone_threshold, int width, int height,
-                          int decomp_levels, int is_chroma) {
-    float effective_q = quantiser;
-    effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
-
-    // Scalar implementation (AVX-512 version would go in separate optimized module)
-    for (int i = 0; i < size; i++) {
-        float quantised_val = coeffs[i] / effective_q;
-
-        // Apply dead-zone quantisation ONLY to luma channel and specific subbands
-        if (dead_zone_threshold > 0.0f && !is_chroma) {
-            int level = get_subband_level(i, width, height, decomp_levels);
-            int subband_type = get_subband_type(i, width, height, decomp_levels);
-            float level_threshold = 0.0f;
-
-            if (level == 1) {
-                // Finest level
-                if (subband_type == 3) {
-                    // HH1: full dead-zone
-                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
-                } else if (subband_type == 1 || subband_type == 2) {
-                    // LH1, HL1: half dead-zone
-                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
-                }
-            } else if (level == 2) {
-                // Second-finest level
-                if (subband_type == 3) {
-                    // HH2: half dead-zone
-                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
-                }
-            }
-
-            if (fabsf(quantised_val) <= level_threshold) {
-                quantised_val = 0.0f;
-            }
-        }
-
-        quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
-    }
-}
-
-void tav_quantise_perceptual(tav_encoder_t *enc,
-                              float *coeffs, int16_t *quantised, int size,
-                              int base_quantiser, float dead_zone_threshold, int width, int height,
-                              int decomp_levels, int is_chroma, int frame_count) {
-    float effective_base_q = base_quantiser;
-    effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
-
-    for (int i = 0; i < size; i++) {
-        // Apply perceptual weight based on coefficient's position in DWT layout
-        float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
-        float effective_q = effective_base_q * weight;
-        float quantised_val = coeffs[i] / effective_q;
-
-        // Apply dead-zone quantisation ONLY to luma channel
-        if (dead_zone_threshold > 0.0f && !is_chroma) {
-            int level = get_subband_level(i, width, height, decomp_levels);
-            int subband_type = get_subband_type(i, width, height, decomp_levels);
-            float level_threshold = 0.0f;
-
-            if (level == 1) {
-                if (subband_type == 3) {
-                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
-                } else if (subband_type == 1 || subband_type == 2) {
-                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
-                }
-            } else if (level == 2) {
-                if (subband_type == 3) {
-                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
-                }
-            }
-
-            if (fabsf(quantised_val) <= level_threshold) {
-                quantised_val = 0.0f;
-            }
-        }
-
-        quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
-    }
-}
-
-void tav_quantise_3d_dwt(tav_encoder_t *enc,
-                         float **gop_coeffs, int16_t **quantised, int num_frames,
-                         int spatial_size, int base_quantiser, int is_chroma) {
-    // Sports preset: use finer temporal quantisation (less aggressive)
-    const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
-    const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
-
-    // Process each temporal subband independently (separable approach)
-    for (int t = 0; t < num_frames; t++) {
-        // Step 1: Determine temporal subband level
-        int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
-
-        // Step 2: Compute temporal base quantiser using exponential scaling
-        float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
-        float temporal_quantiser = base_quantiser * temporal_scale;
-
-        int temporal_base_quantiser = (int)roundf(temporal_quantiser);
-        temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
-
-        // Step 3: Apply spatial quantisation within this temporal subband
-        // Check if perceptual tuning is enabled (stored in encoder_preset bit 1)
-        // NOTE: perceptual_tuning field is NOT in tav_encoder_s, so we check context flag
-        // For now, just use perceptual (this will be controlled by caller disabling)
-        tav_quantise_perceptual(
-            enc,
-            gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
-            quantised[t],            // Output: quantised spatial coefficients
-            spatial_size,            // Number of spatial coefficients
-            temporal_base_quantiser, // Temporally-scaled base quantiser
-            enc->dead_zone_threshold, // Dead zone threshold
-            enc->width,              // Frame width
-            enc->height,             // Frame height
-            enc->decomp_levels,      // Spatial decomposition levels
-            is_chroma,               // Is chroma channel
-            enc->frame_count + t     // Frame number
-        );
-
-        /*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
-            printf("  Temporal subband %d: level=%d, tH_base=%d\n",
-                   t, temporal_level, temporal_base_quantiser);
-        }*/
-    }
-}
-
-int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
-    float qy_float = enc->adjusted_quantiser_y_float;
-
-    // Add accumulated dithering error
-    float qy_with_error = qy_float + enc->dither_accumulator;
-
-    // Round to nearest integer
-    int qy_int = (int)(qy_with_error + 0.5f);
-
-    // Calculate quantisation error and accumulate for next frame
-    // This is Floyd-Steinberg style error diffusion
-    float quantisation_error = qy_with_error - (float)qy_int;
-    enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
-
-    // Clamp to valid range
-    qy_int = CLAMP(qy_int, 0, 254);
-
-    return qy_int;
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_quantize.h b/video_encoder/lib/libtavenc/tav_encoder_quantize.h
deleted file mode 100644
index 4c5b46a..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_quantize.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/**
- * TAV Encoder - Quantization Library
- *
- * Public API for DWT coefficient quantization with perceptual weighting.
- */
-
-#ifndef TAV_ENCODER_QUANTIZE_H
-#define TAV_ENCODER_QUANTIZE_H
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Forward declaration of encoder context (defined in main encoder)
-typedef struct tav_encoder_s tav_encoder_t;
-
-// =============================================================================
-// Uniform Quantization
-// =============================================================================
-
-/**
- * Quantize DWT coefficients with uniform quantization and optional dead-zone.
- *
- * This is the basic quantization function without perceptual weighting.
- * Dead-zone quantization is applied selectively to luma channel only:
- * - HH1 (finest diagonal): full dead-zone
- * - LH1/HL1/HH2: half dead-zone
- * - Coarser levels: no dead-zone (preserve structure)
- *
- * @param coeffs               Input DWT coefficients (float)
- * @param quantised            Output quantized coefficients (int16_t)
- * @param size                 Number of coefficients
- * @param quantiser            Base quantizer value (1-4096)
- * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
- * @param width                Frame width
- * @param height               Frame height
- * @param decomp_levels        Number of decomposition levels
- * @param is_chroma            1 for chroma channels, 0 for luma
- */
-void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
-                          float dead_zone_threshold, int width, int height,
-                          int decomp_levels, int is_chroma);
-
-// =============================================================================
-// Perceptual Quantization
-// =============================================================================
-
-/**
- * Quantize DWT coefficients with per-coefficient perceptual weighting.
- *
- * Applies HVS-optimized frequency weighting to each coefficient based on its
- * position in the DWT subband tree. Implements the full perceptual model with
- * dead-zone quantization for luma.
- *
- * NOTE: This function requires encoder context fields:
- * - enc->widths[]/enc->heights[] for subband layout
- * - enc->quality_level for perceptual model
- * - enc->dead_zone_threshold for dead-zone quantization
- *
- * @param enc             Encoder context
- * @param coeffs          Input DWT coefficients (float)
- * @param quantised       Output quantized coefficients (int16_t)
- * @param size            Number of coefficients
- * @param base_quantiser  Base quantizer value (before perceptual weighting)
- * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
- * @param width           Frame width
- * @param height          Frame height
- * @param decomp_levels   Number of decomposition levels
- * @param is_chroma       1 for chroma channels, 0 for luma
- * @param frame_count     Current frame number (for any frame-dependent logic)
- */
-void tav_quantise_perceptual(tav_encoder_t *enc,
-                              float *coeffs, int16_t *quantised, int size,
-                              int base_quantiser, float dead_zone_threshold, int width, int height,
-                              int decomp_levels, int is_chroma, int frame_count);
-
-// =============================================================================
-// 3D GOP Quantization
-// =============================================================================
-
-/**
- * Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
- *
- * After 3D DWT (temporal + spatial), GOP coefficients have this structure:
- * - Temporal DWT applied first → temporal subbands at different levels
- * - Spatial 2D DWT applied to each temporal subband
- *
- * Quantization strategy:
- * 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
- *    - tLL (level 0): coarsest temporal → smallest quantizer
- *    - tHH (highest level): finest temporal → largest quantizer
- * 2. Apply spatial perceptual weighting to tH_base
- * 3. Final quantizer: Q_effective = tH_base × spatial_weight
- *
- * NOTE: This function requires encoder context fields:
- * - enc->encoder_preset for sports mode detection
- * - enc->temporal_decomp_levels for temporal level calculation
- * - enc->verbose for debug output
- * - Plus all fields needed by tav_quantise_perceptual()
- *
- * @param enc             Encoder context
- * @param gop_coeffs      GOP coefficients [frame][pixel] (temporal subbands)
- * @param quantised       Output quantized coefficients [frame][pixel]
- * @param num_frames      Number of temporal subband frames
- * @param spatial_size    Number of spatial coefficients per frame
- * @param base_quantiser  Base quantizer value (before temporal/spatial scaling)
- * @param is_chroma       1 for chroma channels, 0 for luma
- */
-void tav_quantise_3d_dwt(tav_encoder_t *enc,
-                         float **gop_coeffs, int16_t **quantised, int num_frames,
-                         int spatial_size, int base_quantiser, int is_chroma);
-
-// =============================================================================
-// Rate Control
-// =============================================================================
-
-/**
- * Convert floating-point quantizer to integer with dithering (for bitrate mode).
- *
- * Implements Floyd-Steinberg style error diffusion to avoid quantization
- * artifacts when converting float quantizer values to integers for rate control.
- *
- * NOTE: This function requires encoder context fields:
- * - enc->adjusted_quantiser_y_float (current float quantizer)
- * - enc->dither_accumulator (accumulated error, modified by this function)
- *
- * @param enc  Encoder context
- * @return     Integer quantizer value (0-254)
- */
-int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TAV_ENCODER_QUANTIZE_H
diff --git a/video_encoder/lib/libtavenc/tav_encoder_tile.c b/video_encoder/lib/libtavenc/tav_encoder_tile.c
deleted file mode 100644
index b63b362..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_tile.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * TAV Encoder Library - Tile Processing Implementation
- */
-
-#include "tav_encoder_tile.h"
-#include "tav_encoder_dwt.h"
-#include <string.h>
-#include <stdlib.h>
-
-#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
-
-void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
-                             int frame_width, int frame_height,
-                             int tile_x, int tile_y,
-                             float *padded_y, float *padded_co, float *padded_cg) {
-    const int core_start_x = tile_x * TAV_TILE_SIZE_X;
-    const int core_start_y = tile_y * TAV_TILE_SIZE_Y;
-
-    // Process row by row with bulk copying for core region where possible
-    for (int py = 0; py < TAV_PADDED_TILE_SIZE_Y; py++) {
-        // Map padded row to source image row
-        int src_y = core_start_y + py - TAV_TILE_MARGIN;
-
-        // Handle vertical boundary conditions with mirroring
-        if (src_y < 0) {
-            src_y = -src_y;
-        } else if (src_y >= frame_height) {
-            src_y = frame_height - 1 - (src_y - frame_height);
-        }
-        src_y = CLAMP(src_y, 0, frame_height - 1);
-
-        // Calculate source and destination row offsets
-        const int padded_row_offset = py * TAV_PADDED_TILE_SIZE_X;
-        const int src_row_offset = src_y * frame_width;
-
-        // Margin boundaries in padded tile
-        const int core_start_px = TAV_TILE_MARGIN;
-        const int core_end_px = TAV_TILE_MARGIN + TAV_TILE_SIZE_X;
-
-        // Check if core region is entirely within frame bounds
-        const int core_src_start_x = core_start_x;
-        const int core_src_end_x = core_start_x + TAV_TILE_SIZE_X;
-
-        if (core_src_start_x >= 0 && core_src_end_x <= frame_width) {
-            // Bulk copy core region in one operation
-            const int src_core_offset = src_row_offset + core_src_start_x;
-
-            memcpy(&padded_y[padded_row_offset + core_start_px],
-                   &frame_y[src_core_offset],
-                   TAV_TILE_SIZE_X * sizeof(float));
-            memcpy(&padded_co[padded_row_offset + core_start_px],
-                   &frame_co[src_core_offset],
-                   TAV_TILE_SIZE_X * sizeof(float));
-            memcpy(&padded_cg[padded_row_offset + core_start_px],
-                   &frame_cg[src_core_offset],
-                   TAV_TILE_SIZE_X * sizeof(float));
-
-            // Handle left margin pixels individually
-            for (int px = 0; px < core_start_px; px++) {
-                int src_x = core_start_x + px - TAV_TILE_MARGIN;
-                if (src_x < 0) src_x = -src_x;
-                src_x = CLAMP(src_x, 0, frame_width - 1);
-
-                int src_idx = src_row_offset + src_x;
-                int padded_idx = padded_row_offset + px;
-
-                padded_y[padded_idx] = frame_y[src_idx];
-                padded_co[padded_idx] = frame_co[src_idx];
-                padded_cg[padded_idx] = frame_cg[src_idx];
-            }
-
-            // Handle right margin pixels individually
-            for (int px = core_end_px; px < TAV_PADDED_TILE_SIZE_X; px++) {
-                int src_x = core_start_x + px - TAV_TILE_MARGIN;
-                if (src_x >= frame_width) {
-                    src_x = frame_width - 1 - (src_x - frame_width);
-                }
-                src_x = CLAMP(src_x, 0, frame_width - 1);
-
-                int src_idx = src_row_offset + src_x;
-                int padded_idx = padded_row_offset + px;
-
-                padded_y[padded_idx] = frame_y[src_idx];
-                padded_co[padded_idx] = frame_co[src_idx];
-                padded_cg[padded_idx] = frame_cg[src_idx];
-            }
-        } else {
-            // Fallback: process entire row pixel by pixel (for edge tiles)
-            for (int px = 0; px < TAV_PADDED_TILE_SIZE_X; px++) {
-                int src_x = core_start_x + px - TAV_TILE_MARGIN;
-
-                // Handle horizontal boundary conditions with mirroring
-                if (src_x < 0) {
-                    src_x = -src_x;
-                } else if (src_x >= frame_width) {
-                    src_x = frame_width - 1 - (src_x - frame_width);
-                }
-                src_x = CLAMP(src_x, 0, frame_width - 1);
-
-                int src_idx = src_row_offset + src_x;
-                int padded_idx = padded_row_offset + px;
-
-                padded_y[padded_idx] = frame_y[src_idx];
-                padded_co[padded_idx] = frame_co[src_idx];
-                padded_cg[padded_idx] = frame_cg[src_idx];
-            }
-        }
-    }
-}
-
-// Use existing 2D DWT from tav_encoder_dwt.c
-// For padded tiles, we simply call the existing function with tile dimensions
-
-void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type) {
-    // Use the existing 2D DWT with padded tile dimensions
-    tav_dwt_2d_forward(tile_data, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
-                       levels, filter_type);
-}
-
-void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type) {
-    // Note: Inverse transform not yet implemented in library for arbitrary dimensions
-    // For now, this is a placeholder - decoder uses different code path
-    (void)tile_data;
-    (void)levels;
-    (void)filter_type;
-}
-
-void tav_crop_tile_margins(const float *padded_data, float *core_data) {
-    for (int y = 0; y < TAV_TILE_SIZE_Y; y++) {
-        const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
-        const int core_row = y * TAV_TILE_SIZE_X;
-        memcpy(&core_data[core_row], &padded_data[padded_row], TAV_TILE_SIZE_X * sizeof(float));
-    }
-}
-
-void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
-                                int actual_width, int actual_height) {
-    for (int y = 0; y < actual_height; y++) {
-        const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
-        const int core_row = y * actual_width;
-        memcpy(&core_data[core_row], &padded_data[padded_row], actual_width * sizeof(float));
-    }
-}
-
-void tav_get_tile_dimensions(int frame_width, int frame_height,
-                             int tile_x, int tile_y,
-                             int *tile_width, int *tile_height) {
-    // Calculate the starting position of this tile
-    int start_x = tile_x * TAV_TILE_SIZE_X;
-    int start_y = tile_y * TAV_TILE_SIZE_Y;
-
-    // Calculate how much of the frame is left from this starting position
-    int remaining_width = frame_width - start_x;
-    int remaining_height = frame_height - start_y;
-
-    // Tile width is the minimum of standard tile size and remaining width
-    *tile_width = (remaining_width < TAV_TILE_SIZE_X) ? remaining_width : TAV_TILE_SIZE_X;
-    *tile_height = (remaining_height < TAV_TILE_SIZE_Y) ? remaining_height : TAV_TILE_SIZE_Y;
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_tile.h b/video_encoder/lib/libtavenc/tav_encoder_tile.h
deleted file mode 100644
index de7e839..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_tile.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * TAV Encoder Library - Tile Processing
- *
- * Functions for padded tile extraction and DWT processing.
- * Used when video dimensions exceed monoblock threshold (720x576).
- */
-
-#ifndef TAV_ENCODER_TILE_H
-#define TAV_ENCODER_TILE_H
-
-#include <stdint.h>
-#include <stddef.h>
-#include "../../include/tav_encoder_lib.h"
-
-// Tile dimensions (from header)
-// TAV_TILE_SIZE_X = 640, TAV_TILE_SIZE_Y = 540
-// TAV_PADDED_TILE_SIZE_X = 704, TAV_PADDED_TILE_SIZE_Y = 604
-// TAV_TILE_MARGIN = 32
-
-/**
- * Extract a padded tile from full-frame YCoCg buffers.
- *
- * Extracts a tile at position (tile_x, tile_y) with TAV_TILE_MARGIN pixels
- * of padding on all sides for seamless DWT processing. Uses symmetric
- * extension (mirroring) at frame boundaries.
- *
- * @param frame_y       Full frame Y channel
- * @param frame_co      Full frame Co channel
- * @param frame_cg      Full frame Cg channel
- * @param frame_width   Full frame width
- * @param frame_height  Full frame height
- * @param tile_x        Tile X index (0-based)
- * @param tile_y        Tile Y index (0-based)
- * @param padded_y      Output: Padded tile Y (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y floats)
- * @param padded_co     Output: Padded tile Co
- * @param padded_cg     Output: Padded tile Cg
- */
-void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
-                             int frame_width, int frame_height,
-                             int tile_x, int tile_y,
-                             float *padded_y, float *padded_co, float *padded_cg);
-
-/**
- * Apply 2D DWT forward transform to a padded tile.
- *
- * Uses fixed PADDED_TILE_SIZE dimensions (704x604) for optimal performance.
- *
- * @param tile_data     Tile data (modified in-place)
- * @param levels        Number of decomposition levels
- * @param filter_type   Wavelet filter type (0=CDF 5/3, 1=CDF 9/7, etc.)
- */
-void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type);
-
-/**
- * Apply 2D DWT inverse transform to a padded tile.
- *
- * @param tile_data     Tile data (modified in-place)
- * @param levels        Number of decomposition levels
- * @param filter_type   Wavelet filter type
- */
-void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type);
-
-/**
- * Crop a padded tile to its core region (removing margins).
- *
- * Extracts the central TAV_TILE_SIZE_X × TAV_TILE_SIZE_Y region from a padded tile.
- *
- * @param padded_data   Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
- * @param core_data     Output: Core tile (TILE_SIZE_X * TILE_SIZE_Y)
- */
-void tav_crop_tile_margins(const float *padded_data, float *core_data);
-
-/**
- * Crop a padded tile to actual dimensions for edge tiles.
- *
- * For tiles at the right/bottom edges of a frame, the actual tile may be
- * smaller than TILE_SIZE_X × TILE_SIZE_Y. This function handles that case.
- *
- * @param padded_data   Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
- * @param core_data     Output: Core tile data
- * @param actual_width  Actual tile width (may be < TILE_SIZE_X for edge tiles)
- * @param actual_height Actual tile height (may be < TILE_SIZE_Y for edge tiles)
- */
-void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
-                                int actual_width, int actual_height);
-
-/**
- * Calculate actual tile dimensions for a given tile position.
- *
- * Edge tiles may be smaller than the standard tile size.
- *
- * @param frame_width   Full frame width
- * @param frame_height  Full frame height
- * @param tile_x        Tile X index
- * @param tile_y        Tile Y index
- * @param tile_width    Output: Actual tile width
- * @param tile_height   Output: Actual tile height
- */
-void tav_get_tile_dimensions(int frame_width, int frame_height,
-                             int tile_x, int tile_y,
-                             int *tile_width, int *tile_height);
-
-#endif // TAV_ENCODER_TILE_H
diff --git a/video_encoder/lib/libtavenc/tav_encoder_utils.c b/video_encoder/lib/libtavenc/tav_encoder_utils.c
deleted file mode 100644
index 235efc5..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_utils.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/**
- * TAV Encoder - Utilities Library
- *
- * Common utility functions and helpers used across the encoder.
- * Includes math utilities, clamping, filename generation, etc.
- *
- * Extracted from encoder_tav.c as part of library refactoring.
- */
-
-#define _POSIX_C_SOURCE 200112L
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <time.h>
-#include <math.h>
-
-// =============================================================================
-// Math Utilities
-// =============================================================================
-
-/**
- * Clamp integer value to range [min, max].
- */
-int tav_clamp_int(int x, int min, int max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-/**
- * Clamp float value to range [min, max].
- */
-float tav_clamp_float(float x, float min, float max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-/**
- * Clamp double value to range [min, max].
- */
-double tav_clamp_double(double x, double min, double max) {
-    return x < min ? min : (x > max ? max : x);
-}
-
-/**
- * Round double to nearest integer.
- */
-int tav_iround(double v) {
-    return (int)floor(v + 0.5);
-}
-
-/**
- * Linear interpolation between two values.
- * @param a  Start value (when t=0)
- * @param b  End value (when t=1)
- * @param t  Interpolation factor (0.0 to 1.0)
- * @return   Interpolated value
- */
-float tav_lerp(float a, float b, float t) {
-    return a * (1.0f - t) + b * t;
-}
-
-/**
- * Double precision linear interpolation.
- */
-double tav_lerp_double(double a, double b, double t) {
-    return a * (1.0 - t) + b * t;
-}
-
-/**
- * Get minimum of two integers.
- */
-int tav_min_int(int a, int b) {
-    return a < b ? a : b;
-}
-
-/**
- * Get maximum of two integers.
- */
-int tav_max_int(int a, int b) {
-    return a > b ? a : b;
-}
-
-/**
- * Get minimum of two floats.
- */
-float tav_min_float(float a, float b) {
-    return a < b ? a : b;
-}
-
-/**
- * Get maximum of two floats.
- */
-float tav_max_float(float a, float b) {
-    return a > b ? a : b;
-}
-
-/**
- * Compute absolute value of integer.
- */
-int tav_abs_int(int x) {
-    return x < 0 ? -x : x;
-}
-
-/**
- * Compute absolute value of float.
- */
-float tav_abs_float(float x) {
-    return x < 0.0f ? -x : x;
-}
-
-/**
- * Sign function: returns -1, 0, or 1.
- */
-int tav_sign(int x) {
-    return (x > 0) - (x < 0);
-}
-
-/**
- * Check if integer is power of 2.
- */
-int tav_is_power_of_2(int x) {
-    return x > 0 && (x & (x - 1)) == 0;
-}
-
-/**
- * Round up to next power of 2.
- */
-int tav_next_power_of_2(int x) {
-    if (x <= 0) return 1;
-    x--;
-    x |= x >> 1;
-    x |= x >> 2;
-    x |= x >> 4;
-    x |= x >> 8;
-    x |= x >> 16;
-    return x + 1;
-}
-
-/**
- * Compute floor of log2(x).
- * Returns -1 for x <= 0.
- */
-int tav_floor_log2(int x) {
-    if (x <= 0) return -1;
-    int log = 0;
-    while (x > 1) {
-        x >>= 1;
-        log++;
-    }
-    return log;
-}
-
-/**
- * Compute ceil of log2(x).
- * Returns -1 for x <= 0.
- */
-int tav_ceil_log2(int x) {
-    if (x <= 0) return -1;
-    if (x == 1) return 0;
-    int log = tav_floor_log2(x);
-    // Check if x is power of 2
-    if ((1 << log) == x) {
-        return log;
-    }
-    return log + 1;
-}
-
-// =============================================================================
-// Random Filename Generation
-// =============================================================================
-
-/**
- * Generate a random temporary filename with .mp2 extension.
- * Format: /tmp/[32 random chars].mp2
- *
- * @param filename  Output buffer (must be at least 42 bytes)
- */
-void tav_generate_random_filename(char *filename) {
-    static int seeded = 0;
-    if (!seeded) {
-        srand(time(NULL));
-        seeded = 1;
-    }
-
-    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    const int charset_size = sizeof(charset) - 1;
-
-    // Start with the prefix
-    strcpy(filename, "/tmp/");
-
-    // Generate 32 random characters
-    for (int i = 0; i < 32; i++) {
-        filename[5 + i] = charset[rand() % charset_size];
-    }
-
-    // Add the .mp2 extension
-    strcpy(filename + 37, ".mp2");
-    filename[41] = '\0';  // Null terminate
-}
-
-/**
- * Generate a random temporary filename with custom extension.
- * Format: /tmp/[32 random chars].[ext]
- *
- * @param filename  Output buffer (must be large enough for path + extension)
- * @param ext       File extension (without leading dot, e.g., "tmp", "wav")
- */
-void tav_generate_random_filename_ext(char *filename, const char *ext) {
-    static int seeded = 0;
-    if (!seeded) {
-        srand(time(NULL));
-        seeded = 1;
-    }
-
-    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    const int charset_size = sizeof(charset) - 1;
-
-    // Start with the prefix
-    strcpy(filename, "/tmp/");
-
-    // Generate 32 random characters
-    for (int i = 0; i < 32; i++) {
-        filename[5 + i] = charset[rand() % charset_size];
-    }
-
-    // Add the extension
-    filename[37] = '.';
-    strcpy(filename + 38, ext);
-}
-
-// =============================================================================
-// Memory Utilities
-// =============================================================================
-
-/**
- * Safe malloc with error checking.
- * Exits program on allocation failure.
- */
-void *tav_malloc(size_t size) {
-    void *ptr = malloc(size);
-    if (!ptr && size > 0) {
-        fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
-        exit(1);
-    }
-    return ptr;
-}
-
-/**
- * Safe calloc with error checking.
- * Exits program on allocation failure.
- */
-void *tav_calloc(size_t count, size_t size) {
-    void *ptr = calloc(count, size);
-    if (!ptr && count > 0 && size > 0) {
-        fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
-        exit(1);
-    }
-    return ptr;
-}
-
-/**
- * Safe realloc with error checking.
- * Exits program on allocation failure.
- */
-void *tav_realloc(void *ptr, size_t size) {
-    void *new_ptr = realloc(ptr, size);
-    if (!new_ptr && size > 0) {
-        fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
-        exit(1);
-    }
-    return new_ptr;
-}
-
-/**
- * Allocate aligned memory.
- * Returns NULL on failure.
- */
-void *tav_aligned_alloc(size_t alignment, size_t size) {
-    // Ensure alignment is power of 2
-    if (!tav_is_power_of_2(alignment)) {
-        fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
-        return NULL;
-    }
-
-#ifdef _WIN32
-    return _aligned_malloc(size, alignment);
-#else
-    void *ptr = NULL;
-    if (posix_memalign(&ptr, alignment, size) != 0) {
-        return NULL;
-    }
-    return ptr;
-#endif
-}
-
-/**
- * Free aligned memory.
- */
-void tav_aligned_free(void *ptr) {
-#ifdef _WIN32
-    _aligned_free(ptr);
-#else
-    free(ptr);
-#endif
-}
-
-// =============================================================================
-// Array Utilities
-// =============================================================================
-
-/**
- * Fill integer array with constant value.
- */
-void tav_array_fill_int(int *array, size_t count, int value) {
-    for (size_t i = 0; i < count; i++) {
-        array[i] = value;
-    }
-}
-
-/**
- * Fill float array with constant value.
- */
-void tav_array_fill_float(float *array, size_t count, float value) {
-    for (size_t i = 0; i < count; i++) {
-        array[i] = value;
-    }
-}
-
-/**
- * Copy integer array.
- */
-void tav_array_copy_int(int *dst, const int *src, size_t count) {
-    memcpy(dst, src, count * sizeof(int));
-}
-
-/**
- * Copy float array.
- */
-void tav_array_copy_float(float *dst, const float *src, size_t count) {
-    memcpy(dst, src, count * sizeof(float));
-}
-
-/**
- * Find maximum value in integer array.
- */
-int tav_array_max_int(const int *array, size_t count) {
-    if (count == 0) return 0;
-    int max_val = array[0];
-    for (size_t i = 1; i < count; i++) {
-        if (array[i] > max_val) {
-            max_val = array[i];
-        }
-    }
-    return max_val;
-}
-
-/**
- * Find minimum value in integer array.
- */
-int tav_array_min_int(const int *array, size_t count) {
-    if (count == 0) return 0;
-    int min_val = array[0];
-    for (size_t i = 1; i < count; i++) {
-        if (array[i] < min_val) {
-            min_val = array[i];
-        }
-    }
-    return min_val;
-}
-
-/**
- * Find maximum absolute value in float array.
- */
-float tav_array_max_abs_float(const float *array, size_t count) {
-    if (count == 0) return 0.0f;
-    float max_abs = fabsf(array[0]);
-    for (size_t i = 1; i < count; i++) {
-        float abs_val = fabsf(array[i]);
-        if (abs_val > max_abs) {
-            max_abs = abs_val;
-        }
-    }
-    return max_abs;
-}
-
-/**
- * Compute sum of integer array.
- */
-long long tav_array_sum_int(const int *array, size_t count) {
-    long long sum = 0;
-    for (size_t i = 0; i < count; i++) {
-        sum += array[i];
-    }
-    return sum;
-}
-
-/**
- * Compute sum of float array.
- */
-double tav_array_sum_float(const float *array, size_t count) {
-    double sum = 0.0;
-    for (size_t i = 0; i < count; i++) {
-        sum += array[i];
-    }
-    return sum;
-}
-
-/**
- * Compute mean of float array.
- */
-float tav_array_mean_float(const float *array, size_t count) {
-    if (count == 0) return 0.0f;
-    return (float)(tav_array_sum_float(array, count) / count);
-}
-
-/**
- * Swap two integer values.
- */
-void tav_swap_int(int *a, int *b) {
-    int temp = *a;
-    *a = *b;
-    *b = temp;
-}
-
-/**
- * Swap two float values.
- */
-void tav_swap_float(float *a, float *b) {
-    float temp = *a;
-    *a = *b;
-    *b = temp;
-}
-
-/**
- * Swap two pointer values.
- */
-void tav_swap_ptr(void **a, void **b) {
-    void *temp = *a;
-    *a = *b;
-    *b = temp;
-}
diff --git a/video_encoder/lib/libtavenc/tav_encoder_utils.h b/video_encoder/lib/libtavenc/tav_encoder_utils.h
deleted file mode 100644
index 4dc54da..0000000
--- a/video_encoder/lib/libtavenc/tav_encoder_utils.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/**
- * TAV Encoder - Utilities Library
- *
- * Public API for common utility functions and helpers.
- */
-
-#ifndef TAV_ENCODER_UTILS_H
-#define TAV_ENCODER_UTILS_H
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// =============================================================================
-// Math Utilities
-// =============================================================================
-
-/** Clamp integer value to range [min, max] */
-int tav_clamp_int(int x, int min, int max);
-
-/** Clamp float value to range [min, max] */
-float tav_clamp_float(float x, float min, float max);
-
-/** Clamp double value to range [min, max] */
-double tav_clamp_double(double x, double min, double max);
-
-/** Round double to nearest integer */
-int tav_iround(double v);
-
-/** Linear interpolation between two floats */
-float tav_lerp(float a, float b, float t);
-
-/** Linear interpolation between two doubles */
-double tav_lerp_double(double a, double b, double t);
-
-/** Get minimum of two integers */
-int tav_min_int(int a, int b);
-
-/** Get maximum of two integers */
-int tav_max_int(int a, int b);
-
-/** Get minimum of two floats */
-float tav_min_float(float a, float b);
-
-/** Get maximum of two floats */
-float tav_max_float(float a, float b);
-
-/** Compute absolute value of integer */
-int tav_abs_int(int x);
-
-/** Compute absolute value of float */
-float tav_abs_float(float x);
-
-/** Sign function: returns -1, 0, or 1 */
-int tav_sign(int x);
-
-/** Check if integer is power of 2 */
-int tav_is_power_of_2(int x);
-
-/** Round up to next power of 2 */
-int tav_next_power_of_2(int x);
-
-/** Compute floor of log2(x) */
-int tav_floor_log2(int x);
-
-/** Compute ceil of log2(x) */
-int tav_ceil_log2(int x);
-
-// =============================================================================
-// Random Filename Generation
-// =============================================================================
-
-/**
- * Generate a random temporary filename with .mp2 extension.
- * Format: /tmp/[32 random chars].mp2
- *
- * @param filename  Output buffer (must be at least 42 bytes)
- */
-void tav_generate_random_filename(char *filename);
-
-/**
- * Generate a random temporary filename with custom extension.
- * Format: /tmp/[32 random chars].[ext]
- *
- * @param filename  Output buffer (must be large enough)
- * @param ext       File extension (without leading dot)
- */
-void tav_generate_random_filename_ext(char *filename, const char *ext);
-
-// =============================================================================
-// Memory Utilities
-// =============================================================================
-
-/** Safe malloc with error checking (exits on failure) */
-void *tav_malloc(size_t size);
-
-/** Safe calloc with error checking (exits on failure) */
-void *tav_calloc(size_t count, size_t size);
-
-/** Safe realloc with error checking (exits on failure) */
-void *tav_realloc(void *ptr, size_t size);
-
-/** Allocate aligned memory (returns NULL on failure) */
-void *tav_aligned_alloc(size_t alignment, size_t size);
-
-/** Free aligned memory */
-void tav_aligned_free(void *ptr);
-
-// =============================================================================
-// Array Utilities
-// =============================================================================
-
-/** Fill integer array with constant value */
-void tav_array_fill_int(int *array, size_t count, int value);
-
-/** Fill float array with constant value */
-void tav_array_fill_float(float *array, size_t count, float value);
-
-/** Copy integer array */
-void tav_array_copy_int(int *dst, const int *src, size_t count);
-
-/** Copy float array */
-void tav_array_copy_float(float *dst, const float *src, size_t count);
-
-/** Find maximum value in integer array */
-int tav_array_max_int(const int *array, size_t count);
-
-/** Find minimum value in integer array */
-int tav_array_min_int(const int *array, size_t count);
-
-/** Find maximum absolute value in float array */
-float tav_array_max_abs_float(const float *array, size_t count);
-
-/** Compute sum of integer array */
-long long tav_array_sum_int(const int *array, size_t count);
-
-/** Compute sum of float array */
-double tav_array_sum_float(const float *array, size_t count);
-
-/** Compute mean of float array */
-float tav_array_mean_float(const float *array, size_t count);
-
-/** Swap two integer values */
-void tav_swap_int(int *a, int *b);
-
-/** Swap two float values */
-void tav_swap_float(float *a, float *b);
-
-/** Swap two pointer values */
-void tav_swap_ptr(void **a, void **b);
-
-// =============================================================================
-// Convenience Macros (for backward compatibility)
-// =============================================================================
-
-#define CLAMP(x, min, max)  tav_clamp_int(x, min, max)
-#define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TAV_ENCODER_UTILS_H
diff --git a/video_encoder/range_coder.c b/video_encoder/range_coder.c
deleted file mode 100644
index 08e49d1..0000000
--- a/video_encoder/range_coder.c
+++ /dev/null
@@ -1,152 +0,0 @@
-// Simple range coder for TAD audio codec
-// Based on range coding with Laplacian probability model
-
-#include "range_coder.h"
-#include <string.h>
-#include <math.h>
-
-#define TOP_VALUE 0xFFFFFFFFU
-#define BOTTOM_VALUE 0x00FFFFFF
-
-static inline void range_encoder_put_byte(RangeEncoder *enc, uint8_t byte) {
-    if (enc->buffer_pos < enc->buffer_capacity) {
-        enc->buffer[enc->buffer_pos++] = byte;
-    }
-}
-
-static inline uint8_t range_decoder_get_byte(RangeDecoder *dec) {
-    if (dec->buffer_pos < dec->buffer_size) {
-        return dec->buffer[dec->buffer_pos++];
-    }
-    return 0;
-}
-
-static void range_encoder_renormalise(RangeEncoder *enc) {
-    while (enc->range <= BOTTOM_VALUE) {
-        range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
-        enc->low <<= 8;
-        enc->range <<= 8;
-    }
-}
-
-static void range_decoder_renormalise(RangeDecoder *dec) {
-    while (dec->range <= BOTTOM_VALUE) {
-        dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
-        dec->low <<= 8;
-        dec->range <<= 8;
-    }
-}
-
-void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity) {
-    enc->low = 0;
-    enc->range = TOP_VALUE;
-    enc->buffer = buffer;
-    enc->buffer_pos = 0;
-    enc->buffer_capacity = capacity;
-}
-
-// Calculate Laplacian CDF for a given value
-// CDF(x) = 0.5 * exp(λx) for x < 0
-// CDF(x) = 1 - 0.5 * exp(-λx) for x ≥ 0
-static inline double laplacian_cdf(int16_t value, float lambda) {
-    if (value < 0) {
-        return 0.5 * exp(lambda * value);
-    } else {
-        return 1.0 - 0.5 * exp(-lambda * value);
-    }
-}
-
-void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda) {
-    // Clamp to valid range
-    if (value < -max_abs_value) value = -max_abs_value;
-    if (value > max_abs_value) value = max_abs_value;
-
-    // Calculate cumulative probabilities using Laplacian distribution
-    // We need CDF at value and value+1 to get the probability mass for this symbol
-    double cdf_low = (value == -max_abs_value) ? 0.0 : laplacian_cdf(value - 1, lambda);
-    double cdf_high = laplacian_cdf(value, lambda);
-
-    // Normalise to get cumulative counts in range [0, SCALE]
-    const uint32_t SCALE = 0x10000;  // 65536 for precision
-    uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
-    uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
-
-    // Ensure we have at least 1 unit of probability
-    if (cum_high <= cum_low) cum_high = cum_low + 1;
-    if (cum_high > SCALE) cum_high = SCALE;
-
-    // Encode using cumulative probabilities
-    uint64_t range_64 = (uint64_t)enc->range;
-    enc->low += (uint32_t)((range_64 * cum_low) / SCALE);
-    enc->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
-
-    range_encoder_renormalise(enc);
-}
-
-size_t range_encoder_finish(RangeEncoder *enc) {
-    // Flush remaining bytes
-    for (int i = 0; i < 4; i++) {
-        range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
-        enc->low <<= 8;
-    }
-    return enc->buffer_pos;
-}
-
-void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size) {
-    dec->low = 0;
-    dec->range = TOP_VALUE;
-    dec->code = 0;
-    dec->buffer = buffer;
-    dec->buffer_pos = 0;
-    dec->buffer_size = size;
-
-    // Read initial bytes into code
-    for (int i = 0; i < 4; i++) {
-        dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
-    }
-}
-
-int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda) {
-    const uint32_t SCALE = 0x10000;  // Must match encoder
-
-    // Calculate current position in probability space
-    uint64_t range_64 = (uint64_t)dec->range;
-    uint32_t cum_freq = (uint32_t)(((uint64_t)(dec->code - dec->low) * SCALE) / range_64);
-
-    // Binary search to find symbol whose CDF range contains cum_freq
-    int16_t low = -max_abs_value;
-    int16_t high = max_abs_value;
-    int16_t value = 0;
-
-    while (low <= high) {
-        int16_t mid = (low + high) / 2;
-
-        double cdf_low = (mid == -max_abs_value) ? 0.0 : laplacian_cdf(mid - 1, lambda);
-        double cdf_high = laplacian_cdf(mid, lambda);
-
-        uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
-        uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
-
-        if (cum_high <= cum_low) cum_high = cum_low + 1;
-
-        if (cum_freq >= cum_low && cum_freq < cum_high) {
-            // Found the symbol
-            value = mid;
-
-            // Update decoder state
-            dec->low += (uint32_t)((range_64 * cum_low) / SCALE);
-            dec->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
-
-            range_decoder_renormalise(dec);
-            return value;
-        } else if (cum_freq < cum_low) {
-            high = mid - 1;
-        } else {
-            low = mid + 1;
-        }
-    }
-
-    // Fallback: shouldn't happen with correct encoding
-    range_decoder_renormalise(dec);
-    return value;
-}
diff --git a/video_encoder/range_coder.h b/video_encoder/range_coder.h
deleted file mode 100644
index b9832f1..0000000
--- a/video_encoder/range_coder.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef RANGE_CODER_H
-#define RANGE_CODER_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-// Simple range coder for signed 16-bit integers
-// Uses adaptive frequency model for better compression
-
-typedef struct {
-    uint32_t low;
-    uint32_t range;
-    uint8_t *buffer;
-    size_t buffer_pos;
-    size_t buffer_capacity;
-} RangeEncoder;
-
-typedef struct {
-    uint32_t low;
-    uint32_t range;
-    uint32_t code;
-    const uint8_t *buffer;
-    size_t buffer_pos;
-    size_t buffer_size;
-} RangeDecoder;
-
-// Initialise encoder
-void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity);
-
-// Encode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
-void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda);
-
-// Finalise encoding and return bytes written
-size_t range_encoder_finish(RangeEncoder *enc);
-
-// Initialise decoder
-void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size);
-
-// Decode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
-int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda);
-
-#endif // RANGE_CODER_H
diff --git a/video_encoder/src/decoder_tav.c b/video_encoder/src/decoder_tav.c
deleted file mode 100644
index 1ee3c99..0000000
--- a/video_encoder/src/decoder_tav.c
+++ /dev/null
@@ -1,2330 +0,0 @@
-/**
- * TAV Decoder CLI - Reference Implementation using libtavdec and libtaddec
- *
- * Complete reference decoder with all features:
- * - Full command-line argument support
- * - TAV file format parsing (header + packets)
- * - Video decoding via libtavdec (I-frames, GOPs)
- * - Audio decoding via libtaddec (TAD32 to PCMu8)
- * - FFmpeg integration for output (FFV1/rawvideo + audio muxing)
- * - Progress reporting and statistics
- *
- * This is the official CLI implementation using libtavdec/libtaddec libraries.
- * Reduced from ~3,500 lines monolithic to ~1,000 lines while preserving all features.
- *
- * Created by CuriousTorvald and Claude on 2025-12-07.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <getopt.h>
-#include <time.h>
-#include <unistd.h>
-#include <sys/wait.h>
-#include <signal.h>
-#include <pthread.h>
-#include <limits.h>
-
-#include "tav_video_decoder.h"
-#include "decoder_tad.h"
-
-// =============================================================================
-// Constants
-// =============================================================================
-
-#define DECODER_VENDOR_STRING "Decoder-TAV 20251223 (reference)"
-#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVMTAV"
-#define TAP_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x50"  // "\x1FTSVMTAP" (still picture)
-#define MAX_PATH 4096
-
-// TAV packet types
-#define TAV_PACKET_IFRAME          0x10
-#define TAV_PACKET_PFRAME          0x11
-#define TAV_PACKET_GOP_UNIFIED     0x12
-#define TAV_PACKET_AUDIO_MP2       0x20
-#define TAV_PACKET_AUDIO_PCM8      0x21
-#define TAV_PACKET_AUDIO_TAD       0x24
-#define TAV_PACKET_SUBTITLE        0x30
-#define TAV_PACKET_SUBTITLE_TC     0x31
-#define TAV_PACKET_AUDIO_TRACK     0x40
-#define TAV_PACKET_EXTENDED_HDR    0xEF
-#define TAV_PACKET_SCREEN_MASK     0xF2
-#define TAV_PACKET_GOP_SYNC        0xFC
-#define TAV_PACKET_TIMECODE        0xFD
-#define TAV_PACKET_SYNC_NTSC       0xFE
-#define TAV_PACKET_SYNC            0xFF
-
-// Threading constants
-#define MAX_DECODE_THREADS 16
-#define DECODE_SLOT_PENDING     0
-#define DECODE_SLOT_PROCESSING  1
-#define DECODE_SLOT_DONE        2
-
-// =============================================================================
-// GOP Decode Job Structure (for multithreading)
-// =============================================================================
-
-typedef struct {
-    int job_id;
-    volatile int status;  // DECODE_SLOT_*
-
-    // Input (compressed data read from file)
-    uint8_t *compressed_data;
-    uint32_t compressed_size;
-    int gop_size;
-
-    // Output (decoded frames)
-    uint8_t **frames;
-    int frames_allocated;
-    int decode_result;
-
-} gop_decode_job_t;
-
-// =============================================================================
-// Audio Decode Job Structure (for multithreading)
-// =============================================================================
-
-typedef struct {
-    long file_offset;          // File position for reading
-    uint32_t payload_size;     // Size of compressed audio data
-    uint16_t sample_count;     // Expected sample count
-    uint8_t packet_type;       // TAD_PACKET_AUDIO_TAD or TAD_PACKET_AUDIO_PCM8
-
-    // Output (decoded PCM data)
-    uint8_t *decoded_pcm;      // Stereo PCMu8 output
-    size_t decoded_samples;    // Actual samples decoded
-    volatile int status;       // DECODE_SLOT_*
-} audio_decode_job_t;
-
-// =============================================================================
-// TAV Header Structure (32 bytes)
-// =============================================================================
-
-typedef struct {
-    uint8_t magic[8];
-    uint8_t version;
-    uint16_t width;
-    uint16_t height;
-    uint8_t fps;
-    uint32_t total_frames;
-    uint8_t wavelet_filter;
-    uint8_t decomp_levels;
-    uint8_t quantiser_y;
-    uint8_t quantiser_co;
-    uint8_t quantiser_cg;
-    uint8_t extra_flags;
-    uint8_t video_flags;
-    uint8_t encoder_quality;
-    uint8_t channel_layout;
-    uint8_t entropy_coder;
-    uint8_t encoder_preset;
-    uint8_t reserved;
-    uint8_t device_orientation;
-    uint8_t file_role;
-} __attribute__((packed)) tav_header_t;
-
-// =============================================================================
-// Decoder Context
-// =============================================================================
-
-typedef struct {
-    // Input/output
-    char *input_file;
-    char *output_file;
-    FILE *input_fp;
-
-    // TAV header info
-    tav_header_t header;
-    int perceptual_mode;
-    int interlaced;        // 1 if video is interlaced (from video_flags bit 0)
-    int decode_height;     // Actual decode height (half of header.height when interlaced)
-
-    // Video decoder context
-    tav_video_context_t *video_ctx;
-
-    // FFmpeg integration
-    pid_t ffmpeg_pid;
-    FILE *video_pipe;
-    char *audio_temp_file;
-    FILE *audio_temp_fp;
-
-    // Frame buffers
-    uint8_t **gop_frames;
-    int gop_frames_allocated;
-
-    // Statistics
-    uint64_t frames_decoded;
-    uint64_t gops_decoded;
-    uint64_t audio_samples_decoded;
-    uint64_t bytes_read;
-    time_t start_time;
-
-    // Options
-    int verbose;
-    int decode_limit;       // Max frames to decode (0=all)
-    int output_raw;         // Output raw video instead of FFV1
-    int no_audio;           // Skip audio decoding
-    int dump_packets;       // Debug: dump packet info
-
-    // Still image (TAP) mode
-    int is_still_image;     // 1 if input is a still picture (TAP format)
-    int output_tga;         // 1 for TGA output, 0 for PNG (default)
-
-    // Extended framerate support (XFPS)
-    int fps_num;            // Framerate numerator (from header or XFPS extended header)
-    int fps_den;            // Framerate denominator (1 for standard, 1001 for NTSC, or from XFPS)
-
-    // Threading support (video decoding)
-    int num_threads;
-    int num_slots;
-    gop_decode_job_t *slots;
-    tav_video_context_t **worker_video_ctx;  // Per-thread decoder contexts
-    pthread_t *worker_threads;
-    pthread_mutex_t mutex;
-    pthread_cond_t cond_job_available;
-    pthread_cond_t cond_slot_free;
-    volatile int threads_should_exit;
-    volatile int next_write_slot;      // Next slot to write to FFmpeg
-    volatile int next_read_slot;       // Next slot for reading from file
-    volatile int jobs_submitted;
-    volatile int jobs_completed;
-
-    // Audio decoding (pass 1 multithreading)
-    audio_decode_job_t *audio_jobs;
-    int audio_job_count;
-    int audio_job_capacity;
-    pthread_t *audio_worker_threads;
-    int num_audio_threads;
-    pthread_mutex_t audio_mutex;
-    pthread_cond_t audio_cond_job_available;
-    volatile int audio_threads_should_exit;
-    volatile int next_audio_job;       // Next job for worker threads to process
-    volatile int next_audio_write;     // Next job to write to temp file
-
-} decoder_context_t;
-
-// =============================================================================
-// TAV Header Parsing
-// =============================================================================
-
-static int read_tav_header(decoder_context_t *ctx) {
-    // Read raw header bytes
-    uint8_t header_bytes[32];
-    if (fread(header_bytes, 1, 32, ctx->input_fp) != 32) {
-        fprintf(stderr, "Error: Failed to read TAV header\n");
-        return -1;
-    }
-
-    // Verify magic (accept both TAV and TAP)
-    if (memcmp(header_bytes, TAV_MAGIC, 8) == 0) {
-        ctx->is_still_image = 0;
-    } else if (memcmp(header_bytes, TAP_MAGIC, 8) == 0) {
-        ctx->is_still_image = 1;
-    } else {
-        fprintf(stderr, "Error: Invalid TAV/TAP magic (not a TAV/TAP file)\n");
-        return -1;
-    }
-
-    // Parse header fields manually (avoid packing issues)
-    memcpy(ctx->header.magic, header_bytes, 8);
-    ctx->header.version = header_bytes[8];
-    ctx->header.width = header_bytes[9] | (header_bytes[10] << 8);
-    ctx->header.height = header_bytes[11] | (header_bytes[12] << 8);
-    ctx->header.fps = header_bytes[13];
-    ctx->header.total_frames = header_bytes[14] | (header_bytes[15] << 8) |
-                               (header_bytes[16] << 16) | (header_bytes[17] << 24);
-    ctx->header.wavelet_filter = header_bytes[18];
-    ctx->header.decomp_levels = header_bytes[19];
-    ctx->header.quantiser_y = header_bytes[20];
-    ctx->header.quantiser_co = header_bytes[21];
-    ctx->header.quantiser_cg = header_bytes[22];
-    ctx->header.extra_flags = header_bytes[23];
-    ctx->header.video_flags = header_bytes[24];
-    ctx->header.encoder_quality = header_bytes[25];
-    ctx->header.channel_layout = header_bytes[26];
-    ctx->header.entropy_coder = header_bytes[27];
-    ctx->header.encoder_preset = header_bytes[28];
-    ctx->header.reserved = header_bytes[29];
-    ctx->header.device_orientation = header_bytes[30];
-    ctx->header.file_role = header_bytes[31];
-
-    ctx->bytes_read += 32;
-
-    // Determine perceptual mode from version
-    // Versions 5, 6, 13, 14 = perceptual; 3, 4, 11, 12 = uniform
-    int base_version = ctx->header.version & 0x07;  // Remove temporal wavelet flag
-    ctx->perceptual_mode = (base_version == 5 || base_version == 6);
-
-    // Detect interlaced mode from video_flags bit 0
-    ctx->interlaced = (ctx->header.video_flags & 0x01) ? 1 : 0;
-
-    // Calculate decode height: half of header height for interlaced video
-    // The header stores the full display height, but encoded frames are half-height
-    if (ctx->interlaced) {
-        ctx->decode_height = ctx->header.height / 2;
-    } else {
-        ctx->decode_height = ctx->header.height;
-    }
-
-    // Initialize fps_num and fps_den from header
-    // If header.fps == 0xFF, the actual framerate is in the XFPS extended header entry
-    // If header.fps == 0x00, this is a still image
-    // Otherwise, fps_num = header.fps and fps_den is 1 (or 1001 for NTSC if video_flags bit 1 is set)
-    if (ctx->header.fps == 0xFF) {
-        // Will be set from XFPS extended header
-        ctx->fps_num = 0;
-        ctx->fps_den = 1;
-    } else if (ctx->header.fps == 0x00) {
-        // Still image
-        ctx->fps_num = 0;
-        ctx->fps_den = 1;
-    } else {
-        ctx->fps_num = ctx->header.fps;
-        ctx->fps_den = (ctx->header.video_flags & 0x02) ? 1001 : 1;
-    }
-
-    if (ctx->verbose) {
-        printf("=== %s Header ===\n", ctx->is_still_image ? "TAP" : "TAV");
-        printf("  Format: %s\n", ctx->is_still_image ? "Still Picture" : "Video");
-        printf("  Version: %d\n", ctx->header.version);
-        printf("  Resolution: %dx%d\n", ctx->header.width, ctx->header.height);
-        if (ctx->interlaced) {
-            printf("  Interlaced: yes (decode height: %d)\n", ctx->decode_height);
-        }
-        if (!ctx->is_still_image) {
-            if (ctx->header.fps == 0xFF) {
-                printf("  FPS: (extended - see XFPS)\n");
-            } else {
-                printf("  FPS: %d\n", ctx->header.fps);
-            }
-            printf("  Total frames: %u\n", ctx->header.total_frames);
-        }
-        printf("  Wavelet filter: %d\n", ctx->header.wavelet_filter);
-        printf("  Decomp levels: %d\n", ctx->header.decomp_levels);
-        printf("  Quantisers: Y=%d, Co=%d, Cg=%d\n",
-               ctx->header.quantiser_y, ctx->header.quantiser_co, ctx->header.quantiser_cg);
-        printf("  Perceptual mode: %s\n", ctx->perceptual_mode ? "yes" : "no");
-        printf("  Entropy coder: %s\n", ctx->header.entropy_coder ? "EZBC" : "Twobitmap");
-        printf("  Encoder preset: 0x%02X\n", ctx->header.encoder_preset);
-        if (!ctx->is_still_image) {
-            printf("  Has audio: %s\n", (ctx->header.extra_flags & 0x01) ? "yes" : "no");
-        }
-        printf("==================\n\n");
-    }
-
-    return 0;
-}
-
-/**
- * Scan for XFPS extended header entry if header.fps == 0xFF.
- * Must be called after read_tav_header() while file position is at start of packets.
- * Will restore file position after scanning.
- */
-static void scan_for_xfps(decoder_context_t *ctx) {
-    if (ctx->header.fps != 0xFF) {
-        // No need to scan for XFPS
-        return;
-    }
-
-    long start_pos = ftell(ctx->input_fp);
-
-    // Scan packets looking for extended header
-    while (!feof(ctx->input_fp)) {
-        uint8_t packet_type;
-        if (fread(&packet_type, 1, 1, ctx->input_fp) != 1) break;
-
-        if (packet_type == TAV_PACKET_EXTENDED_HDR) {
-            // Parse extended header looking for XFPS
-            uint16_t num_pairs;
-            if (fread(&num_pairs, 2, 1, ctx->input_fp) != 1) break;
-
-            for (int i = 0; i < num_pairs; i++) {
-                char key[5] = {0};
-                uint8_t value_type;
-
-                if (fread(key, 1, 4, ctx->input_fp) != 4) break;
-                if (fread(&value_type, 1, 1, ctx->input_fp) != 1) break;
-
-                if (value_type == 0x10) {  // Bytes type
-                    uint16_t length;
-                    if (fread(&length, 2, 1, ctx->input_fp) != 1) break;
-
-                    if (strncmp(key, "XFPS", 4) == 0 && length < 32) {
-                        // Found XFPS - parse it
-                        char xfps_str[32] = {0};
-                        if (fread(xfps_str, 1, length, ctx->input_fp) != length) break;
-                        xfps_str[length] = '\0';
-
-                        int num, den;
-                        if (sscanf(xfps_str, "%d/%d", &num, &den) == 2) {
-                            ctx->fps_num = num;
-                            ctx->fps_den = den;
-                            if (ctx->verbose) {
-                                printf("  XFPS: %d/%d (%.3f fps)\n", num, den, (double)num / den);
-                            }
-                        }
-                        // Found XFPS, done scanning
-                        goto done;
-                    } else {
-                        // Skip this value
-                        fseek(ctx->input_fp, length, SEEK_CUR);
-                    }
-                } else if (value_type == 0x04) {  // Int64
-                    fseek(ctx->input_fp, 8, SEEK_CUR);
-                } else if (value_type <= 0x04) {  // Other int types
-                    int sizes[] = {2, 3, 4, 6, 8};
-                    fseek(ctx->input_fp, sizes[value_type], SEEK_CUR);
-                }
-            }
-            // Extended header parsed, done scanning (XFPS not found)
-            break;
-        } else if (packet_type == TAV_PACKET_TIMECODE) {
-            fseek(ctx->input_fp, 8, SEEK_CUR);
-        } else if (packet_type == TAV_PACKET_SYNC || packet_type == TAV_PACKET_SYNC_NTSC) {
-            // No payload
-        } else {
-            // Reached a non-metadata packet, stop scanning
-            break;
-        }
-    }
-
-done:
-    // Restore file position
-    fseek(ctx->input_fp, start_pos, SEEK_SET);
-}
-
-// =============================================================================
-// FFmpeg Integration
-// =============================================================================
-
-static int spawn_ffmpeg(decoder_context_t *ctx) {
-    int video_pipe_fd[2];
-
-    // Create pipe for video data
-    if (pipe(video_pipe_fd) < 0) {
-        fprintf(stderr, "Error: Failed to create video pipe\n");
-        return -1;
-    }
-
-    ctx->ffmpeg_pid = fork();
-
-    if (ctx->ffmpeg_pid < 0) {
-        fprintf(stderr, "Error: Failed to fork FFmpeg process\n");
-        close(video_pipe_fd[0]);
-        close(video_pipe_fd[1]);
-        return -1;
-    }
-
-    if (ctx->ffmpeg_pid == 0) {
-        // Child process - execute FFmpeg
-        close(video_pipe_fd[1]);  // Close write end
-
-        // For interlaced video: input is half-height fields, output is full-height interlaced
-        // For progressive video: input and output are both full-height
-        char video_size[32];
-        char framerate[32];
-        snprintf(video_size, sizeof(video_size), "%dx%d", ctx->header.width, ctx->decode_height);
-        // Use fps_num/fps_den for extended framerates (XFPS)
-        if (ctx->fps_den == 1) {
-            snprintf(framerate, sizeof(framerate), "%d", ctx->fps_num);
-        } else {
-            snprintf(framerate, sizeof(framerate), "%d/%d", ctx->fps_num, ctx->fps_den);
-        }
-
-        // Redirect video pipe to fd 3
-        dup2(video_pipe_fd[0], 3);
-        close(video_pipe_fd[0]);
-
-        if (ctx->interlaced) {
-            // Interlaced mode: merge separate fields into interlaced frames
-            // setfield=tff marks each frame as top-field, weave combines consecutive fields
-            // into full-height interlaced frames at half framerate
-            if (ctx->output_raw) {
-                // Raw video output (no compression)
-                execl("/usr/bin/ffmpeg", "ffmpeg",
-                      "-f", "rawvideo",
-                      "-pixel_format", "rgb24",
-                      "-video_size", video_size,
-                      "-framerate", framerate,
-                      "-i", "pipe:3",
-                      "-f", "u8",
-                      "-ar", "32000",
-                      "-ac", "2",
-                      "-i", ctx->audio_temp_file,
-                      "-vf", "setfield=tff,weave",
-                      "-field_order", "tt",
-                      "-c:v", "rawvideo",
-                      "-pixel_format", "rgb24",
-                      "-c:a", "pcm_u8",
-                      "-f", "matroska",
-                      ctx->output_file,
-                      "-y",
-                      "-v", "warning",
-                      (char*)NULL);
-            } else {
-                // FFV1 output (lossless compression) with interlaced flag
-                execl("/usr/bin/ffmpeg", "ffmpeg",
-                      "-f", "rawvideo",
-                      "-pixel_format", "rgb24",
-                      "-video_size", video_size,
-                      "-framerate", framerate,
-                      "-i", "pipe:3",
-                      "-f", "u8",
-                      "-ar", "32000",
-                      "-ac", "2",
-                      "-i", ctx->audio_temp_file,
-                      "-vf", "setfield=tff,weave",
-                      "-field_order", "tt",
-                      "-color_range", "2",
-                      "-c:v", "ffv1",
-                      "-level", "3",
-                      "-coder", "1",
-                      "-context", "1",
-                      "-g", "1",
-                      "-slices", "24",
-                      "-slicecrc", "1",
-                      "-pixel_format", "rgb24",
-                      "-color_range", "2",
-                      "-c:a", "pcm_u8",
-                      "-f", "matroska",
-                      ctx->output_file,
-                      "-y",
-                      "-v", "warning",
-                      (char*)NULL);
-            }
-        } else {
-            // Progressive mode - simple passthrough
-            if (ctx->output_raw) {
-                // Raw video output (no compression)
-                execl("/usr/bin/ffmpeg", "ffmpeg",
-                      "-f", "rawvideo",
-                      "-pixel_format", "rgb24",
-                      "-video_size", video_size,
-                      "-framerate", framerate,
-                      "-i", "pipe:3",
-                      "-f", "u8",
-                      "-ar", "32000",
-                      "-ac", "2",
-                      "-i", ctx->audio_temp_file,
-                      "-c:v", "rawvideo",
-                      "-pixel_format", "rgb24",
-                      "-c:a", "pcm_u8",
-                      "-f", "matroska",
-                      ctx->output_file,
-                      "-y",
-                      "-v", "warning",
-                      (char*)NULL);
-            } else {
-                // FFV1 output (lossless compression)
-                execl("/usr/bin/ffmpeg", "ffmpeg",
-                      "-f", "rawvideo",
-                      "-pixel_format", "rgb24",
-                      "-video_size", video_size,
-                      "-framerate", framerate,
-                      "-i", "pipe:3",
-                      "-f", "u8",
-                      "-ar", "32000",
-                      "-ac", "2",
-                      "-i", ctx->audio_temp_file,
-                      "-color_range", "2",
-                      "-c:v", "ffv1",
-                      "-level", "3",
-                      "-coder", "1",
-                      "-context", "1",
-                      "-g", "1",
-                      "-slices", "24",
-                      "-slicecrc", "1",
-                      "-pixel_format", "rgb24",
-                      "-color_range", "2",
-                      "-c:a", "pcm_u8",
-                      "-f", "matroska",
-                      ctx->output_file,
-                      "-y",
-                      "-v", "warning",
-                      (char*)NULL);
-            }
-        }
-
-        fprintf(stderr, "Error: Failed to execute FFmpeg\n");
-        exit(1);
-    }
-
-    // Parent process
-    close(video_pipe_fd[0]);  // Close read end
-
-    ctx->video_pipe = fdopen(video_pipe_fd[1], "wb");
-    if (!ctx->video_pipe) {
-        fprintf(stderr, "Error: Failed to open video pipe for writing\n");
-        kill(ctx->ffmpeg_pid, SIGTERM);
-        return -1;
-    }
-
-    return 0;
-}
-
-// =============================================================================
-// Multithreading Support
-// =============================================================================
-
-// Worker thread function - decodes GOPs in parallel
-static void *decoder_worker_thread(void *arg) {
-    decoder_context_t *ctx = (decoder_context_t *)arg;
-
-    // Get thread index by finding our thread ID in the array
-    int thread_idx = -1;
-    pthread_t self = pthread_self();
-    for (int i = 0; i < ctx->num_threads; i++) {
-        if (pthread_equal(ctx->worker_threads[i], self)) {
-            thread_idx = i;
-            break;
-        }
-    }
-    if (thread_idx < 0) thread_idx = 0;  // Fallback
-
-    tav_video_context_t *my_video_ctx = ctx->worker_video_ctx[thread_idx];
-
-    while (1) {
-        pthread_mutex_lock(&ctx->mutex);
-
-        // Find a pending slot to work on
-        int slot_idx = -1;
-        while (slot_idx < 0 && !ctx->threads_should_exit) {
-            for (int i = 0; i < ctx->num_slots; i++) {
-                if (ctx->slots[i].status == DECODE_SLOT_PENDING &&
-                    ctx->slots[i].compressed_data != NULL) {
-                    slot_idx = i;
-                    ctx->slots[i].status = DECODE_SLOT_PROCESSING;
-                    break;
-                }
-            }
-            if (slot_idx < 0 && !ctx->threads_should_exit) {
-                pthread_cond_wait(&ctx->cond_job_available, &ctx->mutex);
-            }
-        }
-
-        if (ctx->threads_should_exit && slot_idx < 0) {
-            pthread_mutex_unlock(&ctx->mutex);
-            break;
-        }
-
-        pthread_mutex_unlock(&ctx->mutex);
-
-        if (slot_idx < 0) continue;
-
-        gop_decode_job_t *job = &ctx->slots[slot_idx];
-
-        // Decode GOP using our thread's decoder context
-        job->decode_result = tav_video_decode_gop(
-            my_video_ctx,
-            job->compressed_data,
-            job->compressed_size,
-            job->gop_size,
-            job->frames
-        );
-
-        // Free compressed data after decoding
-        free(job->compressed_data);
-        job->compressed_data = NULL;
-
-        // Mark as done
-        pthread_mutex_lock(&ctx->mutex);
-        job->status = DECODE_SLOT_DONE;
-        ctx->jobs_completed++;
-        pthread_cond_broadcast(&ctx->cond_slot_free);
-        pthread_mutex_unlock(&ctx->mutex);
-    }
-
-    return NULL;
-}
-
-static int init_decoder_threads(decoder_context_t *ctx) {
-    if (ctx->num_threads <= 0) {
-        return 0;  // Single-threaded mode
-    }
-
-    // Limit threads
-    if (ctx->num_threads > MAX_DECODE_THREADS) {
-        ctx->num_threads = MAX_DECODE_THREADS;
-    }
-
-    // Number of slots = threads + 2 for pipelining
-    ctx->num_slots = ctx->num_threads + 2;
-
-    // Allocate slots
-    ctx->slots = calloc(ctx->num_slots, sizeof(gop_decode_job_t));
-    if (!ctx->slots) {
-        fprintf(stderr, "Error: Failed to allocate decode slots\n");
-        return -1;
-    }
-
-    // Pre-allocate frame buffers for each slot (assuming max GOP size of 32)
-    // Use decode_height for interlaced video (half of header height)
-    size_t frame_size = ctx->header.width * ctx->decode_height * 3;
-    int max_gop_size = 32;
-
-    for (int i = 0; i < ctx->num_slots; i++) {
-        ctx->slots[i].job_id = -1;
-        ctx->slots[i].status = DECODE_SLOT_DONE;  // Available
-        ctx->slots[i].frames = malloc(max_gop_size * sizeof(uint8_t*));
-        if (!ctx->slots[i].frames) {
-            fprintf(stderr, "Error: Failed to allocate frame pointers for slot %d\n", i);
-            return -1;
-        }
-        for (int j = 0; j < max_gop_size; j++) {
-            ctx->slots[i].frames[j] = malloc(frame_size);
-            if (!ctx->slots[i].frames[j]) {
-                fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d frame %d\n", i, j);
-                return -1;
-            }
-        }
-        ctx->slots[i].frames_allocated = max_gop_size;
-    }
-
-    // Create per-thread video decoder contexts
-    ctx->worker_video_ctx = malloc(ctx->num_threads * sizeof(tav_video_context_t*));
-    if (!ctx->worker_video_ctx) {
-        fprintf(stderr, "Error: Failed to allocate worker video contexts\n");
-        return -1;
-    }
-
-    tav_video_params_t video_params = {
-        .width = ctx->header.width,
-        .height = ctx->decode_height,  // Use decode_height for interlaced video
-        .decomp_levels = ctx->header.decomp_levels,
-        .temporal_levels = 2,
-        .wavelet_filter = ctx->header.wavelet_filter,
-        .temporal_wavelet = 255,
-        .entropy_coder = ctx->header.entropy_coder,
-        .channel_layout = ctx->header.channel_layout,
-        .perceptual_tuning = ctx->perceptual_mode,
-        .quantiser_y = ctx->header.quantiser_y,
-        .quantiser_co = ctx->header.quantiser_co,
-        .quantiser_cg = ctx->header.quantiser_cg,
-        .encoder_preset = ctx->header.encoder_preset,
-        .monoblock = 1
-    };
-
-    for (int i = 0; i < ctx->num_threads; i++) {
-        ctx->worker_video_ctx[i] = tav_video_create(&video_params);
-        if (!ctx->worker_video_ctx[i]) {
-            fprintf(stderr, "Error: Failed to create video context for thread %d\n", i);
-            return -1;
-        }
-    }
-
-    // Initialize synchronization primitives
-    pthread_mutex_init(&ctx->mutex, NULL);
-    pthread_cond_init(&ctx->cond_job_available, NULL);
-    pthread_cond_init(&ctx->cond_slot_free, NULL);
-    ctx->threads_should_exit = 0;
-    ctx->next_write_slot = 0;
-    ctx->next_read_slot = 0;
-    ctx->jobs_submitted = 0;
-    ctx->jobs_completed = 0;
-
-    // Create worker threads
-    ctx->worker_threads = malloc(ctx->num_threads * sizeof(pthread_t));
-    if (!ctx->worker_threads) {
-        fprintf(stderr, "Error: Failed to allocate worker threads\n");
-        return -1;
-    }
-
-    for (int i = 0; i < ctx->num_threads; i++) {
-        if (pthread_create(&ctx->worker_threads[i], NULL, decoder_worker_thread, ctx) != 0) {
-            fprintf(stderr, "Error: Failed to create worker thread %d\n", i);
-            return -1;
-        }
-    }
-
-    if (ctx->verbose) {
-        printf("Initialized %d decoder worker threads with %d slots\n",
-               ctx->num_threads, ctx->num_slots);
-    }
-
-    return 0;
-}
-
-static void cleanup_decoder_threads(decoder_context_t *ctx) {
-    if (ctx->num_threads <= 0) return;
-
-    // Signal threads to exit
-    pthread_mutex_lock(&ctx->mutex);
-    ctx->threads_should_exit = 1;
-    pthread_cond_broadcast(&ctx->cond_job_available);
-    pthread_mutex_unlock(&ctx->mutex);
-
-    // Wait for threads to finish
-    for (int i = 0; i < ctx->num_threads; i++) {
-        pthread_join(ctx->worker_threads[i], NULL);
-    }
-    free(ctx->worker_threads);
-    ctx->worker_threads = NULL;
-
-    // Free per-thread video contexts
-    for (int i = 0; i < ctx->num_threads; i++) {
-        tav_video_free(ctx->worker_video_ctx[i]);
-    }
-    free(ctx->worker_video_ctx);
-    ctx->worker_video_ctx = NULL;
-
-    // Free slots
-    for (int i = 0; i < ctx->num_slots; i++) {
-        if (ctx->slots[i].frames) {
-            for (int j = 0; j < ctx->slots[i].frames_allocated; j++) {
-                free(ctx->slots[i].frames[j]);
-            }
-            free(ctx->slots[i].frames);
-        }
-        if (ctx->slots[i].compressed_data) {
-            free(ctx->slots[i].compressed_data);
-        }
-    }
-    free(ctx->slots);
-    ctx->slots = NULL;
-
-    // Destroy sync primitives
-    pthread_mutex_destroy(&ctx->mutex);
-    pthread_cond_destroy(&ctx->cond_job_available);
-    pthread_cond_destroy(&ctx->cond_slot_free);
-}
-
-// =============================================================================
-// Frame Buffer Management
-// =============================================================================
-
-static int allocate_gop_frames(decoder_context_t *ctx, int gop_size) {
-    if (ctx->gop_frames_allocated >= gop_size) {
-        return 0;  // Already have enough
-    }
-
-    // Free existing if any
-    if (ctx->gop_frames) {
-        for (int i = 0; i < ctx->gop_frames_allocated; i++) {
-            free(ctx->gop_frames[i]);
-        }
-        free(ctx->gop_frames);
-    }
-
-    // Allocate new
-    ctx->gop_frames = malloc(gop_size * sizeof(uint8_t*));
-    if (!ctx->gop_frames) {
-        return -1;
-    }
-
-    size_t frame_size = ctx->header.width * ctx->header.height * 3;
-    for (int i = 0; i < gop_size; i++) {
-        ctx->gop_frames[i] = malloc(frame_size);
-        if (!ctx->gop_frames[i]) {
-            // Cleanup on failure
-            for (int j = 0; j < i; j++) {
-                free(ctx->gop_frames[j]);
-            }
-            free(ctx->gop_frames);
-            ctx->gop_frames = NULL;
-            return -1;
-        }
-    }
-
-    ctx->gop_frames_allocated = gop_size;
-    return 0;
-}
-
-// =============================================================================
-// Still Image Output (TAP format)
-// =============================================================================
-
-/**
- * Write RGB24 frame to TGA file.
- * TGA format: uncompressed true-color image (type 2).
- */
-static int write_tga_file(const char *filename, const uint8_t *rgb_data,
-                          int width, int height) {
-    FILE *fp = fopen(filename, "wb");
-    if (!fp) {
-        fprintf(stderr, "Error: Cannot create TGA file: %s\n", filename);
-        return -1;
-    }
-
-    // TGA header (18 bytes)
-    uint8_t header[18] = {0};
-    header[2] = 2;  // Uncompressed true-color
-    header[12] = width & 0xFF;
-    header[13] = (width >> 8) & 0xFF;
-    header[14] = height & 0xFF;
-    header[15] = (height >> 8) & 0xFF;
-    header[16] = 24;  // Bits per pixel
-    header[17] = 0x20;  // Top-left origin
-
-    fwrite(header, 1, 18, fp);
-
-    // Write pixel data (convert RGB to BGR, flip vertically)
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int src_idx = (y * width + x) * 3;
-            uint8_t bgr[3] = {
-                rgb_data[src_idx + 2],  // B
-                rgb_data[src_idx + 1],  // G
-                rgb_data[src_idx + 0]   // R
-            };
-            fwrite(bgr, 1, 3, fp);
-        }
-    }
-
-    fclose(fp);
-    return 0;
-}
-
-/**
- * Write RGB24 frame to PNG file using FFmpeg.
- */
-static int write_png_file(const char *filename, const uint8_t *rgb_data,
-                          int width, int height) {
-    char cmd[MAX_PATH * 2];
-    snprintf(cmd, sizeof(cmd),
-             "ffmpeg -hide_banner -v quiet -f rawvideo -pix_fmt rgb24 "
-             "-s %dx%d -i pipe:0 -y \"%s\"",
-             width, height, filename);
-
-    FILE *fp = popen(cmd, "w");
-    if (!fp) {
-        fprintf(stderr, "Error: Cannot start FFmpeg for PNG output\n");
-        return -1;
-    }
-
-    size_t frame_size = width * height * 3;
-    if (fwrite(rgb_data, 1, frame_size, fp) != frame_size) {
-        fprintf(stderr, "Error: Failed to write frame data to FFmpeg\n");
-        pclose(fp);
-        return -1;
-    }
-
-    int result = pclose(fp);
-    if (result != 0) {
-        fprintf(stderr, "Error: FFmpeg failed to write PNG file\n");
-        return -1;
-    }
-
-    return 0;
-}
-
-/**
- * Write decoded still image to file (PNG or TGA).
- */
-static int write_still_image(decoder_context_t *ctx, const uint8_t *rgb_data) {
-    int width = ctx->header.width;
-    int height = ctx->decode_height;
-
-    if (ctx->output_tga) {
-        return write_tga_file(ctx->output_file, rgb_data, width, height);
-    } else {
-        return write_png_file(ctx->output_file, rgb_data, width, height);
-    }
-}
-
-// =============================================================================
-// Packet Processing
-// =============================================================================
-
-static int process_gop_packet(decoder_context_t *ctx) {
-    // Read GOP size (1 byte)
-    uint8_t gop_size;
-    if (fread(&gop_size, 1, 1, ctx->input_fp) != 1) {
-        fprintf(stderr, "Error: Failed to read GOP size\n");
-        return -1;
-    }
-    ctx->bytes_read++;
-
-    // Read compressed size (4 bytes)
-    uint32_t compressed_size;
-    if (fread(&compressed_size, 4, 1, ctx->input_fp) != 1) {
-        fprintf(stderr, "Error: Failed to read GOP compressed size\n");
-        return -1;
-    }
-    ctx->bytes_read += 4;
-
-    if (ctx->dump_packets) {
-        printf("  GOP: %d frames, %u bytes compressed\n", gop_size, compressed_size);
-    }
-
-    // Allocate frame buffers
-    if (allocate_gop_frames(ctx, gop_size) < 0) {
-        fprintf(stderr, "Error: Failed to allocate GOP frame buffers\n");
-        return -1;
-    }
-
-    // Read compressed data
-    uint8_t *compressed_data = malloc(compressed_size);
-    if (!compressed_data) {
-        fprintf(stderr, "Error: Failed to allocate compressed data buffer\n");
-        return -1;
-    }
-
-    if (fread(compressed_data, 1, compressed_size, ctx->input_fp) != compressed_size) {
-        fprintf(stderr, "Error: Failed to read GOP compressed data\n");
-        free(compressed_data);
-        return -1;
-    }
-    ctx->bytes_read += compressed_size;
-
-    // Decode GOP using library
-    int result = tav_video_decode_gop(ctx->video_ctx, compressed_data, compressed_size,
-                                       gop_size, ctx->gop_frames);
-    free(compressed_data);
-
-    if (result < 0) {
-        fprintf(stderr, "Error: GOP decode failed: %s\n", tav_video_get_error(ctx->video_ctx));
-        return -1;
-    }
-
-    // Write frames to FFmpeg
-    size_t frame_size = ctx->header.width * ctx->header.height * 3;
-    for (int i = 0; i < gop_size; i++) {
-        if (ctx->video_pipe) {
-            fwrite(ctx->gop_frames[i], 1, frame_size, ctx->video_pipe);
-        }
-        ctx->frames_decoded++;
-
-        // Check decode limit
-        if (ctx->decode_limit > 0 && ctx->frames_decoded >= (uint64_t)ctx->decode_limit) {
-            break;
-        }
-    }
-
-    ctx->gops_decoded++;
-    return 0;
-}
-
-static int process_iframe_packet(decoder_context_t *ctx) {
-    // Read compressed size (4 bytes)
-    uint32_t compressed_size;
-    if (fread(&compressed_size, 4, 1, ctx->input_fp) != 1) {
-        fprintf(stderr, "Error: Failed to read I-frame compressed size\n");
-        return -1;
-    }
-    ctx->bytes_read += 4;
-
-    if (ctx->dump_packets) {
-        printf("  I-frame: %u bytes compressed\n", compressed_size);
-    }
-
-    // Allocate frame buffer
-    if (allocate_gop_frames(ctx, 1) < 0) {
-        fprintf(stderr, "Error: Failed to allocate I-frame buffer\n");
-        return -1;
-    }
-
-    // Read compressed data
-    uint8_t *compressed_data = malloc(compressed_size);
-    if (!compressed_data) {
-        fprintf(stderr, "Error: Failed to allocate compressed data buffer\n");
-        return -1;
-    }
-
-    if (fread(compressed_data, 1, compressed_size, ctx->input_fp) != compressed_size) {
-        fprintf(stderr, "Error: Failed to read I-frame compressed data\n");
-        free(compressed_data);
-        return -1;
-    }
-    ctx->bytes_read += compressed_size;
-
-    // Decode I-frame using library
-    if (ctx->dump_packets) {
-        printf("  Calling tav_video_decode_iframe(%p, %p, %u, %p)\n",
-               (void*)ctx->video_ctx, (void*)compressed_data, compressed_size, (void*)ctx->gop_frames[0]);
-    }
-
-    int result = tav_video_decode_iframe(ctx->video_ctx, compressed_data, compressed_size,
-                                          ctx->gop_frames[0]);
-    free(compressed_data);
-
-    if (result < 0) {
-        fprintf(stderr, "Error: I-frame decode failed: %s\n", tav_video_get_error(ctx->video_ctx));
-        return -1;
-    }
-
-    if (ctx->dump_packets) {
-        printf("  I-frame decoded successfully\n");
-    }
-
-    // Write frame to FFmpeg
-    if (ctx->video_pipe) {
-        size_t frame_size = ctx->header.width * ctx->header.height * 3;
-        fwrite(ctx->gop_frames[0], 1, frame_size, ctx->video_pipe);
-    }
-
-    ctx->frames_decoded++;
-    return 0;
-}
-
-static int process_audio_tad_packet(decoder_context_t *ctx) {
-    // TAD packet format:
-    // [sample_count(2)][payload_size+7(4)][sample_count(2)][quant_index(1)][compressed_size(4)][compressed_data]
-
-    // Read outer header
-    uint16_t sample_count;
-    uint32_t payload_size_plus_7;
-
-    if (fread(&sample_count, 2, 1, ctx->input_fp) != 1) return -1;
-    if (fread(&payload_size_plus_7, 4, 1, ctx->input_fp) != 1) return -1;
-    ctx->bytes_read += 6;
-
-    if (ctx->dump_packets) {
-        printf("  TAD audio: %u samples, %u bytes payload\n", sample_count, payload_size_plus_7);
-    }
-
-    if (ctx->no_audio) {
-        // Skip audio data
-        fseek(ctx->input_fp, payload_size_plus_7, SEEK_CUR);
-        ctx->bytes_read += payload_size_plus_7;
-        return 0;
-    }
-
-    // Read TAD chunk data (includes inner header)
-    uint8_t *tad_data = malloc(payload_size_plus_7);
-    if (!tad_data) return -1;
-
-    if (fread(tad_data, 1, payload_size_plus_7, ctx->input_fp) != payload_size_plus_7) {
-        free(tad_data);
-        return -1;
-    }
-    ctx->bytes_read += payload_size_plus_7;
-
-    // Allocate output buffer (stereo PCMu8)
-    uint8_t *pcm_output = malloc(sample_count * 2);
-    if (!pcm_output) {
-        free(tad_data);
-        return -1;
-    }
-
-    // Decode TAD using library
-    size_t bytes_consumed = 0;
-    size_t samples_decoded = 0;
-
-    int result = tad32_decode_chunk(tad_data, payload_size_plus_7,
-                                    pcm_output, &bytes_consumed, &samples_decoded);
-    free(tad_data);
-
-    if (result == 0 && samples_decoded > 0) {
-        // Write PCMu8 to audio temp file
-        if (ctx->audio_temp_fp) {
-            fwrite(pcm_output, 1, samples_decoded * 2, ctx->audio_temp_fp);
-        }
-        ctx->audio_samples_decoded += samples_decoded;
-    }
-
-    free(pcm_output);
-    return 0;
-}
-
-static int process_audio_pcm8_packet(decoder_context_t *ctx) {
-    // PCM8 packet format: [size(4)][pcm_data]
-    uint32_t pcm_size;
-    if (fread(&pcm_size, 4, 1, ctx->input_fp) != 1) return -1;
-    ctx->bytes_read += 4;
-
-    if (ctx->dump_packets) {
-        printf("  PCM8 audio: %u bytes\n", pcm_size);
-    }
-
-    if (ctx->no_audio) {
-        fseek(ctx->input_fp, pcm_size, SEEK_CUR);
-        ctx->bytes_read += pcm_size;
-        return 0;
-    }
-
-    // Read and write PCM data directly
-    uint8_t *pcm_data = malloc(pcm_size);
-    if (!pcm_data) return -1;
-
-    if (fread(pcm_data, 1, pcm_size, ctx->input_fp) != pcm_size) {
-        free(pcm_data);
-        return -1;
-    }
-    ctx->bytes_read += pcm_size;
-
-    if (ctx->audio_temp_fp) {
-        fwrite(pcm_data, 1, pcm_size, ctx->audio_temp_fp);
-    }
-
-    ctx->audio_samples_decoded += pcm_size / 2;  // Stereo
-    free(pcm_data);
-    return 0;
-}
-
-static int skip_packet_with_size(decoder_context_t *ctx, const char *name) {
-    uint32_t size;
-    if (fread(&size, 4, 1, ctx->input_fp) != 1) return -1;
-    ctx->bytes_read += 4;
-
-    if (ctx->dump_packets) {
-        printf("  %s: %u bytes (skipped)\n", name, size);
-    }
-
-    fseek(ctx->input_fp, size, SEEK_CUR);
-    ctx->bytes_read += size;
-    return 0;
-}
-
-static int process_packet(decoder_context_t *ctx) {
-    uint8_t packet_type;
-
-    if (fread(&packet_type, 1, 1, ctx->input_fp) != 1) {
-        return -1;  // EOF
-    }
-    ctx->bytes_read++;
-
-    if (ctx->dump_packets) {
-        printf("Packet 0x%02X at offset %lu\n", packet_type, ctx->bytes_read - 1);
-    }
-
-    switch (packet_type) {
-        case TAV_PACKET_GOP_UNIFIED:
-            return process_gop_packet(ctx);
-
-        case TAV_PACKET_IFRAME:
-            return process_iframe_packet(ctx);
-
-        case TAV_PACKET_PFRAME:
-            // P-frame not commonly used in TAV, skip for now
-            return skip_packet_with_size(ctx, "P-frame");
-
-        case TAV_PACKET_AUDIO_TAD:
-            return process_audio_tad_packet(ctx);
-
-        case TAV_PACKET_AUDIO_PCM8:
-            return process_audio_pcm8_packet(ctx);
-
-        case TAV_PACKET_AUDIO_MP2:
-        case TAV_PACKET_AUDIO_TRACK:
-            return skip_packet_with_size(ctx, "Audio track");
-
-        case TAV_PACKET_SUBTITLE:
-        case TAV_PACKET_SUBTITLE_TC:
-            return skip_packet_with_size(ctx, "Subtitle");
-
-        case TAV_PACKET_EXTENDED_HDR: {
-            // Extended header format: [num_pairs(2)][key-value pairs...]
-            // Each KV pair: [key(4)][type(1)][value...]
-            uint16_t num_pairs;
-            if (fread(&num_pairs, 2, 1, ctx->input_fp) != 1) return -1;
-            ctx->bytes_read += 2;
-
-            if (ctx->dump_packets) {
-                printf("  Extended header: %u key-value pairs\n", num_pairs);
-            }
-
-            // Skip key-value pairs
-            for (int i = 0; i < num_pairs; i++) {
-                uint8_t kv_header[5];  // key(4) + type(1)
-                if (fread(kv_header, 1, 5, ctx->input_fp) != 5) return 0;
-                ctx->bytes_read += 5;
-
-                uint8_t value_type = kv_header[4];
-                if (value_type == 0x04) {  // Int64
-                    uint64_t value;
-                    if (fread(&value, 8, 1, ctx->input_fp) != 1) return 0;
-                    ctx->bytes_read += 8;
-                } else if (value_type == 0x10) {  // Bytes
-                    uint16_t length;
-                    if (fread(&length, 2, 1, ctx->input_fp) != 1) return 0;
-                    ctx->bytes_read += 2;
-                    fseek(ctx->input_fp, length, SEEK_CUR);
-                    ctx->bytes_read += length;
-                } else if (value_type <= 0x04) {  // Int types
-                    int sizes[] = {2, 3, 4, 6, 8};  // Int16, Int24, Int32, Int48, Int64
-                    fseek(ctx->input_fp, sizes[value_type], SEEK_CUR);
-                    ctx->bytes_read += sizes[value_type];
-                }
-            }
-            return 0;
-        }
-
-        case TAV_PACKET_SCREEN_MASK: {
-            // Screen mask: 4 bytes (top, bottom, left, right)
-            uint8_t mask[4];
-            if (fread(mask, 1, 4, ctx->input_fp) != 4) return -1;
-            ctx->bytes_read += 4;
-            if (ctx->dump_packets) {
-                printf("  Screen mask: T=%d B=%d L=%d R=%d\n", mask[0], mask[1], mask[2], mask[3]);
-            }
-            return 0;
-        }
-
-        case TAV_PACKET_GOP_SYNC: {
-            // GOP sync: 1 byte (frame count)
-            uint8_t frame_count;
-            if (fread(&frame_count, 1, 1, ctx->input_fp) != 1) return -1;
-            ctx->bytes_read++;
-            if (ctx->dump_packets) {
-                printf("  GOP sync: %d frames\n", frame_count);
-            }
-            return 0;
-        }
-
-        case TAV_PACKET_TIMECODE: {
-            // Timecode: 8 bytes (nanoseconds)
-            uint64_t timecode_ns;
-            if (fread(&timecode_ns, 8, 1, ctx->input_fp) != 1) return -1;
-            ctx->bytes_read += 8;
-            if (ctx->dump_packets) {
-                printf("  Timecode: %.3f sec\n", timecode_ns / 1000000000.0);
-            }
-            return 0;
-        }
-
-        case TAV_PACKET_SYNC_NTSC:
-        case TAV_PACKET_SYNC:
-            // Sync packets: no payload
-            if (ctx->dump_packets) {
-                printf("  Sync packet\n");
-            }
-            return 0;
-
-        default:
-            if (ctx->verbose) {
-                fprintf(stderr, "Warning: Unknown packet type 0x%02X, attempting to skip\n", packet_type);
-            }
-            // Try to skip by reading size
-            uint32_t size;
-            if (fread(&size, 4, 1, ctx->input_fp) != 1) return 0;  // May be EOF
-            ctx->bytes_read += 4;
-            if (size < 1000000) {  // Sanity check
-                fseek(ctx->input_fp, size, SEEK_CUR);
-                ctx->bytes_read += size;
-            }
-            return 0;
-    }
-}
-
-// =============================================================================
-// Multithreaded Video Decoding (Pass 2)
-// =============================================================================
-
-// Read a single GOP packet without decoding - for multithreaded submission
-static int read_gop_packet_mt(decoder_context_t *ctx, int slot_idx) {
-    gop_decode_job_t *job = &ctx->slots[slot_idx];
-
-    // Read GOP size (1 byte)
-    uint8_t gop_size;
-    if (fread(&gop_size, 1, 1, ctx->input_fp) != 1) {
-        return -1;
-    }
-    ctx->bytes_read++;
-
-    // Read compressed size (4 bytes)
-    uint32_t compressed_size;
-    if (fread(&compressed_size, 4, 1, ctx->input_fp) != 1) {
-        return -1;
-    }
-    ctx->bytes_read += 4;
-
-    // Read compressed data
-    uint8_t *compressed_data = malloc(compressed_size);
-    if (!compressed_data) {
-        fprintf(stderr, "Error: Failed to allocate compressed data buffer\n");
-        return -1;
-    }
-
-    if (fread(compressed_data, 1, compressed_size, ctx->input_fp) != compressed_size) {
-        free(compressed_data);
-        return -1;
-    }
-    ctx->bytes_read += compressed_size;
-
-    // Fill job
-    job->compressed_data = compressed_data;
-    job->compressed_size = compressed_size;
-    job->gop_size = gop_size;
-    job->decode_result = 0;
-
-    return gop_size;
-}
-
-// Multithreaded pass 2 decoding loop
-static int decode_video_pass2_mt(decoder_context_t *ctx) {
-    size_t frame_size = ctx->header.width * ctx->header.height * 3;
-    int done = 0;
-    int job_counter = 0;
-
-    while (!done) {
-        // Try to submit new jobs to any free slots
-        pthread_mutex_lock(&ctx->mutex);
-
-        // Find a free slot
-        int free_slot = -1;
-        for (int i = 0; i < ctx->num_slots; i++) {
-            if (ctx->slots[i].status == DECODE_SLOT_DONE &&
-                ctx->slots[i].compressed_data == NULL) {
-                free_slot = i;
-                break;
-            }
-        }
-
-        pthread_mutex_unlock(&ctx->mutex);
-
-        if (free_slot >= 0) {
-            // Read next packet
-            uint8_t packet_type;
-            if (fread(&packet_type, 1, 1, ctx->input_fp) != 1) {
-                // EOF
-                done = 1;
-            } else {
-                ctx->bytes_read++;
-
-                if (packet_type == TAV_PACKET_GOP_UNIFIED) {
-                    // Read GOP and submit to slot
-                    int gop_size = read_gop_packet_mt(ctx, free_slot);
-                    if (gop_size > 0) {
-                        pthread_mutex_lock(&ctx->mutex);
-                        ctx->slots[free_slot].job_id = job_counter++;
-                        ctx->slots[free_slot].status = DECODE_SLOT_PENDING;
-                        ctx->jobs_submitted++;
-                        pthread_cond_broadcast(&ctx->cond_job_available);
-                        pthread_mutex_unlock(&ctx->mutex);
-                    } else {
-                        done = 1;
-                    }
-                } else if (packet_type == TAV_PACKET_IFRAME) {
-                    // For I-frames, decode synchronously (they're rare)
-                    process_iframe_packet(ctx);
-                } else {
-                    // Skip other packets (audio already extracted in Pass 1)
-                    switch (packet_type) {
-                        case TAV_PACKET_AUDIO_TAD: {
-                            // TAD format: [sample_count(2)][payload_size+7(4)][data...]
-                            uint16_t sample_count;
-                            uint32_t payload_size;
-                            if (fread(&sample_count, 2, 1, ctx->input_fp) != 1) { done = 1; break; }
-                            if (fread(&payload_size, 4, 1, ctx->input_fp) != 1) { done = 1; break; }
-                            ctx->bytes_read += 6;
-                            fseek(ctx->input_fp, payload_size, SEEK_CUR);
-                            ctx->bytes_read += payload_size;
-                            break;
-                        }
-                        case TAV_PACKET_AUDIO_PCM8:
-                        case TAV_PACKET_AUDIO_MP2:
-                        case TAV_PACKET_AUDIO_TRACK:
-                        case TAV_PACKET_SUBTITLE:
-                        case TAV_PACKET_SUBTITLE_TC:
-                        case TAV_PACKET_PFRAME: {
-                            uint32_t size;
-                            if (fread(&size, 4, 1, ctx->input_fp) != 1) { done = 1; break; }
-                            ctx->bytes_read += 4;
-                            fseek(ctx->input_fp, size, SEEK_CUR);
-                            ctx->bytes_read += size;
-                            break;
-                        }
-                        case TAV_PACKET_SCREEN_MASK:
-                            fseek(ctx->input_fp, 4, SEEK_CUR);
-                            ctx->bytes_read += 4;
-                            break;
-                        case TAV_PACKET_GOP_SYNC:
-                            fseek(ctx->input_fp, 1, SEEK_CUR);
-                            ctx->bytes_read += 1;
-                            break;
-                        case TAV_PACKET_TIMECODE:
-                            fseek(ctx->input_fp, 8, SEEK_CUR);
-                            ctx->bytes_read += 8;
-                            break;
-                        case TAV_PACKET_EXTENDED_HDR: {
-                            // Skip extended header
-                            uint16_t num_pairs;
-                            if (fread(&num_pairs, 2, 1, ctx->input_fp) != 1) { done = 1; break; }
-                            ctx->bytes_read += 2;
-                            for (int i = 0; i < num_pairs; i++) {
-                                uint8_t kv_header[5];
-                                if (fread(kv_header, 1, 5, ctx->input_fp) != 5) break;
-                                ctx->bytes_read += 5;
-                                uint8_t value_type = kv_header[4];
-                                if (value_type == 0x04) {
-                                    fseek(ctx->input_fp, 8, SEEK_CUR);
-                                    ctx->bytes_read += 8;
-                                } else if (value_type == 0x10) {
-                                    uint16_t length;
-                                    if (fread(&length, 2, 1, ctx->input_fp) != 1) break;
-                                    ctx->bytes_read += 2;
-                                    fseek(ctx->input_fp, length, SEEK_CUR);
-                                    ctx->bytes_read += length;
-                                } else if (value_type <= 0x04) {
-                                    int sizes[] = {2, 3, 4, 6, 8};
-                                    fseek(ctx->input_fp, sizes[value_type], SEEK_CUR);
-                                    ctx->bytes_read += sizes[value_type];
-                                }
-                            }
-                            break;
-                        }
-                        case TAV_PACKET_SYNC_NTSC:
-                        case TAV_PACKET_SYNC:
-                            // No payload
-                            break;
-                        default:
-                            // Unknown packet, try to skip
-                            {
-                                uint32_t size;
-                                if (fread(&size, 4, 1, ctx->input_fp) == 1 && size < 1000000) {
-                                    fseek(ctx->input_fp, size, SEEK_CUR);
-                                    ctx->bytes_read += 4 + size;
-                                }
-                            }
-                            break;
-                    }
-                }
-            }
-        }
-
-        // Write completed jobs in order
-        pthread_mutex_lock(&ctx->mutex);
-        while (1) {
-            // Find the next job to write (by job_id order)
-            int write_slot = -1;
-            int min_job_id = INT32_MAX;
-            for (int i = 0; i < ctx->num_slots; i++) {
-                if (ctx->slots[i].status == DECODE_SLOT_DONE &&
-                    ctx->slots[i].job_id >= 0 &&
-                    ctx->slots[i].job_id < min_job_id) {
-                    // Check if this is the next expected job
-                    if (ctx->slots[i].job_id == ctx->next_write_slot) {
-                        write_slot = i;
-                        break;
-                    }
-                    min_job_id = ctx->slots[i].job_id;
-                }
-            }
-
-            if (write_slot < 0) {
-                // No jobs ready in order, wait if there are pending jobs
-                if (!done && ctx->jobs_submitted > ctx->next_write_slot) {
-                    // Wait for job to complete
-                    pthread_cond_wait(&ctx->cond_slot_free, &ctx->mutex);
-                    continue;
-                }
-                break;
-            }
-
-            pthread_mutex_unlock(&ctx->mutex);
-
-            // Write frames to FFmpeg
-            gop_decode_job_t *job = &ctx->slots[write_slot];
-            if (job->decode_result >= 0) {
-                for (int i = 0; i < job->gop_size; i++) {
-                    if (ctx->video_pipe) {
-                        fwrite(job->frames[i], 1, frame_size, ctx->video_pipe);
-                    }
-                    ctx->frames_decoded++;
-
-                    if (ctx->decode_limit > 0 && ctx->frames_decoded >= (uint64_t)ctx->decode_limit) {
-                        done = 1;
-                        break;
-                    }
-                }
-                ctx->gops_decoded++;
-            }
-
-            // Mark slot as free
-            pthread_mutex_lock(&ctx->mutex);
-            job->job_id = -1;
-            ctx->next_write_slot++;
-            pthread_mutex_unlock(&ctx->mutex);
-
-            // Progress
-            time_t elapsed = time(NULL) - ctx->start_time;
-            double fps = elapsed > 0 ? (double)ctx->frames_decoded / elapsed : 0.0;
-            printf("\rFrames: %lu | GOPs: %lu | %.1f fps",
-                   ctx->frames_decoded, ctx->gops_decoded, fps);
-            fflush(stdout);
-
-            pthread_mutex_lock(&ctx->mutex);
-        }
-        pthread_mutex_unlock(&ctx->mutex);
-
-        // Check decode limit
-        if (ctx->decode_limit > 0 && ctx->frames_decoded >= (uint64_t)ctx->decode_limit) {
-            done = 1;
-        }
-    }
-
-    // Wait for remaining jobs to complete
-    pthread_mutex_lock(&ctx->mutex);
-    while (ctx->jobs_completed < ctx->jobs_submitted) {
-        pthread_cond_wait(&ctx->cond_slot_free, &ctx->mutex);
-    }
-
-    // Write any remaining completed jobs
-    while (1) {
-        int write_slot = -1;
-        for (int i = 0; i < ctx->num_slots; i++) {
-            if (ctx->slots[i].status == DECODE_SLOT_DONE &&
-                ctx->slots[i].job_id == ctx->next_write_slot) {
-                write_slot = i;
-                break;
-            }
-        }
-
-        if (write_slot < 0) break;
-
-        pthread_mutex_unlock(&ctx->mutex);
-
-        gop_decode_job_t *job = &ctx->slots[write_slot];
-        if (job->decode_result >= 0) {
-            for (int i = 0; i < job->gop_size; i++) {
-                if (ctx->video_pipe) {
-                    fwrite(job->frames[i], 1, frame_size, ctx->video_pipe);
-                }
-                ctx->frames_decoded++;
-            }
-            ctx->gops_decoded++;
-        }
-
-        pthread_mutex_lock(&ctx->mutex);
-        job->job_id = -1;
-        ctx->next_write_slot++;
-
-        time_t elapsed = time(NULL) - ctx->start_time;
-        double fps = elapsed > 0 ? (double)ctx->frames_decoded / elapsed : 0.0;
-        printf("\rFrames: %lu | GOPs: %lu | %.1f fps",
-               ctx->frames_decoded, ctx->gops_decoded, fps);
-        fflush(stdout);
-    }
-    pthread_mutex_unlock(&ctx->mutex);
-
-    printf("\n");
-    return 0;
-}
-
-// =============================================================================
-// Multithreaded Audio Extraction (Pass 1)
-// =============================================================================
-
-// Audio worker thread - decodes audio packets in parallel
-static void *audio_worker_thread(void *arg) {
-    decoder_context_t *ctx = (decoder_context_t*)arg;
-    FILE *input_fp = fopen(ctx->input_file, "rb");
-    if (!input_fp) {
-        return NULL;
-    }
-
-    while (1) {
-        pthread_mutex_lock(&ctx->audio_mutex);
-
-        // Wait for job or exit signal
-        while (ctx->next_audio_job >= ctx->audio_job_count && !ctx->audio_threads_should_exit) {
-            pthread_cond_wait(&ctx->audio_cond_job_available, &ctx->audio_mutex);
-        }
-
-        if (ctx->audio_threads_should_exit) {
-            pthread_mutex_unlock(&ctx->audio_mutex);
-            break;
-        }
-
-        // Get next job
-        int job_idx = ctx->next_audio_job++;
-        pthread_mutex_unlock(&ctx->audio_mutex);
-
-        if (job_idx >= ctx->audio_job_count) break;
-
-        audio_decode_job_t *job = &ctx->audio_jobs[job_idx];
-        job->status = DECODE_SLOT_PROCESSING;
-
-        // Seek to packet location
-        fseek(input_fp, job->file_offset, SEEK_SET);
-
-        if (job->packet_type == TAV_PACKET_AUDIO_TAD) {
-            // Read TAD packet data
-            uint8_t *tad_data = malloc(job->payload_size);
-            if (tad_data && fread(tad_data, 1, job->payload_size, input_fp) == job->payload_size) {
-                // Allocate output buffer
-                job->decoded_pcm = malloc(job->sample_count * 2);
-                if (job->decoded_pcm) {
-                    size_t bytes_consumed = 0;
-                    int result = tad32_decode_chunk(tad_data, job->payload_size,
-                                                    job->decoded_pcm, &bytes_consumed,
-                                                    &job->decoded_samples);
-                    if (result != 0) {
-                        free(job->decoded_pcm);
-                        job->decoded_pcm = NULL;
-                        job->decoded_samples = 0;
-                    }
-                }
-                free(tad_data);
-            }
-        } else if (job->packet_type == TAV_PACKET_AUDIO_PCM8) {
-            // Read PCM8 data directly
-            job->decoded_pcm = malloc(job->payload_size);
-            if (job->decoded_pcm && fread(job->decoded_pcm, 1, job->payload_size, input_fp) == job->payload_size) {
-                job->decoded_samples = job->payload_size / 2;  // Stereo
-            } else {
-                free(job->decoded_pcm);
-                job->decoded_pcm = NULL;
-                job->decoded_samples = 0;
-            }
-        }
-
-        job->status = DECODE_SLOT_DONE;
-    }
-
-    fclose(input_fp);
-    return NULL;
-}
-
-// Scan file and collect all audio packet metadata
-static int collect_audio_packets(decoder_context_t *ctx) {
-    long current_pos = ftell(ctx->input_fp);
-
-    ctx->audio_job_capacity = 1024;
-    ctx->audio_jobs = malloc(ctx->audio_job_capacity * sizeof(audio_decode_job_t));
-    if (!ctx->audio_jobs) return -1;
-    ctx->audio_job_count = 0;
-
-    // Scan through file
-    while (1) {
-        uint8_t packet_type;
-
-        if (fread(&packet_type, 1, 1, ctx->input_fp) != 1) break;
-
-        if (packet_type == TAV_PACKET_AUDIO_TAD) {
-            // TAD packet: [sample_count(2)][payload_size+7(4)][payload...]
-            uint16_t sample_count;
-            uint32_t payload_size_plus_7;
-
-            if (fread(&sample_count, 2, 1, ctx->input_fp) != 1) break;
-            if (fread(&payload_size_plus_7, 4, 1, ctx->input_fp) != 1) break;
-
-            // Grow array if needed
-            if (ctx->audio_job_count >= ctx->audio_job_capacity) {
-                ctx->audio_job_capacity *= 2;
-                ctx->audio_jobs = realloc(ctx->audio_jobs,
-                    ctx->audio_job_capacity * sizeof(audio_decode_job_t));
-                if (!ctx->audio_jobs) return -1;
-            }
-
-            // Add job
-            audio_decode_job_t *job = &ctx->audio_jobs[ctx->audio_job_count++];
-            job->file_offset = ftell(ctx->input_fp);
-            job->payload_size = payload_size_plus_7;
-            job->sample_count = sample_count;
-            job->packet_type = TAV_PACKET_AUDIO_TAD;
-            job->decoded_pcm = NULL;
-            job->decoded_samples = 0;
-            job->status = DECODE_SLOT_PENDING;
-
-            fseek(ctx->input_fp, payload_size_plus_7, SEEK_CUR);
-
-        } else if (packet_type == TAV_PACKET_AUDIO_PCM8) {
-            // PCM8 packet: [size(4)][pcm_data]
-            uint32_t pcm_size;
-            if (fread(&pcm_size, 4, 1, ctx->input_fp) != 1) break;
-
-            // Grow array if needed
-            if (ctx->audio_job_count >= ctx->audio_job_capacity) {
-                ctx->audio_job_capacity *= 2;
-                ctx->audio_jobs = realloc(ctx->audio_jobs,
-                    ctx->audio_job_capacity * sizeof(audio_decode_job_t));
-                if (!ctx->audio_jobs) return -1;
-            }
-
-            // Add job
-            audio_decode_job_t *job = &ctx->audio_jobs[ctx->audio_job_count++];
-            job->file_offset = ftell(ctx->input_fp);
-            job->payload_size = pcm_size;
-            job->sample_count = pcm_size / 2;
-            job->packet_type = TAV_PACKET_AUDIO_PCM8;
-            job->decoded_pcm = NULL;
-            job->decoded_samples = 0;
-            job->status = DECODE_SLOT_PENDING;
-
-            fseek(ctx->input_fp, pcm_size, SEEK_CUR);
-
-        } else {
-            // Skip other packet types
-            if (packet_type == TAV_PACKET_GOP_UNIFIED) {
-                uint8_t gop_size;
-                uint32_t compressed_size;
-                if (fread(&gop_size, 1, 1, ctx->input_fp) != 1) break;
-                if (fread(&compressed_size, 4, 1, ctx->input_fp) != 1) break;
-                fseek(ctx->input_fp, compressed_size, SEEK_CUR);
-            } else if (packet_type == TAV_PACKET_IFRAME) {
-                uint32_t compressed_size;
-                if (fread(&compressed_size, 4, 1, ctx->input_fp) != 1) break;
-                fseek(ctx->input_fp, compressed_size, SEEK_CUR);
-            } else if (packet_type == TAV_PACKET_EXTENDED_HDR) {
-                uint16_t num_pairs;
-                if (fread(&num_pairs, 2, 1, ctx->input_fp) != 1) break;
-                for (int i = 0; i < num_pairs; i++) {
-                    uint8_t kv_header[5];
-                    if (fread(kv_header, 1, 5, ctx->input_fp) != 5) break;
-                    uint8_t value_type = kv_header[4];
-                    if (value_type == 0x04) {
-                        fseek(ctx->input_fp, 8, SEEK_CUR);
-                    } else if (value_type == 0x10) {
-                        uint16_t length;
-                        if (fread(&length, 2, 1, ctx->input_fp) != 1) break;
-                        fseek(ctx->input_fp, length, SEEK_CUR);
-                    } else if (value_type <= 0x04) {
-                        int sizes[] = {2, 3, 4, 6, 8};
-                        fseek(ctx->input_fp, sizes[value_type], SEEK_CUR);
-                    }
-                }
-            } else if (packet_type == TAV_PACKET_SCREEN_MASK) {
-                fseek(ctx->input_fp, 4, SEEK_CUR);
-            } else if (packet_type == TAV_PACKET_GOP_SYNC) {
-                fseek(ctx->input_fp, 1, SEEK_CUR);
-            } else if (packet_type == TAV_PACKET_TIMECODE) {
-                fseek(ctx->input_fp, 8, SEEK_CUR);
-            } else if (packet_type == TAV_PACKET_SYNC_NTSC ||
-                       packet_type == TAV_PACKET_SYNC) {
-                // No payload
-            } else {
-                // Unknown packet - try to skip by reading size
-                uint32_t size;
-                if (fread(&size, 4, 1, ctx->input_fp) != 1) break;
-                if (size < 1000000) {
-                    fseek(ctx->input_fp, size, SEEK_CUR);
-                } else {
-                    break;  // Likely corrupt
-                }
-            }
-        }
-    }
-
-    // Restore file position
-    fseek(ctx->input_fp, current_pos, SEEK_SET);
-    return 0;
-}
-
-// Extract audio using multiple threads
-static int extract_audio_mt(decoder_context_t *ctx) {
-    // Collect all audio packet metadata
-    if (collect_audio_packets(ctx) < 0) {
-        fprintf(stderr, "Error: Failed to collect audio packets\n");
-        return -1;
-    }
-
-    if (ctx->audio_job_count == 0) {
-        // No audio packets found
-        return 0;
-    }
-
-    if (ctx->verbose) {
-        printf("  Found %d audio packets\n", ctx->audio_job_count);
-    }
-
-    // Initialize audio threading
-    ctx->num_audio_threads = ctx->num_threads > 0 ? ctx->num_threads : 1;
-    ctx->next_audio_job = 0;
-    ctx->next_audio_write = 0;
-    ctx->audio_threads_should_exit = 0;
-
-    pthread_mutex_init(&ctx->audio_mutex, NULL);
-    pthread_cond_init(&ctx->audio_cond_job_available, NULL);
-
-    // Create worker threads
-    ctx->audio_worker_threads = malloc(ctx->num_audio_threads * sizeof(pthread_t));
-    if (!ctx->audio_worker_threads) return -1;
-
-    for (int i = 0; i < ctx->num_audio_threads; i++) {
-        if (pthread_create(&ctx->audio_worker_threads[i], NULL,
-                          audio_worker_thread, ctx) != 0) {
-            fprintf(stderr, "Error: Failed to create audio worker thread %d\n", i);
-            return -1;
-        }
-    }
-
-    // Signal all jobs available
-    pthread_mutex_lock(&ctx->audio_mutex);
-    pthread_cond_broadcast(&ctx->audio_cond_job_available);
-    pthread_mutex_unlock(&ctx->audio_mutex);
-
-    // Write decoded audio in order
-    for (int i = 0; i < ctx->audio_job_count; i++) {
-        audio_decode_job_t *job = &ctx->audio_jobs[i];
-
-        // Wait for this job to complete
-        while (job->status != DECODE_SLOT_DONE) {
-            usleep(100);
-        }
-
-        // Write to temp file
-        if (job->decoded_pcm && job->decoded_samples > 0 && ctx->audio_temp_fp) {
-            fwrite(job->decoded_pcm, 1, job->decoded_samples * 2, ctx->audio_temp_fp);
-            ctx->audio_samples_decoded += job->decoded_samples;
-        }
-    }
-
-    // Signal threads to exit
-    pthread_mutex_lock(&ctx->audio_mutex);
-    ctx->audio_threads_should_exit = 1;
-    pthread_cond_broadcast(&ctx->audio_cond_job_available);
-    pthread_mutex_unlock(&ctx->audio_mutex);
-
-    // Wait for threads to finish
-    for (int i = 0; i < ctx->num_audio_threads; i++) {
-        pthread_join(ctx->audio_worker_threads[i], NULL);
-    }
-
-    // Cleanup
-    for (int i = 0; i < ctx->audio_job_count; i++) {
-        if (ctx->audio_jobs[i].decoded_pcm) {
-            free(ctx->audio_jobs[i].decoded_pcm);
-        }
-    }
-    free(ctx->audio_jobs);
-    free(ctx->audio_worker_threads);
-
-    pthread_mutex_destroy(&ctx->audio_mutex);
-    pthread_cond_destroy(&ctx->audio_cond_job_available);
-
-    return 0;
-}
-
-// =============================================================================
-// Main Decoding Loop
-// =============================================================================
-
-static int decode_video(decoder_context_t *ctx) {
-    printf("Decoding...\n");
-    ctx->start_time = time(NULL);
-
-    // Special path for still images (TAP format) - output directly to PNG/TGA
-    if (ctx->is_still_image) {
-        printf("Decoding still picture...\n");
-
-        // Allocate frame buffer for single frame
-        if (allocate_gop_frames(ctx, 1) < 0) {
-            fprintf(stderr, "Error: Failed to allocate frame buffer\n");
-            return -1;
-        }
-
-        // Process packets until we get the first frame
-        int found_frame = 0;
-        while (!found_frame && process_packet(ctx) == 0) {
-            if (ctx->frames_decoded > 0) {
-                found_frame = 1;
-            }
-        }
-
-        if (!found_frame || ctx->frames_decoded == 0) {
-            fprintf(stderr, "Error: No video frame found in TAP file\n");
-            return -1;
-        }
-
-        // Write the decoded frame to output file
-        printf("Writing %s...\n", ctx->output_tga ? "TGA" : "PNG");
-        if (write_still_image(ctx, ctx->gop_frames[0]) < 0) {
-            fprintf(stderr, "Error: Failed to write output image\n");
-            return -1;
-        }
-
-        printf("Successfully decoded still picture\n");
-        return 0;
-    }
-
-    // Two-pass approach for proper audio/video muxing:
-    // Pass 1: Extract all audio to temp file
-    // Pass 2: Spawn FFmpeg with complete audio, decode video
-
-    long data_start = ftell(ctx->input_fp);
-
-    // Pass 1: Audio extraction
-    if (!ctx->no_audio) {
-        printf("Pass 1: Extracting audio");
-        if (ctx->num_threads > 0) {
-            printf(" (%d threads)...\n", ctx->num_threads);
-            if (extract_audio_mt(ctx) < 0) {
-                fprintf(stderr, "Error: Multithreaded audio extraction failed\n");
-                return -1;
-            }
-        } else {
-            printf("...\n");
-            // Fallback to single-threaded
-            while (process_packet(ctx) == 0) {
-                // Check decode limit
-                if (ctx->decode_limit > 0 && ctx->frames_decoded >= (uint64_t)ctx->decode_limit) {
-                    break;
-                }
-            }
-        }
-
-        // Close and flush audio file
-        if (ctx->audio_temp_fp) {
-            fclose(ctx->audio_temp_fp);
-            ctx->audio_temp_fp = NULL;
-        }
-
-        printf("  Audio samples: %lu\n", ctx->audio_samples_decoded);
-    }
-
-    // Reset for pass 2
-    fseek(ctx->input_fp, data_start, SEEK_SET);
-    ctx->frames_decoded = 0;
-    ctx->gops_decoded = 0;
-    ctx->bytes_read = 32;  // Header already read
-
-    // Spawn FFmpeg with complete audio
-    printf("Pass 2: Decoding video and muxing...\n");
-    if (spawn_ffmpeg(ctx) < 0) {
-        return -1;
-    }
-
-    // Initialize decoder threads if multithreaded mode
-    if (ctx->num_threads > 0) {
-        if (init_decoder_threads(ctx) < 0) {
-            fprintf(stderr, "Error: Failed to initialize decoder threads\n");
-            return -1;
-        }
-        printf("  Using %d decoder threads\n", ctx->num_threads);
-    }
-
-    // Pass 2: Video decoding
-    if (ctx->num_threads > 0) {
-        // Multithreaded decode
-        int result = decode_video_pass2_mt(ctx);
-        cleanup_decoder_threads(ctx);
-        return result;
-    } else {
-        // Single-threaded decode
-        uint64_t last_reported = 0;
-        while (process_packet(ctx) == 0) {
-            // Progress reporting - show when frames were decoded
-            if (ctx->frames_decoded != last_reported) {
-                time_t elapsed = time(NULL) - ctx->start_time;
-                double fps = elapsed > 0 ? (double)ctx->frames_decoded / elapsed : 0.0;
-                printf("\rFrames: %lu | GOPs: %lu | %.1f fps",
-                       ctx->frames_decoded, ctx->gops_decoded, fps);
-                fflush(stdout);
-                last_reported = ctx->frames_decoded;
-            }
-
-            // Check decode limit
-            if (ctx->decode_limit > 0 && ctx->frames_decoded >= (uint64_t)ctx->decode_limit) {
-                break;
-            }
-        }
-
-        printf("\n");
-        return 0;
-    }
-}
-
-// =============================================================================
-// Usage and Main
-// =============================================================================
-
-// Generate output filename by replacing extension with .mkv
-static char *generate_output_filename(const char *input_file) {
-    size_t len = strlen(input_file);
-    char *output = malloc(len + 5);  // Worst case: add ".mkv" + null
-    if (!output) return NULL;
-
-    strcpy(output, input_file);
-
-    // Find last dot in filename (not in path)
-    char *last_dot = strrchr(output, '.');
-    char *last_slash = strrchr(output, '/');
-
-    // Only replace if dot is after last slash (i.e., in filename, not path)
-    if (last_dot && (!last_slash || last_dot > last_slash)) {
-        strcpy(last_dot, ".mkv");
-    } else {
-        // No extension found, append .mkv
-        strcat(output, ".mkv");
-    }
-
-    return output;
-}
-
-/**
- * Get number of available CPU cores.
- * Returns the number of online processors, or 1 on error.
- */
-static int get_available_cpus(void) {
-#ifdef _SC_NPROCESSORS_ONLN
-    long nproc = sysconf(_SC_NPROCESSORS_ONLN);
-    if (nproc > 0) {
-        return (int)nproc;
-    }
-#endif
-    return 1;  // Fallback to single core
-}
-
-/**
- * Get default thread count (cap at 8)
- */
-static int get_default_thread_count(void) {
-    int available = get_available_cpus();
-    return available < 8 ? available : 8;
-}
-
-static void print_usage(const char *program) {
-    printf("TAV/TAP Decoder - TSVM Advanced Video/Picture Codec (Reference Implementation)\n");
-    printf("\nUsage: %s -i input.tav [-o output.mkv] [options]\n\n", program);
-    printf("Required:\n");
-    printf("  -i, --input FILE         Input TAV (video) or TAP (still image) file\n");
-    printf("\nOptional:\n");
-    printf("  -o, --output FILE        Output file (default: input with .mkv/.png extension)\n");
-    printf("  --raw                    Output raw video (no FFV1 compression)\n");
-    printf("  --no-audio               Skip audio decoding\n");
-    printf("  --decode-limit N         Decode only first N frames\n");
-    printf("  --dump-packets           Debug: print packet info\n");
-    printf("  -t, --threads N          Number of decoder threads (0=single-threaded, default)\n");
-    printf("  -v, --verbose            Verbose output\n");
-    printf("  --help                   Show this help\n");
-    printf("\nStill Image (TAP) Options:\n");
-    printf("  --tga                    Output TGA format instead of PNG (for TAP files)\n");
-    printf("\nExamples:\n");
-    printf("  %s -i video.tav                      # Output: video.mkv\n", program);
-    printf("  %s -i video.tav -o custom.mkv\n", program);
-    printf("  %s -i video.tav --verbose --decode-limit 100\n", program);
-    printf("  %s -i image.tap                      # Output: image.png\n", program);
-    printf("  %s -i image.tap --tga -o out.tga     # Output: out.tga\n", program);
-}
-
-int main(int argc, char *argv[]) {
-    printf("TAV Decoder - %s\n", DECODER_VENDOR_STRING);
-    printf("Using libtavdec + libtaddec\n\n");
-
-    decoder_context_t ctx = {0};
-
-    // Initialize threading
-    ctx.num_threads = get_default_thread_count();
-
-    // Command-line options
-    static struct option long_options[] = {
-        {"input",        required_argument, 0, 'i'},
-        {"output",       required_argument, 0, 'o'},
-        {"verbose",      no_argument,       0, 'v'},
-        {"threads",      required_argument, 0, 't'},
-        {"raw",          no_argument,       0, 1001},
-        {"no-audio",     no_argument,       0, 1002},
-        {"decode-limit", required_argument, 0, 1003},
-        {"dump-packets", no_argument,       0, 1004},
-        {"tga",          no_argument,       0, 1005},
-        {"help",         no_argument,       0, 'h'},
-        {0, 0, 0, 0}
-    };
-
-    int c, option_index = 0;
-    while ((c = getopt_long(argc, argv, "i:o:t:vh", long_options, &option_index)) != -1) {
-        switch (c) {
-            case 'i':
-                ctx.input_file = strdup(optarg);
-                break;
-            case 'o':
-                ctx.output_file = strdup(optarg);
-                break;
-            case 'v':
-                ctx.verbose = 1;
-                break;
-            case 't': {  // --threads
-                int threads = atoi(optarg);
-                if (threads < 0) {
-                    fprintf(stderr, "Error: Thread count must be positive\n");
-                    return 1;
-                }
-                // Both 0 and 1 mean single-threaded (use value 0 internally)
-                ctx.num_threads = (threads <= 1) ? 0 : threads;
-                break;
-            }
-            case 1001:
-                ctx.output_raw = 1;
-                break;
-            case 1002:
-                ctx.no_audio = 1;
-                break;
-            case 1003:
-                ctx.decode_limit = atoi(optarg);
-                break;
-            case 1004:
-                ctx.dump_packets = 1;
-                break;
-            case 1005:  // --tga
-                ctx.output_tga = 1;
-                break;
-            case 'h':
-            case '?':
-            default:
-                print_usage(argv[0]);
-                return (c == 'h' || c == '?') ? 0 : 1;
-        }
-    }
-
-    // Validate arguments
-    if (!ctx.input_file) {
-        fprintf(stderr, "Error: Input file is required\n\n");
-        print_usage(argv[0]);
-        return 1;
-    }
-
-    // Generate output filename if not provided
-    if (!ctx.output_file) {
-        ctx.output_file = generate_output_filename(ctx.input_file);
-        if (!ctx.output_file) {
-            fprintf(stderr, "Error: Failed to generate output filename\n");
-            return 1;
-        }
-    }
-
-    // Open input file
-    ctx.input_fp = fopen(ctx.input_file, "rb");
-    if (!ctx.input_fp) {
-        fprintf(stderr, "Error: Cannot open input file: %s\n", ctx.input_file);
-        return 1;
-    }
-
-    // Read and parse header
-    if (read_tav_header(&ctx) < 0) {
-        fclose(ctx.input_fp);
-        return 1;
-    }
-
-    // Scan for XFPS if header.fps == 0xFF
-    scan_for_xfps(&ctx);
-
-    // Handle still image (TAP) mode
-    if (ctx.is_still_image) {
-        printf("Detected still picture (TAP format)\n");
-
-        // Force single-threaded mode (override user option)
-        if (ctx.num_threads > 0) {
-            printf("  Disabling multithreading for still image\n");
-            ctx.num_threads = 0;
-        }
-
-        // Disable audio for still images
-        ctx.no_audio = 1;
-
-        // Bypass grain synthesis (set anime preset bit)
-        // Bit 1 of encoder_preset disables grain synthesis
-        ctx.header.encoder_preset |= 0x02;
-
-        // Set decode limit to 1 frame
-        ctx.decode_limit = 1;
-
-        // Update output filename to use .png or .tga if it ends with .mkv (auto-generated)
-        if (ctx.output_file) {
-            char *last_dot = strrchr(ctx.output_file, '.');
-            if (last_dot && strcmp(last_dot, ".mkv") == 0) {
-                const char *new_ext = ctx.output_tga ? ".tga" : ".png";
-                strcpy(last_dot, new_ext);
-            }
-        }
-
-        printf("  Output format: %s\n", ctx.output_tga ? "TGA" : "PNG");
-    }
-
-    // Create audio temp file
-    char temp_audio_file[256];
-    snprintf(temp_audio_file, sizeof(temp_audio_file), "/tmp/tav_dec_audio_%d.pcm", getpid());
-    ctx.audio_temp_file = strdup(temp_audio_file);
-
-    if (!ctx.no_audio) {
-        ctx.audio_temp_fp = fopen(ctx.audio_temp_file, "wb");
-        if (!ctx.audio_temp_fp) {
-            fprintf(stderr, "Error: Cannot create audio temp file: %s\n", ctx.audio_temp_file);
-            fclose(ctx.input_fp);
-            return 1;
-        }
-    }
-
-    // Initialize video decoder
-    tav_video_params_t video_params = {
-        .width = ctx.header.width,
-        .height = ctx.header.height,
-        .decomp_levels = ctx.header.decomp_levels,
-        .temporal_levels = 2,  // Default
-        .wavelet_filter = ctx.header.wavelet_filter,
-        .temporal_wavelet = 255,  // Haar
-        .entropy_coder = ctx.header.entropy_coder,
-        .channel_layout = ctx.header.channel_layout,
-        .perceptual_tuning = ctx.perceptual_mode,
-        .quantiser_y = ctx.header.quantiser_y,
-        .quantiser_co = ctx.header.quantiser_co,
-        .quantiser_cg = ctx.header.quantiser_cg,
-        .encoder_preset = ctx.header.encoder_preset,
-        .monoblock = 1
-    };
-
-    ctx.video_ctx = tav_video_create(&video_params);
-    if (!ctx.video_ctx) {
-        fprintf(stderr, "Error: Failed to create video decoder context\n");
-        fclose(ctx.input_fp);
-        if (ctx.audio_temp_fp) fclose(ctx.audio_temp_fp);
-        return 1;
-    }
-
-    tav_video_set_verbose(ctx.video_ctx, ctx.verbose);
-
-    printf("Input: %s\n", ctx.input_file);
-    printf("Output: %s\n", ctx.output_file);
-    if (ctx.is_still_image) {
-        printf("Resolution: %dx%d (still picture)\n", ctx.header.width, ctx.header.height);
-    } else {
-        printf("Resolution: %dx%d @ %d fps\n", ctx.header.width, ctx.header.height, ctx.header.fps);
-    }
-    printf("\n");
-
-    // Decode
-    int result = decode_video(&ctx);
-
-    // Cleanup FFmpeg
-    if (ctx.video_pipe) {
-        fclose(ctx.video_pipe);
-        waitpid(ctx.ffmpeg_pid, NULL, 0);
-    }
-
-    // Cleanup
-    if (ctx.video_ctx) {
-        tav_video_free(ctx.video_ctx);
-    }
-
-    if (ctx.gop_frames) {
-        for (int i = 0; i < ctx.gop_frames_allocated; i++) {
-            free(ctx.gop_frames[i]);
-        }
-        free(ctx.gop_frames);
-    }
-
-    fclose(ctx.input_fp);
-
-    // Remove temp audio file
-    if (ctx.audio_temp_file) {
-        unlink(ctx.audio_temp_file);
-        free(ctx.audio_temp_file);
-    }
-
-    // Statistics
-    time_t total_time = time(NULL) - ctx.start_time;
-    double avg_fps = total_time > 0 ? (double)ctx.frames_decoded / total_time : 0.0;
-
-    if (ctx.is_still_image) {
-        printf("\n=== Decoding Complete ===\n");
-        printf("  Still picture decoded successfully\n");
-        printf("  Bytes read: %lu\n", ctx.bytes_read);
-        printf("  Time taken: %ld seconds\n", total_time);
-        printf("=========================\n");
-    } else {
-        printf("\n=== Decoding Complete ===\n");
-        printf("  Frames decoded: %lu\n", ctx.frames_decoded);
-        printf("  GOPs decoded: %lu\n", ctx.gops_decoded);
-        printf("  Audio samples: %lu\n", ctx.audio_samples_decoded);
-        printf("  Bytes read: %lu\n", ctx.bytes_read);
-        printf("  Decoding speed: %.1f fps\n", avg_fps);
-        printf("  Time taken: %ld seconds\n", total_time);
-        printf("=========================\n");
-    }
-
-    if (result < 0) {
-        fprintf(stderr, "Decoding failed\n");
-        free(ctx.input_file);
-        free(ctx.output_file);
-        return 1;
-    }
-
-    printf("\nOutput written to: %s\n", ctx.output_file);
-
-    free(ctx.input_file);
-    free(ctx.output_file);
-
-    return 0;
-}
diff --git a/video_encoder/src/decoder_tav_dt.c b/video_encoder/src/decoder_tav_dt.c
deleted file mode 100644
index d1e1cca..0000000
--- a/video_encoder/src/decoder_tav_dt.c
+++ /dev/null
@@ -1,2180 +0,0 @@
-/**
- * TAV-DT Decoder - Digital Tape Format Decoder
- *
- * Decodes TAV-DT format with forward error correction.
- *
- * TAV-DT is a packetised streaming format designed for digital tape/broadcast:
- * - Fixed dimensions: 720x480 (NTSC) or 720x576 (PAL)
- * - 16-frame GOPs with 9/7 spatial wavelet, Haar temporal
- * - Mandatory TAD audio
- * - LDPC rate 1/2 for headers, Reed-Solomon (255,223) for payloads
- *
- * Packet structure (revised 2025-12-17):
- * - Main packet: sync(4) NOT LDPC + header 28 bytes (224 bits) -> 56 bytes LDPC (448 bits, rate 224/448)
- *   Header layout: fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
- *   CRC covers bytes 0-23 (everything except CRC itself)
- * - TAD subpacket: header 14 bytes (112 bits) -> 28 bytes LDPC (224 bits, rate 112/224), + RS-encoded payload
- *   Layout: sample_count(2) + quant_bits(1) + compressed_size(4) + rs_block_count(3) + crc(4)
- * - TAV subpacket: sync(4) NOT LDPC + header 14 bytes (112 bits) -> 28 bytes LDPC (224 bits, rate 112/224), + RS payload
- *   Header layout: gop_size(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4)
- * - No packet type bytes - always audio then video
- *
- * Features (revised 2025-12-17):
- * - Soft Sync Recovery: Attempts to recover corrupted headers using known values
- * - Stage 3 recovery uses TAV subpacket sync pattern (0xA3F7C91E) to locate video data
- * - Sync patterns are NOT LDPC-coded for reliable packet boundary detection
- *
- * Created by CuriousTorvald and Claude on 2025-12-09.
- * Revised 2025-12-17 for separate sync patterns, power-of-two LDPC sizes, and subpacket CRCs.
- */
-
-#define _POSIX_C_SOURCE 200809L
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/wait.h>
-#include <signal.h>
-#include <time.h>
-#include <pthread.h>
-
-#include "tav_video_decoder.h"
-#include "decoder_tad.h"
-#include "reed_solomon.h"
-#include "ldpc.h"
-#include "ldpc_payload.h"
-
-// FEC mode for payloads (must match encoder setting)
-#define FEC_MODE_RS   0    // Reed-Solomon (255,223) - default
-#define FEC_MODE_LDPC 1    // LDPC (255,223) - experimental
-
-// =============================================================================
-// Constants
-// =============================================================================
-
-// TAV-DT sync patterns (big endian)
-#define TAV_DT_SYNC_NTSC  0xE3537A1F
-#define TAV_DT_SYNC_PAL   0xD193A745
-
-// TAV-DT dimensions
-#define DT_WIDTH          720
-#define DT_HEIGHT_NTSC    480
-#define DT_HEIGHT_PAL     576
-
-// Fixed parameters
-#define DT_SPATIAL_LEVELS  4
-#define DT_TEMPORAL_LEVELS 2
-
-// Header sizes (before LDPC encoding) - revised 2025-12-17
-// Sync patterns are written separately (not LDPC-coded)
-// Main header: 28 bytes raw (224 bits) -> 56 bytes LDPC (448 bits, rate 224/448)
-// Subpacket headers: 14 bytes raw (112 bits) -> 28 bytes LDPC (224 bits, rate 112/224)
-#define DT_MAIN_HEADER_SIZE   28   // fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
-#define DT_TAD_HEADER_SIZE    14   // sample_count(2) + quant_bits(1) + compressed_size(4) + rs_block_count(3) + crc(4)
-#define DT_TAV_HEADER_SIZE    14   // gop_size(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4)
-
-// TAV subpacket sync pattern (big endian)
-#define TAV_SUBPACKET_SYNC    0xA3F7C91E
-
-// Quality level to quantiser mapping (must match encoder)
-static const int QUALITY_Y[]  = {79, 47, 23, 11, 5, 2};
-static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
-static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
-
-#define MAX_PATH 4096
-#define MAX_DECODE_THREADS 16
-
-// Subpacket decode result codes (for detailed statistics tracking)
-#define DECODE_OK               0   // Success
-#define DECODE_ERR_HEADER_CRC  -1   // Header CRC validation failed
-#define DECODE_ERR_PAYLOAD_FEC -2   // Payload FEC decode failed (unrecoverable)
-#define DECODE_ERR_TRUNCATED   -3   // Data truncated or other error
-
-// =============================================================================
-// Multithreading Structures
-// =============================================================================
-
-#define DECODE_SLOT_EMPTY      0
-#define DECODE_SLOT_PENDING    1
-#define DECODE_SLOT_PROCESSING 2
-#define DECODE_SLOT_DONE       3
-
-// GOP decode job structure
-typedef struct {
-    // Input
-    uint8_t *compressed_data;      // Raw GOP data to decode (owned by job)
-    size_t compressed_size;
-    int gop_size;                  // Number of frames in this GOP
-    int job_id;                    // Sequential job ID for ordering output
-
-    // Output
-    uint8_t **rgb_frames;          // Decoded RGB24 frames [gop_size]
-    size_t frame_size;             // Size of each frame in bytes
-    int decode_result;             // 0 = success, -1 = error
-
-    // Status
-    volatile int status;           // DECODE_SLOT_EMPTY, PENDING, or DONE
-} gop_decode_job_t;
-
-/**
- * Get number of available CPUs.
- */
-static int get_available_cpus(void) {
-#ifdef _SC_NPROCESSORS_ONLN
-    long nproc = sysconf(_SC_NPROCESSORS_ONLN);
-    if (nproc > 0) {
-        return (int)nproc;
-    }
-#endif
-    return 1;  // Fallback to single core
-}
-
-/**
- * Get default thread count (cap at 8)
- */
-static int get_default_thread_count(void) {
-    int available = get_available_cpus();
-    return available < 8 ? available : 8;
-}
-
-// =============================================================================
-// CRC-32
-// =============================================================================
-
-static uint32_t crc32_table[256];
-static int crc32_initialized = 0;
-
-static void init_crc32_table(void) {
-    if (crc32_initialized) return;
-    for (uint32_t i = 0; i < 256; i++) {
-        uint32_t crc = i;
-        for (int j = 0; j < 8; j++) {
-            if (crc & 1) {
-                crc = (crc >> 1) ^ 0xEDB88320;
-            } else {
-                crc >>= 1;
-            }
-        }
-        crc32_table[i] = crc;
-    }
-    crc32_initialized = 1;
-}
-
-static uint32_t calculate_crc32(const uint8_t *data, size_t length) {
-    init_crc32_table();
-    uint32_t crc = 0xFFFFFFFF;
-    for (size_t i = 0; i < length; i++) {
-        crc = (crc >> 8) ^ crc32_table[(crc ^ data[i]) & 0xFF];
-    }
-    return crc ^ 0xFFFFFFFF;
-}
-
-// =============================================================================
-// Decoder Context
-// =============================================================================
-
-typedef struct {
-    // Input/output
-    char *input_file;
-    char *output_file;
-    FILE *input_fp;
-
-    // FFmpeg integration
-    pid_t ffmpeg_pid;
-    FILE *video_pipe;
-    char audio_temp_file[MAX_PATH];
-    FILE *audio_temp_fp;
-    char video_temp_file[MAX_PATH];
-    FILE *video_temp_fp;
-
-    // Video parameters (derived from first packet)
-    int width;
-    int height;
-    int framerate;
-    int is_interlaced;
-    int is_ntsc_framerate;
-    int quality_index;
-    int is_pal;
-
-    // Video decoder context
-    tav_video_context_t *video_ctx;
-
-    // Statistics
-    uint64_t packets_processed;
-    uint64_t frames_decoded;
-    uint64_t bytes_read;
-    uint64_t crc_errors;
-    uint64_t fec_corrections;
-    uint64_t sync_losses;
-
-    // Detailed packet outcome statistics
-    // Fully decoded (both audio and video present)
-    uint64_t stat_fully_decoded_intact;          // (a) Both A/V intact, no recovery needed
-    uint64_t stat_fully_decoded_recovered;       // (b) Header recovered by soft sync recovery
-
-    // Partially decoded (only audio or only video)
-    uint64_t stat_partial_video_ok_audio_hdr_damaged;       // (c) Video OK, audio lost - header CRC fail
-    uint64_t stat_partial_video_ok_audio_payload_damaged;   // (d) Video OK, audio lost - FEC fail
-    uint64_t stat_partial_video_rec_audio_hdr_damaged;      // (e) Video recovered, audio lost - header CRC fail
-    uint64_t stat_partial_video_rec_audio_payload_damaged;  // (f) Video recovered, audio lost - FEC fail
-    uint64_t stat_partial_audio_ok_video_hdr_damaged;       // (g) Audio OK, video lost - header CRC fail
-    uint64_t stat_partial_audio_ok_video_payload_damaged;   // (h) Audio OK, video lost - FEC fail
-
-    // Packets lost (no audio or video)
-    uint64_t stat_lost_sync_recovery_failure;    // (j) Main header sync/CRC unrecoverable
-    uint64_t stat_lost_both_headers_damaged;     // (k) Has sync but both subpacket headers damaged
-    uint64_t stat_lost_payloads_damaged;         // (l) All headers OK but both payloads unrecoverable
-
-    // Options
-    int verbose;
-    int dump_mode;  // Just dump packets, don't decode
-    int fec_mode;   // FEC_MODE_RS or FEC_MODE_LDPC (must match encoder)
-
-    // Multithreading
-    int num_threads;
-    int num_slots;
-    gop_decode_job_t *slots;
-    tav_video_context_t **worker_video_ctx;  // Per-thread decoder contexts
-    pthread_t *worker_threads;
-    pthread_mutex_t mutex;
-    pthread_cond_t cond_job_available;
-    pthread_cond_t cond_slot_free;
-    volatile int threads_should_exit;
-    volatile int next_write_slot;      // Next slot to write to output
-    volatile int jobs_submitted;
-    volatile int jobs_completed;
-
-    // Timing
-    time_t start_time;
-
-    // Error concealment
-    uint8_t *freeze_frame;         // Last good video frame for error concealment
-    size_t freeze_frame_size;
-    uint64_t last_timecode_ns;     // Last processed timecode
-    uint64_t audio_samples_written; // Total audio samples written
-    uint64_t video_frames_written; // Total video frames written (for sync check)
-
-    // Soft Sync Recovery state
-    uint8_t last_valid_framerate;
-    uint8_t last_valid_flags;
-    uint64_t last_valid_timecode_ns;
-    int packets_since_valid_sync;
-    int soft_sync_recoveries;      // Statistics counter
-    int current_packet_recovered;  // Flag: was current packet's main header recovered?
-} dt_decoder_t;
-
-// =============================================================================
-// Utility Functions
-// =============================================================================
-
-static void print_usage(const char *program) {
-    printf("TAV-DT Decoder - Digital Tape Format with FEC\n");
-    printf("\nUsage: %s -i input.tavdt -o output.mkv [options]\n\n", program);
-    printf("Required:\n");
-    printf("  -i, --input FILE     Input TAV-DT file\n");
-    printf("  -o, --output FILE    Output video file (FFV1/MKV)\n");
-    printf("\nOptions:\n");
-    printf("  -t, --threads N      Number of decoder threads (default: min(8, available CPUs))\n");
-    printf("                       0 or 1 = single-threaded, 2-16 = multithreaded\n");
-    printf("  --ldpc-payload       Use LDPC(255,223) instead of RS(255,223) for payloads\n");
-    printf("                       (must match encoder setting)\n");
-    printf("  --dump               Dump packet info without decoding\n");
-    printf("  -v, --verbose        Verbose output\n");
-    printf("  --help               Show this help\n");
-}
-
-// =============================================================================
-// FEC Block Decoding (RS or LDPC based on mode)
-// =============================================================================
-
-static int decode_fec_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len, int fec_mode) {
-    if (fec_mode == FEC_MODE_LDPC) {
-        // Use LDPC(255,223) decoding
-        return ldpc_p_decode_blocks(data, total_len, output, output_len);
-    } else {
-        // Use RS(255,223) decoding (default)
-        return rs_decode_blocks(data, total_len, output, output_len);
-    }
-}
-
-static void generate_random_filename(char *filename, size_t size) {
-    static int seeded = 0;
-    if (!seeded) {
-        srand((unsigned int)time(NULL));
-        seeded = 1;
-    }
-
-    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-    snprintf(filename, size, "/tmp/tavdt_dec_");
-    size_t prefix_len = strlen(filename);
-    for (int i = 0; i < 16; i++) {
-        filename[prefix_len + i] = charset[rand() % (sizeof(charset) - 1)];
-    }
-    filename[prefix_len + 16] = '\0';
-}
-
-// =============================================================================
-// Error Concealment Functions
-// =============================================================================
-
-/**
- * Write silent audio samples for error concealment.
- * Generates PCMu8 silence (value 128) for the specified number of stereo samples.
- */
-static int write_silent_audio(dt_decoder_t *dec, size_t num_samples) {
-    if (!dec->audio_temp_fp || num_samples == 0) {
-        return 0;
-    }
-
-    // PCMu8 silence is value 128 (0x80)
-    uint8_t *silence = malloc(num_samples * 2);
-    if (!silence) {
-        fprintf(stderr, "Warning: Cannot allocate silence buffer\n");
-        return -1;
-    }
-
-    memset(silence, 128, num_samples * 2);
-    fwrite(silence, 1, num_samples * 2, dec->audio_temp_fp);
-    free(silence);
-
-    dec->audio_samples_written += num_samples;
-
-    if (dec->verbose) {
-        printf("  Error concealment: Wrote %zu samples of silent audio\n", num_samples);
-    }
-
-    return 0;
-}
-
-/**
- * Write frozen video frame(s) for error concealment.
- * Repeats the last good frame or writes black frame if no freeze frame exists.
- */
-static int write_frozen_frames(dt_decoder_t *dec, int num_frames) {
-    if (!dec->video_temp_fp || num_frames <= 0) {
-        return 0;
-    }
-
-    int internal_height = dec->is_interlaced ? dec->height / 2 : dec->height;
-    size_t frame_size = dec->width * internal_height * 3;
-
-    // If no freeze frame exists, create a black frame
-    if (!dec->freeze_frame) {
-        dec->freeze_frame = calloc(1, frame_size);
-        if (!dec->freeze_frame) {
-            fprintf(stderr, "Warning: Cannot allocate freeze frame buffer\n");
-            return -1;
-        }
-        dec->freeze_frame_size = frame_size;
-        if (dec->verbose) {
-            printf("  Error concealment: Using black frame (no reference frame available)\n");
-        }
-    }
-
-    // Write the freeze frame multiple times
-    for (int i = 0; i < num_frames; i++) {
-        fwrite(dec->freeze_frame, 1, dec->freeze_frame_size, dec->video_temp_fp);
-        dec->video_frames_written++;
-        dec->frames_decoded++;
-    }
-
-    if (dec->verbose) {
-        printf("  Error concealment: Wrote %d frozen frame(s)\n", num_frames);
-    }
-
-    return 0;
-}
-
-/**
- * Update the freeze frame buffer with the last successfully decoded frame.
- */
-static int update_freeze_frame(dt_decoder_t *dec, const uint8_t *frame_data, size_t frame_size) {
-    if (!frame_data || frame_size == 0) {
-        return -1;
-    }
-
-    // Allocate or reallocate freeze frame buffer
-    if (!dec->freeze_frame || dec->freeze_frame_size != frame_size) {
-        free(dec->freeze_frame);
-        dec->freeze_frame = malloc(frame_size);
-        if (!dec->freeze_frame) {
-            fprintf(stderr, "Warning: Cannot allocate freeze frame buffer\n");
-            dec->freeze_frame_size = 0;
-            return -1;
-        }
-        dec->freeze_frame_size = frame_size;
-    }
-
-    memcpy(dec->freeze_frame, frame_data, frame_size);
-    return 0;
-}
-
-// =============================================================================
-// Sync Pattern Search
-// =============================================================================
-
-static int find_sync_pattern(dt_decoder_t *dec) {
-    uint8_t sync_bytes[4] = {0};
-    uint8_t byte;
-
-    // NTSC and PAL sync patterns as byte arrays (big endian)
-    const uint8_t ntsc_sync[4] = {0xE3, 0x53, 0x7A, 0x1F};
-    const uint8_t pal_sync[4] = {0xD1, 0x93, 0xA7, 0x45};
-
-    // Sliding window search
-    while (fread(&byte, 1, 1, dec->input_fp) == 1) {
-        dec->bytes_read++;
-
-        // Shift window
-        sync_bytes[0] = sync_bytes[1];
-        sync_bytes[1] = sync_bytes[2];
-        sync_bytes[2] = sync_bytes[3];
-        sync_bytes[3] = byte;
-
-        // Check NTSC sync
-        if (memcmp(sync_bytes, ntsc_sync, 4) == 0) {
-            dec->is_pal = 0;
-            // Seek back to start of sync pattern
-            fseek(dec->input_fp, -4, SEEK_CUR);
-            dec->bytes_read -= 4;
-            return 0;
-        }
-
-        // Check PAL sync
-        if (memcmp(sync_bytes, pal_sync, 4) == 0) {
-            dec->is_pal = 1;
-            // Seek back to start of sync pattern
-            fseek(dec->input_fp, -4, SEEK_CUR);
-            dec->bytes_read -= 4;
-            return 0;
-        }
-    }
-
-    return -1;  // EOF
-}
-
-// =============================================================================
-// Header Decoding
-// =============================================================================
-
-typedef struct {
-    uint32_t sync_pattern;
-    uint8_t framerate;
-    uint8_t flags;
-    uint16_t reserved;
-    uint32_t packet_size;
-    uint64_t timecode_ns;      // Now at offset 12 (moved before CRC)
-    uint32_t offset_to_video;  // Now at offset 20 (moved before CRC)
-    uint32_t crc32;            // Now at offset 24 (last field)
-} dt_packet_header_t;
-
-// Soft Sync Recovery state
-typedef struct {
-    uint8_t last_framerate;
-    uint8_t last_flags;
-    uint64_t last_timecode_ns;
-    int packets_since_valid;
-    int is_initialized;
-} soft_sync_state_t;
-
-/**
- * Attempt Soft Sync Recovery on a header with CRC mismatch.
- * Returns 1 if recovery succeeded, 0 if failed.
- *
- * Stage 1: Substitute known sync pattern, zero-fill reserved, recalculate CRC
- * Stage 2: Also substitute framerate, flags, timecode with last known values
- */
-static int attempt_soft_sync_recovery(dt_decoder_t *dec, uint8_t *decoded_header,
-                                       dt_packet_header_t *header __attribute__((unused))) {
-    // CRC is at offset 24-27 (revised layout 2025-12-17, sync is separate)
-    uint32_t stored_crc;
-    memcpy(&stored_crc, decoded_header + 24, 4);
-
-    // === Stage 1 ===
-    // Zero-fill reserved fields (sync is now read separately, not in LDPC header)
-    // Layout: fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
-    uint8_t recovery_header[DT_MAIN_HEADER_SIZE];
-    memcpy(recovery_header, decoded_header, DT_MAIN_HEADER_SIZE);
-
-    // Zero-fill reserved fields (offset 2-3 and 20-23)
-    recovery_header[2] = 0;
-    recovery_header[3] = 0;
-    recovery_header[20] = 0;
-    recovery_header[21] = 0;
-    recovery_header[22] = 0;
-    recovery_header[23] = 0;
-
-    // Recalculate CRC over bytes 0-23
-    uint32_t calculated_crc = calculate_crc32(recovery_header, 24);
-    if (calculated_crc == stored_crc) {
-        if (dec->verbose) {
-            printf("  Soft Sync Recovery Stage 1: SUCCESS (reserved fields corrected)\n");
-        }
-        // Use recovered header
-        memcpy(decoded_header, recovery_header, DT_MAIN_HEADER_SIZE);
-        dec->soft_sync_recoveries++;
-        return 1;
-    }
-
-    // === Stage 2 ===
-    // Also substitute framerate, flags, and timecode with last known values
-    if (dec->packets_processed > 0) {
-        recovery_header[0] = dec->last_valid_framerate;
-        recovery_header[1] = dec->last_valid_flags;
-
-        // Calculate expected timecode based on last known timecode + GOP duration
-        // GOP duration = 16 frames / framerate
-        uint64_t gop_duration_ns = (16ULL * 1000000000ULL) / dec->framerate;
-        uint64_t expected_timecode = dec->last_valid_timecode_ns +
-                                     (dec->packets_since_valid_sync + 1) * gop_duration_ns;
-
-        // Write expected timecode to bytes 8-15
-        memcpy(recovery_header + 8, &expected_timecode, 8);
-
-        // Recalculate CRC
-        calculated_crc = calculate_crc32(recovery_header, 24);
-        if (calculated_crc == stored_crc) {
-            if (dec->verbose) {
-                printf("  Soft Sync Recovery Stage 2: SUCCESS (reserved/fps/flags/timecode corrected)\n");
-                printf("    Reconstructed timecode: %.3f s\n", expected_timecode / 1000000000.0);
-            }
-            // Use recovered header
-            memcpy(decoded_header, recovery_header, DT_MAIN_HEADER_SIZE);
-            dec->soft_sync_recoveries++;
-            return 1;
-        }
-    }
-
-    // === Stage 3 ===
-    // Note: Stage 3 (searching for TAV subpacket sync 0xA3F7C91E) would require
-    // file seeking and is implemented separately in find_sync_pattern fallback.
-    // This function only handles header data recovery.
-
-    if (dec->verbose) {
-        fprintf(stderr, "  Soft Sync Recovery: FAILED (all stages exhausted)\n");
-    }
-    return 0;
-}
-
-static int read_and_decode_header(dt_decoder_t *dec, dt_packet_header_t *header) {
-    // Read sync pattern first (4 bytes, NOT LDPC-coded)
-    uint8_t sync_bytes[4];
-    size_t bytes_read = fread(sync_bytes, 1, 4, dec->input_fp);
-    if (bytes_read < 4) return -1;
-    dec->bytes_read += 4;
-
-    header->sync_pattern = ((uint32_t)sync_bytes[0] << 24) | ((uint32_t)sync_bytes[1] << 16) |
-                           ((uint32_t)sync_bytes[2] << 8) | sync_bytes[3];
-
-    // Verify sync pattern early
-    int sync_valid = (header->sync_pattern == TAV_DT_SYNC_NTSC || header->sync_pattern == TAV_DT_SYNC_PAL);
-    if (!sync_valid && dec->verbose) {
-        fprintf(stderr, "Warning: Invalid sync pattern 0x%08X\n", header->sync_pattern);
-    }
-
-    // Read LDPC-encoded header (56 bytes = 28 bytes * 2, rate 224/448 bits)
-    uint8_t encoded_header[DT_MAIN_HEADER_SIZE * 2];
-    bytes_read = fread(encoded_header, 1, DT_MAIN_HEADER_SIZE * 2, dec->input_fp);
-    if (bytes_read < DT_MAIN_HEADER_SIZE * 2) return -1;
-    dec->bytes_read += DT_MAIN_HEADER_SIZE * 2;
-
-    // LDPC decode header (56 bytes -> 28 bytes)
-    uint8_t decoded_header[DT_MAIN_HEADER_SIZE];
-    int ldpc_result = ldpc_decode(encoded_header, DT_MAIN_HEADER_SIZE * 2, decoded_header);
-    if (ldpc_result < 0) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: LDPC decode failed for main header\n");
-        }
-        // Try to use raw data anyway (first half)
-        memcpy(decoded_header, encoded_header, DT_MAIN_HEADER_SIZE);
-    } else if (ldpc_result > 0) {
-        dec->fec_corrections++;
-    }
-
-    // Parse header fields (revised layout 2025-12-17)
-    // Layout: fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4) = 28 bytes
-    // (sync is read separately above)
-    header->framerate = decoded_header[0];
-    header->flags = decoded_header[1];
-    header->reserved = decoded_header[2] | ((uint16_t)decoded_header[3] << 8);
-    memcpy(&header->packet_size, decoded_header + 4, 4);
-    memcpy(&header->timecode_ns, decoded_header + 8, 8);       // At offset 8
-    memcpy(&header->offset_to_video, decoded_header + 16, 4);  // At offset 16
-    // Reserved at offset 20-23 (ignored)
-    memcpy(&header->crc32, decoded_header + 24, 4);            // At offset 24
-
-    // Verify CRC-32 (covers bytes 0-23: fps + flags + reserved + size + timecode + offset + reserved)
-    uint32_t calculated_crc = calculate_crc32(decoded_header, 24);
-    int crc_valid = (calculated_crc == header->crc32);
-
-    if (!crc_valid) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: CRC mismatch (expected 0x%08X, got 0x%08X)\n",
-                   header->crc32, calculated_crc);
-        }
-
-        // Attempt Soft Sync Recovery
-        if (dec->packets_processed > 0 && attempt_soft_sync_recovery(dec, decoded_header, header)) {
-            // Re-parse header from recovered data (revised layout 2025-12-17)
-            header->framerate = decoded_header[0];
-            header->flags = decoded_header[1];
-            header->reserved = decoded_header[2] | ((uint16_t)decoded_header[3] << 8);
-            memcpy(&header->packet_size, decoded_header + 4, 4);
-            memcpy(&header->timecode_ns, decoded_header + 8, 8);
-            memcpy(&header->offset_to_video, decoded_header + 16, 4);
-            // Reserved at offset 20-23 (ignored)
-            memcpy(&header->crc32, decoded_header + 24, 4);
-            crc_valid = 1;  // Recovery succeeded
-            dec->current_packet_recovered = 1;  // Track for detailed statistics
-        } else {
-            dec->crc_errors++;
-            dec->packets_since_valid_sync++;
-
-            // Per spec: If CRC is unmatched after all soft recovery stages, packet MUST be discarded
-            if (dec->verbose) {
-                fprintf(stderr, "Warning: Packet discarded due to unrecoverable CRC error\n");
-            }
-            dec->sync_losses++;
-            dec->stat_lost_sync_recovery_failure++;  // (j) Sync recovery failure
-            return -2;
-        }
-    }
-
-    if (!sync_valid) {
-        dec->sync_losses++;
-        dec->stat_lost_sync_recovery_failure++;  // (j) Sync recovery failure
-        return -2;
-    }
-
-    // CRC is valid - update soft sync recovery state
-    dec->last_valid_framerate = header->framerate;
-    dec->last_valid_flags = header->flags;
-    dec->last_valid_timecode_ns = header->timecode_ns;
-    dec->packets_since_valid_sync = 0;
-
-    // Update decoder state from first packet
-    if (dec->packets_processed == 0) {
-        dec->width = DT_WIDTH;
-        dec->height = (header->sync_pattern == TAV_DT_SYNC_PAL) ? DT_HEIGHT_PAL : DT_HEIGHT_NTSC;
-        dec->framerate = header->framerate;
-        dec->is_interlaced = header->flags & 0x01;
-        dec->is_ntsc_framerate = header->flags & 0x02;
-        dec->quality_index = (header->flags >> 4) & 0x0F;
-        if (dec->quality_index > 5) dec->quality_index = 5;
-
-        if (dec->verbose) {
-            printf("=== TAV-DT Stream Info ===\n");
-            printf("  Format: %s %s\n",
-                   (header->sync_pattern == TAV_DT_SYNC_NTSC) ? "NTSC" : "PAL",
-                   dec->is_interlaced ? "interlaced" : "progressive");
-            printf("  Resolution: %dx%d\n", dec->width, dec->height);
-            printf("  Framerate: %d fps%s\n", dec->framerate,
-                   dec->is_ntsc_framerate ? " (NTSC)" : "");
-            printf("  Quality index: %d\n", dec->quality_index);
-            printf("==========================\n\n");
-        }
-    }
-
-    return 0;
-}
-
-// =============================================================================
-// Multithreading Support
-// =============================================================================
-
-/**
- * Worker thread function for parallel GOP decoding
- */
-static void *decoder_worker_thread(void *arg) {
-    dt_decoder_t *dec = (dt_decoder_t *)arg;
-    int thread_id = -1;
-
-    // Find our thread ID
-    for (int i = 0; i < dec->num_threads; i++) {
-        if (pthread_equal(dec->worker_threads[i], pthread_self())) {
-            thread_id = i;
-            break;
-        }
-    }
-
-    if (thread_id < 0) {
-        fprintf(stderr, "Error: Worker thread couldn't find its ID\n");
-        return NULL;
-    }
-
-    tav_video_context_t *video_ctx = dec->worker_video_ctx[thread_id];
-
-    while (1) {
-        pthread_mutex_lock(&dec->mutex);
-
-        // Look for a pending job and claim it
-        int job_idx = -1;
-        for (int i = 0; i < dec->num_slots; i++) {
-            if (dec->slots[i].status == DECODE_SLOT_PENDING) {
-                job_idx = i;
-                dec->slots[i].status = DECODE_SLOT_PROCESSING;  // Claim it - prevents other threads from picking it
-                break;
-            }
-        }
-
-        if (job_idx < 0) {
-            // No jobs available, check if we should exit
-            if (dec->threads_should_exit) {
-                pthread_mutex_unlock(&dec->mutex);
-                break;
-            }
-
-            // Wait for a job
-            pthread_cond_wait(&dec->cond_job_available, &dec->mutex);
-            pthread_mutex_unlock(&dec->mutex);
-            continue;
-        }
-
-        pthread_mutex_unlock(&dec->mutex);
-
-        // Decode this GOP
-        gop_decode_job_t *job = &dec->slots[job_idx];
-
-        // The compressed data format: [type(1)][gop_size(1)][size(4)][zstd_data]
-        const uint8_t *zstd_data = job->compressed_data + 6;
-        size_t zstd_size = job->compressed_size > 6 ? job->compressed_size - 6 : 0;
-
-        job->decode_result = tav_video_decode_gop(video_ctx, zstd_data, zstd_size,
-                                                   job->gop_size, job->rgb_frames);
-
-        // Mark as done
-        pthread_mutex_lock(&dec->mutex);
-        job->status = DECODE_SLOT_DONE;
-        dec->jobs_completed++;
-        pthread_cond_broadcast(&dec->cond_slot_free);
-        pthread_mutex_unlock(&dec->mutex);
-    }
-
-    return NULL;
-}
-
-/**
- * Initialize decoder threads
- */
-static int init_decoder_threads(dt_decoder_t *dec) {
-    if (dec->num_threads <= 1) {
-        return 0;  // Single-threaded, nothing to initialize
-    }
-
-    dec->num_slots = dec->num_threads + 2;  // Pipeline with lookahead
-    dec->slots = calloc(dec->num_slots, sizeof(gop_decode_job_t));
-    if (!dec->slots) {
-        fprintf(stderr, "Error: Cannot allocate decode slots\n");
-        return -1;
-    }
-
-    // Initialize slots
-    for (int i = 0; i < dec->num_slots; i++) {
-        dec->slots[i].status = DECODE_SLOT_EMPTY;
-        dec->slots[i].job_id = -1;
-        dec->slots[i].rgb_frames = NULL;
-        dec->slots[i].compressed_data = NULL;
-    }
-
-    // Create per-thread video decoder contexts
-    dec->worker_video_ctx = calloc(dec->num_threads, sizeof(tav_video_context_t*));
-    if (!dec->worker_video_ctx) {
-        free(dec->slots);
-        return -1;
-    }
-
-    tav_video_params_t vparams;
-    vparams.width = dec->width;
-    vparams.height = dec->is_interlaced ? dec->height / 2 : dec->height;
-    vparams.decomp_levels = DT_SPATIAL_LEVELS;
-    vparams.temporal_levels = DT_TEMPORAL_LEVELS;
-    vparams.wavelet_filter = 1;     // CDF 9/7
-    vparams.temporal_wavelet = 255; // Haar
-    vparams.entropy_coder = 1;      // EZBC
-    vparams.channel_layout = 0;     // YCoCg-R
-    vparams.perceptual_tuning = 1;
-    vparams.quantiser_y = QUALITY_Y[dec->quality_index];
-    vparams.quantiser_co = QUALITY_CO[dec->quality_index];
-    vparams.quantiser_cg = QUALITY_CG[dec->quality_index];
-    vparams.encoder_preset = 0x01;  // Sports
-    vparams.monoblock = 1;
-
-    for (int i = 0; i < dec->num_threads; i++) {
-        dec->worker_video_ctx[i] = tav_video_create(&vparams);
-        if (!dec->worker_video_ctx[i]) {
-            fprintf(stderr, "Error: Cannot create video decoder for thread %d\n", i);
-            return -1;
-        }
-    }
-
-    // Initialize threading primitives
-    pthread_mutex_init(&dec->mutex, NULL);
-    pthread_cond_init(&dec->cond_job_available, NULL);
-    pthread_cond_init(&dec->cond_slot_free, NULL);
-    dec->threads_should_exit = 0;
-    dec->next_write_slot = 0;
-    dec->jobs_submitted = 0;
-    dec->jobs_completed = 0;
-
-    // Create worker threads
-    dec->worker_threads = calloc(dec->num_threads, sizeof(pthread_t));
-    if (!dec->worker_threads) {
-        return -1;
-    }
-
-    for (int i = 0; i < dec->num_threads; i++) {
-        if (pthread_create(&dec->worker_threads[i], NULL, decoder_worker_thread, dec) != 0) {
-            fprintf(stderr, "Error: Cannot create worker thread %d\n", i);
-            return -1;
-        }
-    }
-
-    if (dec->verbose) {
-        printf("Initialized %d decoder threads\n", dec->num_threads);
-    }
-
-    return 0;
-}
-
-/**
- * Cleanup decoder threads
- */
-static void cleanup_decoder_threads(dt_decoder_t *dec) {
-    if (dec->num_threads <= 1) {
-        return;
-    }
-
-    // Signal threads to exit
-    pthread_mutex_lock(&dec->mutex);
-    dec->threads_should_exit = 1;
-    pthread_cond_broadcast(&dec->cond_job_available);
-    pthread_mutex_unlock(&dec->mutex);
-
-    // Wait for threads
-    if (dec->worker_threads) {
-        for (int i = 0; i < dec->num_threads; i++) {
-            pthread_join(dec->worker_threads[i], NULL);
-        }
-        free(dec->worker_threads);
-    }
-
-    // Free video contexts
-    if (dec->worker_video_ctx) {
-        for (int i = 0; i < dec->num_threads; i++) {
-            if (dec->worker_video_ctx[i]) {
-                tav_video_free(dec->worker_video_ctx[i]);
-            }
-        }
-        free(dec->worker_video_ctx);
-    }
-
-    // Free slots
-    if (dec->slots) {
-        for (int i = 0; i < dec->num_slots; i++) {
-            if (dec->slots[i].rgb_frames) {
-                for (int f = 0; f < dec->slots[i].gop_size; f++) {
-                    free(dec->slots[i].rgb_frames[f]);
-                }
-                free(dec->slots[i].rgb_frames);
-            }
-            if (dec->slots[i].compressed_data) {
-                free(dec->slots[i].compressed_data);
-            }
-        }
-        free(dec->slots);
-    }
-
-    pthread_mutex_destroy(&dec->mutex);
-    pthread_cond_destroy(&dec->cond_job_available);
-    pthread_cond_destroy(&dec->cond_slot_free);
-}
-
-// =============================================================================
-// Subpacket Decoding
-// =============================================================================
-
-static int decode_audio_subpacket(dt_decoder_t *dec, const uint8_t *data, size_t data_len,
-                                   size_t *consumed, size_t *samples_written) {
-    *samples_written = 0;
-
-    // Minimum: 28 byte LDPC header (14 bytes * 2, rate 112/224 bits)
-    if (data_len < DT_TAD_HEADER_SIZE * 2) return -1;
-
-    size_t offset = 0;
-
-    // LDPC decode TAD header (28 bytes -> 14 bytes)
-    uint8_t decoded_tad_header[DT_TAD_HEADER_SIZE];
-    int ldpc_result = ldpc_decode(data + offset, DT_TAD_HEADER_SIZE * 2, decoded_tad_header);
-    if (ldpc_result < 0) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: LDPC decode failed for TAD header\n");
-        }
-        memcpy(decoded_tad_header, data + offset, DT_TAD_HEADER_SIZE);
-    } else if (ldpc_result > 0) {
-        dec->fec_corrections++;
-    }
-    offset += DT_TAD_HEADER_SIZE * 2;
-
-    // Parse TAD header (revised layout 2025-12-17)
-    // Layout: sample_count(2) + quant_bits(1) + compressed_size(4) + rs_block_count(3) + crc(4) = 14 bytes
-    uint16_t sample_count;
-    uint8_t quant_bits;
-    uint32_t compressed_size;
-    uint32_t rs_block_count;
-    uint32_t stored_crc;
-
-    memcpy(&sample_count, decoded_tad_header, 2);
-    quant_bits = decoded_tad_header[2];
-    memcpy(&compressed_size, decoded_tad_header + 3, 4);
-    // uint24 rs_block_count (little endian) at offset 7-9
-    rs_block_count = decoded_tad_header[7] |
-                     ((uint32_t)decoded_tad_header[8] << 8) |
-                     ((uint32_t)decoded_tad_header[9] << 16);
-    memcpy(&stored_crc, decoded_tad_header + 10, 4);
-
-    // Verify CRC-32 (covers bytes 0-9)
-    uint32_t calculated_crc = calculate_crc32(decoded_tad_header, 10);
-    int tad_header_valid = (calculated_crc == stored_crc);
-    if (!tad_header_valid) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: TAD header CRC mismatch (expected 0x%08X, got 0x%08X) - skipping audio\n",
-                   stored_crc, calculated_crc);
-        }
-        dec->crc_errors++;
-        // Cannot trust header data - skip audio decoding entirely
-        // Error concealment will insert silence
-        *consumed = offset;
-        return DECODE_ERR_HEADER_CRC;
-    }
-
-    if (dec->verbose) {
-        printf("  TAD: samples=%u, quant_bits=%u, compressed=%u, rs_blocks=%u\n",
-               sample_count, quant_bits, compressed_size, rs_block_count);
-    }
-
-    // Calculate RS payload size
-    size_t rs_total = rs_block_count * RS_BLOCK_SIZE;
-
-    // Sanity check: compressed_size must not exceed RS payload capacity
-    // RS(255,223) means 223 data bytes per 255-byte block
-    size_t max_data_size = (rs_block_count * RS_DATA_SIZE);
-    if (compressed_size > max_data_size) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: TAD compressed_size (%u) exceeds RS capacity (%zu) - skipping audio\n",
-                   compressed_size, max_data_size);
-        }
-        *consumed = offset;
-        return DECODE_ERR_HEADER_CRC;  // Header data invalid
-    }
-
-    // Handle empty audio packet (no samples in this GOP)
-    if (compressed_size == 0 || rs_block_count == 0 || sample_count == 0) {
-        *consumed = offset;
-        return DECODE_OK;  // Successfully processed empty audio packet
-    }
-
-    if (offset + rs_total > data_len) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: Audio packet truncated\n");
-        }
-        *consumed = data_len;
-        return DECODE_ERR_TRUNCATED;
-    }
-
-    // RS decode payload
-    uint8_t *rs_data = malloc(rs_total);
-    if (!rs_data) return DECODE_ERR_TRUNCATED;
-    memcpy(rs_data, data + offset, rs_total);
-
-    uint8_t *decoded_payload = malloc(compressed_size);
-    if (!decoded_payload) {
-        free(rs_data);
-        return DECODE_ERR_TRUNCATED;
-    }
-
-    int fec_result = decode_fec_blocks(rs_data, rs_total, decoded_payload, compressed_size, dec->fec_mode);
-    if (fec_result < 0) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: FEC decode failed for audio - UNRECOVERABLE\n");
-        }
-        free(rs_data);
-        free(decoded_payload);
-        *consumed = offset + rs_total;
-        return DECODE_ERR_PAYLOAD_FEC;
-    } else if (fec_result > 0) {
-        dec->fec_corrections += fec_result;
-    }
-
-    // decoded_payload already contains the full TAD chunk format:
-    // [sample_count(2)][max_index(1)][payload_size(4)][zstd_data]
-    // No need to rebuild the header - pass it directly to the TAD decoder
-
-    // Read the actual sample count from the TAD chunk header (not the wrapper header)
-    // The wrapper header sample_count might be incorrect or 0 in some cases
-    uint16_t tad_chunk_sample_count;
-    memcpy(&tad_chunk_sample_count, decoded_payload, 2);
-
-    // Decode TAD to PCMu8 - allocate based on TAD chunk's sample count
-    uint8_t *pcmu8_output = malloc(tad_chunk_sample_count * 2);
-    if (!pcmu8_output) {
-        free(rs_data);
-        free(decoded_payload);
-        return -1;
-    }
-
-    size_t bytes_consumed_tad, samples_decoded;
-    int tad_result = tad32_decode_chunk(decoded_payload, compressed_size, pcmu8_output,
-                                         &bytes_consumed_tad, &samples_decoded);
-
-    if (tad_result == 0 && samples_decoded > 0 && dec->audio_temp_fp) {
-        fwrite(pcmu8_output, 1, samples_decoded * 2, dec->audio_temp_fp);
-        *samples_written = samples_decoded;
-        dec->audio_samples_written += samples_decoded;
-    } else {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: TAD decode failed - UNRECOVERABLE\n");
-        }
-        free(pcmu8_output);
-        free(rs_data);
-        free(decoded_payload);
-        *consumed = offset + rs_total;
-        return DECODE_ERR_PAYLOAD_FEC;  // Treat TAD decode failure as payload error
-    }
-
-    free(pcmu8_output);
-    free(rs_data);
-    free(decoded_payload);
-
-    offset += rs_total;
-    *consumed = offset;
-
-    return DECODE_OK;
-}
-
-/**
- * Multithreaded video decoding - submit GOP to worker pool
- */
-static int decode_video_subpacket_mt(dt_decoder_t *dec, const uint8_t *data, size_t data_len,
-                                      size_t *consumed, int *frames_written) {
-    *frames_written = 0;
-
-    // Minimum: 4 byte sync + 28 byte LDPC header (14 bytes * 2, rate 112/224 bits)
-    if (data_len < 4 + DT_TAV_HEADER_SIZE * 2) return -1;
-
-    size_t offset = 0;
-
-    // Read TAV sync pattern (4 bytes, NOT LDPC-coded)
-    uint32_t subpacket_sync = ((uint32_t)data[offset] << 24) |
-                              ((uint32_t)data[offset + 1] << 16) |
-                              ((uint32_t)data[offset + 2] << 8) |
-                              data[offset + 3];
-    offset += 4;
-
-    // Verify sync pattern early
-    int sync_valid = (subpacket_sync == TAV_SUBPACKET_SYNC);
-    if (!sync_valid && dec->verbose) {
-        fprintf(stderr, "Warning: TAV subpacket sync mismatch (MT) (expected 0x%08X, got 0x%08X)\n",
-               TAV_SUBPACKET_SYNC, subpacket_sync);
-    }
-
-    // LDPC decode TAV header (28 bytes -> 14 bytes)
-    uint8_t decoded_tav_header[DT_TAV_HEADER_SIZE];
-    int ldpc_result = ldpc_decode(data + offset, DT_TAV_HEADER_SIZE * 2, decoded_tav_header);
-    if (ldpc_result < 0) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: LDPC decode failed for TAV header\n");
-        }
-        memcpy(decoded_tav_header, data + offset, DT_TAV_HEADER_SIZE);
-    } else if (ldpc_result > 0) {
-        dec->fec_corrections++;
-    }
-    offset += DT_TAV_HEADER_SIZE * 2;
-
-    // Parse TAV header (revised layout 2025-12-17)
-    // Layout: gop_size(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4) = 14 bytes
-    // (sync is read separately above)
-    uint8_t gop_size = decoded_tav_header[0];
-    // Reserved at offset 1-2 (ignored)
-    uint32_t compressed_size;
-    uint32_t rs_block_count;
-    uint32_t stored_crc;
-
-    memcpy(&compressed_size, decoded_tav_header + 3, 4);
-    rs_block_count = decoded_tav_header[7] |
-                     ((uint32_t)decoded_tav_header[8] << 8) |
-                     ((uint32_t)decoded_tav_header[9] << 16);
-    memcpy(&stored_crc, decoded_tav_header + 10, 4);
-
-    // Verify CRC-32 (covers bytes 0-9)
-    uint32_t calculated_crc = calculate_crc32(decoded_tav_header, 10);
-    int crc_valid = (calculated_crc == stored_crc);
-
-    if (!crc_valid) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: TAV header CRC mismatch (MT) (expected 0x%08X, got 0x%08X) - skipping video\n",
-                   stored_crc, calculated_crc);
-        }
-        dec->crc_errors++;
-        // Cannot trust header data - skip video decoding entirely
-        *consumed = offset;
-        return DECODE_ERR_HEADER_CRC;
-    }
-
-    // Calculate RS payload size
-    size_t rs_total = rs_block_count * RS_BLOCK_SIZE;
-
-    // Sanity check: compressed_size must not exceed RS payload capacity
-    size_t max_data_size = (rs_block_count * RS_DATA_SIZE);
-    if (compressed_size > max_data_size) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: TAV compressed_size (MT) (%u) exceeds RS capacity (%zu) - skipping video\n",
-                   compressed_size, max_data_size);
-        }
-        *consumed = offset;
-        return DECODE_ERR_HEADER_CRC;  // Header data invalid
-    }
-
-    if (offset + rs_total > data_len) {
-        *consumed = data_len;
-        return DECODE_ERR_TRUNCATED;
-    }
-
-    // RS decode payload
-    uint8_t *rs_data = malloc(rs_total);
-    if (!rs_data) return DECODE_ERR_TRUNCATED;
-    memcpy(rs_data, data + offset, rs_total);
-
-    uint8_t *decoded_payload = malloc(compressed_size);
-    if (!decoded_payload) {
-        free(rs_data);
-        return DECODE_ERR_TRUNCATED;
-    }
-
-    int fec_result = decode_fec_blocks(rs_data, rs_total, decoded_payload, compressed_size, dec->fec_mode);
-    if (fec_result < 0) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: FEC decode failed for video (MT) - UNRECOVERABLE\n");
-        }
-        free(rs_data);
-        free(decoded_payload);
-        *consumed = offset + rs_total;
-        return DECODE_ERR_PAYLOAD_FEC;
-    } else if (fec_result > 0) {
-        dec->fec_corrections += fec_result;
-    }
-    free(rs_data);
-
-    // Lazy initialization of multithreading (after first packet header is known)
-    if (!dec->worker_threads && dec->num_threads > 1) {
-        if (init_decoder_threads(dec) != 0) {
-            fprintf(stderr, "Error: Cannot initialize decoder threads, falling back to single-threaded\n");
-            dec->num_threads = 1;
-            // Fall back to single-threaded decoding for this packet
-            free(decoded_payload);
-            *consumed = offset + rs_total;
-            return DECODE_ERR_TRUNCATED;  // Treat as truncated/other error
-        }
-        if (dec->verbose) {
-            printf("Initialized multithreaded decoding: %d threads\n", dec->num_threads);
-        }
-    }
-
-    // Find an empty slot
-    int slot_idx = -1;
-    pthread_mutex_lock(&dec->mutex);
-
-    while (slot_idx < 0) {
-        // Try to write completed GOPs first
-        for (int i = 0; i < dec->num_slots; i++) {
-            if (dec->slots[i].status == DECODE_SLOT_DONE &&
-                dec->slots[i].job_id == dec->next_write_slot) {
-
-                gop_decode_job_t *job = &dec->slots[i];
-                pthread_mutex_unlock(&dec->mutex);
-
-                // Write frames to temp file and update freeze frame
-                if (job->decode_result == 0 && dec->video_temp_fp) {
-                    for (int f = 0; f < job->gop_size; f++) {
-                        fwrite(job->rgb_frames[f], 1, job->frame_size, dec->video_temp_fp);
-                        update_freeze_frame(dec, job->rgb_frames[f], job->frame_size);
-                        dec->frames_decoded++;
-                        dec->video_frames_written++;
-                    }
-                }
-
-                pthread_mutex_lock(&dec->mutex);
-
-                // Free job resources while holding mutex
-                for (int f = 0; f < job->gop_size; f++) {
-                    free(job->rgb_frames[f]);
-                }
-                free(job->rgb_frames);
-                free(job->compressed_data);
-
-                job->status = DECODE_SLOT_EMPTY;
-                job->rgb_frames = NULL;
-                job->compressed_data = NULL;
-                dec->next_write_slot++;
-                break;
-            }
-        }
-
-        // Look for empty slot
-        for (int i = 0; i < dec->num_slots; i++) {
-            if (dec->slots[i].status == DECODE_SLOT_EMPTY) {
-                slot_idx = i;
-                break;
-            }
-        }
-
-        if (slot_idx < 0) {
-            // Wait for a slot to become available
-            pthread_cond_wait(&dec->cond_slot_free, &dec->mutex);
-        }
-    }
-
-    // Fill the slot
-    gop_decode_job_t *job = &dec->slots[slot_idx];
-
-    int internal_height = dec->is_interlaced ? dec->height / 2 : dec->height;
-    size_t frame_size = dec->width * internal_height * 3;
-
-    job->compressed_data = decoded_payload;  // Transfer ownership
-    job->compressed_size = compressed_size;
-    job->gop_size = gop_size;
-    job->job_id = dec->jobs_submitted++;
-    job->frame_size = frame_size;
-    job->decode_result = -1;
-
-    // Allocate frame buffers
-    job->rgb_frames = malloc(gop_size * sizeof(uint8_t*));
-    for (int i = 0; i < gop_size; i++) {
-        job->rgb_frames[i] = malloc(frame_size);
-    }
-
-    // Submit job
-    job->status = DECODE_SLOT_PENDING;
-    pthread_cond_broadcast(&dec->cond_job_available);
-    pthread_mutex_unlock(&dec->mutex);
-
-    offset += rs_total;
-    *consumed = offset;
-    *frames_written = gop_size;  // Optimistic - assume decode will succeed
-
-    return DECODE_OK;  // Success - job submitted
-}
-
-static int decode_video_subpacket(dt_decoder_t *dec, const uint8_t *data, size_t data_len,
-                                   size_t *consumed, int *frames_written) {
-    *frames_written = 0;
-
-    // Minimum: 4 byte sync + 28 byte LDPC header (14 bytes * 2, rate 112/224 bits)
-    if (data_len < 4 + DT_TAV_HEADER_SIZE * 2) return -1;
-
-    size_t offset = 0;
-
-    // Read TAV sync pattern (4 bytes, NOT LDPC-coded)
-    uint32_t subpacket_sync = ((uint32_t)data[offset] << 24) |
-                              ((uint32_t)data[offset + 1] << 16) |
-                              ((uint32_t)data[offset + 2] << 8) |
-                              data[offset + 3];
-    offset += 4;
-
-    // Verify sync pattern early
-    int sync_valid = (subpacket_sync == TAV_SUBPACKET_SYNC);
-    if (!sync_valid && dec->verbose) {
-        fprintf(stderr, "Warning: TAV subpacket sync mismatch (expected 0x%08X, got 0x%08X)\n",
-               TAV_SUBPACKET_SYNC, subpacket_sync);
-    }
-
-    // LDPC decode TAV header (28 bytes -> 14 bytes)
-    uint8_t decoded_tav_header[DT_TAV_HEADER_SIZE];
-    int ldpc_result = ldpc_decode(data + offset, DT_TAV_HEADER_SIZE * 2, decoded_tav_header);
-    if (ldpc_result < 0) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: LDPC decode failed for TAV header\n");
-        }
-        memcpy(decoded_tav_header, data + offset, DT_TAV_HEADER_SIZE);
-    } else if (ldpc_result > 0) {
-        dec->fec_corrections++;
-    }
-    offset += DT_TAV_HEADER_SIZE * 2;
-
-    // Parse TAV header (revised layout 2025-12-17)
-    // Layout: gop_size(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4) = 14 bytes
-    // (sync is read separately above)
-    uint8_t gop_size = decoded_tav_header[0];
-    // Reserved at offset 1-2 (ignored)
-    uint32_t compressed_size;
-    uint32_t rs_block_count;
-    uint32_t stored_crc;
-
-    memcpy(&compressed_size, decoded_tav_header + 3, 4);
-    // uint24 rs_block_count (little endian) at offset 7-9
-    rs_block_count = decoded_tav_header[7] |
-                     ((uint32_t)decoded_tav_header[8] << 8) |
-                     ((uint32_t)decoded_tav_header[9] << 16);
-    memcpy(&stored_crc, decoded_tav_header + 10, 4);
-
-    // Verify CRC-32 (covers bytes 0-9)
-    uint32_t calculated_crc = calculate_crc32(decoded_tav_header, 10);
-    int crc_valid = (calculated_crc == stored_crc);
-
-    if (!crc_valid) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: TAV header CRC mismatch (expected 0x%08X, got 0x%08X) - skipping video\n",
-                   stored_crc, calculated_crc);
-        }
-        dec->crc_errors++;
-        // Cannot trust header data - skip video decoding entirely
-        // Error concealment will use freeze frame
-        *consumed = offset;
-        return DECODE_ERR_HEADER_CRC;
-    }
-
-    if (dec->verbose) {
-        printf("  TAV: gop_size=%u, compressed=%u, rs_blocks=%u\n",
-               gop_size, compressed_size, rs_block_count);
-    }
-
-    // Calculate RS payload size
-    size_t rs_total = rs_block_count * RS_BLOCK_SIZE;
-
-    // Sanity check: compressed_size must not exceed RS payload capacity
-    size_t max_data_size = (rs_block_count * RS_DATA_SIZE);
-    if (compressed_size > max_data_size) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: TAV compressed_size (%u) exceeds RS capacity (%zu) - skipping video\n",
-                   compressed_size, max_data_size);
-        }
-        *consumed = offset;
-        return DECODE_ERR_HEADER_CRC;  // Header data invalid
-    }
-
-    if (offset + rs_total > data_len) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: Video packet truncated\n");
-        }
-        *consumed = data_len;
-        return DECODE_ERR_TRUNCATED;
-    }
-
-    // RS decode payload
-    uint8_t *rs_data = malloc(rs_total);
-    if (!rs_data) return DECODE_ERR_TRUNCATED;
-    memcpy(rs_data, data + offset, rs_total);
-
-    uint8_t *decoded_payload = malloc(compressed_size);
-    if (!decoded_payload) {
-        free(rs_data);
-        return DECODE_ERR_TRUNCATED;
-    }
-
-    int fec_result = decode_fec_blocks(rs_data, rs_total, decoded_payload, compressed_size, dec->fec_mode);
-    if (fec_result < 0) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: FEC decode failed for video - UNRECOVERABLE\n");
-        }
-        free(rs_data);
-        free(decoded_payload);
-        *consumed = offset + rs_total;
-        return DECODE_ERR_PAYLOAD_FEC;
-    } else if (fec_result > 0) {
-        dec->fec_corrections += fec_result;
-    }
-
-    // Initialize video decoder if needed
-    if (!dec->video_ctx) {
-        tav_video_params_t vparams;
-        vparams.width = dec->width;
-        vparams.height = dec->is_interlaced ? dec->height / 2 : dec->height;
-        vparams.decomp_levels = DT_SPATIAL_LEVELS;
-        vparams.temporal_levels = DT_TEMPORAL_LEVELS;
-        vparams.wavelet_filter = 1;     // CDF 9/7
-        vparams.temporal_wavelet = 255; // Haar
-        vparams.entropy_coder = 1;      // EZBC
-        vparams.channel_layout = 0;     // YCoCg-R
-        vparams.perceptual_tuning = 1;
-        vparams.quantiser_y = QUALITY_Y[dec->quality_index];
-        vparams.quantiser_co = QUALITY_CO[dec->quality_index];
-        vparams.quantiser_cg = QUALITY_CG[dec->quality_index];
-        vparams.encoder_preset = 0x01;  // Sports
-        vparams.monoblock = 1;
-
-        dec->video_ctx = tav_video_create(&vparams);
-        if (!dec->video_ctx) {
-            fprintf(stderr, "Error: Cannot create video decoder\n");
-            free(rs_data);
-            free(decoded_payload);
-            return -1;
-        }
-        if (dec->verbose) {
-            tav_video_set_verbose(dec->video_ctx, 1);
-        }
-    }
-
-    // Allocate frame buffers
-    int internal_height = dec->is_interlaced ? dec->height / 2 : dec->height;
-    size_t frame_size = dec->width * internal_height * 3;
-    uint8_t **rgb_frames = malloc(gop_size * sizeof(uint8_t *));
-    for (int i = 0; i < gop_size; i++) {
-        rgb_frames[i] = malloc(frame_size);
-    }
-
-    // Decode GOP
-    // The encoder packet format is [type(1)][gop_size(1)][size(4)][zstd_data]
-    // Skip the 6-byte header to get to the raw Zstd-compressed data
-    const uint8_t *zstd_data = decoded_payload + 6;
-    size_t zstd_size = compressed_size > 6 ? compressed_size - 6 : 0;
-
-    // Debug: check packet header
-    if (dec->verbose && decoded_payload) {
-        fprintf(stderr, "DEBUG: Video packet header: type=0x%02x gop=%d size=%u (total=%u, zstd=%zu)\n",
-                decoded_payload[0], decoded_payload[1],
-                *(uint32_t*)(decoded_payload + 2), (unsigned)compressed_size, zstd_size);
-        fprintf(stderr, "DEBUG: First 16 bytes of zstd data: ");
-        for (int i = 0; i < 16 && i < (int)zstd_size; i++) {
-            fprintf(stderr, "%02x ", zstd_data[i]);
-        }
-        fprintf(stderr, "\n");
-    }
-
-    int decode_result = tav_video_decode_gop(dec->video_ctx, zstd_data, zstd_size,
-                                              gop_size, rgb_frames);
-
-    if (decode_result == 0) {
-        // Write frames to video temp file and update freeze frame
-        for (int i = 0; i < gop_size; i++) {
-            if (dec->video_temp_fp) {
-                fwrite(rgb_frames[i], 1, frame_size, dec->video_temp_fp);
-            }
-            // Update freeze frame with last successfully decoded frame
-            update_freeze_frame(dec, rgb_frames[i], frame_size);
-            dec->frames_decoded++;
-            dec->video_frames_written++;
-        }
-        *frames_written = gop_size;
-    } else {
-        if (dec->verbose) {
-            const char *err = tav_video_get_error(dec->video_ctx);
-            fprintf(stderr, "Warning: Video decode failed: %s - UNRECOVERABLE\n", err ? err : "unknown error");
-        }
-        // Cleanup and return error
-        for (int i = 0; i < gop_size; i++) {
-            free(rgb_frames[i]);
-        }
-        free(rgb_frames);
-        free(rs_data);
-        free(decoded_payload);
-        *consumed = offset + rs_total;
-        return -1;  // Unrecoverable - video decode failed
-    }
-
-    // Cleanup
-    for (int i = 0; i < gop_size; i++) {
-        free(rgb_frames[i]);
-    }
-    free(rgb_frames);
-    free(rs_data);
-    free(decoded_payload);
-
-    offset += rs_total;
-    *consumed = offset;
-
-    return DECODE_OK;
-}
-
-// =============================================================================
-// FFmpeg Output
-// =============================================================================
-
-// Mux decoded video and audio temp files into final output
-static int mux_output(dt_decoder_t *dec) {
-    if (!dec->output_file) {
-        if (dec->verbose) {
-            printf("No output file specified, skipping mux\n");
-        }
-        return 0;
-    }
-
-    if (dec->frames_decoded == 0) {
-        fprintf(stderr, "Warning: No frames decoded, skipping mux\n");
-        return -1;
-    }
-
-    if (dec->verbose) {
-        printf("Muxing output to %s...\n", dec->output_file);
-    }
-
-    int internal_height = dec->is_interlaced ? dec->height / 2 : dec->height;
-    char video_size[32];
-    char framerate[16];
-    snprintf(video_size, sizeof(video_size), "%dx%d", dec->width, internal_height);
-    snprintf(framerate, sizeof(framerate), "%d", dec->framerate);
-
-    pid_t pid = fork();
-    if (pid < 0) {
-        fprintf(stderr, "Error: Failed to fork for FFmpeg\n");
-        return -1;
-    }
-
-    if (pid == 0) {
-        // Child process - execute FFmpeg
-        execl("/usr/bin/ffmpeg", "ffmpeg",
-              "-f", "rawvideo",
-              "-pixel_format", "rgb24",
-              "-video_size", video_size,
-              "-framerate", framerate,
-              "-i", dec->video_temp_file,
-              "-f", "u8",
-              "-ar", "32000",
-              "-ac", "2",
-              "-i", dec->audio_temp_file,
-              "-c:v", "ffv1",
-              "-level", "3",
-              "-coder", "1",
-              "-context", "1",
-              "-g", "1",
-              "-slices", "24",
-              "-slicecrc", "1",
-              "-pixel_format", "rgb24",
-              "-c:a", "pcm_u8",
-              "-f", "matroska",
-              dec->output_file,
-              "-y",
-              "-v", "warning",
-              (char*)NULL);
-
-        fprintf(stderr, "Error: Failed to execute FFmpeg\n");
-        exit(1);
-    } else {
-        // Parent process - wait for FFmpeg
-        int status;
-        waitpid(pid, &status, 0);
-        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
-            if (dec->verbose) {
-                printf("Output written to %s\n", dec->output_file);
-            }
-            return 0;
-        } else {
-            fprintf(stderr, "Warning: FFmpeg mux failed (status %d)\n", WEXITSTATUS(status));
-            return -1;
-        }
-    }
-}
-
-// Spawn FFmpeg for streaming output (unused in current implementation)
-static int spawn_ffmpeg(dt_decoder_t *dec) {
-    int video_pipe_fd[2];
-
-    if (pipe(video_pipe_fd) < 0) {
-        fprintf(stderr, "Error: Failed to create video pipe\n");
-        return -1;
-    }
-
-    dec->ffmpeg_pid = fork();
-
-    if (dec->ffmpeg_pid < 0) {
-        fprintf(stderr, "Error: Failed to fork FFmpeg process\n");
-        close(video_pipe_fd[0]);
-        close(video_pipe_fd[1]);
-        return -1;
-    }
-
-    if (dec->ffmpeg_pid == 0) {
-        // Child process - execute FFmpeg
-        close(video_pipe_fd[1]);
-
-        int internal_height = dec->is_interlaced ? dec->height / 2 : dec->height;
-        char video_size[32];
-        char framerate[16];
-        snprintf(video_size, sizeof(video_size), "%dx%d", dec->width, internal_height);
-        snprintf(framerate, sizeof(framerate), "%d", dec->framerate);
-
-        dup2(video_pipe_fd[0], 3);
-        close(video_pipe_fd[0]);
-
-        execl("/usr/bin/ffmpeg", "ffmpeg",
-              "-f", "rawvideo",
-              "-pixel_format", "rgb24",
-              "-video_size", video_size,
-              "-framerate", framerate,
-              "-i", "pipe:3",
-              "-f", "u8",
-              "-ar", "32000",
-              "-ac", "2",
-              "-i", dec->audio_temp_file,
-              "-c:v", "ffv1",
-              "-level", "3",
-              "-coder", "1",
-              "-context", "1",
-              "-g", "1",
-              "-slices", "24",
-              "-slicecrc", "1",
-              "-pixel_format", "rgb24",
-              "-c:a", "pcm_u8",
-              "-f", "matroska",
-              dec->output_file,
-              "-y",
-              "-v", "warning",
-              (char*)NULL);
-
-        fprintf(stderr, "Error: Failed to execute FFmpeg\n");
-        exit(1);
-    } else {
-        close(video_pipe_fd[0]);
-        dec->video_pipe = fdopen(video_pipe_fd[1], "wb");
-        if (!dec->video_pipe) {
-            fprintf(stderr, "Error: Failed to open video pipe\n");
-            kill(dec->ffmpeg_pid, SIGTERM);
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-// =============================================================================
-// Multithreading Support
-// =============================================================================
-// Main Decoding Loop
-// =============================================================================
-
-static int process_packet(dt_decoder_t *dec) {
-    dt_packet_header_t header;
-
-    // Find and read header
-    if (find_sync_pattern(dec) != 0) {
-        return -1;  // EOF
-    }
-
-    if (read_and_decode_header(dec, &header) != 0) {
-        // Try to recover
-        return 0;  // Continue
-    }
-
-    if (dec->verbose) {
-        double timecode_sec = header.timecode_ns / 1000000000.0;
-        printf("Packet %lu: timecode=%.3fs, size=%u, offset_to_video=%u\n",
-               dec->packets_processed + 1, timecode_sec, header.packet_size, header.offset_to_video);
-    }
-
-    // Calculate expected samples/frames based on timecode
-    // TAD audio is 32000 Hz stereo, GOP size varies
-    uint64_t timecode_delta_ns = 0;
-    size_t expected_audio_samples = 0;
-    int expected_video_frames = 0;
-    int timecode_valid = 0;
-
-    if (dec->packets_processed > 0) {
-        // Sanity check: detect obviously garbage timecodes (corrupted header data)
-        // A timecode is "garbage" if it's impossibly large (> 24 hours) or if it went backwards
-        // Large forward jumps are OK - they indicate lost packets and should be trusted
-        uint64_t max_reasonable_timecode_ns = 86400ULL * 1000000000ULL;  // 24 hours
-
-        uint64_t reconstructed_timecode_ns = 0;
-        int use_reconstructed = 0;
-        uint64_t gop_duration_ns = (16ULL * 1000000000ULL) / dec->framerate;
-
-        if (header.timecode_ns > max_reasonable_timecode_ns) {
-            // Timecode is garbage (e.g., 9007208.588s = 104 days) - reconstruct
-            reconstructed_timecode_ns = dec->last_timecode_ns + gop_duration_ns;
-            timecode_delta_ns = gop_duration_ns;
-            use_reconstructed = 1;
-
-            if (dec->verbose) {
-                double corrupted_tc = header.timecode_ns / 1000000000.0;
-                double reconstructed_tc = reconstructed_timecode_ns / 1000000000.0;
-                fprintf(stderr, "Warning: Timecode garbage (%.3fs), reconstructed as %.3fs based on GOP size\n",
-                        corrupted_tc, reconstructed_tc);
-            }
-        } else if (header.timecode_ns > dec->last_timecode_ns) {
-            // Valid timecode moving forward - trust it (even with large jumps from lost packets)
-            timecode_delta_ns = header.timecode_ns - dec->last_timecode_ns;
-        } else if (header.timecode_ns == dec->last_timecode_ns) {
-            // Duplicate timecode - corrupted, reconstruct
-            reconstructed_timecode_ns = dec->last_timecode_ns + gop_duration_ns;
-            timecode_delta_ns = gop_duration_ns;
-            use_reconstructed = 1;
-
-            if (dec->verbose) {
-                fprintf(stderr, "Warning: Duplicate timecode detected, reconstructed based on GOP size\n");
-            }
-        } else {
-            // Timecode went backwards - corrupted, reconstruct
-            reconstructed_timecode_ns = dec->last_timecode_ns + gop_duration_ns;
-            timecode_delta_ns = gop_duration_ns;
-            use_reconstructed = 1;
-
-            if (dec->verbose) {
-                fprintf(stderr, "Warning: Timecode went backwards, reconstructed based on GOP size\n");
-            }
-        }
-
-        // Calculate expected samples/frames from (possibly reconstructed) timecode delta
-        // NOTE: These variables are currently unused - cumulative logic below uses absolute timecodes
-        expected_audio_samples = (timecode_delta_ns * 64000) / 1000000000ULL;  // 32kHz stereo = 64000 samples/sec
-        expected_video_frames = (int)((timecode_delta_ns * dec->framerate) / 1000000000ULL);
-        timecode_valid = 1;
-
-        // Store which timecode to use for next packet
-        if (use_reconstructed) {
-            // Override header timecode with reconstructed value
-            header.timecode_ns = reconstructed_timecode_ns;
-        }
-    }
-
-    // Error concealment: Insert gaps BEFORE decoding current packet
-    // This ensures concealment data appears in the correct timeline position
-
-    // Also handle first packet - if timecode > 0, insert concealment for missed initial data
-    if (dec->packets_processed == 0 && header.timecode_ns > 0) {
-        // First packet but timecode is not 0 - we missed the beginning
-        // Audio: 32000 Hz stereo = 64000 total samples per second (L+R combined)
-        uint64_t expected_cumulative_audio = (header.timecode_ns * 64000ULL) / 1000000000ULL;
-        uint64_t expected_cumulative_video = (header.timecode_ns * (uint64_t)dec->framerate) / 1000000000ULL;
-
-        if (dec->verbose) {
-            printf("  FIRST PACKET CONCEALMENT: timecode=%.3fs, inserting %lu silent samples + %lu frozen frames\n",
-                   header.timecode_ns / 1000000000.0, expected_cumulative_audio, expected_cumulative_video);
-        }
-
-        if (expected_cumulative_audio > 0) {
-            write_silent_audio(dec, expected_cumulative_audio);
-        }
-        if (expected_cumulative_video > 0) {
-            write_frozen_frames(dec, (int)expected_cumulative_video);
-        }
-    }
-
-    if (dec->packets_processed > 0 && timecode_valid) {
-        // Save cumulative counts BEFORE decoding this packet
-        uint64_t cumulative_audio_before = dec->audio_samples_written;
-        uint64_t cumulative_video_before = dec->video_frames_written;
-
-        // Calculate expected CUMULATIVE samples/frames at this timecode
-        // Audio: 32000 Hz stereo = 64000 total samples per second (L+R combined)
-        uint64_t expected_cumulative_audio = (header.timecode_ns * 64000ULL) / 1000000000ULL;
-        uint64_t expected_cumulative_video = (header.timecode_ns * (uint64_t)dec->framerate) / 1000000000ULL;
-
-        // Calculate gap between expected and actual (BEFORE this packet)
-        size_t audio_gap = 0;
-        int video_gap = 0;
-
-        if (expected_cumulative_audio > cumulative_audio_before) {
-            audio_gap = expected_cumulative_audio - cumulative_audio_before;
-        }
-
-        if (expected_cumulative_video > cumulative_video_before) {
-            video_gap = expected_cumulative_video - cumulative_video_before;
-        }
-
-        // Insert concealment data FIRST (fills gap from lost packets)
-        if (audio_gap > 0 || video_gap > 0) {
-            if (dec->verbose) {
-                if (audio_gap > 0 && video_gap > 0) {
-                    printf("  ERROR CONCEALMENT: Inserting %zu silent samples + %d frozen frames\n",
-                           audio_gap, video_gap);
-                    printf("    (Expected: %lu samples/%lu frames, Actual: %lu samples/%lu frames)\n",
-                           expected_cumulative_audio, expected_cumulative_video,
-                           cumulative_audio_before, cumulative_video_before);
-                } else if (audio_gap > 0) {
-                    printf("  ERROR CONCEALMENT: Inserting %zu silent samples\n", audio_gap);
-                    printf("    (Expected: %lu samples, Actual: %lu samples)\n",
-                           expected_cumulative_audio, cumulative_audio_before);
-                } else {
-                    printf("  ERROR CONCEALMENT: Inserting %d frozen frames\n", video_gap);
-                    printf("    (Expected: %lu frames, Actual: %lu frames)\n",
-                           expected_cumulative_video, cumulative_video_before);
-                }
-            }
-
-            if (audio_gap > 0) {
-                write_silent_audio(dec, audio_gap);
-            }
-            if (video_gap > 0) {
-                write_frozen_frames(dec, video_gap);
-            }
-        }
-    }
-
-    // NOW decode current packet (writes AFTER concealment)
-    // Read packet payload (contains both TAD and TAV subpackets)
-    uint8_t *packet_data = malloc(header.packet_size);
-    if (!packet_data) return -1;
-
-    size_t bytes_read = fread(packet_data, 1, header.packet_size, dec->input_fp);
-    if (bytes_read < header.packet_size) {
-        if (dec->verbose) {
-            fprintf(stderr, "Warning: Incomplete packet (got %zu, expected %u)\n",
-                   bytes_read, header.packet_size);
-        }
-        free(packet_data);
-        return -1;
-    }
-    dec->bytes_read += bytes_read;
-
-    // Decode audio and video
-    size_t audio_samples_written = 0;
-    int video_frames_written = 0;
-    int audio_result = DECODE_OK;
-    int video_result = DECODE_OK;
-
-    // Process TAD subpacket (audio comes first, no type byte)
-    size_t tad_consumed = 0;
-    if (header.offset_to_video > 0) {
-        audio_result = decode_audio_subpacket(dec, packet_data, header.offset_to_video,
-                                              &tad_consumed, &audio_samples_written);
-    }
-
-    // Process TAV subpacket (video comes after audio)
-    if (header.offset_to_video < header.packet_size) {
-        size_t tav_consumed = 0;
-        if (dec->num_threads > 1) {
-            video_result = decode_video_subpacket_mt(dec, packet_data + header.offset_to_video,
-                                                     header.packet_size - header.offset_to_video,
-                                                     &tav_consumed, &video_frames_written);
-        } else {
-            video_result = decode_video_subpacket(dec, packet_data + header.offset_to_video,
-                                                  header.packet_size - header.offset_to_video,
-                                                  &tav_consumed, &video_frames_written);
-        }
-    }
-
-    // Categorize packet outcome for detailed statistics
-    int audio_ok = (audio_result == DECODE_OK);
-    int video_ok = (video_result == DECODE_OK);
-    int audio_hdr_fail = (audio_result == DECODE_ERR_HEADER_CRC);
-    int video_hdr_fail = (video_result == DECODE_ERR_HEADER_CRC);
-
-    if (audio_ok && video_ok) {
-        // Fully decoded
-        if (dec->current_packet_recovered) {
-            dec->stat_fully_decoded_recovered++;   // (b)
-        } else {
-            dec->stat_fully_decoded_intact++;      // (a)
-        }
-    } else if (video_ok && !audio_ok) {
-        // Video OK, audio lost
-        if (dec->current_packet_recovered) {
-            // Video recovered by soft sync
-            if (audio_hdr_fail) {
-                dec->stat_partial_video_rec_audio_hdr_damaged++;      // (e)
-            } else {
-                dec->stat_partial_video_rec_audio_payload_damaged++;  // (f)
-            }
-        } else {
-            // Video intact
-            if (audio_hdr_fail) {
-                dec->stat_partial_video_ok_audio_hdr_damaged++;       // (c)
-            } else {
-                dec->stat_partial_video_ok_audio_payload_damaged++;   // (d)
-            }
-        }
-    } else if (audio_ok && !video_ok) {
-        // Audio OK, video lost
-        if (video_hdr_fail) {
-            dec->stat_partial_audio_ok_video_hdr_damaged++;   // (g)
-        } else {
-            dec->stat_partial_audio_ok_video_payload_damaged++;  // (h)
-        }
-    } else {
-        // Both audio and video lost
-        if (audio_hdr_fail && video_hdr_fail) {
-            dec->stat_lost_both_headers_damaged++;   // (k)
-        } else {
-            dec->stat_lost_payloads_damaged++;       // (l)
-        }
-    }
-
-    // Update timecode tracking
-    dec->last_timecode_ns = header.timecode_ns;
-    dec->packets_processed++;
-    dec->current_packet_recovered = 0;  // Reset for next packet
-
-    if (!dec->verbose && dec->packets_processed % 10 == 0) {
-        fprintf(stderr, "\rDecoding packet %lu, frames: %lu...",
-               dec->packets_processed, dec->frames_decoded);
-    }
-
-    free(packet_data);
-    return 0;
-}
-
-static int run_decoder(dt_decoder_t *dec) {
-    // Open input file
-    dec->input_fp = fopen(dec->input_file, "rb");
-    if (!dec->input_fp) {
-        fprintf(stderr, "Error: Cannot open input file: %s\n", dec->input_file);
-        return -1;
-    }
-
-    // Create temp file for audio
-    generate_random_filename(dec->audio_temp_file, sizeof(dec->audio_temp_file));
-    dec->audio_temp_fp = fopen(dec->audio_temp_file, "wb");
-    if (!dec->audio_temp_fp) {
-        fprintf(stderr, "Warning: Cannot create temp audio file, audio will be skipped\n");
-    }
-
-    // Create temp file for video
-    generate_random_filename(dec->video_temp_file, sizeof(dec->video_temp_file));
-    dec->video_temp_fp = fopen(dec->video_temp_file, "wb");
-    if (!dec->video_temp_fp) {
-        fprintf(stderr, "Warning: Cannot create temp video file, video will be skipped\n");
-    }
-
-    // Note: Multithreading will be initialized lazily after reading first packet header
-    // (need to know dimensions and quality settings first)
-
-    // Decode all packets
-    if (dec->verbose) {
-        printf("Decoding TAV-DT stream...\n");
-    }
-
-    // Decode all packets, writing to temp files
-    while (process_packet(dec) == 0) {
-        // Progress is shown in process_packet
-    }
-
-    // Flush remaining GOPs in multithreaded mode
-    if (dec->num_threads > 1) {
-        pthread_mutex_lock(&dec->mutex);
-
-        // Write all remaining completed GOPs in order
-        while (dec->next_write_slot < dec->jobs_submitted) {
-            int found = -1;
-            for (int i = 0; i < dec->num_slots; i++) {
-                if (dec->slots[i].status == DECODE_SLOT_DONE &&
-                    dec->slots[i].job_id == dec->next_write_slot) {
-                    found = i;
-                    break;
-                }
-            }
-
-            if (found >= 0) {
-                gop_decode_job_t *job = &dec->slots[found];
-                pthread_mutex_unlock(&dec->mutex);
-
-                // Write frames and update freeze frame
-                if (job->decode_result == 0 && dec->video_temp_fp) {
-                    for (int f = 0; f < job->gop_size; f++) {
-                        fwrite(job->rgb_frames[f], 1, job->frame_size, dec->video_temp_fp);
-                        update_freeze_frame(dec, job->rgb_frames[f], job->frame_size);
-                        dec->frames_decoded++;
-                        dec->video_frames_written++;
-                    }
-                }
-
-                pthread_mutex_lock(&dec->mutex);
-
-                // Free resources while holding mutex
-                for (int f = 0; f < job->gop_size; f++) {
-                    free(job->rgb_frames[f]);
-                }
-                free(job->rgb_frames);
-                free(job->compressed_data);
-
-                job->status = DECODE_SLOT_EMPTY;
-                job->rgb_frames = NULL;
-                job->compressed_data = NULL;
-                dec->next_write_slot++;
-            } else {
-                // Wait for the GOP to complete
-                pthread_cond_wait(&dec->cond_slot_free, &dec->mutex);
-            }
-        }
-
-        pthread_mutex_unlock(&dec->mutex);
-
-        // Cleanup threads
-        cleanup_decoder_threads(dec);
-    }
-
-    // Close temp files for reading by FFmpeg
-    if (dec->audio_temp_fp) {
-        fclose(dec->audio_temp_fp);
-        dec->audio_temp_fp = NULL;
-    }
-    if (dec->video_temp_fp) {
-        fclose(dec->video_temp_fp);
-        dec->video_temp_fp = NULL;
-    }
-
-    fprintf(stderr, "\n");
-
-    // Calculate summary totals
-    uint64_t fully_decoded = dec->stat_fully_decoded_intact + dec->stat_fully_decoded_recovered;
-    uint64_t partially_decoded = dec->stat_partial_video_ok_audio_hdr_damaged +
-                                  dec->stat_partial_video_ok_audio_payload_damaged +
-                                  dec->stat_partial_video_rec_audio_hdr_damaged +
-                                  dec->stat_partial_video_rec_audio_payload_damaged +
-                                  dec->stat_partial_audio_ok_video_hdr_damaged +
-                                  dec->stat_partial_audio_ok_video_payload_damaged;
-    uint64_t packets_lost = dec->stat_lost_sync_recovery_failure +
-                            dec->stat_lost_both_headers_damaged +
-                            dec->stat_lost_payloads_damaged;
-
-    printf("\n=== Decoding Statistics ===\n");
-    printf("Total packets: %lu\n", dec->packets_processed + packets_lost);
-    printf("Frames decoded: %lu\n", dec->frames_decoded);
-    printf("Bytes read: %lu\n", dec->bytes_read);
-    printf("FEC corrections: %lu\n", dec->fec_corrections);
-    printf("\n--- Packet Outcome Breakdown ---\n");
-
-    printf("Packets fully decoded (audio and video present): %lu\n", fully_decoded);
-    printf("  (a) Packets intact: %lu\n", dec->stat_fully_decoded_intact);
-    printf("  (b) Recovered by soft sync recovery: %lu\n", dec->stat_fully_decoded_recovered);
-
-    printf("Packets partially decoded (audio or video only): %lu\n", partially_decoded);
-    printf("  (c) Intact video, audio lost due to subpacket header damage: %lu\n",
-           dec->stat_partial_video_ok_audio_hdr_damaged);
-    printf("  (d) Intact video, audio lost due to payload damage: %lu\n",
-           dec->stat_partial_video_ok_audio_payload_damaged);
-    printf("  (e) Video resync'd by soft sync recovery, audio lost due to subpacket header damage: %lu\n",
-           dec->stat_partial_video_rec_audio_hdr_damaged);
-    printf("  (f) Video resync'd by soft sync recovery, audio lost due to payload damage: %lu\n",
-           dec->stat_partial_video_rec_audio_payload_damaged);
-    printf("  (g) Intact audio, video lost due to subpacket header damage: %lu\n",
-           dec->stat_partial_audio_ok_video_hdr_damaged);
-    printf("  (h) Intact audio, video lost due to payload damage: %lu\n",
-           dec->stat_partial_audio_ok_video_payload_damaged);
-
-    printf("Packets lost (no audio and video): %lu\n", packets_lost);
-    printf("  (j) Sync recovery failure: %lu\n", dec->stat_lost_sync_recovery_failure);
-    printf("  (k) Has sync but both subpacket headers damaged: %lu\n", dec->stat_lost_both_headers_damaged);
-    printf("  (l) All headers intact but damaged payload: %lu\n", dec->stat_lost_payloads_damaged);
-
-    printf("===========================\n");
-
-    // Mux output files
-    mux_output(dec);
-
-    // Cleanup
-    if (dec->video_ctx) {
-        tav_video_free(dec->video_ctx);
-    }
-    if (dec->video_pipe) {
-        fclose(dec->video_pipe);
-        waitpid(dec->ffmpeg_pid, NULL, 0);
-    }
-    if (dec->input_fp) {
-        fclose(dec->input_fp);
-    }
-    if (dec->freeze_frame) {
-        free(dec->freeze_frame);
-        dec->freeze_frame = NULL;
-    }
-
-    // Remove temp files
-    unlink(dec->audio_temp_file);
-    unlink(dec->video_temp_file);
-
-    return 0;
-}
-
-// =============================================================================
-// Main
-// =============================================================================
-
-// Generate output filename by replacing extension with .mkv
-static char *generate_output_filename(const char *input_file) {
-    size_t len = strlen(input_file);
-    char *output = malloc(len + 5);  // Worst case: add ".mkv" + null
-    if (!output) return NULL;
-
-    strcpy(output, input_file);
-
-    // Find last dot in filename (not in path)
-    char *last_dot = strrchr(output, '.');
-    char *last_slash = strrchr(output, '/');
-
-    // Only replace if dot is after last slash (i.e., in filename, not path)
-    if (last_dot && (!last_slash || last_dot > last_slash)) {
-        strcpy(last_dot, ".mkv");
-    } else {
-        // No extension found, append .mkv
-        strcat(output, ".mkv");
-    }
-
-    return output;
-}
-
-int main(int argc, char **argv) {
-    dt_decoder_t dec;
-    memset(&dec, 0, sizeof(dec));
-
-    // Default thread count
-    dec.num_threads = 0;//get_default_thread_count();
-
-    // Initialize FEC libraries
-    rs_init();
-    ldpc_init();
-    ldpc_p_init();  // LDPC payload codec
-
-    static struct option long_options[] = {
-        {"input",        required_argument, 0, 'i'},
-        {"output",       required_argument, 0, 'o'},
-        {"threads",      required_argument, 0, 't'},
-        {"ldpc-payload", no_argument,       0, 'D'},
-        {"dump",         no_argument,       0, 'd'},
-        {"verbose",      no_argument,       0, 'v'},
-        {"help",         no_argument,       0, 'h'},
-        {0, 0, 0, 0}
-    };
-
-    int opt;
-    while ((opt = getopt_long(argc, argv, "i:o:t:dvh", long_options, NULL)) != -1) {
-        switch (opt) {
-            case 'i':
-                dec.input_file = optarg;
-                break;
-            case 'o':
-                dec.output_file = optarg;
-                break;
-            case 't': {
-                int threads = atoi(optarg);
-                if (threads < 0) {
-                    fprintf(stderr, "Error: Thread count must be positive\n");
-                    return 1;
-                }
-                // Both 0 and 1 mean single-threaded (use value 0 internally)
-                dec.num_threads = (threads <= 1) ? 0 : threads;
-                if (dec.num_threads > MAX_DECODE_THREADS) dec.num_threads = MAX_DECODE_THREADS;
-                break;
-            }
-            case 'D':
-                dec.fec_mode = FEC_MODE_LDPC;
-                break;
-            case 'd':
-                dec.dump_mode = 1;
-                break;
-            case 'v':
-                dec.verbose = 1;
-                break;
-            case 'h':
-            default:
-                print_usage(argv[0]);
-                return opt == 'h' ? 0 : 1;
-        }
-    }
-
-    // Validate arguments
-    if (!dec.input_file) {
-        fprintf(stderr, "Error: Input and output files are required\n");
-        print_usage(argv[0]);
-        return 1;
-    }
-
-    // Generate output filename if not provided
-    if (!dec.output_file) {
-        dec.output_file = generate_output_filename(dec.input_file);
-        if (!dec.output_file) {
-            fprintf(stderr, "Error: Failed to generate output filename\n");
-            return 1;
-        }
-    }
-
-    return run_decoder(&dec);
-}
diff --git a/video_encoder/src/encoder_tad_standalone.c b/video_encoder/src/encoder_tad_standalone.c
deleted file mode 100644
index d053129..0000000
--- a/video_encoder/src/encoder_tad_standalone.c
+++ /dev/null
@@ -1,344 +0,0 @@
-// Created by CuriousTorvald and Claude on 2025-10-24.
-// TAD32 (Terrarum Advanced Audio - PCM32 version) Encoder - Standalone program
-// Alternative version: PCM32 throughout encoding, PCM8 conversion only at decoder
-// Uses encoder_tad32.c library for encoding functions
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <getopt.h>
-#include <math.h>
-#include <time.h>
-#include "encoder_tad.h"
-
-#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251107"
-
-// TAD32 format constants
-#define TAD32_DEFAULT_CHUNK_SIZE 32768  // Using a prime number to force the worst condition
-
-// Temporary file for FFmpeg PCM extraction
-char TEMP_PCM_FILE[42];
-
-static void generate_random_filename(char *filename) {
-    srand(time(NULL));
-
-    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    const int charset_size = sizeof(charset) - 1;
-
-    // Start with the prefix
-    strcpy(filename, "/tmp/");
-
-    // Generate 32 random characters
-    for (int i = 0; i < 32; i++) {
-        filename[5 + i] = charset[rand() % charset_size];
-    }
-
-    // Add the extension
-    strcpy(filename + 37, ".tad");
-    filename[41] = '\0';  // Null terminate
-}
-
-//=============================================================================
-// Main Encoder
-//=============================================================================
-
-static void print_usage(const char *prog_name) {
-    printf("Usage: %s -i <input> [options]\n", prog_name);
-    printf("Options:\n");
-    printf("  -i <file>       Input audio file (any format supported by FFmpeg)\n");
-    printf("  -o <file>       Output TAD32 file (optional, auto-generated as input.qN.tad)\n");
-    printf("  -q <level>      Quality level (0-5, default: %d)\n", TAD32_QUALITY_DEFAULT);
-    printf("                  0 = lowest quality/smallest (max_index=31)\n");
-    printf("                  1 = low quality (max_index=35)\n");
-    printf("                  2 = medium quality (max_index=39)\n");
-    printf("                  3 = good quality (max_index=47) [DEFAULT]\n");
-    printf("                  4 = high quality (max_index=56)\n");
-    printf("                  5 = very high quality/largest (max_index=89)\n");
-    printf("  -v              Verbose output\n");
-    printf("  -h, --help      Show this help\n");
-    printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
-    printf("Note: This is the PCM32 alternative version for comparison testing.\n");
-    printf("      PCM32 is processed throughout encoding; PCM8 conversion happens at decoder.\n");
-}
-
-int main(int argc, char *argv[]) {
-    generate_random_filename(TEMP_PCM_FILE);
-
-    char *input_file = NULL;
-    char *output_file = NULL;
-    int quality = TAD32_QUALITY_DEFAULT;  // Default quality level (0-5)
-    float quantiser_scale = 1.0f;  // Default quantiser scaling
-    int verbose = 0;
-
-    // Parse command line arguments
-    static struct option long_options[] = {
-        {"help", no_argument, 0, 'h'},
-        {0, 0, 0, 0}
-    };
-
-    int opt;
-    int option_index = 0;
-    while ((opt = getopt_long(argc, argv, "i:o:q:s:vh", long_options, &option_index)) != -1) {
-        switch (opt) {
-            case 'i':
-                input_file = optarg;
-                break;
-            case 'o':
-                output_file = optarg;
-                break;
-            case 'q':
-                quality = atoi(optarg);
-                if (quality < TAD32_QUALITY_MIN || quality > TAD32_QUALITY_MAX) {
-                    fprintf(stderr, "Error: Quality must be in range %d-%d\n", TAD32_QUALITY_MIN, TAD32_QUALITY_MAX);
-                    return 1;
-                }
-                break;
-            case 's':
-                quantiser_scale = atof(optarg);
-                if (quantiser_scale < 0.5f || quantiser_scale > 4.0f) {
-                    fprintf(stderr, "Error: Quantiser scale must be in range 0.5-4.0\n");
-                    return 1;
-                }
-                break;
-            case 'v':
-                verbose = 1;
-                break;
-            case 'h':
-                print_usage(argv[0]);
-                return 0;
-            default:
-                print_usage(argv[0]);
-                return 1;
-        }
-    }
-
-    if (!input_file) {
-        fprintf(stderr, "Error: Input file is required\n");
-        print_usage(argv[0]);
-        return 1;
-    }
-
-    // Convert quality (0-5) to max_index for quantisation
-    int max_index = tad32_quality_to_max_index(quality);
-
-    // Generate output filename if not provided
-    if (!output_file) {
-        // Allocate space for output filename
-        size_t input_len = strlen(input_file);
-        output_file = malloc(input_len + 32);  // Extra space for .qNN.tad
-
-        // Find the last directory separator
-        const char *basename_start = strrchr(input_file, '/');
-        if (!basename_start) basename_start = strrchr(input_file, '\\');
-        basename_start = basename_start ? basename_start + 1 : input_file;
-
-        // Copy directory part
-        size_t dir_len = basename_start - input_file;
-        strncpy(output_file, input_file, dir_len);
-
-        // Find the extension (last dot after basename)
-        const char *ext = strrchr(basename_start, '.');
-        if (ext && ext > basename_start) {
-            // Copy basename without extension
-            size_t name_len = ext - basename_start;
-            strncpy(output_file + dir_len, basename_start, name_len);
-            output_file[dir_len + name_len] = '\0';
-        } else {
-            // No extension, copy entire basename
-            strcpy(output_file + dir_len, basename_start);
-        }
-
-        // Append .qNN.tad (use quality level for filename)
-        sprintf(output_file + strlen(output_file), ".q%d.tad", quality);
-
-        if (verbose) {
-            printf("Auto-generated output path: %s\n", output_file);
-        }
-    }
-
-    if (verbose) {
-        printf("%s\n", ENCODER_VENDOR_STRING);
-        printf("Input: %s\n", input_file);
-        printf("Output: %s\n", output_file);
-        printf("Quality level: %d (max_index=%d)\n", quality, max_index);
-        printf("Quantiser scale: %.2f\n", quantiser_scale);
-    }
-
-    // Detect original sample rate for high-quality resampling
-    char sample_rate_str[32] = "48000";  // Default fallback
-    char detect_cmd[2048];
-    snprintf(detect_cmd, sizeof(detect_cmd),
-        "ffprobe -v error -select_streams a:0 -show_entries stream=sample_rate "
-        "-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
-        input_file);
-
-    FILE *probe = popen(detect_cmd, "r");
-    if (probe) {
-        if (fgets(sample_rate_str, sizeof(sample_rate_str), probe)) {
-            // Remove newline
-            sample_rate_str[strcspn(sample_rate_str, "\n")] = 0;
-        }
-        pclose(probe);
-    }
-
-    int original_rate = atoi(sample_rate_str);
-    if (original_rate <= 0 || original_rate > 192000) {
-        original_rate = 48000;  // Fallback
-    }
-
-    if (verbose) {
-        printf("Detected original sample rate: %d Hz\n", original_rate);
-        printf("Extracting and resampling audio to %d Hz...\n", TAD32_SAMPLE_RATE);
-    }
-
-    // Extract and resample in two passes for better quality
-    // Pass 1: Extract at original sample rate
-    char temp_original_pcm[256];
-    snprintf(temp_original_pcm, sizeof(temp_original_pcm), "%s.orig", TEMP_PCM_FILE);
-
-    char ffmpeg_cmd[2048];
-    snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
-        "ffmpeg -hide_banner -v error -i \"%s\" -f f32le -acodec pcm_f32le -ac %d -y \"%s\" 2>&1",
-        input_file, TAD32_CHANNELS, temp_original_pcm);
-
-    int result = system(ffmpeg_cmd);
-    if (result != 0) {
-        fprintf(stderr, "Error: FFmpeg extraction failed\n");
-        return 1;
-    }
-
-    // Pass 2: Resample to 32kHz with high-quality SoXR resampler and highpass filter
-    snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
-        "ffmpeg -hide_banner -v error -f f32le -ar %d -ac %d -i \"%s\" "
-        "-f f32le -acodec pcm_f32le -ar %d -ac %d "
-        "-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" "
-        "-y \"%s\" 2>&1",
-        original_rate, TAD32_CHANNELS, temp_original_pcm, TAD32_SAMPLE_RATE, TAD32_CHANNELS, TEMP_PCM_FILE);
-
-    result = system(ffmpeg_cmd);
-    remove(temp_original_pcm);  // Clean up intermediate file
-
-    if (result != 0) {
-        fprintf(stderr, "Error: FFmpeg resampling failed\n");
-        return 1;
-    }
-
-    // Open PCM file
-    FILE *pcm_file = fopen(TEMP_PCM_FILE, "rb");
-    if (!pcm_file) {
-        fprintf(stderr, "Error: Could not open temporary PCM file\n");
-        return 1;
-    }
-
-    // Get file size
-    fseek(pcm_file, 0, SEEK_END);
-    size_t pcm_size = ftell(pcm_file);
-    fseek(pcm_file, 0, SEEK_SET);
-
-    size_t total_samples = pcm_size / (TAD32_CHANNELS * sizeof(float));
-
-    // Pad to even sample count
-    if (total_samples % 2 == 1) {
-        total_samples++;
-        if (verbose) {
-            printf("Odd sample count detected, padding with one zero sample\n");
-        }
-    }
-
-    size_t num_chunks = (total_samples + TAD32_DEFAULT_CHUNK_SIZE - 1) / TAD32_DEFAULT_CHUNK_SIZE;
-
-    if (verbose) {
-        printf("Total samples: %zu (%.2f seconds)\n", total_samples,
-               (double)total_samples / TAD32_SAMPLE_RATE);
-        printf("Chunks: %zu (chunk size: %d samples)\n", num_chunks, TAD32_DEFAULT_CHUNK_SIZE);
-    }
-
-    // Open output file
-    FILE *output = fopen(output_file, "wb");
-    if (!output) {
-        fprintf(stderr, "Error: Could not open output file\n");
-        fclose(pcm_file);
-        return 1;
-    }
-
-    // Process chunks using linked TAD32 encoder library
-    size_t total_output_size = 0;
-    float *chunk_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * TAD32_CHANNELS * sizeof(float));
-    uint8_t *output_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * 4 * sizeof(float));  // Generous buffer
-
-    for (size_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) {
-        size_t chunk_samples = TAD32_DEFAULT_CHUNK_SIZE;
-        size_t remaining = total_samples - (chunk_idx * TAD32_DEFAULT_CHUNK_SIZE);
-
-        if (remaining < TAD32_DEFAULT_CHUNK_SIZE) {
-            chunk_samples = remaining;
-        }
-
-        // Read chunk
-        size_t samples_read = fread(chunk_buffer, TAD32_CHANNELS * sizeof(float),
-                                   chunk_samples, pcm_file);
-        (void)samples_read;  // Unused, but kept for compatibility
-
-        // Pad with zeros if necessary
-        if (chunk_samples < TAD32_DEFAULT_CHUNK_SIZE) {
-            memset(&chunk_buffer[chunk_samples * TAD32_CHANNELS], 0,
-                   (TAD32_DEFAULT_CHUNK_SIZE - chunk_samples) * TAD32_CHANNELS * sizeof(float));
-        }
-
-        // Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
-        size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
-                                                 max_index,
-                                                 quantiser_scale, TAD32_ZSTD_LEVEL, output_buffer);
-
-        if (encoded_size == 0) {
-            fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);
-            free(chunk_buffer);
-            free(output_buffer);
-            fclose(pcm_file);
-            fclose(output);
-            return 1;
-        }
-
-        // Write chunk to output
-        fwrite(output_buffer, 1, encoded_size, output);
-        total_output_size += encoded_size;
-
-        if (verbose && (chunk_idx % 10 == 0 || chunk_idx == num_chunks - 1)) {
-            printf("Processed chunk %zu/%zu (%.1f%%)\r", chunk_idx + 1, num_chunks,
-                   (chunk_idx + 1) * 100.0 / num_chunks);
-            fflush(stdout);
-        }
-    }
-
-    if (verbose) {
-        printf("\n");
-    }
-
-    // Print coefficient statistics if enabled
-    tad32_print_statistics();
-    tad32_free_statistics();
-
-    // Cleanup
-    free(chunk_buffer);
-    free(output_buffer);
-    fclose(pcm_file);
-    fclose(output);
-    remove(TEMP_PCM_FILE);
-
-    // Print statistics
-    size_t pcmu8_size = total_samples * TAD32_CHANNELS;  // PCMu8 baseline
-    float compression_ratio = (float)pcmu8_size / total_output_size;
-
-    printf("Encoding complete!\n");
-    printf("PCMu8 size: %zu bytes\n", pcmu8_size);
-    printf("TAD32 size: %zu bytes\n", total_output_size);
-    printf("Compression ratio: %.2f:1 (%.1f%% of PCMu8)\n",
-           compression_ratio, (total_output_size * 100.0) / pcmu8_size);
-
-    if (compression_ratio < 1.8) {
-        printf("Warning: Compression ratio below 2:1 target. Try lower quantisation bits or different settings.\n");
-    }
-
-    return 0;
-}
diff --git a/video_encoder/src/encoder_tav.c b/video_encoder/src/encoder_tav.c
deleted file mode 100644
index 5ec53af..0000000
--- a/video_encoder/src/encoder_tav.c
+++ /dev/null
@@ -1,3796 +0,0 @@
-/**
- * TAV Encoder CLI - Reference Implementation using libtavenc
- *
- * Complete reference encoder with all features from the original encoder:
- * - Full command-line argument support
- * - All encoder presets (sports, anime)
- * - Scene change detection (two-pass encoding)
- * - Multi-threading support
- * - FFmpeg integration for frame reading
- * - TAV file format writing with all packet types
- * - TAD audio encoding integration
- * - Subtitle and font ROM support
- *
- * This is the official CLI implementation using libtavenc library.
- * Reduced from 14,000 lines to ~1,600 lines while preserving all features.
- *
- * Created by CuriousTorvald and Claude on 2025-12-03-04.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <getopt.h>
-#include <time.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <pthread.h>
-#include <math.h>
-#include <float.h>
-#include <limits.h>
-
-#include "tav_encoder_lib.h"
-#include "encoder_tad.h"
-
-// =============================================================================
-// Multithreading Structures
-// =============================================================================
-
-#define GOP_SLOT_EMPTY 0
-#define GOP_SLOT_READY 1
-#define GOP_SLOT_ENCODING 2
-#define GOP_SLOT_COMPLETE 3
-
-typedef struct gop_job {
-    // Slot state
-    volatile int status;
-
-    // Input data (owned by job)
-    uint8_t **rgb_frames;        // Array of frame pointers [num_frames]
-    int num_frames;              // Frames in this GOP
-    int *frame_numbers;          // Frame indices for timecodes
-    int gop_index;               // Sequential GOP number
-
-    // Audio data (owned by job)
-    float *audio_samples;        // Stereo PCM32f for this GOP
-    size_t num_audio_samples;    // Samples per channel
-
-    // Output data (filled by worker, owned by job)
-    tav_encoder_packet_t *packet; // Encoded video packet
-    int success;                  // 1 if encoding succeeded
-
-    // Encoder params (copy for thread safety)
-    tav_encoder_params_t params;
-} gop_job_t;
-
-// =============================================================================
-// Constants and Globals
-// =============================================================================
-
-#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVMTAV"
-#define TAP_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x50"  // "\x1FTSVMTAP" (still picture)
-#define MAX_PATH 4096
-#define TEMP_AUDIO_FILE_SIZE 42
-#define TEMP_PCM_FILE_SIZE 42
-#define AUDIO_SAMPLE_RATE 32000  // TAD audio sample rate
-#define MAX_SUBTITLE_LENGTH 2048
-#define TAV_PACKET_SUBTITLE_TC 0x31  // Subtitle packet with timecode (SSF-TC format)
-#define TAV_PACKET_SSF 0x30          // SSF packet (for font ROM)
-#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet
-#define FONTROM_OPCODE_LOW 0x80      // Low font ROM opcode
-#define FONTROM_OPCODE_HIGH 0x81     // High font ROM opcode
-#define MAX_FONTROM_SIZE 1920        // Max font ROM size in bytes
-
-#define DEFAULT_WIDTH 560  // TSVM default
-#define DEFAULT_HEIGHT 448 // TSVM default
-
-// Quality level to quantiser mapping (must match library tables)
-static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2};   // Quality levels 0-5
-static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
-static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
-static const float DEAD_ZONE_THRESHOLD[] = {1.5f, 1.5f, 1.2f, 1.1f, 0.8f, 0.6f, 0.0f};
-
-static char TEMP_AUDIO_FILE[TEMP_AUDIO_FILE_SIZE];
-static char TEMP_PCM_FILE[TEMP_PCM_FILE_SIZE];
-
-// =============================================================================
-// Two-Pass Scene Change Detection Constants
-// =============================================================================
-
-// Fixed analysis resolution for scene change detection (performance-independent of source size)
-#define ANALYSIS_WIDTH 128
-#define ANALYSIS_HEIGHT 128
-#define ANALYSIS_DWT_LEVELS 3        // 3-level Haar DWT for analysis
-
-// Adaptive threshold parameters
-#define ANALYSIS_MOVING_WINDOW 30    // Moving average window (30 frames = ~1 second at 30fps)
-#define ANALYSIS_STDDEV_MULTIPLIER 1.4  // Standard deviation multiplier for adaptive threshold
-#define ANALYSIS_LL_DIFF_MIN_THRESHOLD 1.5  // Minimum absolute threshold for LL_diff
-#define ANALYSIS_HB_RATIO_THRESHOLD 0.4     // Highband energy ratio threshold
-#define ANALYSIS_HB_ENERGY_MULTIPLIER 1.4   // Energy spike multiplier (1.4× mean to trigger)
-#define ANALYSIS_FADE_THRESHOLD 50.0        // Brightness change threshold over 5 frames
-
-// GOP size constraints for two-pass mode
-#define ANALYSIS_GOP_MIN_SIZE 10      // Minimum GOP size for two-pass mode
-#define ANALYSIS_GOP_MAX_SIZE 24      // Maximum GOP size for two-pass mode
-
-// =============================================================================
-// Two-Pass Scene Change Detection Structures
-// =============================================================================
-
-// Frame analysis metrics for two-pass scene change detection
-typedef struct frame_analysis {
-    int frame_number;
-
-    // Wavelet-based metrics (3-level Haar on fixed-size analysis buffer)
-    double ll_diff;              // L1 distance between consecutive LL bands
-    double ll_mean;              // Mean brightness (LL band average)
-    double ll_variance;          // Contrast estimate (LL band variance)
-
-    double highband_energy;      // Sum of absolute values in LH/HL/HH bands
-    double total_energy;         // Total energy (all bands)
-    double highband_ratio;       // highband_energy / total_energy
-
-    // Per-band entropies (Shannon entropy of coefficient magnitudes)
-//    double entropy_ll;
-//    double entropy_lh[ANALYSIS_DWT_LEVELS];
-//    double entropy_hl[ANALYSIS_DWT_LEVELS];
-//    double entropy_hh[ANALYSIS_DWT_LEVELS];
-
-    // Texture change indicators
-    double zero_crossing_rate;   // Zero crossing rate in highbands
-
-    // Detection results
-    int is_scene_change;         // Final scene change flag
-    double scene_change_score;   // Composite score for debugging
-} frame_analysis_t;
-
-// GOP boundary list for two-pass encoding
-typedef struct gop_boundary {
-    int start_frame;
-    int end_frame;
-    int num_frames;
-    struct gop_boundary *next;
-} gop_boundary_t;
-
-// =============================================================================
-// Subtitle Structures
-// =============================================================================
-
-typedef struct subtitle_entry {
-    int start_frame;
-    int end_frame;
-    uint64_t start_time_ns;   // Start time in nanoseconds
-    uint64_t end_time_ns;     // End time in nanoseconds
-    char *text;
-    struct subtitle_entry *next;
-} subtitle_entry_t;
-
-// =============================================================================
-// CLI Context
-// =============================================================================
-
-typedef struct {
-    // Input/output
-    char *input_file;
-    char *output_file;
-    FILE *output_fp;
-
-    // Video parameters (from library params)
-    tav_encoder_params_t enc_params;
-
-    // FFmpeg subprocess
-    FILE *ffmpeg_pipe;
-    int original_width, original_height;
-    int original_fps_num, original_fps_den;
-
-    // Encoding state
-    int64_t frame_count;
-    int64_t gop_count;
-    size_t total_bytes;
-    time_t start_time;
-
-    // GOP frame buffer (for tav_encoder_encode_gop())
-    uint8_t **gop_frames;         // Array of frame pointers [gop_size]
-    int gop_frame_count;          // Number of frames in current GOP
-    int *gop_frame_numbers;       // Frame numbers for timecodes [gop_size]
-
-    // CLI options
-    int verbose;
-    int encode_limit;  // Max frames to encode (0=all)
-    char *subtitle_file;
-    char *fontrom_low;
-    char *fontrom_high;
-    int separate_audio_track;
-    int use_native_audio;  // PCM8 instead of TAD
-    int interlaced;        // Interlaced mode (half-height internally, full height in header)
-    int header_height;     // Height to write to header (may differ from enc_params.height when interlaced)
-
-    // Framerate conversion
-    int target_fps_num;    // Target output framerate numerator (0 = no conversion)
-    int target_fps_den;    // Target output framerate denominator
-
-    // Audio encoding
-    int has_audio;
-    int audio_quality;           // TAD quality level (0-5)
-    FILE *pcm_file;              // Extracted PCM32f audio file
-    float *audio_buffer;         // Audio sample buffer (per-frame)
-    size_t audio_buffer_size;    // Buffer size in samples per channel
-    int samples_per_frame;       // Audio samples per video frame
-    size_t audio_remaining;      // Remaining bytes in PCM file
-    float *gop_audio_buffer;     // GOP audio accumulation buffer
-    size_t gop_audio_samples;    // Accumulated audio samples for current GOP
-
-    // Subtitle processing
-    subtitle_entry_t *subtitles;
-
-    // Extended Header support
-    char *ffmpeg_version;        // FFmpeg version string (first line of "ffmpeg -version")
-    uint64_t creation_time_us;   // Creation time in microseconds since UNIX Epoch (UTC)
-    long extended_header_offset; // File offset for updating ENDT value at end
-    int suppress_xhdr;           // If 1, don't write Extended Header
-
-    // Multithreading
-    int num_threads;             // 0 = single-threaded, 1+ = num worker threads
-    gop_job_t *gop_jobs;         // Array of GOP job slots [num_threads]
-    pthread_t *worker_threads;   // Array of worker thread handles [num_threads]
-    pthread_mutex_t job_mutex;   // Mutex for job slot access
-    pthread_cond_t job_ready;    // Signal when a job slot is ready for encoding
-    pthread_cond_t job_complete; // Signal when a job slot is complete
-    volatile int shutdown_workers; // 1 when workers should exit
-
-    // Still image (TAP) mode
-    int is_still_image;          // 1 if input is a still image (outputs TAP format)
-
-    // Two-pass scene change detection
-    int two_pass_mode;                    // 1 = two-pass enabled, 0 = disabled
-    frame_analysis_t *frame_analyses;     // Array of frame analyses from first pass
-    int frame_analyses_count;             // Number of frames analysed
-    int frame_analyses_capacity;          // Allocated capacity
-    gop_boundary_t *gop_boundaries;       // Linked list of GOP boundaries
-    gop_boundary_t *current_gop_boundary; // Current GOP being encoded
-
-} cli_context_t;
-
-// =============================================================================
-// Utility Functions
-// =============================================================================
-
-static void generate_random_filename(char *filename) {
-    static int seeded = 0;
-    if (!seeded) {
-        srand(time(NULL));
-        seeded = 1;
-    }
-
-    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    const int charset_size = sizeof(charset) - 1;
-
-    strcpy(filename, "/tmp/");
-    for (int i = 0; i < 32; i++) {
-        filename[5 + i] = charset[rand() % charset_size];
-    }
-    filename[37] = '\0';
-}
-
-/**
- * Execute command and capture its output.
- * Returns dynamically allocated string that caller must free(), or NULL on error.
- */
-static char* execute_command(const char* command) {
-    FILE* pipe = popen(command, "r");
-    if (!pipe) return NULL;
-
-    size_t buffer_size = 4096;
-    char* buffer = malloc(buffer_size);
-    if (!buffer) {
-        pclose(pipe);
-        return NULL;
-    }
-
-    size_t total_size = 0;
-    size_t bytes_read;
-
-    while ((bytes_read = fread(buffer + total_size, 1, buffer_size - total_size - 1, pipe)) > 0) {
-        total_size += bytes_read;
-        if (total_size + 1 >= buffer_size) {
-            buffer_size *= 2;
-            char* new_buffer = realloc(buffer, buffer_size);
-            if (!new_buffer) {
-                free(buffer);
-                pclose(pipe);
-                return NULL;
-            }
-            buffer = new_buffer;
-        }
-    }
-
-    buffer[total_size] = '\0';
-    pclose(pipe);
-    return buffer;
-}
-
-/**
- * Get FFmpeg version string (first line of "ffmpeg -version").
- * Returns dynamically allocated string that caller must free(), or NULL on error.
- */
-static char* get_ffmpeg_version(void) {
-    char *output = execute_command("ffmpeg -version 2>&1 | head -1");
-    if (!output) return NULL;
-
-    // Trim trailing newline/carriage return
-    size_t len = strlen(output);
-    while (len > 0 && (output[len-1] == '\n' || output[len-1] == '\r')) {
-        output[len-1] = '\0';
-        len--;
-    }
-
-    return output;  // Caller must free
-}
-
-/**
- * Get number of available CPU cores.
- * Returns the number of online processors, or 1 on error.
- */
-static int get_available_cpus(void) {
-#ifdef _SC_NPROCESSORS_ONLN
-    long nproc = sysconf(_SC_NPROCESSORS_ONLN);
-    if (nproc > 0) {
-        return (int)nproc;
-    }
-#endif
-    return 1;  // Fallback to single core
-}
-
-/**
- * Get default thread count (cap at 8)
- */
-static int get_default_thread_count(void) {
-    int available = get_available_cpus();
-    return available < 8 ? available : 8;
-}
-
-static void print_usage(const char *program) {
-    printf("TAV Encoder - TSVM Advanced Video Codec (Reference Implementation)\n");
-    printf("\nUsage: %s -i input.mp4 -o output.tav [options]\n\n", program);
-    printf("Required:\n");
-    printf("  -i, --input FILE         Input video file\n");
-    printf("  -o, --output FILE        Output TAV file\n");
-    printf("\nVideo Options:\n");
-    printf("  -s, --size WxH           Frame size (using %dx%d if omitted)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
-    printf("  -f, --fps NUM/DEN        Output Framerate (e.g., 60/1, 30000/1001)\n");
-    printf("  -q, --quality N          Quality level 0-5 (default: 3)\n");
-    printf("  -Q, --quantiser Y,Co,Cg  Custom quantisers (advanced)\n");
-    printf("  -w, --wavelet N          Spatial wavelet: 0=5/3, 1=9/7 (default), 2=13/7, 16=DD-4, 255=Haar\n");
-    printf("  --temporal-wavelet N     Temporal wavelet: 0=Haar (default), 1=CDF 5/3\n");
-    printf("  -c, --colour-space N     Colour space: 0=YCoCg-R (default), 1=ICtCp\n");
-    printf("  --decomp-levels N        Spatial DWT levels (0=auto, default: 6)\n");
-//    printf("  --temporal-levels N      Temporal DWT levels (0=auto, default: 2)\n");
-    printf("\nGOP Options:\n");
-    printf("  --temporal-dwt           Enable 3D DWT GOP encoding (default)\n");
-    printf("  --intra-only             Disable temporal compression (I-frames only)\n");
-    printf("  --gop-size N             GOP size 8/16/24 (default: 24)\n");
-//    printf("  --single-pass            Disable scene change detection\n");
-    printf("\nPerformance:\n");
-    printf("  -t, --threads N          Parallel encoding threads (default: min(8, available CPUs))\n");
-    printf("                           0 or 1 = single-threaded, 2-16 = multithreaded\n");
-    printf("                           Each thread encodes one GOP independently\n");
-//    printf("\nTiling:\n");
-//    printf("  --monoblock              Force single-tile mode (auto-disabled for > %dx%d)\n",
-//           TAV_MONOBLOCK_MAX_WIDTH, TAV_MONOBLOCK_MAX_HEIGHT);
-//    printf("  --tiled                  Force multi-tile mode (Padded Tiling)\n");
-    printf("\nCompression:\n");
-    printf("  --zstd-level N           Zstd level 3-22 (default: 7)\n");
-    printf("  --no-perceptual-tuning   Disable HVS perceptual quantization\n");
-    printf("  --no-dead-zone           Disable dead-zone quantization\n");
-    printf("  --dead-zone-threshold N  Dead-zone threshold. Defaults by quality level:\n");
-    printf("                           0=1.5, 1=1.5, 2=1.2, 3=1.1, 4=0.8, 5=0.6\n");
-    printf("\nEncoder Presets:\n");
-    printf("  --preset-sports          Sports mode (finer temporal quantization)\n");
-    printf("  --preset-anime           Anime mode (disable grain)\n");
-    printf("\nAudio:\n");
-    printf("  --tad-audio              Use TAD audio codec (default)\n");
-    printf("  --pcm8-audio             Use TSVM-native PCM8 audio\n");
-    printf("  --audio-quality N        TAD audio quality 0-5 (default: matches video -q)\n");
-    printf("  --no-audio               Disable audio encoding\n");
-    printf("  --separate-audio-track   Multiplex audio as separate track\n");
-    printf("\nMisc:\n");
-    printf("  --encode-limit N         Encode only first N frames\n");
-    printf("  --subtitle FILE          Add subtitle track (.srt)\n");
-    printf("  --fontrom-low FILE       Font ROM for low ASCII (.chr)\n");
-    printf("  --fontrom-high FILE      Font ROM for high ASCII (.chr)\n");
-    printf("  --suppress-xhdr          Suppress Extended Header packet (enabled by default)\n");
-    printf("  --interlaced             Enable interlaced video mode (half-height encoding)\n");
-    printf("  -v, --verbose            Verbose output\n");
-    printf("  --help                   Show this help\n");
-    printf("\nExamples:\n");
-    printf("  # Basic encoding\n");
-    printf("  %s -i video.mp4 -o out.tav -q 3\n\n", program);
-    printf("  # High quality with CDF 5/3 wavelet\n");
-    printf("  %s -i video.mp4 -o out.tav -q 5 -w 0\n\n", program);
-    printf("  # Sports mode with larger GOP\n");
-    printf("  %s -i video.mp4 -o out.tav --preset-sports --gop-size 24\n\n", program);
-    printf("  # Advanced: separate quantiser per channel\n");
-    printf("  %s -i video.mp4 -o out.tav -Q 3,5,6\n\n", program);
-    printf("  # Multithreaded encoding with 4 threads\n");
-    printf("  %s -i video.mp4 -o out.tav -t 4 -q 3\n", program);
-}
-
-// =============================================================================
-// FFmpeg Integration
-// =============================================================================
-
-/**
- * Probe video file to get resolution and framerate using FFmpeg.
- */
-static int get_video_info(const char *input_file, int *width, int *height,
-                         int *fps_num, int *fps_den) {
-    char cmd[MAX_PATH * 2];
-    snprintf(cmd, sizeof(cmd),
-             "ffprobe -v error -select_streams v:0 "
-             "-show_entries stream=width,height,r_frame_rate "
-             "-of default=noprint_wrappers=1:nokey=1 \"%s\"",
-             input_file);
-
-    FILE *fp = popen(cmd, "r");
-    if (!fp) {
-        fprintf(stderr, "Error: Failed to run ffprobe\n");
-        return -1;
-    }
-
-    if (fscanf(fp, "%d\n%d\n", width, height) != 2) {
-        fprintf(stderr, "Error: Failed to parse video dimensions\n");
-        pclose(fp);
-        return -1;
-    }
-
-    char fps_str[64];
-    if (fgets(fps_str, sizeof(fps_str), fp) == NULL) {
-        fprintf(stderr, "Error: Failed to parse framerate\n");
-        pclose(fp);
-        return -1;
-    }
-
-    // Parse framerate (format: "num/den" or "num")
-    if (sscanf(fps_str, "%d/%d", fps_num, fps_den) != 2) {
-        if (sscanf(fps_str, "%d", fps_num) == 1) {
-            *fps_den = 1;
-        } else {
-            fprintf(stderr, "Error: Failed to parse framerate: %s\n", fps_str);
-            pclose(fp);
-            return -1;
-        }
-    }
-
-    pclose(fp);
-    return 0;
-}
-
-/**
- * Check if input file is a still image (not a video).
- * Uses FFmpeg to check if the input has a video stream with frames.
- * Returns 1 if still image, 0 if video, -1 on error.
- */
-static int is_input_still_image(const char *input_file) {
-    char cmd[MAX_PATH * 2];
-
-    // Check for common image extensions first (quick path)
-    const char *ext = strrchr(input_file, '.');
-    if (ext) {
-        const char *image_exts[] = {
-            ".png", ".jpg", ".jpeg", ".bmp", ".tga", ".gif", ".tiff", ".tif",
-            ".webp", ".ppm", ".pgm", ".pbm", ".pnm", ".exr", ".hdr",
-            ".PNG", ".JPG", ".JPEG", ".BMP", ".TGA", ".GIF", ".TIFF", ".TIF",
-            ".WEBP", ".PPM", ".PGM", ".PBM", ".PNM", ".EXR", ".HDR",
-            NULL
-        };
-        for (int i = 0; image_exts[i]; i++) {
-            if (strcmp(ext, image_exts[i]) == 0) {
-                return 1;  // Known image extension
-            }
-        }
-
-        if (strcmp(ext, ".webm") == 0 || strcmp(ext, ".WEBM") == 0) {
-            return 0;  // Known video extension
-        }
-    }
-
-    // Use ffprobe to check if it's a single-frame input
-    // For still images, nb_frames will be "1" or "N/A" and duration will be very short or N/A
-    snprintf(cmd, sizeof(cmd),
-             "ffprobe -v error -select_streams v:0 "
-             "-show_entries stream=nb_frames,duration "
-             "-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
-             input_file);
-
-    FILE *fp = popen(cmd, "r");
-    if (!fp) {
-        return -1;
-    }
-
-    char nb_frames_str[64] = {0};
-    char duration_str[64] = {0};
-
-    if (fgets(nb_frames_str, sizeof(nb_frames_str), fp) != NULL) {
-        fgets(duration_str, sizeof(duration_str), fp);
-    }
-    pclose(fp);
-
-    // Check if nb_frames is exactly "1" or "N/A"
-    // Also check if duration is very short (< 0.1 seconds) or N/A
-    if (nb_frames_str[0]) {
-        // Remove trailing newline
-        char *nl = strchr(nb_frames_str, '\n');
-        if (nl) *nl = '\0';
-        nl = strchr(duration_str, '\n');
-        if (nl) *nl = '\0';
-
-        // Still image if nb_frames is "1" or "N/A"
-        if (strcmp(nb_frames_str, "1") == 0 ||
-            strcmp(nb_frames_str, "N/A") == 0) {
-            return 1;
-        }
-
-        // Also check for very short duration (might be a single frame)
-        if (duration_str[0] && strcmp(duration_str, "N/A") != 0) {
-            double duration = atof(duration_str);
-            if (duration > 0 && duration < 0.1) {
-                return 1;  // Very short, likely a single frame
-            }
-        }
-    }
-
-    return 0;  // Assume video
-}
-
-/**
- * Open FFmpeg pipe for reading RGB24 frames.
- *
- * When interlaced=1:
- *   - full_height is the full display height (written to header)
- *   - FFmpeg outputs half-height frames via tinterlace+separatefields
- *   - Filtergraph: scale/crop to full size, then tinterlace weave halves
- *     framerate, then separatefields restores framerate at half height
- *
- * Framerate conversion:
- *   - If target_fps > source_fps: uses minterpolate for motion interpolation
- *   - If target_fps < source_fps: uses fps filter for frame dropping
- *   - If target_fps == source_fps: no fps filter applied
- */
-static FILE* open_ffmpeg_pipe(const char *input_file, int width, int height,
-                              int interlaced, int full_height,
-                              int target_fps_num, int target_fps_den,
-                              int source_fps_num, int source_fps_den) {
-    char cmd[MAX_PATH * 2];
-    char fps_filter[128] = "";
-
-    // Build fps filter string if conversion is requested (applied first)
-    if (target_fps_num > 0 && target_fps_den > 0 &&
-        source_fps_num > 0 && source_fps_den > 0) {
-        // Compare framerates: target/1 vs source/1 -> target * source_den vs source * target_den
-        double target_rate = (double)target_fps_num / (double)source_fps_den;
-        double source_rate = (double)source_fps_num / (double)target_fps_den;
-
-        if (target_rate > source_rate) {
-            // Upsampling: use motion interpolation
-            snprintf(fps_filter, sizeof(fps_filter), "minterpolate=fps=%d/%d,",
-                     target_fps_num, target_fps_den);
-        } else if (target_rate < source_rate) {
-            // Downsampling: use fps filter
-            snprintf(fps_filter, sizeof(fps_filter), "fps=%d/%d,",
-                     target_fps_num, target_fps_den);
-        }
-        // If equal, fps_filter remains empty (no conversion needed)
-    }
-
-    if (interlaced) {
-        // Interlaced mode filtergraph:
-        // 1. fps filter (if conversion requested) - applied first
-        // 2. scale and crop to full size (width x full_height)
-        // 3. tinterlace interleave_top:cvlpf - weave fields, halves framerate
-        // 4. separatefields - separate into half-height frames, doubles framerate back
-        // Final output: width x (full_height/2) at target framerate
-        snprintf(cmd, sizeof(cmd),
-                 "ffmpeg -hide_banner -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf "
-                 "\"%sscale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,"
-                 "tinterlace=interleave_top:cvlpf,separatefields\" -",
-                 input_file, fps_filter, width, full_height, width, full_height);
-    } else {
-        // Progressive mode - optional fps conversion, then scale and crop
-        snprintf(cmd, sizeof(cmd),
-                 "ffmpeg -hide_banner -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf "
-                 "\"%sscale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" -",
-                 input_file, fps_filter, width, height, width, height);
-    }
-
-    FILE *fp = popen(cmd, "r");
-    if (!fp) {
-        fprintf(stderr, "Error: Failed to start FFmpeg\n");
-        return NULL;
-    }
-
-    return fp;
-}
-
-/**
- * Read one RGB24 frame from FFmpeg pipe.
- * Returns 1 on success, 0 on EOF, -1 on error.
- */
-static int read_rgb_frame(FILE *fp, uint8_t *rgb_frame, size_t frame_size) {
-    size_t bytes_read = fread(rgb_frame, 1, frame_size, fp);
-
-    if (bytes_read == 0) {
-        return feof(fp) ? 0 : -1;  // EOF or error
-    }
-
-    if (bytes_read != frame_size) {
-        fprintf(stderr, "Warning: Incomplete frame read (%zu/%zu bytes)\n",
-                bytes_read, frame_size);
-        return -1;
-    }
-
-    return 1;
-}
-
-// =============================================================================
-// TAV File Format Writing
-// =============================================================================
-
-/**
- * Write TAV/TAP file header.
- *
- * When interlaced mode is enabled:
- *   - header_height should be the full display height (e.g., 448)
- *   - params->height is the internal encoding height (e.g., 224)
- *   - video_flags bit 0 is set to indicate interlaced
- *
- * When is_still_image is set:
- *   - Writes TAP magic instead of TAV
- *   - FPS is set to 0
- *   - Total frames is set to 0xFFFFFFFF
- */
-static int write_tav_header(FILE *fp, const tav_encoder_params_t *params,
-                            int has_audio, int has_subtitles,
-                            int interlaced, int header_height,
-                            int is_still_image) {
-    // Magic (8 bytes: \x1FTSVMTAV or \x1FTSVMTAP)
-    if (is_still_image) {
-        fwrite(TAP_MAGIC, 1, 8, fp);
-    } else {
-        fwrite(TAV_MAGIC, 1, 8, fp);
-    }
-
-    // Version (1 byte) - calculate based on params
-    // Version encoding (monoblock mode always used):
-    //   3 = YCoCg-R monoblock uniform
-    //   4 = ICtCp monoblock uniform
-    //   5 = YCoCg-R monoblock perceptual
-    //   6 = ICtCp monoblock perceptual
-    //   Add 8 if using CDF 5/3 temporal wavelet
-    uint8_t version;
-    if (params->monoblock) {
-        if (params->perceptual_tuning) {
-            // Monoblock perceptual: version 5 (YCoCg-R) or 6 (ICtCp)
-            version = params->channel_layout ? 6 : 5;
-        } else {
-            // Monoblock uniform: version 3 (YCoCg-R) or 4 (ICtCp)
-            version = params->channel_layout ? 4 : 3;
-        }
-    } else {
-        if (params->perceptual_tuning) {
-            // Tiled perceptual: version 7 (YCoCg-R) or 8 (ICtCp)
-            version = params->channel_layout ? 7 : 8;
-        } else {
-            // Tiled uniform: version 1 (YCoCg-R) or 2 (ICtCp)
-            version = params->channel_layout ? 1 : 2;
-        }
-    }
-    // Add 8 if using CDF 5/3 temporal wavelet
-    if (params->enable_temporal_dwt && params->temporal_wavelet == 0) {
-        version += 8;
-    }
-    fputc(version, fp);
-
-    // Width (uint16_t, 2 bytes)
-    // Write 0 if width exceeds 65535 (extended dimensions will be in XDIM)
-    uint16_t width = (params->width > 65535) ? 0 : (uint16_t)params->width;
-    fwrite(&width, sizeof(uint16_t), 1, fp);
-
-    // Height (uint16_t, 2 bytes)
-    // For interlaced mode, write the full display height (header_height)
-    // For progressive mode, write params->height
-    // Write 0 if height exceeds 65535 (extended dimensions will be in XDIM)
-    int actual_height = interlaced ? header_height : params->height;
-    uint16_t height = (actual_height > 65535) ? 0 : (uint16_t)actual_height;
-    fwrite(&height, sizeof(uint16_t), 1, fp);
-
-    // FPS (uint8_t, 1 byte)
-    // - 0x00 for still images
-    // - 0xFF if fps_num > 254 or fps_den is not 1 or 1001 (use XFPS extended header)
-    // - otherwise fps_num
-    uint8_t fps;
-    if (is_still_image) {
-        fps = 0;
-    } else if (params->fps_num > 254 ||
-               (params->fps_den != 1 && params->fps_den != 1001)) {
-        fps = 0xFF;  // Extended framerate in XFPS
-    } else {
-        fps = (uint8_t)params->fps_num;
-    }
-    fputc(fps, fp);
-
-    // Total frames (uint32_t, 4 bytes)
-    // For still images: 0xFFFFFFFF
-    // For video: 0 (will be updated later)
-    uint32_t total_frames = is_still_image ? 0xFFFFFFFF : 0;
-    fwrite(&total_frames, sizeof(uint32_t), 1, fp);
-
-    // Wavelet filter (uint8_t, 1 byte)
-    fputc((uint8_t)params->wavelet_type, fp);
-
-    // Decomp levels (uint8_t, 1 byte)
-    fputc((uint8_t)params->decomp_levels, fp);
-
-    // Quantisers (3 bytes: Y, Co, Cg)
-    fputc((uint8_t)params->quantiser_y, fp);
-    fputc((uint8_t)params->quantiser_co, fp);
-    fputc((uint8_t)params->quantiser_cg, fp);
-
-    // Extra flags (uint8_t, 1 byte)
-    uint8_t extra_flags = 0;
-    if (has_audio) extra_flags |= 0x01;        // Bit 0: has audio
-    if (has_subtitles) extra_flags |= 0x02;     // Bit 1: has subtitles
-    fputc(extra_flags, fp);
-
-    // Video flags (uint8_t, 1 byte)
-    // Bit 0 = interlaced, Bit 1 = NTSC framerate, Bit 2 = lossless, etc.
-    uint8_t video_flags = 0;
-    if (interlaced) video_flags |= 0x01;  // Bit 0: interlaced
-    fputc(video_flags, fp);
-
-    // Quality level (uint8_t, 1 byte)
-    uint8_t quality_level = params->quality_level + 1;
-    fputc(quality_level, fp);
-
-    // Channel layout (uint8_t, 1 byte)
-    fputc((uint8_t)params->channel_layout, fp);
-
-    // Entropy coder (uint8_t, 1 byte): 0=Twobitmap, 1=EZBC
-    fputc((uint8_t)params->entropy_coder, fp);
-
-    // Encoder preset (uint8_t, 1 byte)
-    fputc((uint8_t)params->encoder_preset, fp);
-
-    // Reserved (uint8_t, 1 byte)
-    fputc(0, fp);
-
-    // Device orientation (uint8_t, 1 byte)
-    fputc(0, fp);
-
-    // File role (uint8_t, 1 byte)
-    fputc(0, fp);
-
-    return 0;
-}
-
-/**
- * Write Extended Header packet (0xEF) with metadata.
- * Returns the file offset of the ENDT value for later update, or -1 on error.
- */
-static long write_extended_header(cli_context_t *cli, int width, int height) {
-    FILE *fp = cli->output_fp;
-
-    // Write packet type (0xEF)
-    uint8_t packet_type = TAV_PACKET_EXTENDED_HDR;
-    if (fwrite(&packet_type, 1, 1, fp) != 1) return -1;
-
-    // Count key-value pairs: BGNT, ENDT, CDAT, VNDR, optionally FMPG, XDIM, XFPS
-    int has_xdim = (width > 65535 || height > 65535);
-    int has_xfps = (cli->enc_params.fps_num > 254 ||
-                    (cli->enc_params.fps_den != 1 && cli->enc_params.fps_den != 1001));
-    uint16_t num_pairs = 4;  // BGNT, ENDT, CDAT, VNDR
-    if (cli->ffmpeg_version) num_pairs++;  // FMPG
-    if (has_xdim) num_pairs++;  // XDIM
-    if (has_xfps) num_pairs++;  // XFPS
-    if (fwrite(&num_pairs, sizeof(uint16_t), 1, fp) != 1) return -1;
-
-    // Helper macros for writing key-value pairs
-    #define WRITE_KV_UINT64(key_str, value) do { \
-        if (fwrite(key_str, 1, 4, fp) != 4) return -1; \
-        uint8_t value_type = 0x04; /* Uint64 */ \
-        if (fwrite(&value_type, 1, 1, fp) != 1) return -1; \
-        uint64_t val = (value); \
-        if (fwrite(&val, sizeof(uint64_t), 1, fp) != 1) return -1; \
-    } while(0)
-
-    #define WRITE_KV_BYTES(key_str, data, len) do { \
-        if (fwrite(key_str, 1, 4, fp) != 4) return -1; \
-        uint8_t value_type = 0x10; /* Bytes */ \
-        if (fwrite(&value_type, 1, 1, fp) != 1) return -1; \
-        uint16_t length = (len); \
-        if (fwrite(&length, sizeof(uint16_t), 1, fp) != 1) return -1; \
-        if (fwrite((data), 1, (len), fp) != (len)) return -1; \
-    } while(0)
-
-    // BGNT: Video begin time (0 nanoseconds for frame 0)
-    WRITE_KV_UINT64("BGNT", 0ULL);
-
-    // ENDT: Video end time (placeholder, will be updated at end)
-    // Save the file offset of the ENDT value (after key + type byte)
-    long endt_offset = ftell(fp) + 4 + 1;  // 4 bytes for "ENDT", 1 byte for type
-    WRITE_KV_UINT64("ENDT", 0ULL);
-
-    // CDAT: Creation time in microseconds since UNIX Epoch (UTC)
-    WRITE_KV_UINT64("CDAT", cli->creation_time_us);
-
-    // VNDR: Encoder name and version
-    const char *vendor_str = "Encoder-TAV 20260121 (reference)";
-    WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
-
-    // FMPG: FFmpeg version (if available)
-    if (cli->ffmpeg_version) {
-        WRITE_KV_BYTES("FMPG", cli->ffmpeg_version, strlen(cli->ffmpeg_version));
-    }
-
-    // XDIM: Extended dimensions (if width or height exceeds 65535)
-    if (has_xdim) {
-        char xdim_str[32];
-        snprintf(xdim_str, sizeof(xdim_str), "%d,%d", width, height);
-        WRITE_KV_BYTES("XDIM", xdim_str, strlen(xdim_str));
-    }
-
-    // XFPS: Extended framerate (if fps_num > 254 or fps_den is not 1 or 1001)
-    if (has_xfps) {
-        char xfps_str[32];
-        snprintf(xfps_str, sizeof(xfps_str), "%d/%d",
-                 cli->enc_params.fps_num, cli->enc_params.fps_den);
-        WRITE_KV_BYTES("XFPS", xfps_str, strlen(xfps_str));
-    }
-
-    #undef WRITE_KV_UINT64
-    #undef WRITE_KV_BYTES
-
-    return endt_offset;
-}
-
-/**
- * Update ENDT value in Extended Header.
- * Seeks to the stored offset and updates the uint64_t ENDT value.
- */
-static int update_extended_header_endt(FILE *fp, long endt_offset, uint64_t end_time_ns) {
-    if (endt_offset < 0) return -1;  // Extended Header not written
-
-    long current_pos = ftell(fp);
-    if (current_pos < 0) return -1;
-
-    // Seek to ENDT value offset
-    if (fseek(fp, endt_offset, SEEK_SET) != 0) return -1;
-
-    // Write ENDT value
-    if (fwrite(&end_time_ns, sizeof(uint64_t), 1, fp) != 1) {
-        fseek(fp, current_pos, SEEK_SET);
-        return -1;
-    }
-
-    // Restore file position
-    if (fseek(fp, current_pos, SEEK_SET) != 0) return -1;
-
-    return 0;
-}
-
-/**
- * Update total frames in header.
- * Seeks back to offset 14 and updates the uint32_t total_frames field.
- */
-static int update_total_frames(FILE *fp, uint32_t total_frames) {
-    long current_pos = ftell(fp);
-    if (current_pos < 0) {
-        return -1;
-    }
-
-    // Seek to total_frames field (offset 14: magic(8) + version(1) + width(2) + height(2) + fps(1))
-    if (fseek(fp, 14, SEEK_SET) != 0) {
-        return -1;
-    }
-
-    // Write total frames
-    fwrite(&total_frames, sizeof(uint32_t), 1, fp);
-
-    // Seek back to original position
-    if (fseek(fp, current_pos, SEEK_SET) != 0) {
-        return -1;
-    }
-
-    return 0;
-}
-
-/**
- * Write TAV packet to file.
- */
-static int write_tav_packet(FILE *fp, const tav_encoder_packet_t *packet) {
-    if (!packet || !packet->data) {
-        return -1;
-    }
-
-    // Packet is already formatted: [type(1)][size(4)][data(N)]
-    // Or: [type(1)][gop_size(1)][size(4)][data(N)] for GOP packets
-    size_t written = fwrite(packet->data, 1, packet->size, fp);
-
-    if (written != packet->size) {
-        fprintf(stderr, "Error: Failed to write packet (%zu/%zu bytes)\n",
-                written, packet->size);
-        return -1;
-    }
-
-    return 0;
-}
-
-/**
- * Write timecode packet.
- * Format: [type(1)][timecode_ns(8)] where timecode_ns is uint64_t in nanoseconds
- */
-static int write_timecode_packet(FILE *fp, int64_t frame_number, int fps_num, int fps_den) {
-    uint8_t packet[9];
-    packet[0] = TAV_PACKET_TIMECODE;
-
-    // Convert frame number to nanoseconds
-    // timecode_ns = (frame_number * fps_den * 1000000000) / fps_num
-    uint64_t timecode_ns = ((uint64_t)frame_number * (uint64_t)fps_den * 1000000000ULL) / (uint64_t)fps_num;
-    memcpy(packet + 1, &timecode_ns, 8);
-
-    fwrite(packet, 1, 9, fp);
-    return 0;
-}
-
-/**
- * Write GOP sync packet.
- * Format: [type(1)][frame_count(1)]
- */
-static int write_gop_sync_packet(FILE *fp, int frame_count) {
-    uint8_t packet[2];
-    packet[0] = TAV_PACKET_GOP_SYNC;
-    packet[1] = (uint8_t)frame_count;
-
-    fwrite(packet, 1, 2, fp);
-    return 0;
-}
-
-/**
- * Write sync packet (0xFF) for intra-only mode.
- * Format: [type(1)] (no payload)
- */
-static int write_sync_packet(FILE *fp) {
-    uint8_t packet = TAV_PACKET_SYNC;
-    fwrite(&packet, 1, 1, fp);
-    return 0;
-}
-
-// =============================================================================
-// Audio Encoding Functions
-// =============================================================================
-
-/**
- * Extract audio from video file to PCM32f stereo at 32kHz.
- * Uses FFmpeg with high-quality resampling and highpass filter.
- */
-static int extract_audio_to_file(const char *input_file, const char *output_file) {
-    char cmd[MAX_PATH * 2];
-    snprintf(cmd, sizeof(cmd),
-             "ffmpeg -hide_banner -v quiet -i \"%s\" -f f32le -acodec pcm_f32le -ar %d -ac 2 "
-             "-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" "
-             "-y \"%s\" 2>/dev/null",
-             input_file, AUDIO_SAMPLE_RATE, output_file);
-
-    int result = system(cmd);
-    if (result != 0) {
-        fprintf(stderr, "Warning: FFmpeg audio extraction failed\n");
-        return 0;
-    }
-
-    // Check if output file exists and has content
-    struct stat st;
-    if (stat(output_file, &st) != 0 || st.st_size == 0) {
-        return 0;
-    }
-
-    return 1;
-}
-
-/**
- * Read audio samples for one frame from PCM file.
- * Returns number of samples actually read.
- */
-static size_t read_audio_samples(cli_context_t *cli, float *buffer, size_t samples_to_read) {
-    if (!cli->pcm_file || cli->audio_remaining == 0) {
-        return 0;
-    }
-
-    // Calculate bytes to read (stereo float32)
-    size_t bytes_to_read = samples_to_read * 2 * sizeof(float);
-    if (bytes_to_read > cli->audio_remaining) {
-        bytes_to_read = cli->audio_remaining;
-        samples_to_read = bytes_to_read / (2 * sizeof(float));
-    }
-
-    size_t bytes_read = fread(buffer, 1, bytes_to_read, cli->pcm_file);
-    cli->audio_remaining -= bytes_read;
-
-    return bytes_read / (2 * sizeof(float));
-}
-
-/**
- * Encode and write TAD audio packet.
- * Format per terranmon.txt:
- *   uint8  Packet Type (0x24)
- *   <header for decoding packet>
- *   uint16 Sample Count
- *   uint32 Compressed Size + 7
- *   <header for decoding TAD chunk>
- *   uint16 Sample Count
- *   uint8  Quantiser Bits
- *   uint32 Compressed Size
- *   *      Zstd-compressed TAD
- */
-static int write_audio_packet(FILE *fp, cli_context_t *cli, float *pcm_samples, size_t num_samples) {
-    if (num_samples == 0) {
-        return 0;
-    }
-
-    // Allocate buffer for TAD-encoded data
-    size_t max_output_size = num_samples * 4 * sizeof(float) + 1024;
-    uint8_t *tad_buffer = malloc(max_output_size);
-    if (!tad_buffer) {
-        fprintf(stderr, "Error: Cannot allocate TAD buffer\n");
-        return -1;
-    }
-
-    // Encode with TAD (returns: sample_count(2) + max_index(1) + payload_size(4) + payload)
-    int max_index = tad32_quality_to_max_index(cli->audio_quality);
-    size_t tad_chunk_size = tad32_encode_chunk(pcm_samples, num_samples, max_index, 1.0f,
-                                               cli->enc_params.zstd_level, tad_buffer);
-
-    if (tad_chunk_size == 0) {
-        fprintf(stderr, "Error: TAD encoding failed\n");
-        free(tad_buffer);
-        return -1;
-    }
-
-    // Extract TAD chunk header
-    uint16_t sample_count;
-    uint8_t quantiser_bits;
-    uint32_t compressed_size;
-    memcpy(&sample_count, tad_buffer, 2);
-    memcpy(&quantiser_bits, tad_buffer + 2, 1);
-    memcpy(&compressed_size, tad_buffer + 3, 4);
-
-    // Write TAV packet header
-    fputc(TAV_PACKET_AUDIO_TAD, fp);                        // Packet type (0x24)
-    fwrite(&sample_count, 2, 1, fp);                         // Sample count
-    uint32_t packet_payload_size = compressed_size + 7;      // TAD chunk size
-    fwrite(&packet_payload_size, 4, 1, fp);                  // Compressed size + 7
-
-    // Write TAD chunk (sample_count, quantiser_bits, compressed_size, payload)
-    fwrite(tad_buffer, 1, tad_chunk_size, fp);
-
-    free(tad_buffer);
-    return 1 + 2 + 4 + tad_chunk_size;  // Total bytes written
-}
-
-// =============================================================================
-// Subtitle Functions
-// =============================================================================
-
-/**
- * Convert SRT timestamp to nanoseconds.
- * Format: "HH:MM:SS,mmm" (e.g., "00:01:23,456")
- */
-static uint64_t srt_time_to_ns(const char *time_str) {
-    int hours = 0, minutes = 0, seconds = 0, milliseconds = 0;
-    if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) {
-        return 0;
-    }
-
-    uint64_t total_ns = 0;
-    total_ns += (uint64_t)hours * 3600ULL * 1000000000ULL;
-    total_ns += (uint64_t)minutes * 60ULL * 1000000000ULL;
-    total_ns += (uint64_t)seconds * 1000000000ULL;
-    total_ns += (uint64_t)milliseconds * 1000000ULL;
-
-    return total_ns;
-}
-
-/**
- * Parse SRT subtitle file.
- * Returns linked list of subtitle entries, or NULL on error.
- */
-static subtitle_entry_t* parse_srt_file(const char *filename) {
-    FILE *file = fopen(filename, "r");
-    if (!file) {
-        fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
-        return NULL;
-    }
-
-    subtitle_entry_t *head = NULL;
-    subtitle_entry_t *tail = NULL;
-    char line[1024];
-    int state = 0;  // 0=index, 1=time, 2=text, 3=blank
-
-    subtitle_entry_t *current_entry = NULL;
-    char *text_buffer = NULL;
-    size_t text_buffer_size = 0;
-
-    while (fgets(line, sizeof(line), file)) {
-        // Remove trailing newline/carriage return
-        size_t len = strlen(line);
-        while (len > 0 && (line[len-1] == '\n' || line[len-1] == '\r')) {
-            line[--len] = '\0';
-        }
-
-        if (state == 0) {  // Expecting subtitle index
-            if (strlen(line) == 0) continue;  // Skip empty lines
-            current_entry = calloc(1, sizeof(subtitle_entry_t));
-            if (!current_entry) break;
-            state = 1;
-        } else if (state == 1) {  // Expecting time range
-            char start_time[32], end_time[32];
-            if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) {
-                current_entry->start_time_ns = srt_time_to_ns(start_time);
-                current_entry->end_time_ns = srt_time_to_ns(end_time);
-
-                if (current_entry->start_time_ns == 0 && current_entry->end_time_ns == 0) {
-                    free(current_entry);
-                    current_entry = NULL;
-                    state = 3;  // Skip to next blank line
-                    continue;
-                }
-
-                // Initialize text buffer
-                text_buffer_size = 256;
-                text_buffer = malloc(text_buffer_size);
-                if (!text_buffer) {
-                    free(current_entry);
-                    current_entry = NULL;
-                    break;
-                }
-                text_buffer[0] = '\0';
-                state = 2;
-            } else {
-                free(current_entry);
-                current_entry = NULL;
-                state = 3;  // Skip malformed entry
-            }
-        } else if (state == 2) {  // Collecting subtitle text
-            if (strlen(line) == 0) {
-                // End of subtitle text
-                current_entry->text = strdup(text_buffer);
-                free(text_buffer);
-                text_buffer = NULL;
-
-                // Add to list
-                if (!head) {
-                    head = current_entry;
-                    tail = current_entry;
-                } else {
-                    tail->next = current_entry;
-                    tail = current_entry;
-                }
-                current_entry = NULL;
-                state = 0;
-            } else {
-                // Append text line
-                size_t current_len = strlen(text_buffer);
-                size_t line_len = strlen(line);
-                size_t needed = current_len + line_len + 2;  // +2 for newline and null
-
-                if (needed > text_buffer_size) {
-                    text_buffer_size = needed + 256;
-                    char *new_buffer = realloc(text_buffer, text_buffer_size);
-                    if (!new_buffer) {
-                        free(text_buffer);
-                        free(current_entry);
-                        current_entry = NULL;
-                        break;
-                    }
-                    text_buffer = new_buffer;
-                }
-
-                if (current_len > 0) {
-                    strcat(text_buffer, "\n");
-                }
-                strcat(text_buffer, line);
-            }
-        } else if (state == 3) {  // Skipping to next blank line
-            if (strlen(line) == 0) {
-                state = 0;
-            }
-        }
-    }
-
-    // Handle last subtitle if file ended while collecting text
-    if (state == 2 && current_entry && text_buffer) {
-        current_entry->text = strdup(text_buffer);
-        free(text_buffer);
-        text_buffer = NULL;
-
-        // Add to list
-        if (!head) {
-            head = current_entry;
-            tail = current_entry;
-        } else {
-            tail->next = current_entry;
-            tail = current_entry;
-        }
-        current_entry = NULL;
-    } else if (current_entry) {
-        // Cleanup any incomplete entry
-        free(current_entry);
-        if (text_buffer) free(text_buffer);
-    }
-
-    fclose(file);
-    return head;
-}
-
-/**
- * Free subtitle list.
- */
-static void free_subtitle_list(subtitle_entry_t *list) {
-    while (list) {
-        subtitle_entry_t *next = list->next;
-        free(list->text);
-        free(list);
-        list = next;
-    }
-}
-
-// =============================================================================
-// Two-Pass Scene Change Detection Functions
-// =============================================================================
-
-// 1D Haar forward transform (in-place)
-static void haar_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    int half = length / 2;
-    float *temp = malloc(length * sizeof(float));
-
-    for (int i = 0; i < half; i++) {
-        float a = data[2 * i];
-        float b = data[2 * i + 1];
-        temp[i] = (a + b) * 0.5f;       // Low-pass (average)
-        temp[half + i] = (a - b) * 0.5f; // High-pass (difference)
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
-// 2D Haar forward transform for analysis (works on ANALYSIS_WIDTH x ANALYSIS_HEIGHT buffer)
-static void analysis_haar_2d_forward(float *data, int width, int height, int levels) {
-    float *temp = malloc((width > height ? width : height) * sizeof(float));
-
-    // Generate division series for levels
-    int widths[levels + 1];
-    int heights[levels + 1];
-    widths[0] = width;
-    heights[0] = height;
-
-    for (int i = 1; i <= levels; i++) {
-        widths[i] = (int)roundf(widths[i - 1] / 2.0f);
-        heights[i] = (int)roundf(heights[i - 1] / 2.0f);
-    }
-
-    for (int level = 0; level < levels; level++) {
-        int current_width = widths[level];
-        int current_height = heights[level];
-
-        if (current_width < 2 || current_height < 2) break;
-
-        // Horizontal pass
-        for (int y = 0; y < current_height; y++) {
-            for (int x = 0; x < current_width; x++) {
-                temp[x] = data[y * width + x];
-            }
-            haar_forward_1d(temp, current_width);
-            for (int x = 0; x < current_width; x++) {
-                data[y * width + x] = temp[x];
-            }
-        }
-
-        // Vertical pass
-        for (int x = 0; x < current_width; x++) {
-            for (int y = 0; y < current_height; y++) {
-                temp[y] = data[y * width + x];
-            }
-            haar_forward_1d(temp, current_height);
-            for (int y = 0; y < current_height; y++) {
-                data[y * width + x] = temp[y];
-            }
-        }
-    }
-
-    free(temp);
-}
-
-// Bilinear resize RGB frame to fixed 128x128 grayscale analysis buffer
-static float* resize_frame_to_analysis(const uint8_t *rgb_frame, int src_width, int src_height) {
-    float *gray = malloc(ANALYSIS_WIDTH * ANALYSIS_HEIGHT * sizeof(float));
-
-    float x_ratio = (float)(src_width - 1) / (ANALYSIS_WIDTH - 1);
-    float y_ratio = (float)(src_height - 1) / (ANALYSIS_HEIGHT - 1);
-
-    for (int y = 0; y < ANALYSIS_HEIGHT; y++) {
-        for (int x = 0; x < ANALYSIS_WIDTH; x++) {
-            float src_x = x * x_ratio;
-            float src_y = y * y_ratio;
-
-            int x0 = (int)src_x;
-            int y0 = (int)src_y;
-            int x1 = x0 + 1 < src_width ? x0 + 1 : x0;
-            int y1 = y0 + 1 < src_height ? y0 + 1 : y0;
-
-            float x_frac = src_x - x0;
-            float y_frac = src_y - y0;
-
-            // Get grayscale values at four corners
-            int idx00 = (y0 * src_width + x0) * 3;
-            int idx01 = (y0 * src_width + x1) * 3;
-            int idx10 = (y1 * src_width + x0) * 3;
-            int idx11 = (y1 * src_width + x1) * 3;
-
-            float g00 = 0.299f * rgb_frame[idx00] + 0.587f * rgb_frame[idx00 + 1] + 0.114f * rgb_frame[idx00 + 2];
-            float g01 = 0.299f * rgb_frame[idx01] + 0.587f * rgb_frame[idx01 + 1] + 0.114f * rgb_frame[idx01 + 2];
-            float g10 = 0.299f * rgb_frame[idx10] + 0.587f * rgb_frame[idx10 + 1] + 0.114f * rgb_frame[idx10 + 2];
-            float g11 = 0.299f * rgb_frame[idx11] + 0.587f * rgb_frame[idx11 + 1] + 0.114f * rgb_frame[idx11 + 2];
-
-            // Bilinear interpolation
-            float top = g00 * (1 - x_frac) + g01 * x_frac;
-            float bottom = g10 * (1 - x_frac) + g11 * x_frac;
-            gray[y * ANALYSIS_WIDTH + x] = top * (1 - y_frac) + bottom * y_frac;
-        }
-    }
-
-    return gray;
-}
-
-// Calculate Shannon entropy of coefficient magnitudes
-/*static double calculate_shannon_entropy(const float *coeffs, int count) {
-    if (count == 0) return 0.0;
-
-    // Build histogram of coefficient magnitudes (use 256 bins)
-    #define HIST_BINS 256
-    int histogram[HIST_BINS] = {0};
-
-    // Find min/max for normalisation
-    float min_val = FLT_MAX, max_val = -FLT_MAX;
-    for (int i = 0; i < count; i++) {
-        float abs_val = fabsf(coeffs[i]);
-        if (abs_val < min_val) min_val = abs_val;
-        if (abs_val > max_val) max_val = abs_val;
-    }
-
-    // Avoid division by zero
-    float range = max_val - min_val;
-    if (range < 1e-6) return 0.0;
-
-    // Build histogram
-    for (int i = 0; i < count; i++) {
-        float abs_val = fabsf(coeffs[i]);
-        int bin = (int)((abs_val - min_val) / range * (HIST_BINS - 1));
-        bin = bin < 0 ? 0 : (bin >= HIST_BINS ? HIST_BINS - 1 : bin);
-        histogram[bin]++;
-    }
-
-    // Calculate entropy: H = -sum(p_i * log2(p_i))
-    double entropy = 0.0;
-    for (int i = 0; i < HIST_BINS; i++) {
-        if (histogram[i] > 0) {
-            double p = (double)histogram[i] / count;
-            entropy -= p * log2(p);
-        }
-    }
-
-    return entropy;
-    #undef HIST_BINS
-}*/
-
-// Extract subband from DWT coefficients (helper for entropy calculation)
-/*static void extract_subband(const float *dwt_data, int width, int height, int level,
-                           int band, float *output, int *out_count) {
-    // band: 0=LL, 1=LH, 2=HL, 3=HH
-    // For level L, subbands are in top-left quadrant of size (width>>L, height>>L)
-
-    // Generate division series
-    int widths[10]; widths[0] = width;
-    int heights[10]; heights[0] = height;
-
-    for (int i = 1; i < 10; i++) {
-        widths[i] = (int)roundf(widths[i - 1] / 2.0f);
-        heights[i] = (int)roundf(heights[i - 1] / 2.0f);
-    }
-
-    int level_width = widths[level];
-    int level_height = heights[level];
-    int half_width = level_width / 2;
-    int half_height = level_height / 2;
-
-    if (half_width < 1 || half_height < 1) {
-        *out_count = 0;
-        return;
-    }
-
-    int count = 0;
-    int offset_x = (band & 1) ? half_width : 0;   // LH, HH have x offset
-    int offset_y = (band & 2) ? half_height : 0;  // HL, HH have y offset
-
-    for (int y = 0; y < half_height; y++) {
-        for (int x = 0; x < half_width; x++) {
-            int src_x = offset_x + x;
-            int src_y = offset_y + y;
-            output[count++] = dwt_data[src_y * width + src_x];
-        }
-    }
-
-    *out_count = count;
-}*/
-
-// Compute comprehensive frame analysis metrics
-static void compute_frame_metrics(const float *dwt_current, const float *dwt_previous,
-                                  frame_analysis_t *metrics) {
-    int width = ANALYSIS_WIDTH;
-    int height = ANALYSIS_HEIGHT;
-    int num_pixels = width * height;
-    int levels = ANALYSIS_DWT_LEVELS;
-
-    // Generate division series
-    int widths[levels + 1]; widths[0] = width;
-    int heights[levels + 1]; heights[0] = height;
-
-    for (int i = 1; i <= levels; i++) {
-        widths[i] = (int)roundf(widths[i - 1] / 2.0f);
-        heights[i] = (int)roundf(heights[i - 1] / 2.0f);
-    }
-
-    // Initialise metrics
-    memset(metrics, 0, sizeof(frame_analysis_t));
-
-    // Extract LL band (approximation coefficients)
-    int ll_width = widths[levels];
-    int ll_height = heights[levels];
-    int ll_count = ll_width * ll_height;
-
-    if (ll_count <= 0) return;
-
-    // Metric 1: LL band statistics (mean, variance)
-    double ll_sum = 0.0, ll_sum_sq = 0.0;
-    for (int i = 0; i < ll_count; i++) {
-        float val = dwt_current[i];
-        ll_sum += val;
-        ll_sum_sq += val * val;
-    }
-    metrics->ll_mean = ll_sum / ll_count;
-    double ll_var = (ll_sum_sq / ll_count) - (metrics->ll_mean * metrics->ll_mean);
-    metrics->ll_variance = ll_var > 0 ? ll_var : 0;
-
-    // Metric 2: LL_diff (L1 distance between consecutive frames)
-    if (dwt_previous) {
-        double diff_sum = 0.0;
-        for (int i = 0; i < ll_count; i++) {
-            diff_sum += fabs(dwt_current[i] - dwt_previous[i]);
-        }
-        metrics->ll_diff = diff_sum / ll_count;
-    }
-
-    // Metric 3: Highband energy and ratio
-    double total_energy = 0.0, highband_energy = 0.0;
-    for (int i = 0; i < num_pixels; i++) {
-        float abs_val = fabsf(dwt_current[i]);
-        total_energy += abs_val;
-        if (i >= ll_count) {  // All coefficients except LL band
-            highband_energy += abs_val;
-        }
-    }
-    metrics->total_energy = total_energy;
-    metrics->highband_energy = highband_energy;
-    metrics->highband_ratio = total_energy > 0 ? (highband_energy / total_energy) : 0;
-
-    // Metric 4: Per-band entropies
-    /*float *subband_buffer = malloc(num_pixels * sizeof(float));
-    int subband_count;
-
-    // LL band entropy
-    extract_subband(dwt_current, width, height, levels, 0, subband_buffer, &subband_count);
-    metrics->entropy_ll = calculate_shannon_entropy(subband_buffer, subband_count);
-
-    // High-frequency bands entropy (LH, HL, HH for each level)
-    for (int level = 0; level < levels && level < ANALYSIS_DWT_LEVELS; level++) {
-        // LH band
-        extract_subband(dwt_current, width, height, level, 1, subband_buffer, &subband_count);
-        metrics->entropy_lh[level] = calculate_shannon_entropy(subband_buffer, subband_count);
-
-        // HL band
-        extract_subband(dwt_current, width, height, level, 2, subband_buffer, &subband_count);
-        metrics->entropy_hl[level] = calculate_shannon_entropy(subband_buffer, subband_count);
-
-        // HH band
-        extract_subband(dwt_current, width, height, level, 3, subband_buffer, &subband_count);
-        metrics->entropy_hh[level] = calculate_shannon_entropy(subband_buffer, subband_count);
-    }*/
-
-    // Metric 5: Zero crossing rate in highbands (texture change indicator)
-    int zero_crossings = 0;
-    int highband_coeffs = num_pixels - ll_count;
-    if (highband_coeffs > 1) {
-        for (int i = ll_count; i < num_pixels - 1; i++) {
-            if ((dwt_current[i] > 0 && dwt_current[i + 1] < 0) ||
-                (dwt_current[i] < 0 && dwt_current[i + 1] > 0)) {
-                zero_crossings++;
-            }
-        }
-        metrics->zero_crossing_rate = (double)zero_crossings / highband_coeffs;
-    }
-
-    //free(subband_buffer);
-}
-
-// Hybrid scene change detector with adaptive thresholds
-// Returns 1 if scene change detected, 0 otherwise
-static int detect_scene_change_wavelet(int frame_number,
-                                      const frame_analysis_t *metrics_history,
-                                      int history_count,
-                                      const frame_analysis_t *current_metrics,
-                                      int verbose) {
-    if (history_count < 2) return 0;  // Need history for adaptive thresholds
-
-    // Calculate moving statistics for LL_diff (mean and stddev)
-    int window_size = history_count < ANALYSIS_MOVING_WINDOW ? history_count : ANALYSIS_MOVING_WINDOW;
-    int start_idx = history_count - window_size;
-
-    double ll_diff_sum = 0.0, ll_diff_sum_sq = 0.0;
-    for (int i = start_idx; i < history_count; i++) {
-        double val = metrics_history[i].ll_diff;
-        ll_diff_sum += val;
-        ll_diff_sum_sq += val * val;
-    }
-
-    double ll_diff_mean = ll_diff_sum / window_size;
-    double ll_diff_variance = (ll_diff_sum_sq / window_size) - (ll_diff_mean * ll_diff_mean);
-    double ll_diff_stddev = ll_diff_variance > 0 ? sqrt(ll_diff_variance) : 0;
-
-    // Adaptive threshold: mean + k*stddev (with minimum absolute threshold)
-    double ll_diff_threshold = ll_diff_mean + ANALYSIS_STDDEV_MULTIPLIER * ll_diff_stddev;
-    if (ll_diff_threshold < ANALYSIS_LL_DIFF_MIN_THRESHOLD) {
-        ll_diff_threshold = ANALYSIS_LL_DIFF_MIN_THRESHOLD;
-    }
-
-    // Detection rule 1: Hard cut or fast fade (LL_diff spike)
-    // Normalise LL_diff by LL_mean to handle exposure/lighting changes
-    double normalised_ll_diff = current_metrics->ll_mean > 1.0 ?
-        current_metrics->ll_diff / current_metrics->ll_mean : current_metrics->ll_diff;
-    double normalised_threshold = current_metrics->ll_mean > 1.0 ?
-        ll_diff_threshold / current_metrics->ll_mean : ll_diff_threshold;
-
-    if (normalised_ll_diff > normalised_threshold) {
-        if (verbose) {
-            printf("  Scene change detected frame %d: Normalised LL_diff=%.4f > threshold=%.4f (raw: %.2f > %.2f)\n",
-                   frame_number + 1, normalised_ll_diff, normalised_threshold,
-                   current_metrics->ll_diff, ll_diff_threshold);
-        }
-        return 1;
-    }
-
-    // Detection rule 2: Structural change (high-frequency energy spike)
-    double hb_ratio_threshold = ANALYSIS_HB_RATIO_THRESHOLD;
-
-    // Calculate average highband energy from history
-    double hb_energy_sum = 0.0;
-    for (int i = start_idx; i < history_count; i++) {
-        hb_energy_sum += metrics_history[i].highband_energy;
-    }
-    double hb_energy_mean = hb_energy_sum / window_size;
-    double hb_energy_threshold = hb_energy_mean * ANALYSIS_HB_ENERGY_MULTIPLIER;
-
-    // Check if highband spike is detected
-    if (current_metrics->highband_ratio > hb_ratio_threshold &&
-        current_metrics->highband_energy > hb_energy_threshold) {
-
-        // Calculate confidence: how much does it exceed threshold?
-        double ratio_confidence = current_metrics->highband_ratio / hb_ratio_threshold;
-        double energy_confidence = current_metrics->highband_energy / hb_energy_threshold;
-        double min_confidence = ratio_confidence < energy_confidence ? ratio_confidence : energy_confidence;
-
-        // High confidence (>1.3x threshold): Skip persistence check (likely hard cut)
-        if (min_confidence > 1.3) {
-            if (verbose) {
-                printf("  Scene change detected frame %d: HB_ratio=%.3f > %.3f AND HB_energy=%.1f > %.1f (high confidence: %.2fx)\n",
-                       frame_number + 1, current_metrics->highband_ratio, hb_ratio_threshold,
-                       current_metrics->highband_energy, hb_energy_threshold, min_confidence);
-            }
-            return 1;
-        }
-
-        // Borderline detection: Check persistence to avoid single-frame flashes
-        if (history_count >= 1) {
-            const frame_analysis_t *prev_metrics = &metrics_history[history_count - 1];
-            if (prev_metrics->highband_ratio > hb_ratio_threshold * 0.6 ||
-                prev_metrics->highband_energy > hb_energy_threshold * 0.6) {
-                if (verbose) {
-                    printf("  Scene change detected frame %d: HB_ratio=%.3f > %.3f AND HB_energy=%.1f > %.1f (persistent)\n",
-                           frame_number + 1, current_metrics->highband_ratio, hb_ratio_threshold,
-                           current_metrics->highband_energy, hb_energy_threshold);
-                }
-                return 1;
-            }
-        }
-    }
-
-    // Detection rule 3: Gradual transition (slow LL_mean change over several frames)
-    // Check if LL_mean changed significantly over last 5 frames
-    if (history_count >= 5) {
-        double ll_mean_5_frames_ago = metrics_history[history_count - 5].ll_mean;
-        double ll_mean_change = fabs(current_metrics->ll_mean - ll_mean_5_frames_ago);
-
-        if (ll_mean_change > ANALYSIS_FADE_THRESHOLD) {
-            if (verbose) {
-                printf("  Scene change detected frame %d: Gradual fade - LL_mean change=%.2f over 5 frames (threshold=%.1f)\n",
-                       frame_number + 1, ll_mean_change, ANALYSIS_FADE_THRESHOLD);
-            }
-            return 1;
-        }
-    }
-
-    return 0;  // No scene change detected
-}
-
-// Split a scene into evenly-sized GOPs
-// Returns linked list of GOP boundaries for the scene
-static gop_boundary_t* split_scene_into_gops(int scene_start, int scene_end,
-                                             int min_gop_size, int max_gop_size,
-                                             gop_boundary_t **tail_ptr, int verbose) {
-    int scene_length = scene_end - scene_start + 1;
-
-    if (scene_length < min_gop_size) {
-        // Scene too short, make it a single GOP
-        gop_boundary_t *boundary = malloc(sizeof(gop_boundary_t));
-        boundary->start_frame = scene_start;
-        boundary->end_frame = scene_end;
-        boundary->num_frames = scene_length;
-        boundary->next = NULL;
-        *tail_ptr = boundary;
-        return boundary;
-    }
-
-    // Calculate optimal number of GOPs for this scene
-    int num_gops = (scene_length + max_gop_size - 1) / max_gop_size;  // ceil(scene_length / max_gop_size)
-
-    // Make sure each GOP is at least min_gop_size
-    if (scene_length / num_gops < min_gop_size) {
-        num_gops = scene_length / min_gop_size;
-    }
-
-    if (num_gops < 1) num_gops = 1;
-
-    // Calculate base GOP size and remainder for even distribution
-    int base_gop_size = scene_length / num_gops;
-    int remainder = scene_length % num_gops;
-
-    gop_boundary_t *head = NULL;
-    gop_boundary_t *tail = NULL;
-    int current_frame = scene_start;
-
-    for (int i = 0; i < num_gops; i++) {
-        // Distribute remainder frames evenly across GOPs
-        int gop_size = base_gop_size + (i < remainder ? 1 : 0);
-
-        gop_boundary_t *boundary = malloc(sizeof(gop_boundary_t));
-        boundary->start_frame = current_frame;
-        boundary->end_frame = current_frame + gop_size - 1;
-        boundary->num_frames = gop_size;
-        boundary->next = NULL;
-
-        if (tail) {
-            tail->next = boundary;
-            tail = boundary;
-        } else {
-            head = tail = boundary;
-        }
-
-        if (verbose) {
-            printf("    GOP: frames %d-%d (length %d)\n",
-                   boundary->start_frame, boundary->end_frame, boundary->num_frames);
-        }
-
-        current_frame += gop_size;
-    }
-
-    *tail_ptr = tail;
-    return head;
-}
-
-// Build GOP boundaries from frame analysis data
-// First detects scene boundaries, then splits each scene into evenly-sized GOPs
-static gop_boundary_t* build_gop_boundaries(const frame_analysis_t *analyses, int num_frames,
-                                           int min_gop_size, int max_gop_size, int verbose) {
-    if (num_frames < min_gop_size) return NULL;
-
-    // Step 1: Detect scene boundaries (actual hard cuts only)
-    int *scene_boundaries = malloc((num_frames + 1) * sizeof(int));
-    int num_scenes = 0;
-    scene_boundaries[num_scenes++] = 0;  // First scene starts at frame 0
-
-    for (int i = 1; i < num_frames; i++) {
-        if (analyses[i].is_scene_change) {
-            scene_boundaries[num_scenes++] = i;
-            if (verbose) {
-                printf("  Scene boundary candidate at frame %d\n", i);
-            }
-        }
-    }
-    scene_boundaries[num_scenes++] = num_frames;  // End of last scene
-
-    // Step 1.5: Merge tiny scenes (< min_gop_size) with adjacent scenes
-    // This prevents false positives from creating 1-frame GOPs
-    int *merged_boundaries = malloc((num_scenes + 1) * sizeof(int));
-    int num_merged = 0;
-    merged_boundaries[num_merged++] = scene_boundaries[0];  // Always keep first boundary
-
-    for (int s = 1; s < num_scenes; s++) {
-        int scene_length = scene_boundaries[s] - scene_boundaries[s - 1];
-
-        // If this scene is too short, skip this boundary (merge with next scene)
-        if (scene_length >= min_gop_size || s == num_scenes - 1) {
-            merged_boundaries[num_merged++] = scene_boundaries[s];
-        } else if (verbose) {
-            printf("  Merging tiny scene at frame %d (length %d)\n",
-                   scene_boundaries[s - 1], scene_length);
-        }
-    }
-
-    // Replace original boundaries with merged ones
-    free(scene_boundaries);
-    scene_boundaries = merged_boundaries;
-    num_scenes = num_merged;
-
-    if (verbose) {
-        printf("  After merging: %d scenes\n", num_scenes - 1);
-    }
-
-    // Step 2: Split each scene into evenly-sized GOPs
-    gop_boundary_t *head = NULL;
-    gop_boundary_t *tail = NULL;
-
-    for (int s = 0; s < num_scenes - 1; s++) {
-        int scene_start = scene_boundaries[s];
-        int scene_end = scene_boundaries[s + 1] - 1;
-        int scene_length = scene_end - scene_start + 1;
-
-        if (verbose) {
-            printf("  Scene %d: frames %d-%d (length %d)\n",
-                   s + 1, scene_start, scene_end, scene_length);
-        }
-
-        // Split scene into evenly-sized GOPs
-        gop_boundary_t *scene_tail = NULL;
-        gop_boundary_t *scene_gops = split_scene_into_gops(scene_start, scene_end,
-                                                           min_gop_size, max_gop_size,
-                                                           &scene_tail, verbose);
-
-        // Link to main GOP list
-        if (head == NULL) {
-            head = scene_gops;
-            tail = scene_tail;
-        } else {
-            tail->next = scene_gops;
-            tail = scene_tail;
-        }
-    }
-
-    free(scene_boundaries);
-    return head;
-}
-
-// Free GOP boundary list
-static void free_gop_boundaries(gop_boundary_t *head) {
-    while (head) {
-        gop_boundary_t *next = head->next;
-        free(head);
-        head = next;
-    }
-}
-
-// First pass: Analyse all frames and build GOP boundaries
-// Returns 0 on success, -1 on error
-static int two_pass_first_pass(cli_context_t *cli) {
-    printf("=== Two-Pass Encoding: First Pass (Scene Analysis) ===\n");
-    printf("  Using fixed 128x128 analysis resolution for all video sizes\n");
-
-    // Allocate analysis array (estimate: 10000 frames max for in-memory storage)
-    cli->frame_analyses_capacity = 10000;
-    cli->frame_analyses = malloc(cli->frame_analyses_capacity * sizeof(frame_analysis_t));
-    cli->frame_analyses_count = 0;
-
-    if (!cli->frame_analyses) {
-        fprintf(stderr, "Error: Failed to allocate frame analysis buffer\n");
-        return -1;
-    }
-
-    // Open FFmpeg pipe for first pass
-    char ffmpeg_cmd[MAX_PATH * 2];
-    if (cli->interlaced) {
-        snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
-                 "ffmpeg -loglevel error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
-                 "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,"
-                 "tinterlace=interleave_top:cvlpf,separatefields\" -",
-                 cli->input_file, cli->enc_params.width, cli->header_height,
-                 cli->enc_params.width, cli->header_height);
-    } else {
-        snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
-                 "ffmpeg -loglevel error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
-                 "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" -",
-                 cli->input_file, cli->enc_params.width, cli->enc_params.height,
-                 cli->enc_params.width, cli->enc_params.height);
-    }
-
-    FILE *ffmpeg_pipe = popen(ffmpeg_cmd, "r");
-    if (!ffmpeg_pipe) {
-        fprintf(stderr, "Error: Failed to open FFmpeg pipe for first pass\n");
-        free(cli->frame_analyses);
-        cli->frame_analyses = NULL;
-        return -1;
-    }
-
-    size_t frame_rgb_size = cli->enc_params.width * cli->enc_params.height * 3;
-    uint8_t *frame_rgb = malloc(frame_rgb_size);
-    float *prev_dwt = NULL;
-
-    int frame_num = 0;
-    size_t bytes_read;
-    while ((bytes_read = fread(frame_rgb, 1, frame_rgb_size, ffmpeg_pipe)) == frame_rgb_size) {
-        // Honor encode limit BEFORE processing
-        if (cli->encode_limit > 0 && frame_num >= cli->encode_limit) {
-            break;
-        }
-
-        // Resize to fixed 128x128 grayscale
-        float *gray = resize_frame_to_analysis(frame_rgb, cli->enc_params.width, cli->enc_params.height);
-
-        // Apply 3-level Haar DWT
-        analysis_haar_2d_forward(gray, ANALYSIS_WIDTH, ANALYSIS_HEIGHT, ANALYSIS_DWT_LEVELS);
-
-        // Compute metrics
-        frame_analysis_t metrics;
-        compute_frame_metrics(gray, prev_dwt, &metrics);
-
-        // Set frame number AFTER compute_frame_metrics (which does memset)
-        metrics.frame_number = frame_num;
-
-        // Detect scene change using hybrid detector
-        if (frame_num > 0) {
-            metrics.is_scene_change = detect_scene_change_wavelet(
-                frame_num,
-                cli->frame_analyses,
-                cli->frame_analyses_count,
-                &metrics,
-                cli->verbose
-            );
-        } else {
-            metrics.is_scene_change = 0;  // First frame is always start of first GOP
-        }
-
-        // Store analysis
-        if (cli->frame_analyses_count >= cli->frame_analyses_capacity) {
-            // Expand array
-            cli->frame_analyses_capacity *= 2;
-            cli->frame_analyses = realloc(cli->frame_analyses,
-                                         cli->frame_analyses_capacity * sizeof(frame_analysis_t));
-            if (!cli->frame_analyses) {
-                fprintf(stderr, "Error: Failed to reallocate analysis buffer\n");
-                free(gray);
-                if (prev_dwt) free(prev_dwt);
-                free(frame_rgb);
-                pclose(ffmpeg_pipe);
-                return -1;
-            }
-        }
-
-        cli->frame_analyses[cli->frame_analyses_count++] = metrics;
-
-        // Update previous DWT
-        if (prev_dwt) free(prev_dwt);
-        prev_dwt = gray;
-
-        frame_num++;
-
-        if (frame_num % 100 == 0) {
-            printf("  Analysed %d frames...\r", frame_num);
-            fflush(stdout);
-        }
-    }
-
-    printf("\n  Analysed %d frames total\n", frame_num);
-
-    free(frame_rgb);
-    if (prev_dwt) free(prev_dwt);
-    pclose(ffmpeg_pipe);
-
-    // Build GOP boundaries
-    printf("  Building GOP boundaries...\n");
-    cli->gop_boundaries = build_gop_boundaries(
-        cli->frame_analyses,
-        cli->frame_analyses_count,
-        ANALYSIS_GOP_MIN_SIZE,
-        ANALYSIS_GOP_MAX_SIZE,
-        cli->verbose
-    );
-
-    // Count and print GOP statistics
-    int num_gops = 0;
-    int total_gop_frames = 0;
-    int min_gop = INT_MAX, max_gop = 0;
-    gop_boundary_t *gop = cli->gop_boundaries;
-    while (gop) {
-        num_gops++;
-        total_gop_frames += gop->num_frames;
-        if (gop->num_frames < min_gop) min_gop = gop->num_frames;
-        if (gop->num_frames > max_gop) max_gop = gop->num_frames;
-        gop = gop->next;
-    }
-
-    printf("  GOP Statistics:\n");
-    printf("    Total GOPs: %d\n", num_gops);
-    if (num_gops > 0) {
-        printf("    Average GOP size: %.1f frames\n", (double)total_gop_frames / num_gops);
-        printf("    Min GOP size: %d frames\n", min_gop);
-        printf("    Max GOP size: %d frames\n", max_gop);
-    }
-
-    printf("=== First Pass Complete ===\n\n");
-
-    return 0;
-}
-
-/**
- * Write subtitle packet in SSF-TC format.
- * Packet structure:
- *   uint8  Packet Type (0x31)
- *   uint32 Packet Size
- *   uint24 Subtitle Index (little-endian, always 0 for now)
- *   uint64 Timecode (nanoseconds, little-endian)
- *   uint8  Opcode (0x01=show, 0x02=hide)
- *   char[] Text (null-terminated, empty for hide)
- */
-static int write_subtitle_packet(FILE *fp, uint64_t timecode_ns, uint8_t opcode, const char *text) {
-    // Calculate packet size: index (3) + timecode (8) + opcode (1) + text + null
-    size_t text_len = text ? strlen(text) : 0;
-    size_t packet_size = 3 + 8 + 1 + text_len + 1;
-
-    // Write packet type and size
-    fputc(TAV_PACKET_SUBTITLE_TC, fp);
-    uint32_t size32 = (uint32_t)packet_size;
-    fwrite(&size32, 4, 1, fp);
-
-    // Write subtitle index (24-bit, little-endian) - always 0
-    uint8_t index_bytes[3] = {0, 0, 0};
-    fwrite(index_bytes, 3, 1, fp);
-
-    // Write timecode (64-bit, little-endian)
-    uint8_t timecode_bytes[8];
-    for (int i = 0; i < 8; i++) {
-        timecode_bytes[i] = (timecode_ns >> (i * 8)) & 0xFF;
-    }
-    fwrite(timecode_bytes, 8, 1, fp);
-
-    // Write opcode
-    fputc(opcode, fp);
-
-    // Write text if present
-    if (text && text_len > 0) {
-        fwrite(text, 1, text_len, fp);
-    }
-
-    // Write null terminator
-    fputc(0, fp);
-
-    return 1 + 4 + (int)packet_size;  // Total bytes written
-}
-
-/**
- * Write all subtitles upfront in SSF-TC format.
- * Each subtitle generates two packets: show and hide events.
- */
-static int write_all_subtitles(FILE *fp, subtitle_entry_t *subtitles, int verbose) {
-    if (!subtitles) return 0;
-
-    int bytes_written = 0;
-    int subtitle_count = 0;
-
-    subtitle_entry_t *sub = subtitles;
-    while (sub) {
-        // Write show subtitle event (opcode 0x01)
-        bytes_written += write_subtitle_packet(fp, sub->start_time_ns, 0x01, sub->text);
-
-        // Write hide subtitle event (opcode 0x02)
-        bytes_written += write_subtitle_packet(fp, sub->end_time_ns, 0x02, NULL);
-
-        subtitle_count++;
-        if (verbose) {
-            printf("  Subtitle %d: show at %.3fs, hide at %.3fs: %.50s%s\n",
-                   subtitle_count,
-                   sub->start_time_ns / 1000000000.0,
-                   sub->end_time_ns / 1000000000.0,
-                   sub->text, strlen(sub->text) > 50 ? "..." : "");
-        }
-
-        sub = sub->next;
-    }
-
-    if (verbose && subtitle_count > 0) {
-        printf("Wrote %d SSF-TC subtitle events (%d bytes)\n", subtitle_count * 2, bytes_written);
-    }
-
-    return bytes_written;
-}
-
-// =============================================================================
-// Font ROM Functions
-// =============================================================================
-
-/**
- * Write font ROM packet in SSF format.
- * Packet structure:
- *   uint8  Packet Type (0x30 - SSF)
- *   uint32 Packet Size
- *   uint24 Index (3 bytes, always 0 for font ROM)
- *   uint8  Opcode (0x80=low font ROM, 0x81=high font ROM)
- *   uint16 Payload Length
- *   uint8[] Font data (up to 1920 bytes)
- *   uint8  Terminator (0x00)
- */
-static int write_fontrom_packet(FILE *fp, const char *filename, uint8_t opcode, int verbose) {
-    if (!filename || !fp) return 0;
-
-    FILE *rom_file = fopen(filename, "rb");
-    if (!rom_file) {
-        fprintf(stderr, "Warning: Could not open font ROM file: %s\n", filename);
-        return -1;
-    }
-
-    // Get file size
-    fseek(rom_file, 0, SEEK_END);
-    long file_size = ftell(rom_file);
-    fseek(rom_file, 0, SEEK_SET);
-
-    if (file_size > MAX_FONTROM_SIZE) {
-        fprintf(stderr, "Warning: Font ROM file too large (max %d bytes): %s\n", MAX_FONTROM_SIZE, filename);
-        fclose(rom_file);
-        return -1;
-    }
-
-    // Read font data
-    uint8_t *font_data = malloc(file_size);
-    if (!font_data) {
-        fprintf(stderr, "Error: Could not allocate memory for font ROM\n");
-        fclose(rom_file);
-        return -1;
-    }
-
-    size_t bytes_read = fread(font_data, 1, file_size, rom_file);
-    fclose(rom_file);
-
-    if (bytes_read != (size_t)file_size) {
-        fprintf(stderr, "Warning: Could not read entire font ROM file: %s\n", filename);
-        free(font_data);
-        return -1;
-    }
-
-    // Calculate packet size: index(3) + opcode(1) + length(2) + data + terminator(1)
-    uint32_t packet_size = 3 + 1 + 2 + file_size + 1;
-
-    // Write packet type (0x30 - SSF)
-    fputc(TAV_PACKET_SSF, fp);
-
-    // Write packet size (uint32, little-endian)
-    fputc(packet_size & 0xFF, fp);
-    fputc((packet_size >> 8) & 0xFF, fp);
-    fputc((packet_size >> 16) & 0xFF, fp);
-    fputc((packet_size >> 24) & 0xFF, fp);
-
-    // Write index (3 bytes, always 0 for font ROM)
-    fputc(0, fp);
-    fputc(0, fp);
-    fputc(0, fp);
-
-    // Write opcode
-    fputc(opcode, fp);
-
-    // Write payload length (uint16, little-endian)
-    uint16_t payload_len = (uint16_t)file_size;
-    fputc(payload_len & 0xFF, fp);
-    fputc((payload_len >> 8) & 0xFF, fp);
-
-    // Write font data
-    fwrite(font_data, 1, file_size, fp);
-
-    // Write terminator
-    fputc(0x00, fp);
-
-    free(font_data);
-
-    if (verbose) {
-        printf("  Font ROM uploaded: %s (%ld bytes, opcode 0x%02X)\n", filename, file_size, opcode);
-    }
-
-    return 1 + 4 + (int)packet_size;  // Total bytes written
-}
-
-// =============================================================================
-// Worker Thread Functions
-// =============================================================================
-
-/**
- * Worker thread context - passed to worker_thread_main.
- */
-typedef struct {
-    cli_context_t *cli;
-    int thread_id;
-} worker_context_t;
-
-/**
- * Worker thread main function.
- * Continuously picks up jobs from the job pool and encodes them.
- */
-static void *worker_thread_main(void *arg) {
-    worker_context_t *wctx = (worker_context_t *)arg;
-    cli_context_t *cli = wctx->cli;
-    (void)wctx->thread_id;  // Unused but kept for debugging
-
-    while (1) {
-        pthread_mutex_lock(&cli->job_mutex);
-
-        // Wait for a job or shutdown signal
-        while (!cli->shutdown_workers) {
-            // Look for a job slot that is ready to encode
-            int found_job = -1;
-            for (int i = 0; i < cli->num_threads; i++) {
-                if (cli->gop_jobs[i].status == GOP_SLOT_READY) {
-                    cli->gop_jobs[i].status = GOP_SLOT_ENCODING;
-                    found_job = i;
-                    break;
-                }
-            }
-
-            if (found_job >= 0) {
-                pthread_mutex_unlock(&cli->job_mutex);
-
-                // Encode this GOP
-                gop_job_t *job = &cli->gop_jobs[found_job];
-
-                // Create thread-local encoder context
-                tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
-                if (!ctx) {
-                    fprintf(stderr, "Failed to create encoder for GOP %d\n", job->gop_index);
-                    job->success = 0;
-                } else {
-                    // Encode GOP
-                    int result = tav_encoder_encode_gop(ctx,
-                                                        (const uint8_t **)job->rgb_frames,
-                                                        job->num_frames,
-                                                        job->frame_numbers,
-                                                        &job->packet);
-                    job->success = (result == 1 && job->packet != NULL);
-                    tav_encoder_free(ctx);
-                }
-
-                // Mark job as complete (reacquire lock for next iteration)
-                pthread_mutex_lock(&cli->job_mutex);
-                job->status = GOP_SLOT_COMPLETE;
-                pthread_cond_broadcast(&cli->job_complete);
-                // Keep lock held for next iteration of inner while loop
-                continue;  // Look for more jobs
-            }
-
-            // No job found, wait for signal
-            pthread_cond_wait(&cli->job_ready, &cli->job_mutex);
-        }
-
-        pthread_mutex_unlock(&cli->job_mutex);
-        break;  // Shutdown
-    }
-
-    free(wctx);
-    return NULL;
-}
-
-/**
- * Initialize multithreading resources.
- * Returns 0 on success, -1 on failure.
- */
-static int init_threading(cli_context_t *cli) {
-    if (cli->num_threads <= 0) {
-        return 0;  // Single-threaded mode
-    }
-
-    // Initialize mutex and condition variables
-    if (pthread_mutex_init(&cli->job_mutex, NULL) != 0) {
-        fprintf(stderr, "Error: Failed to initialize job mutex\n");
-        return -1;
-    }
-    if (pthread_cond_init(&cli->job_ready, NULL) != 0) {
-        fprintf(stderr, "Error: Failed to initialize job_ready cond\n");
-        pthread_mutex_destroy(&cli->job_mutex);
-        return -1;
-    }
-    if (pthread_cond_init(&cli->job_complete, NULL) != 0) {
-        fprintf(stderr, "Error: Failed to initialize job_complete cond\n");
-        pthread_cond_destroy(&cli->job_ready);
-        pthread_mutex_destroy(&cli->job_mutex);
-        return -1;
-    }
-
-    // Allocate job slots (one per thread)
-    cli->gop_jobs = calloc(cli->num_threads, sizeof(gop_job_t));
-    if (!cli->gop_jobs) {
-        fprintf(stderr, "Error: Failed to allocate job slots\n");
-        pthread_cond_destroy(&cli->job_complete);
-        pthread_cond_destroy(&cli->job_ready);
-        pthread_mutex_destroy(&cli->job_mutex);
-        return -1;
-    }
-
-    // Allocate worker thread handles
-    cli->worker_threads = malloc(cli->num_threads * sizeof(pthread_t));
-    if (!cli->worker_threads) {
-        fprintf(stderr, "Error: Failed to allocate thread handles\n");
-        free(cli->gop_jobs);
-        pthread_cond_destroy(&cli->job_complete);
-        pthread_cond_destroy(&cli->job_ready);
-        pthread_mutex_destroy(&cli->job_mutex);
-        return -1;
-    }
-
-    // Start worker threads
-    cli->shutdown_workers = 0;
-    for (int i = 0; i < cli->num_threads; i++) {
-        worker_context_t *wctx = malloc(sizeof(worker_context_t));
-        if (!wctx) {
-            fprintf(stderr, "Error: Failed to allocate worker context\n");
-            cli->shutdown_workers = 1;
-            pthread_cond_broadcast(&cli->job_ready);
-            for (int j = 0; j < i; j++) {
-                pthread_join(cli->worker_threads[j], NULL);
-            }
-            free(cli->worker_threads);
-            free(cli->gop_jobs);
-            pthread_cond_destroy(&cli->job_complete);
-            pthread_cond_destroy(&cli->job_ready);
-            pthread_mutex_destroy(&cli->job_mutex);
-            return -1;
-        }
-        wctx->cli = cli;
-        wctx->thread_id = i;
-
-        if (pthread_create(&cli->worker_threads[i], NULL, worker_thread_main, wctx) != 0) {
-            fprintf(stderr, "Error: Failed to create worker thread %d\n", i);
-            free(wctx);
-            cli->shutdown_workers = 1;
-            pthread_cond_broadcast(&cli->job_ready);
-            for (int j = 0; j < i; j++) {
-                pthread_join(cli->worker_threads[j], NULL);
-            }
-            free(cli->worker_threads);
-            free(cli->gop_jobs);
-            pthread_cond_destroy(&cli->job_complete);
-            pthread_cond_destroy(&cli->job_ready);
-            pthread_mutex_destroy(&cli->job_mutex);
-            return -1;
-        }
-    }
-
-    printf("Started %d worker threads for parallel GOP encoding\n", cli->num_threads);
-    return 0;
-}
-
-/**
- * Shutdown multithreading resources.
- */
-static void shutdown_threading(cli_context_t *cli) {
-    if (cli->num_threads <= 0) {
-        return;
-    }
-
-    // Signal workers to shutdown
-    pthread_mutex_lock(&cli->job_mutex);
-    cli->shutdown_workers = 1;
-    pthread_cond_broadcast(&cli->job_ready);
-    pthread_mutex_unlock(&cli->job_mutex);
-
-    // Wait for all workers to finish
-    for (int i = 0; i < cli->num_threads; i++) {
-        pthread_join(cli->worker_threads[i], NULL);
-    }
-
-    // Free job slots (and any remaining resources)
-    if (cli->gop_jobs) {
-        for (int i = 0; i < cli->num_threads; i++) {
-            if (cli->gop_jobs[i].packet) {
-                tav_encoder_free_packet(cli->gop_jobs[i].packet);
-            }
-            // Note: rgb_frames should already be freed by now
-        }
-        free(cli->gop_jobs);
-        cli->gop_jobs = NULL;
-    }
-
-    if (cli->worker_threads) {
-        free(cli->worker_threads);
-        cli->worker_threads = NULL;
-    }
-
-    pthread_cond_destroy(&cli->job_complete);
-    pthread_cond_destroy(&cli->job_ready);
-    pthread_mutex_destroy(&cli->job_mutex);
-}
-
-// =============================================================================
-// Multithreaded Encoding Loop
-// =============================================================================
-
-/**
- * Multithreaded video encoding function.
- * Uses worker threads to encode GOPs in parallel.
- */
-static int encode_video_mt(cli_context_t *cli) {
-    printf("Opening FFmpeg pipe...\n");
-    cli->ffmpeg_pipe = open_ffmpeg_pipe(cli->input_file,
-                                        cli->enc_params.width,
-                                        cli->enc_params.height,
-                                        cli->interlaced,
-                                        cli->header_height,
-                                        cli->target_fps_num,
-                                        cli->target_fps_den,
-                                        cli->original_fps_num,
-                                        cli->original_fps_den);
-    if (!cli->ffmpeg_pipe) {
-        return -1;
-    }
-
-    // Create temporary encoder to get calculated params (decomp_levels, etc.)
-    printf("Creating encoder context...\n");
-    tav_encoder_context_t *ctx = tav_encoder_create(&cli->enc_params);
-    if (!ctx) {
-        fprintf(stderr, "Error: %s\n", "Failed to create encoder");
-        pclose(cli->ffmpeg_pipe);
-        return -1;
-    }
-    tav_encoder_get_params(ctx, &cli->enc_params);
-    tav_encoder_free(ctx);
-    ctx = NULL;
-
-    // Initialize threading
-    if (init_threading(cli) < 0) {
-        pclose(cli->ffmpeg_pipe);
-        return -1;
-    }
-
-    // Allocate per-job frame buffers
-    size_t frame_size = cli->enc_params.width * cli->enc_params.height * 3;
-    int gop_size = cli->enc_params.gop_size;
-    if (!cli->enc_params.enable_temporal_dwt) {
-        gop_size = 1;
-    }
-
-    // In two-pass mode, use max GOP size for buffer since GOPs have variable sizes
-    int buffer_gop_size = cli->two_pass_mode ? ANALYSIS_GOP_MAX_SIZE : gop_size;
-
-    // Allocate frame buffers for each job slot
-    for (int slot = 0; slot < cli->num_threads; slot++) {
-        cli->gop_jobs[slot].rgb_frames = malloc(buffer_gop_size * sizeof(uint8_t*));
-        cli->gop_jobs[slot].frame_numbers = malloc(buffer_gop_size * sizeof(int));
-        if (!cli->gop_jobs[slot].rgb_frames || !cli->gop_jobs[slot].frame_numbers) {
-            fprintf(stderr, "Error: Failed to allocate job slot %d buffers\n", slot);
-            shutdown_threading(cli);
-            pclose(cli->ffmpeg_pipe);
-            return -1;
-        }
-        for (int f = 0; f < buffer_gop_size; f++) {
-            cli->gop_jobs[slot].rgb_frames[f] = malloc(frame_size);
-            if (!cli->gop_jobs[slot].rgb_frames[f]) {
-                fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d\n", slot);
-                shutdown_threading(cli);
-                pclose(cli->ffmpeg_pipe);
-                return -1;
-            }
-        }
-        // Copy encoder params for thread safety
-        cli->gop_jobs[slot].params = cli->enc_params;
-        cli->gop_jobs[slot].status = GOP_SLOT_EMPTY;
-        cli->gop_jobs[slot].num_frames = 0;
-    }
-
-    // Allocate audio buffers if needed
-    if (cli->has_audio) {
-        size_t max_gop_audio = buffer_gop_size * cli->samples_per_frame * 2;
-        cli->gop_audio_buffer = malloc(max_gop_audio * sizeof(float));
-        cli->gop_audio_samples = 0;
-        if (!cli->gop_audio_buffer) {
-            fprintf(stderr, "Error: Failed to allocate GOP audio buffer\n");
-            shutdown_threading(cli);
-            pclose(cli->ffmpeg_pipe);
-            return -1;
-        }
-
-        // Allocate per-job audio buffers
-        for (int slot = 0; slot < cli->num_threads; slot++) {
-            cli->gop_jobs[slot].audio_samples = malloc(max_gop_audio * sizeof(float));
-            if (!cli->gop_jobs[slot].audio_samples) {
-                fprintf(stderr, "Error: Failed to allocate audio buffer for slot %d\n", slot);
-                shutdown_threading(cli);
-                pclose(cli->ffmpeg_pipe);
-                return -1;
-            }
-        }
-    }
-
-    // Temporary frame buffer for reading
-    uint8_t *rgb_frame = malloc(frame_size);
-    if (!rgb_frame) {
-        fprintf(stderr, "Error: Failed to allocate frame buffer\n");
-        shutdown_threading(cli);
-        pclose(cli->ffmpeg_pipe);
-        return -1;
-    }
-
-    // Write TAV/TAP header
-    write_tav_header(cli->output_fp, &cli->enc_params, cli->has_audio, cli->subtitles != NULL,
-                     cli->interlaced, cli->header_height, cli->is_still_image);
-
-    // Write Extended Header (unless suppressed)
-    // For interlaced mode, use header_height for XDIM if needed
-    int xhdr_height = cli->interlaced ? cli->header_height : cli->enc_params.height;
-    if (!cli->suppress_xhdr) {
-        cli->extended_header_offset = write_extended_header(cli, cli->enc_params.width, xhdr_height);
-        if (cli->extended_header_offset < 0) {
-            fprintf(stderr, "Warning: Failed to write Extended Header\n");
-        }
-    }
-
-    // Write subtitles upfront
-    if (cli->subtitles) {
-        printf("Writing subtitles...\n");
-        write_all_subtitles(cli->output_fp, cli->subtitles, cli->verbose);
-    }
-
-    // Write font ROMs if provided
-    if (cli->fontrom_low) {
-        printf("Uploading low font ROM...\n");
-        write_fontrom_packet(cli->output_fp, cli->fontrom_low, FONTROM_OPCODE_LOW, cli->verbose);
-    }
-    if (cli->fontrom_high) {
-        printf("Uploading high font ROM...\n");
-        write_fontrom_packet(cli->output_fp, cli->fontrom_high, FONTROM_OPCODE_HIGH, cli->verbose);
-    }
-
-    printf("Encoding frames with %d threads...\n", cli->num_threads);
-    cli->start_time = time(NULL);
-
-    int current_slot = 0;           // Slot being filled
-    int next_gop_to_write = 0;      // GOP index that should be written next
-    int current_gop_index = 0;      // Current GOP index being assembled
-    int frames_in_current_gop = 0;  // Frames accumulated in current slot
-    int encoding_error = 0;
-    int eof_reached = 0;
-
-    while (!encoding_error) {
-        // Step 1: Try to write any completed GOPs in order
-        pthread_mutex_lock(&cli->job_mutex);
-        while (!encoding_error) {
-            // Find the slot with the next GOP to write
-            int found = -1;
-            for (int i = 0; i < cli->num_threads; i++) {
-                if (cli->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
-                    cli->gop_jobs[i].gop_index == next_gop_to_write) {
-                    found = i;
-                    break;
-                }
-            }
-
-            if (found < 0) break;  // No complete GOP ready to write
-
-            gop_job_t *job = &cli->gop_jobs[found];
-            pthread_mutex_unlock(&cli->job_mutex);
-
-            // Write this GOP
-            if (job->success && job->packet) {
-                // Write TIMECODE
-                write_timecode_packet(cli->output_fp, job->frame_numbers[0],
-                                     cli->enc_params.fps_num, cli->enc_params.fps_den);
-
-                // Write AUDIO for this GOP
-                if (cli->has_audio && job->num_audio_samples > 0) {
-                    write_audio_packet(cli->output_fp, cli, job->audio_samples, job->num_audio_samples);
-                }
-
-                // Write VIDEO packet
-                write_tav_packet(cli->output_fp, job->packet);
-                cli->total_bytes += job->packet->size;
-                cli->gop_count++;
-
-                // Write sync packet
-                if (job->packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
-                    // For 3D-DWT mode, write GOP_SYNC (0xFC) with frame count
-                    int frames_in_gop = job->packet->data[1];
-                    write_gop_sync_packet(cli->output_fp, frames_in_gop);
-                } else if (job->packet->packet_type == TAV_PACKET_IFRAME) {
-                    // For intra-only mode, write SYNC (0xFF) with no payload
-                    write_sync_packet(cli->output_fp);
-                }
-
-                tav_encoder_free_packet(job->packet);
-                job->packet = NULL;
-            } else {
-                fprintf(stderr, "Error: GOP %d encoding failed\n", job->gop_index);
-                encoding_error = 1;
-            }
-
-            // Mark slot as empty
-            pthread_mutex_lock(&cli->job_mutex);
-            job->status = GOP_SLOT_EMPTY;
-            job->num_frames = 0;
-            next_gop_to_write++;
-
-            // Progress
-            if (cli->verbose || cli->frame_count % 60 == 0) {
-                time_t elapsed = time(NULL) - cli->start_time;
-                double fps = elapsed > 0 ? (double)cli->frame_count / elapsed : 0.0;
-                double bitrate = elapsed > 0 ?
-                    (cli->total_bytes * 8.0) / (cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den)) / 1000.0 : 0.0;
-
-                printf("\rFrame %ld | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB    ",
-                       cli->frame_count, cli->gop_count, fps, bitrate,
-                       cli->total_bytes / 1024);
-                fflush(stdout);
-            }
-        }
-        pthread_mutex_unlock(&cli->job_mutex);
-
-        if (encoding_error || eof_reached) break;
-
-        // Step 2: Fill current GOP slot
-        gop_job_t *slot = &cli->gop_jobs[current_slot];
-
-        // Wait for slot to be empty (writing completed GOPs along the way)
-        pthread_mutex_lock(&cli->job_mutex);
-        while (slot->status != GOP_SLOT_EMPTY && !cli->shutdown_workers) {
-            // While waiting, check if we can write any completed GOPs
-            int wrote_something = 0;
-            for (int i = 0; i < cli->num_threads; i++) {
-                if (cli->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
-                    cli->gop_jobs[i].gop_index == next_gop_to_write) {
-                    gop_job_t *job = &cli->gop_jobs[i];
-                    pthread_mutex_unlock(&cli->job_mutex);
-
-                    // Write this GOP
-                    if (job->success && job->packet) {
-                        write_timecode_packet(cli->output_fp, job->frame_numbers[0],
-                                             cli->enc_params.fps_num, cli->enc_params.fps_den);
-                        if (cli->has_audio && job->num_audio_samples > 0) {
-                            write_audio_packet(cli->output_fp, cli, job->audio_samples, job->num_audio_samples);
-                        }
-                        write_tav_packet(cli->output_fp, job->packet);
-                        cli->total_bytes += job->packet->size;
-                        cli->gop_count++;
-
-                        if (job->packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
-                            write_gop_sync_packet(cli->output_fp, job->packet->data[1]);
-                        } else if (job->packet->packet_type == TAV_PACKET_IFRAME) {
-                            write_sync_packet(cli->output_fp);
-                        }
-
-                        tav_encoder_free_packet(job->packet);
-                        job->packet = NULL;
-
-                        // Progress
-                        time_t elapsed = time(NULL) - cli->start_time;
-                        double fps = elapsed > 0 ? (double)cli->frame_count / elapsed : 0.0;
-                        printf("\rFrame %ld | GOPs: %ld | %.1f fps | %zu KB    ",
-                               cli->frame_count, cli->gop_count, fps, cli->total_bytes / 1024);
-                        fflush(stdout);
-                    }
-
-                    pthread_mutex_lock(&cli->job_mutex);
-                    job->status = GOP_SLOT_EMPTY;
-                    job->num_frames = 0;
-                    next_gop_to_write++;
-                    wrote_something = 1;
-                    break;
-                }
-            }
-            if (!wrote_something) {
-                pthread_cond_wait(&cli->job_complete, &cli->job_mutex);
-            }
-        }
-        pthread_mutex_unlock(&cli->job_mutex);
-
-        // Reset audio accumulator only when starting a fresh GOP
-        if (frames_in_current_gop == 0) {
-            slot->num_audio_samples = 0;
-        }
-
-        // Read frame from FFmpeg
-        if (cli->encode_limit > 0 && cli->frame_count >= cli->encode_limit) {
-            eof_reached = 1;
-        } else {
-            int result = read_rgb_frame(cli->ffmpeg_pipe, rgb_frame, frame_size);
-            if (result == 0) {
-                eof_reached = 1;
-            } else if (result < 0) {
-                fprintf(stderr, "Error reading frame\n");
-                encoding_error = 1;
-            } else {
-                // Copy frame to slot buffer
-                memcpy(slot->rgb_frames[frames_in_current_gop], rgb_frame, frame_size);
-                slot->frame_numbers[frames_in_current_gop] = (int)cli->frame_count;
-                frames_in_current_gop++;
-                cli->frame_count++;
-
-                // Accumulate audio
-                if (cli->has_audio && cli->audio_buffer) {
-                    size_t samples_read = read_audio_samples(cli, cli->audio_buffer, cli->samples_per_frame);
-                    if (samples_read > 0) {
-                        memcpy(slot->audio_samples + slot->num_audio_samples * 2,
-                               cli->audio_buffer,
-                               samples_read * 2 * sizeof(float));
-                        slot->num_audio_samples += samples_read;
-                    }
-                }
-
-                // Determine current GOP size for two-pass mode
-                int current_gop_size = gop_size;
-                if (cli->two_pass_mode && cli->current_gop_boundary) {
-                    current_gop_size = cli->current_gop_boundary->num_frames;
-                }
-
-                // Check if GOP is complete
-                if (frames_in_current_gop >= current_gop_size) {
-                    slot->num_frames = frames_in_current_gop;
-                    slot->gop_index = current_gop_index;
-
-                    // Submit GOP to worker threads
-                    pthread_mutex_lock(&cli->job_mutex);
-                    slot->status = GOP_SLOT_READY;
-                    pthread_cond_broadcast(&cli->job_ready);
-                    pthread_mutex_unlock(&cli->job_mutex);
-
-                    // Advance to next GOP boundary (two-pass mode)
-                    if (cli->two_pass_mode && cli->current_gop_boundary) {
-                        cli->current_gop_boundary = cli->current_gop_boundary->next;
-                    }
-
-                    // Move to next slot
-                    current_slot = (current_slot + 1) % cli->num_threads;
-                    current_gop_index++;
-                    frames_in_current_gop = 0;
-
-                    // Note: audio reset moved to after we confirm slot is empty
-                }
-            }
-        }
-    }
-
-    // Handle partial GOP at end
-    if (!encoding_error && frames_in_current_gop > 0) {
-        printf("\nEncoding final partial GOP (%d frames)...\n", frames_in_current_gop);
-
-        gop_job_t *slot = &cli->gop_jobs[current_slot];
-        slot->num_frames = frames_in_current_gop;
-        slot->gop_index = current_gop_index;
-
-        pthread_mutex_lock(&cli->job_mutex);
-        slot->status = GOP_SLOT_READY;
-        pthread_cond_broadcast(&cli->job_ready);
-        pthread_mutex_unlock(&cli->job_mutex);
-
-        current_gop_index++;
-    }
-
-    // Wait for all remaining GOPs to complete and write them
-    while (!encoding_error && next_gop_to_write < current_gop_index) {
-        pthread_mutex_lock(&cli->job_mutex);
-
-        // Find slot with next GOP to write
-        int found = -1;
-        while (found < 0 && !encoding_error) {
-            for (int i = 0; i < cli->num_threads; i++) {
-                if (cli->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
-                    cli->gop_jobs[i].gop_index == next_gop_to_write) {
-                    found = i;
-                    break;
-                }
-            }
-            if (found < 0) {
-                pthread_cond_wait(&cli->job_complete, &cli->job_mutex);
-            }
-        }
-
-        if (found >= 0) {
-            gop_job_t *job = &cli->gop_jobs[found];
-            pthread_mutex_unlock(&cli->job_mutex);
-
-            if (job->success && job->packet) {
-                write_timecode_packet(cli->output_fp, job->frame_numbers[0],
-                                     cli->enc_params.fps_num, cli->enc_params.fps_den);
-                if (cli->has_audio && job->num_audio_samples > 0) {
-                    write_audio_packet(cli->output_fp, cli, job->audio_samples, job->num_audio_samples);
-                }
-                write_tav_packet(cli->output_fp, job->packet);
-                cli->total_bytes += job->packet->size;
-                cli->gop_count++;
-
-                if (job->packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
-                    write_gop_sync_packet(cli->output_fp, job->packet->data[1]);
-                } else if (job->packet->packet_type == TAV_PACKET_IFRAME) {
-                    write_sync_packet(cli->output_fp);
-                }
-
-                tav_encoder_free_packet(job->packet);
-                job->packet = NULL;
-            }
-
-            pthread_mutex_lock(&cli->job_mutex);
-            job->status = GOP_SLOT_EMPTY;
-            next_gop_to_write++;
-            pthread_mutex_unlock(&cli->job_mutex);
-        } else {
-            pthread_mutex_unlock(&cli->job_mutex);
-        }
-    }
-
-    printf("\n");
-
-    // Update total frames in header (skip for still images - already set to 0xFFFFFFFF)
-    if (!cli->is_still_image) {
-        update_total_frames(cli->output_fp, (uint32_t)cli->frame_count);
-    }
-
-    // Update ENDT in Extended Header (skip for still images)
-    if (!cli->is_still_image && !cli->suppress_xhdr && cli->extended_header_offset >= 0) {
-        // Calculate end time in nanoseconds
-        uint64_t end_time_ns = (uint64_t)cli->frame_count * 1000000000ULL * cli->enc_params.fps_den / cli->enc_params.fps_num;
-        update_extended_header_endt(cli->output_fp, cli->extended_header_offset, end_time_ns);
-    }
-
-    // Free per-job frame buffers (must be done before shutdown_threading)
-    for (int slot = 0; slot < cli->num_threads; slot++) {
-        if (cli->gop_jobs[slot].rgb_frames) {
-            for (int f = 0; f < buffer_gop_size; f++) {
-                free(cli->gop_jobs[slot].rgb_frames[f]);
-            }
-            free(cli->gop_jobs[slot].rgb_frames);
-            cli->gop_jobs[slot].rgb_frames = NULL;
-        }
-        free(cli->gop_jobs[slot].frame_numbers);
-        cli->gop_jobs[slot].frame_numbers = NULL;
-        free(cli->gop_jobs[slot].audio_samples);
-        cli->gop_jobs[slot].audio_samples = NULL;
-    }
-
-    // Cleanup
-    free(rgb_frame);
-    shutdown_threading(cli);
-    pclose(cli->ffmpeg_pipe);
-
-    // Cleanup audio
-    if (cli->audio_buffer) {
-        free(cli->audio_buffer);
-        cli->audio_buffer = NULL;
-    }
-    if (cli->gop_audio_buffer) {
-        free(cli->gop_audio_buffer);
-        cli->gop_audio_buffer = NULL;
-    }
-    if (cli->pcm_file) {
-        fclose(cli->pcm_file);
-        cli->pcm_file = NULL;
-    }
-    if (cli->has_audio) {
-        unlink(TEMP_PCM_FILE);
-    }
-
-    // Final statistics
-    time_t total_time = time(NULL) - cli->start_time;
-    double avg_fps = total_time > 0 ? (double)cli->frame_count / total_time : 0.0;
-    double duration = (double)cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den);
-    double avg_bitrate = duration > 0 ? (cli->total_bytes * 8.0) / duration / 1000.0 : 0.0;
-
-    printf("\nEncoding complete! (multithreaded, %d threads)\n", cli->num_threads);
-    printf("  Frames encoded: %ld\n", cli->frame_count);
-    printf("  GOPs encoded: %ld\n", cli->gop_count);
-    printf("  Total size: %.2f MB\n", cli->total_bytes / (1024.0 * 1024.0));
-    printf("  Duration: %.2f seconds\n", duration);
-    printf("  Average bitrate: %.1f kbps\n", avg_bitrate);
-    printf("  Encoding speed: %.1f fps\n", avg_fps);
-    printf("  Time taken: %ld seconds\n", total_time);
-
-    return encoding_error ? -1 : 0;
-}
-
-// =============================================================================
-// Single-Threaded Encoding Loop
-// =============================================================================
-
-static int encode_video(cli_context_t *cli) {
-    // Dispatch to multithreaded version if threads > 0
-    if (cli->num_threads > 0) {
-        return encode_video_mt(cli);
-    }
-
-    printf("Opening FFmpeg pipe...\n");
-    cli->ffmpeg_pipe = open_ffmpeg_pipe(cli->input_file,
-                                        cli->enc_params.width,
-                                        cli->enc_params.height,
-                                        cli->interlaced,
-                                        cli->header_height,
-                                        cli->target_fps_num,
-                                        cli->target_fps_den,
-                                        cli->original_fps_num,
-                                        cli->original_fps_den);
-    if (!cli->ffmpeg_pipe) {
-        return -1;
-    }
-
-    // Create encoder
-    printf("Creating encoder context...\n");
-    tav_encoder_context_t *ctx = tav_encoder_create(&cli->enc_params);
-    if (!ctx) {
-        fprintf(stderr, "Error: %s\n", "Failed to create encoder");
-        pclose(cli->ffmpeg_pipe);
-        return -1;
-    }
-
-    // Get actual encoder params (with calculated values like decomp_levels)
-    tav_encoder_get_params(ctx, &cli->enc_params);
-
-    // NOW allocate GOP audio buffer with correct gop_size
-    if (cli->has_audio) {
-        size_t max_gop_audio = cli->enc_params.gop_size * cli->samples_per_frame * 2;
-        cli->gop_audio_buffer = malloc(max_gop_audio * sizeof(float));
-        cli->gop_audio_samples = 0;
-
-        if (!cli->gop_audio_buffer) {
-            fprintf(stderr, "Error: Failed to allocate GOP audio buffer\n");
-            tav_encoder_free(ctx);
-            pclose(cli->ffmpeg_pipe);
-            return -1;
-        }
-
-        if (cli->verbose) {
-            printf("  GOP audio buffer: %zu samples (%zu bytes)\n",
-                   max_gop_audio / 2, max_gop_audio * sizeof(float));
-        }
-    }
-
-    // Allocate GOP frame buffer for tav_encoder_encode_gop()
-    size_t frame_size = cli->enc_params.width * cli->enc_params.height * 3;
-    int gop_size = cli->enc_params.gop_size;
-
-    // In intra-only mode, encode each frame immediately (GOP size = 1)
-    if (!cli->enc_params.enable_temporal_dwt) {
-        gop_size = 1;
-    }
-
-    // In two-pass mode, use max GOP size for buffer since GOPs have variable sizes
-    int buffer_gop_size = cli->two_pass_mode ? ANALYSIS_GOP_MAX_SIZE : gop_size;
-
-    cli->gop_frames = malloc(buffer_gop_size * sizeof(uint8_t*));
-    cli->gop_frame_numbers = malloc(buffer_gop_size * sizeof(int));
-    cli->gop_frame_count = 0;
-
-    if (!cli->gop_frames || !cli->gop_frame_numbers) {
-        fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n");
-        tav_encoder_free(ctx);
-        pclose(cli->ffmpeg_pipe);
-        return -1;
-    }
-
-    for (int i = 0; i < buffer_gop_size; i++) {
-        cli->gop_frames[i] = malloc(frame_size);
-        if (!cli->gop_frames[i]) {
-            fprintf(stderr, "Error: Failed to allocate GOP frame %d\n", i);
-            for (int j = 0; j < i; j++) free(cli->gop_frames[j]);
-            free(cli->gop_frames);
-            free(cli->gop_frame_numbers);
-            tav_encoder_free(ctx);
-            pclose(cli->ffmpeg_pipe);
-            return -1;
-        }
-    }
-
-    if (cli->verbose) {
-        printf("  GOP frame buffer: %d frames x %zu bytes = %zu KB%s\n",
-               buffer_gop_size, frame_size, (buffer_gop_size * frame_size) / 1024,
-               cli->two_pass_mode ? " (two-pass mode)" : "");
-    }
-
-    // Temporary frame buffer for reading from FFmpeg
-    uint8_t *rgb_frame = malloc(frame_size);
-    if (!rgb_frame) {
-        fprintf(stderr, "Error: Failed to allocate frame buffer\n");
-        for (int i = 0; i < buffer_gop_size; i++) free(cli->gop_frames[i]);
-        free(cli->gop_frames);
-        free(cli->gop_frame_numbers);
-        tav_encoder_free(ctx);
-        pclose(cli->ffmpeg_pipe);
-        return -1;
-    }
-
-    // Write TAV/TAP header (with actual encoder params)
-    write_tav_header(cli->output_fp, &cli->enc_params, cli->has_audio, cli->subtitles != NULL,
-                     cli->interlaced, cli->header_height, cli->is_still_image);
-
-    // Write Extended Header (unless suppressed)
-    // For interlaced mode, use header_height for XDIM if needed
-    int xhdr_height_st = cli->interlaced ? cli->header_height : cli->enc_params.height;
-    if (!cli->suppress_xhdr) {
-        cli->extended_header_offset = write_extended_header(cli, cli->enc_params.width, xhdr_height_st);
-        if (cli->extended_header_offset < 0) {
-            fprintf(stderr, "Warning: Failed to write Extended Header\n");
-        }
-    }
-
-    // Write subtitles upfront (SSF-TC format)
-    if (cli->subtitles) {
-        printf("Writing subtitles...\n");
-        write_all_subtitles(cli->output_fp, cli->subtitles, cli->verbose);
-    }
-
-    // Write font ROMs if provided
-    if (cli->fontrom_low) {
-        printf("Uploading low font ROM...\n");
-        write_fontrom_packet(cli->output_fp, cli->fontrom_low, FONTROM_OPCODE_LOW, cli->verbose);
-    }
-    if (cli->fontrom_high) {
-        printf("Uploading high font ROM...\n");
-        write_fontrom_packet(cli->output_fp, cli->fontrom_high, FONTROM_OPCODE_HIGH, cli->verbose);
-    }
-
-    // Encoding loop using tav_encoder_encode_gop()
-    printf("Encoding frames...\n");
-    cli->start_time = time(NULL);
-
-    tav_encoder_packet_t *packet = NULL;
-    int encoding_error = 0;
-
-    while (1) {
-        // Check encode limit
-        if (cli->encode_limit > 0 && cli->frame_count >= cli->encode_limit) {
-            break;
-        }
-
-        // Read frame from FFmpeg
-        int result = read_rgb_frame(cli->ffmpeg_pipe, rgb_frame, frame_size);
-        if (result == 0) {
-            break;  // EOF
-        } else if (result < 0) {
-            fprintf(stderr, "Error reading frame\n");
-            encoding_error = 1;
-            break;
-        }
-
-        // Copy frame to GOP buffer
-        memcpy(cli->gop_frames[cli->gop_frame_count], rgb_frame, frame_size);
-        cli->gop_frame_numbers[cli->gop_frame_count] = (int)cli->frame_count;
-        cli->gop_frame_count++;
-
-        // Accumulate audio samples for this frame (will write when GOP completes)
-        if (cli->has_audio && cli->audio_buffer && cli->gop_audio_buffer) {
-            size_t samples_read = read_audio_samples(cli, cli->audio_buffer, cli->samples_per_frame);
-            if (samples_read > 0) {
-                // Append to GOP audio buffer (samples_read is per-channel count, stereo interleaved)
-                memcpy(cli->gop_audio_buffer + cli->gop_audio_samples * 2,
-                       cli->audio_buffer,
-                       samples_read * 2 * sizeof(float));
-                cli->gop_audio_samples += samples_read;
-            }
-        }
-
-        cli->frame_count++;
-
-        // Determine current GOP size for two-pass mode
-        int current_gop_size = gop_size;
-        if (cli->two_pass_mode && cli->current_gop_boundary) {
-            current_gop_size = cli->current_gop_boundary->num_frames;
-        }
-
-        // Check if GOP is full (either reached fixed size or two-pass boundary)
-        if (cli->gop_frame_count >= current_gop_size) {
-            // Encode complete GOP
-            result = tav_encoder_encode_gop(ctx,
-                                            (const uint8_t**)cli->gop_frames,
-                                            cli->gop_frame_count,
-                                            cli->gop_frame_numbers,
-                                            &packet);
-
-            if (result < 0) {
-                fprintf(stderr, "Error: %s\n", tav_encoder_get_error(ctx));
-                encoding_error = 1;
-                break;
-            }
-
-            if (packet) {
-                // GOP is complete - write in correct order: TIMECODE, AUDIO, VIDEO, GOP_SYNC
-
-                // 1. Write timecode before GOP (use first frame number in GOP)
-                write_timecode_packet(cli->output_fp, cli->gop_frame_numbers[0],
-                                     cli->enc_params.fps_num, cli->enc_params.fps_den);
-
-                // 2. Write accumulated audio for this GOP as single TAD packet
-                if (cli->has_audio && cli->gop_audio_samples > 0) {
-                    write_audio_packet(cli->output_fp, cli, cli->gop_audio_buffer, cli->gop_audio_samples);
-                    cli->gop_audio_samples = 0;  // Reset for next GOP
-                }
-
-                // 3. Write video GOP packet
-                write_tav_packet(cli->output_fp, packet);
-                cli->total_bytes += packet->size;
-                cli->gop_count++;
-
-                // 4. Write sync packet after video packets
-                if (packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
-                    int frames_in_gop = packet->data[1];
-                    write_gop_sync_packet(cli->output_fp, frames_in_gop);
-                } else if (packet->packet_type == TAV_PACKET_IFRAME) {
-                    write_sync_packet(cli->output_fp);
-                }
-
-                tav_encoder_free_packet(packet);
-                packet = NULL;
-            }
-
-            // Reset GOP buffer
-            cli->gop_frame_count = 0;
-
-            // Advance to next GOP boundary (two-pass mode)
-            if (cli->two_pass_mode && cli->current_gop_boundary) {
-                cli->current_gop_boundary = cli->current_gop_boundary->next;
-            }
-
-            // Progress
-            if (cli->verbose || cli->frame_count % 60 == 0) {
-                time_t elapsed = time(NULL) - cli->start_time;
-                double fps = elapsed > 0 ? (double)cli->frame_count / elapsed : 0.0;
-                double bitrate = elapsed > 0 ?
-                    (cli->total_bytes * 8.0) / (cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den)) / 1000.0 : 0.0;
-
-                printf("\rFrame %ld/%ld | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB",
-                       cli->frame_count,
-                       cli->encode_limit > 0 ? cli->encode_limit : 0L,
-                       cli->gop_count, fps, bitrate,
-                       cli->total_bytes / 1024);
-                fflush(stdout);
-            }
-        }
-    }
-
-    printf("\n");
-
-    // Encode remaining frames in GOP buffer (partial GOP)
-    if (!encoding_error && cli->gop_frame_count > 0) {
-        printf("Encoding final partial GOP (%d frames)...\n", cli->gop_frame_count);
-
-        int result = tav_encoder_encode_gop(ctx,
-                                            (const uint8_t**)cli->gop_frames,
-                                            cli->gop_frame_count,
-                                            cli->gop_frame_numbers,
-                                            &packet);
-
-        if (result < 0) {
-            fprintf(stderr, "Error encoding final GOP: %s\n", tav_encoder_get_error(ctx));
-        } else if (packet) {
-            // Write remaining packets in correct order: TIMECODE, AUDIO, VIDEO, GOP_SYNC
-
-            // 1. Write timecode
-            write_timecode_packet(cli->output_fp, cli->gop_frame_numbers[0],
-                                 cli->enc_params.fps_num, cli->enc_params.fps_den);
-
-            // 2. Write any remaining accumulated audio for this GOP
-            if (cli->has_audio && cli->gop_audio_samples > 0) {
-                write_audio_packet(cli->output_fp, cli, cli->gop_audio_buffer, cli->gop_audio_samples);
-                cli->gop_audio_samples = 0;
-            }
-
-            // 3. Write video packet
-            write_tav_packet(cli->output_fp, packet);
-            cli->total_bytes += packet->size;
-            cli->gop_count++;
-
-            // 4. Write sync packet after video packets
-            if (packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
-                int frames_in_gop = packet->data[1];
-                write_gop_sync_packet(cli->output_fp, frames_in_gop);
-            } else if (packet->packet_type == TAV_PACKET_IFRAME) {
-                write_sync_packet(cli->output_fp);
-            }
-
-            tav_encoder_free_packet(packet);
-        }
-    }
-
-    // Update total frames in header (skip for still images - already set to 0xFFFFFFFF)
-    if (!cli->is_still_image) {
-        update_total_frames(cli->output_fp, (uint32_t)cli->frame_count);
-    }
-
-    // Update ENDT in Extended Header (skip for still images)
-    if (!cli->is_still_image && !cli->suppress_xhdr && cli->extended_header_offset >= 0) {
-        // Calculate end time in nanoseconds
-        uint64_t end_time_ns = (uint64_t)cli->frame_count * 1000000000ULL * cli->enc_params.fps_den / cli->enc_params.fps_num;
-        update_extended_header_endt(cli->output_fp, cli->extended_header_offset, end_time_ns);
-    }
-
-    // Cleanup
-    free(rgb_frame);
-    tav_encoder_free(ctx);
-    pclose(cli->ffmpeg_pipe);
-
-    // Cleanup GOP frame buffer
-    if (cli->gop_frames) {
-        for (int i = 0; i < gop_size; i++) {
-            free(cli->gop_frames[i]);
-        }
-        free(cli->gop_frames);
-        cli->gop_frames = NULL;
-    }
-    if (cli->gop_frame_numbers) {
-        free(cli->gop_frame_numbers);
-        cli->gop_frame_numbers = NULL;
-    }
-
-    // Cleanup audio resources
-    if (cli->audio_buffer) {
-        free(cli->audio_buffer);
-        cli->audio_buffer = NULL;
-    }
-    if (cli->gop_audio_buffer) {
-        free(cli->gop_audio_buffer);
-        cli->gop_audio_buffer = NULL;
-    }
-    if (cli->pcm_file) {
-        fclose(cli->pcm_file);
-        cli->pcm_file = NULL;
-    }
-    // Remove temporary audio file
-    if (cli->has_audio) {
-        unlink(TEMP_PCM_FILE);
-    }
-
-    // Final statistics
-    time_t total_time = time(NULL) - cli->start_time;
-    double avg_fps = total_time > 0 ? (double)cli->frame_count / total_time : 0.0;
-    double duration = (double)cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den);
-    double avg_bitrate = duration > 0 ? (cli->total_bytes * 8.0) / duration / 1000.0 : 0.0;
-
-    printf("\nEncoding complete!\n");
-    printf("  Frames encoded: %ld\n", cli->frame_count);
-    printf("  GOPs encoded: %ld\n", cli->gop_count);
-    printf("  Total size: %.2f MB\n", cli->total_bytes / (1024.0 * 1024.0));
-    printf("  Duration: %.2f seconds\n", duration);
-    printf("  Average bitrate: %.1f kbps\n", avg_bitrate);
-    printf("  Encoding speed: %.1f fps\n", avg_fps);
-    printf("  Time taken: %ld seconds\n", total_time);
-
-    return 0;
-}
-
-// =============================================================================
-// Main
-// =============================================================================
-
-// Parse resolution string like "1024x768" with keyword recognition
-static int parse_resolution(const char *res_str, int *width, int *height) {
-    if (!res_str) return 0;
-    if (strcmp(res_str, "cif") == 0 || strcmp(res_str, "CIF") == 0) {
-        *width = 352;
-        *height = 288;
-        return 1;
-    }
-    if (strcmp(res_str, "qcif") == 0 || strcmp(res_str, "QCIF") == 0) {
-        *width = 176;
-        *height = 144;
-        return 1;
-    }
-    if (strcmp(res_str, "vga") == 0 || strcmp(res_str, "VGA") == 0) {
-        *width = 640;
-        *height = 480;
-        return 1;
-    }
-    if (strcmp(res_str, "d1") == 0 || strcmp(res_str, "D1") == 0) {
-        *width = 720;
-        *height = 480;
-        return 1;
-    }
-    if (strcmp(res_str, "d1pal") == 0 || strcmp(res_str, "D1PAL") == 0) {
-        *width = 720;
-        *height = 576;
-        return 1;
-    }
-    if (strcmp(res_str, "960h") == 0 || strcmp(res_str, "960H") == 0) {
-        *width = 960;
-        *height = 576;
-        return 1;
-    }
-    // HD-ish resolutions
-    if (strcmp(res_str, "540p") == 0 || strcmp(res_str, "540P") == 0 || strcmp(res_str, "qHD") == 0) {
-        *width = 960;
-        *height = 540;
-        return 1;
-    }
-    if (strcmp(res_str, "720p") == 0 || strcmp(res_str, "720P") == 0 || strcmp(res_str, "wxga") == 0 || strcmp(res_str, "WXGA") == 0) {
-        *width = 1280;
-        *height = 720;
-        return 1;
-    }
-    if (strcmp(res_str, "800p") == 0 || strcmp(res_str, "800P") == 0) {
-        *width = 1280;
-        *height = 800;
-        return 1;
-    }
-    if (strcmp(res_str, "900p") == 0 || strcmp(res_str, "900P") == 0) {
-        *width = 1600;
-        *height = 900;
-        return 1;
-    }
-    if (strcmp(res_str, "960p") == 0 || strcmp(res_str, "960P") == 0 || strcmp(res_str, "wsxga") == 0 || strcmp(res_str, "WSXGA") == 0) {
-        *width = 1706;
-        *height = 960;
-        return 1;
-    }
-    if (strcmp(res_str, "1080p") == 0 || strcmp(res_str, "1080P") == 0 || strcmp(res_str, "fhd") == 0 || strcmp(res_str, "FHD") == 0 || strcmp(res_str, "wuxga") == 0 || strcmp(res_str, "WUXGA") == 0) {
-        *width = 1920;
-        *height = 1080;
-        return 1;
-    }
-    if (strcmp(res_str, "1440p") == 0 || strcmp(res_str, "1440P") == 0 || strcmp(res_str, "wqhd") == 0 || strcmp(res_str, "WQHD") == 0) {
-        *width = 2560;
-        *height = 1440;
-        return 1;
-    }
-    if (strcmp(res_str, "4k") == 0 || strcmp(res_str, "4K") == 0 || strcmp(res_str, "2160p") == 0 || strcmp(res_str, "2160p") == 0 || strcmp(res_str, "uhd") == 0 || strcmp(res_str, "UHD") == 0) {
-        *width = 3840;
-        *height = 2160;
-        return 1;
-    }
-    // 4K Univisium
-    if (strcmp(res_str, "4ku") == 0 || strcmp(res_str, "4KU") == 0) {
-        *width = 4096;
-        *height = 2048;
-        return 1;
-    }
-    // 3K Univisium
-    if (strcmp(res_str, "3ku") == 0 || strcmp(res_str, "3KU") == 0) {
-        *width = 3072;
-        *height = 1536;
-        return 1;
-    }
-    // 2K Univisium
-    if (strcmp(res_str, "2ku") == 0 || strcmp(res_str, "2KU") == 0) {
-        *width = 2048;
-        *height = 1024;
-        return 1;
-    }
-    // 1K Univisium
-    if (strcmp(res_str, "1ku") == 0 || strcmp(res_str, "1KU") == 0) {
-        *width = 1024;
-        *height = 512;
-        return 1;
-    }
-    // 4K DCI
-    if (strcmp(res_str, "4kdci") == 0 || strcmp(res_str, "4KDCI") == 0 || strcmp(res_str, "4k_dci") == 0 || strcmp(res_str, "4K_DCI") == 0 || strcmp(res_str, "4k-dci") == 0 || strcmp(res_str, "4K-DCI") == 0) {
-        *width = 4096;
-        *height = 2160;
-        return 1;
-    }
-    // 2.5K DCI
-    if (strcmp(res_str, "2.5kdci") == 0 || strcmp(res_str, "2.5KDCI") == 0 || strcmp(res_str, "2.5k_dci") == 0 || strcmp(res_str, "2.5K_DCI") == 0 || strcmp(res_str, "2.5k-dci") == 0 || strcmp(res_str, "2.5K-DCI") == 0 ||
-        strcmp(res_str, "2,5kdci") == 0 || strcmp(res_str, "2,5KDCI") == 0 || strcmp(res_str, "2,5k_dci") == 0 || strcmp(res_str, "2,5K_DCI") == 0 || strcmp(res_str, "2,5k-dci") == 0 || strcmp(res_str, "2,5K-DCI") == 0) {
-        *width = 2560;
-        *height = 1350;
-        return 1;
-    }
-    // 2K DCI
-    if (strcmp(res_str, "2kdci") == 0 || strcmp(res_str, "2KDCI") == 0 || strcmp(res_str, "2k_dci") == 0 || strcmp(res_str, "2K_DCI") == 0 || strcmp(res_str, "2k-dci") == 0 || strcmp(res_str, "2K-DCI") == 0) {
-        *width = 2048;
-        *height = 1080;
-        return 1;
-    }
-    // 1K DCI
-    if (strcmp(res_str, "1kdci") == 0 || strcmp(res_str, "1KDCI") == 0 || strcmp(res_str, "1k_dci") == 0 || strcmp(res_str, "1K_DCI") == 0 || strcmp(res_str, "1k-dci") == 0 || strcmp(res_str, "1K-DCI") == 0) {
-        *width = 1024;
-        *height = 540;
-        return 1;
-    }
-    if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) {
-        *width = 280;
-        *height = 224;
-        return 1;
-    }
-    if (strcmp(res_str, "full") == 0 || strcmp(res_str, "FULL") == 0 || strcmp(res_str, "tsvm") == 0 || strcmp(res_str, "TSVM") == 0) {
-        *width = 560;
-        *height = 448;
-        return 1;
-    }
-    if (strcmp(res_str, "default") == 0 || strcmp(res_str, "DEFAULT") == 0) {
-        *width = DEFAULT_WIDTH;
-        *height = DEFAULT_HEIGHT;
-        return 1;
-    }
-    return sscanf(res_str, "%dx%d", width, height) == 2;
-}
-
-int main(int argc, char *argv[]) {
-    // Generate temp file names
-    generate_random_filename(TEMP_AUDIO_FILE);
-    generate_random_filename(TEMP_PCM_FILE);
-    strcpy(TEMP_PCM_FILE + 37, ".pcm");
-    strcpy(TEMP_AUDIO_FILE + 37, ".mp2");
-
-    printf("TAV Encoder - TSVM Advanced Video Codec (Reference Implementation)\n");
-    printf("Using libtavenc v1.0 - Complete feature set with all encoder presets\n\n");
-
-    // Initialize CLI context
-    cli_context_t cli = {0};
-
-    // Initialize encoder params with defaults
-    tav_encoder_params_init(&cli.enc_params, DEFAULT_WIDTH, DEFAULT_HEIGHT);
-
-    // Force EZBC entropy coder (Twobitmap is deprecated)
-    cli.enc_params.entropy_coder = 1;  // Always use EZBC
-
-    // Ensure two-pass scene detection is enabled by default
-    cli.enc_params.enable_two_pass = 1;
-
-    // Initialize audio defaults
-    cli.has_audio = 1;              // Enabled by default
-    cli.audio_quality = -1;         // Will match video quality if not specified
-    cli.use_native_audio = 0;       // TAD by default
-
-    // Initialize threading
-    cli.num_threads = get_default_thread_count();
-
-    // Command-line options
-    static struct option long_options[] = {
-        {"input", required_argument, 0, 'i'},
-        {"output", required_argument, 0, 'o'},
-        {"size", required_argument, 0, 's'},
-        {"fps", required_argument, 0, 'f'},
-        {"quality", required_argument, 0, 'q'},
-        {"quantiser", required_argument, 0, 'Q'},
-        {"wavelet", required_argument, 0, 'w'},
-        {"temporal-wavelet", required_argument, 0, 1021},
-        {"colour-space", required_argument, 0, 'c'},
-        {"verbose", no_argument, 0, 'v'},
-        {"intra-only", no_argument, 0, 1001},
-        {"temporal-dwt", no_argument, 0, 1002},
-        {"gop-size", required_argument, 0, 1003},
-        {"single-pass", no_argument, 0, 1004},
-        {"zstd-level", required_argument, 0, 1005},
-        {"no-perceptual-tuning", no_argument, 0, 1006},
-        {"no-dead-zone", no_argument, 0, 1007},
-        {"dead-zone-threshold", required_argument, 0, 1023},
-        {"decomp-levels", required_argument, 0, 1024},
-        {"temporal-levels", required_argument, 0, 1025},
-        {"encode-limit", required_argument, 0, 1009},
-        {"subtitle", required_argument, 0, 1010},
-        {"fontrom-low", required_argument, 0, 1011},
-        {"fontrom-high", required_argument, 0, 1012},
-        {"tad-audio", no_argument, 0, 1013},
-        {"pcm8-audio", no_argument, 0, 1014},
-        {"separate-audio-track", no_argument, 0, 1015},
-        {"audio-quality", required_argument, 0, 1016},
-        {"no-audio", no_argument, 0, 1017},
-        {"preset-sports", no_argument, 0, 1026},
-        {"preset-anime", no_argument, 0, 1027},
-        {"monoblock", no_argument, 0, 1028},
-        {"tiled", no_argument, 0, 1029},
-        {"suppress-xhdr", no_argument, 0, 1030},
-        {"threads", required_argument, 0, 't'},
-        {"interlaced", no_argument, 0, 1031},
-        {"help", no_argument, 0, '?'},
-        {0, 0, 0, 0}
-    };
-
-
-    // Probe video to get resolution and framerate
-    int need_probe_dimensions = 0;
-    int need_probe_fps = 1;
-
-    int c, option_index = 0;
-    while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:c:t:v?", long_options, &option_index)) != -1) {
-        switch (c) {
-            case 'i':
-                cli.input_file = strdup(optarg);
-                break;
-            case 'o':
-                cli.output_file = strdup(optarg);
-                break;
-            case 's': {
-                if (strcmp(optarg, "original") == 0 || strcmp(optarg, "ORIGINAL") == 0) {
-                    need_probe_dimensions = 1;
-                    break;
-                }
-                if (!parse_resolution(optarg, &cli.enc_params.width, &cli.enc_params.height)) {
-                    fprintf(stderr, "Invalid resolution format: %s\n", optarg);
-                    return 1;
-                }
-                break;
-            }
-            case 'f': {
-                int num, den = 1;
-                if (sscanf(optarg, "%d/%d", &num, &den) < 1) {
-                    fprintf(stderr, "Error: Invalid fps format. Use NUM or NUM/DEN\n");
-                    return 1;
-                }
-                // Keep need_probe_fps = 1 so we always probe source fps
-                // (needed for minterpolate vs fps filter decision)
-                cli.target_fps_num = num;
-                cli.target_fps_den = den;
-                cli.enc_params.fps_num = num;
-                cli.enc_params.fps_den = den;
-                break;
-            }
-            case 'q': {
-                int q = atoi(optarg);
-                if (q < 0 || q > 5) {
-                    fprintf(stderr, "Error: Quality must be 0-5\n");
-                    return 1;
-                }
-                // Convert quality level to quantiser indices
-                cli.enc_params.quality_level = q;
-                cli.enc_params.quantiser_y = QUALITY_Y[q];
-                cli.enc_params.quantiser_co = QUALITY_CO[q];
-                cli.enc_params.quantiser_cg = QUALITY_CG[q];
-                cli.enc_params.dead_zone_threshold = DEAD_ZONE_THRESHOLD[q];
-                break;
-            }
-            case 'Q': {
-                int y, co, cg;
-                if (sscanf(optarg, "%d,%d,%d", &y, &co, &cg) != 3) {
-                    fprintf(stderr, "Error: Invalid quantiser format. Use Y,Co,Cg\n");
-                    return 1;
-                }
-                cli.enc_params.quantiser_y = y;
-                cli.enc_params.quantiser_co = co;
-                cli.enc_params.quantiser_cg = cg;
-                break;
-            }
-            case 'w':
-                cli.enc_params.wavelet_type = atoi(optarg);
-                break;
-            case 'c':
-                cli.enc_params.channel_layout = atoi(optarg);
-                break;
-            case 'v':
-                cli.verbose = 1;
-                cli.enc_params.verbose = 1;
-                break;
-            case 1001:  // --intra-only
-                cli.enc_params.enable_temporal_dwt = 0;
-                break;
-            case 1002:  // --temporal-dwt
-                cli.enc_params.enable_temporal_dwt = 1;
-                break;
-            case 1003:  // --gop-size
-                cli.enc_params.gop_size = atoi(optarg);
-                break;
-            case 1004:  // --single-pass
-                cli.enc_params.enable_two_pass = 0;
-                break;
-            case 1005:  // --zstd-level
-                cli.enc_params.zstd_level = atoi(optarg);
-                break;
-            case 1006:  // --no-perceptual-tuning
-                cli.enc_params.perceptual_tuning = 0;
-                break;
-            case 1007:  // --no-dead-zone
-                cli.enc_params.dead_zone_threshold = 0.0;
-                break;
-            case 1009:  // --encode-limit
-                cli.encode_limit = atoi(optarg);
-                break;
-            case 1010:  // --subtitle
-                cli.subtitle_file = strdup(optarg);
-                break;
-            case 1011:  // --fontrom-low
-                cli.fontrom_low = strdup(optarg);
-                break;
-            case 1012:  // --fontrom-high
-                cli.fontrom_high = strdup(optarg);
-                break;
-            case 1013:  // --tad-audio
-                cli.use_native_audio = 0;
-                break;
-            case 1014:  // --pcm8-audio
-                cli.use_native_audio = 1;
-                break;
-            case 1015:  // --separate-audio-track
-                cli.separate_audio_track = 1;
-                break;
-            case 1016:  // --audio-quality
-                cli.audio_quality = atoi(optarg);
-                if (cli.audio_quality < 0 || cli.audio_quality > 5) {
-                    fprintf(stderr, "Error: Audio quality must be 0-5\n");
-                    return 1;
-                }
-                break;
-            case 1017:  // --no-audio
-                cli.has_audio = 0;
-                break;
-            case 1021:  // --temporal-wavelet
-                cli.enc_params.temporal_wavelet = atoi(optarg);
-                break;
-            case 1023:  // --dead-zone-threshold
-                cli.enc_params.dead_zone_threshold = atof(optarg);
-                break;
-            case 1024:  // --decomp-levels
-                cli.enc_params.decomp_levels = atoi(optarg);
-                break;
-            case 1025:  // --temporal-levels
-                cli.enc_params.temporal_levels = atoi(optarg);
-                break;
-            case 1026:  // --preset-sports
-                cli.enc_params.encoder_preset |= 0x01;
-                break;
-            case 1027:  // --preset-anime
-                cli.enc_params.encoder_preset |= 0x02;
-                break;
-            case 1028:  // --monoblock
-                cli.enc_params.monoblock = 1;
-                break;
-            case 1029:  // --tiled
-                cli.enc_params.monoblock = 0;
-                break;
-            case 1030:  // --suppress-xhdr
-                cli.suppress_xhdr = 1;
-                break;
-            case 1031:  // --interlaced
-                cli.interlaced = 1;
-                break;
-            case 't': {  // --threads
-                int threads = atoi(optarg);
-                if (threads < 0) {
-                    fprintf(stderr, "Error: Thread count must be positive\n");
-                    return 1;
-                }
-                // Both 0 and 1 mean single-threaded (use value 0 internally)
-                cli.num_threads = (threads <= 1) ? 0 : threads;
-                break;
-            }
-            case '?':
-            default:
-                print_usage(argv[0]);
-                return (c == '?') ? 0 : 1;
-        }
-    }
-
-    // Validate required arguments
-    if (!cli.input_file || !cli.output_file) {
-        fprintf(stderr, "Error: Input and output files are required\n\n");
-        print_usage(argv[0]);
-        return 1;
-    }
-
-    // Detect still images (TAP mode)
-    int still_image_check = is_input_still_image(cli.input_file);
-    if (still_image_check > 0) {
-        cli.is_still_image = 1;
-        printf("Detected still image - encoding as TAP format\n");
-
-        // Force single-threaded mode for still images (override user option)
-        if (cli.num_threads > 0) {
-            printf("  Disabling multithreading for still image\n");
-            cli.num_threads = 0;
-        }
-
-        // Force intra-only mode (no temporal DWT)
-        cli.enc_params.enable_temporal_dwt = 0;
-
-        // Disable audio for still images by default
-        if (cli.has_audio) {
-            printf("  Disabling audio for still image\n");
-            cli.has_audio = 0;
-        }
-
-        // Force encode limit to 1 frame
-        cli.encode_limit = 1;
-    }
-
-    if (need_probe_dimensions || need_probe_fps) {
-        printf("Probing input file...\n");
-        if (get_video_info(cli.input_file,
-                          &cli.original_width, &cli.original_height,
-                          &cli.original_fps_num, &cli.original_fps_den) < 0) {
-            return 1;
-        }
-
-        // Use probed dimensions if not specified by -s
-        if (need_probe_dimensions) {
-            cli.enc_params.width = cli.original_width;
-            cli.enc_params.height = cli.original_height;
-            printf("  Resolution: %dx%d\n", cli.original_width, cli.original_height);
-        }
-
-        // Always print source framerate
-        printf("  Framerate: %d/%d\n", cli.original_fps_num, cli.original_fps_den);
-
-        // Use probed framerate if not specified by -f
-        if (cli.target_fps_num == 0) {
-            cli.enc_params.fps_num = cli.original_fps_num;
-            cli.enc_params.fps_den = cli.original_fps_den;
-        }
-    }
-
-    // Handle interlaced mode: store full height for header, use half-height internally
-    if (cli.interlaced) {
-        // Store full height for the header
-        cli.header_height = cli.enc_params.height;
-        // Use half-height internally (FFmpeg will output half-height frames)
-        cli.enc_params.height = cli.enc_params.height / 2;
-        printf("Interlaced mode: header=%dx%d, internal=%dx%d\n",
-               cli.enc_params.width, cli.header_height,
-               cli.enc_params.width, cli.enc_params.height);
-    } else {
-        // Progressive mode: header_height equals internal height
-        cli.header_height = cli.enc_params.height;
-    }
-
-    // Report fps conversion if enabled
-    if (cli.target_fps_num > 0 && cli.original_fps_num > 0) {
-        long long target_rate = (long long)cli.target_fps_num * cli.original_fps_den;
-        long long source_rate = (long long)cli.original_fps_num * cli.target_fps_den;
-
-        if (target_rate > source_rate) {
-            printf("Framerate conversion: %d/%d -> %d/%d (minterpolate)\n",
-                   cli.original_fps_num, cli.original_fps_den,
-                   cli.target_fps_num, cli.target_fps_den);
-        } else if (target_rate < source_rate) {
-            printf("Framerate conversion: %d/%d -> %d/%d (fps)\n",
-                   cli.original_fps_num, cli.original_fps_den,
-                   cli.target_fps_num, cli.target_fps_den);
-        }
-        // If equal, no message needed (no conversion)
-    } else if (cli.target_fps_num > 0) {
-        printf("Output framerate: %d/%d\n", cli.target_fps_num, cli.target_fps_den);
-    }
-
-    // Set audio quality to match video quality if not specified
-    if (cli.audio_quality < 0) {
-        cli.audio_quality = cli.enc_params.quality_level;  // Match luma quality
-    }
-
-    // Extract audio if enabled
-    if (cli.has_audio && !cli.use_native_audio) {
-        printf("Extracting audio...\n");
-        if (extract_audio_to_file(cli.input_file, TEMP_PCM_FILE)) {
-            cli.pcm_file = fopen(TEMP_PCM_FILE, "rb");
-            if (cli.pcm_file) {
-                fseek(cli.pcm_file, 0, SEEK_END);
-                cli.audio_remaining = ftell(cli.pcm_file);
-                fseek(cli.pcm_file, 0, SEEK_SET);
-
-                // Calculate samples per frame (accounting for fractional fps via fps_den)
-                cli.samples_per_frame = (AUDIO_SAMPLE_RATE * cli.enc_params.fps_den + cli.enc_params.fps_num - 1) / cli.enc_params.fps_num;
-
-                // Allocate per-frame audio buffer
-                cli.audio_buffer_size = cli.samples_per_frame * 2;  // Stereo
-                cli.audio_buffer = malloc(cli.audio_buffer_size * sizeof(float));
-
-                // Note: GOP audio buffer will be allocated in encode_video() after encoder creation
-                // when we know the actual GOP size
-
-                printf("  Audio: TAD quality %d, %d samples/frame\n",
-                       cli.audio_quality, cli.samples_per_frame);
-            } else {
-                fprintf(stderr, "Warning: Failed to open extracted audio, encoding without audio\n");
-                cli.has_audio = 0;
-            }
-        } else {
-            fprintf(stderr, "Warning: No audio stream found or extraction failed\n");
-            cli.has_audio = 0;
-        }
-    }
-
-    // Parse subtitle file if provided
-    if (cli.subtitle_file) {
-        printf("Parsing subtitles: %s\n", cli.subtitle_file);
-        cli.subtitles = parse_srt_file(cli.subtitle_file);
-        if (cli.subtitles) {
-            // Count subtitles
-            int count = 0;
-            subtitle_entry_t *sub = cli.subtitles;
-            while (sub) {
-                count++;
-                sub = sub->next;
-            }
-            printf("  Loaded %d subtitles\n", count);
-        } else {
-            fprintf(stderr, "Warning: Failed to parse subtitle file\n");
-        }
-    }
-
-    // Initialize Extended Header metadata
-    cli.ffmpeg_version = get_ffmpeg_version();  // May return NULL if FFmpeg not found
-    struct timespec ts;
-    if (clock_gettime(CLOCK_REALTIME, &ts) == 0) {
-        cli.creation_time_us = (uint64_t)ts.tv_sec * 1000000ULL + (uint64_t)ts.tv_nsec / 1000ULL;
-    } else {
-        // Fallback to time() if clock_gettime fails
-        cli.creation_time_us = (uint64_t)time(NULL) * 1000000ULL;
-    }
-
-    // Open output file
-    cli.output_fp = fopen(cli.output_file, "wb");
-    if (!cli.output_fp) {
-        fprintf(stderr, "Error: Failed to open output file: %s\n", cli.output_file);
-        return 1;
-    }
-
-    // Two-pass scene change detection (if enabled and temporal DWT is active)
-    if (cli.enc_params.enable_two_pass && cli.enc_params.enable_temporal_dwt && !cli.is_still_image) {
-        if (two_pass_first_pass(&cli) == 0) {
-            cli.two_pass_mode = 1;
-            cli.current_gop_boundary = cli.gop_boundaries;  // Start at first GOP
-            printf("Two-pass mode: Using adaptive GOP sizes based on scene detection\n");
-        } else {
-            fprintf(stderr, "Warning: Two-pass analysis failed, falling back to single-pass\n");
-            cli.two_pass_mode = 0;
-        }
-    } else {
-        cli.two_pass_mode = 0;
-        if (cli.enc_params.enable_two_pass && !cli.enc_params.enable_temporal_dwt) {
-            printf("Note: Two-pass mode requires temporal DWT (disabled in intra-only mode)\n");
-        }
-    }
-
-    // Encode video
-    int result = encode_video(&cli);
-
-    // Print output file before cleanup frees the string
-    if (result >= 0) {
-        printf("\nOutput written to: %s\n", cli.output_file);
-    }
-
-    // Cleanup
-    fclose(cli.output_fp);
-    free(cli.input_file);
-    free(cli.output_file);
-    if (cli.subtitle_file) {
-        free(cli.subtitle_file);
-    }
-    if (cli.subtitles) {
-        free_subtitle_list(cli.subtitles);
-    }
-    if (cli.fontrom_low) {
-        free(cli.fontrom_low);
-    }
-    if (cli.fontrom_high) {
-        free(cli.fontrom_high);
-    }
-    if (cli.ffmpeg_version) {
-        free(cli.ffmpeg_version);
-    }
-
-    // Cleanup two-pass data structures
-    if (cli.frame_analyses) {
-        free(cli.frame_analyses);
-    }
-    if (cli.gop_boundaries) {
-        free_gop_boundaries(cli.gop_boundaries);
-    }
-
-    if (result < 0) {
-        fprintf(stderr, "Encoding failed\n");
-        return 1;
-    }
-
-    return 0;
-}
diff --git a/video_encoder/src/encoder_tav_dt.c b/video_encoder/src/encoder_tav_dt.c
deleted file mode 100644
index ad3fee4..0000000
--- a/video_encoder/src/encoder_tav_dt.c
+++ /dev/null
@@ -1,1502 +0,0 @@
-/**
- * TAV-DT Encoder - Digital Tape Format Encoder
- *
- * Encodes video to TAV-DT format with forward error correction.
- *
- * TAV-DT is a packetised streaming format designed for digital tape/broadcast:
- * - Fixed dimensions: 720x480 (NTSC) or 720x576 (PAL)
- * - 16-frame GOPs with 9/7 spatial wavelet, Haar temporal
- * - Mandatory TAD audio
- * - LDPC rate 1/2 for headers, Reed-Solomon (255,223) for payloads
- *
- * Packet structure (revised 2025-12-17):
- * - Main header: 32 bytes raw (256 bits) -> 64 bytes LDPC encoded (512 bits, rate 256/512)
- *   Layout: sync(4) + fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
- *   CRC covers bytes 0-27 (everything except CRC itself)
- * - TAD subpacket: header 16 bytes raw (128 bits) -> 32 bytes LDPC (256 bits, rate 128/256), + RS-encoded payload
- *   Layout: sample_count(2) + quant_bits(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4)
- * - TAV subpacket: header 16 bytes raw (128 bits) -> 32 bytes LDPC (256 bits, rate 128/256), + RS-encoded payload
- *   Layout: sync(4) + gop_size(1) + compressed_size(4) + rs_block_count(3) + crc(4)
- * - No packet type bytes - always audio then video
- *
- * Created by CuriousTorvald and Claude on 2025-12-09.
- * Revised 2025-12-17 for power-of-two header sizes, subpacket CRCs, and TAV subpacket sync.
- */
-
-#define _POSIX_C_SOURCE 200809L
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/wait.h>
-#include <time.h>
-#include <math.h>
-#include <pthread.h>
-
-#include "tav_encoder_lib.h"
-#include "encoder_tad.h"
-#include "reed_solomon.h"
-#include "ldpc.h"
-#include "ldpc_payload.h"
-
-// FEC mode for payloads (stored in flags byte bit 2)
-#define FEC_MODE_RS   0    // Reed-Solomon (255,223) - default
-#define FEC_MODE_LDPC 1    // LDPC (255,223) - experimental
-
-// =============================================================================
-// Constants
-// =============================================================================
-
-// TAV-DT sync patterns (big endian)
-#define TAV_DT_SYNC_NTSC  0xE3537A1F
-#define TAV_DT_SYNC_PAL   0xD193A745
-
-// TAV-DT dimensions
-#define DT_WIDTH          720
-#define DT_HEIGHT_NTSC    480
-#define DT_HEIGHT_PAL     576
-
-// Fixed parameters
-#define DT_GOP_SIZE        16
-#define DT_SPATIAL_LEVELS  4
-#define DT_TEMPORAL_LEVELS 2
-
-#define DT_MAIN_HEADER_SIZE   28   // fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
-#define DT_TAD_HEADER_SIZE    14   // sample_count(2) + quant_bits(1) + compressed_size(4) + rs_block_count(3) + crc(4)
-#define DT_TAV_HEADER_SIZE    14   // gop_size(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4)
-
-// TAV subpacket sync pattern (big endian)
-#define TAV_SUBPACKET_SYNC    0xA3F7C91E
-
-// Quality level to quantiser mapping
-static const int QUALITY_Y[]  = {79, 47, 23, 11, 5, 2};
-static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
-static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
-
-// Audio samples per GOP (32kHz / framerate * gop_size)
-#define AUDIO_SAMPLE_RATE 32000
-
-// =============================================================================
-// Multithreading Structures
-// =============================================================================
-
-#define GOP_SLOT_EMPTY     0
-#define GOP_SLOT_READY     1
-#define GOP_SLOT_ENCODING  2
-#define GOP_SLOT_COMPLETE  3
-
-typedef struct {
-    // Input frames (copied from main thread)
-    uint8_t **rgb_frames;     // Frame data pointers [gop_size]
-    int *frame_numbers;       // Frame number array [gop_size]
-    int num_frames;           // Actual number of frames in this GOP
-    int gop_index;            // Sequential GOP index for ordering output
-
-    // Audio samples for this GOP
-    float *audio_samples;     // Interleaved stereo samples
-    size_t audio_sample_count;
-
-    // Output
-    tav_encoder_packet_t *packet;  // Encoded video packet
-    uint8_t *tad_output;           // Encoded audio data
-    size_t tad_size;               // Encoded audio size
-    int success;                   // 1 if encoding succeeded
-
-    // Encoder params (copy for thread safety)
-    tav_encoder_params_t params;
-
-    // Slot status
-    volatile int status;
-} gop_job_t;
-
-/**
- * Get number of available CPUs.
- */
-static int get_available_cpus(void) {
-#ifdef _SC_NPROCESSORS_ONLN
-    long nproc = sysconf(_SC_NPROCESSORS_ONLN);
-    if (nproc > 0) {
-        return (int)nproc;
-    }
-#endif
-    return 1;  // Fallback to single core
-}
-
-/**
- * Get default thread count (cap at 8)
- */
-static int get_default_thread_count(void) {
-    int available = get_available_cpus();
-    return available < 8 ? available : 8;
-}
-
-// =============================================================================
-// CRC-32
-// =============================================================================
-
-static uint32_t crc32_table[256];
-static int crc32_initialized = 0;
-
-static void init_crc32_table(void) {
-    if (crc32_initialized) return;
-    for (uint32_t i = 0; i < 256; i++) {
-        uint32_t crc = i;
-        for (int j = 0; j < 8; j++) {
-            if (crc & 1) {
-                crc = (crc >> 1) ^ 0xEDB88320;
-            } else {
-                crc >>= 1;
-            }
-        }
-        crc32_table[i] = crc;
-    }
-    crc32_initialized = 1;
-}
-
-static uint32_t calculate_crc32(const uint8_t *data, size_t length) {
-    init_crc32_table();
-    uint32_t crc = 0xFFFFFFFF;
-    for (size_t i = 0; i < length; i++) {
-        crc = (crc >> 8) ^ crc32_table[(crc ^ data[i]) & 0xFF];
-    }
-    return crc ^ 0xFFFFFFFF;
-}
-
-// =============================================================================
-// Encoder Context
-// =============================================================================
-
-typedef struct {
-    // Input/output
-    char *input_file;
-    char *output_file;
-    FILE *output_fp;
-
-    // Video encoder context
-    tav_encoder_context_t *video_ctx;
-
-    // Video parameters
-    int width;
-    int height;
-    int fps_num;
-    int fps_den;
-    int target_fps_num;   // Target output framerate numerator (0 = no conversion)
-    int target_fps_den;   // Target output framerate denominator
-    int original_fps_num; // Source framerate (always probed)
-    int original_fps_den;
-    int is_interlaced;
-    int is_pal;
-    int quality_index;
-
-    // Frame buffers
-    uint8_t **gop_frames;
-    int gop_frame_count;
-
-    // Audio buffer
-    float *audio_buffer;
-    size_t audio_buffer_samples;
-    size_t audio_buffer_capacity;
-
-    // Timecode
-    uint64_t current_timecode_ns;
-    int frame_number;
-
-    // Statistics
-    uint64_t packets_written;
-    uint64_t bytes_written;
-    uint64_t frames_encoded;
-
-    // Options
-    int verbose;
-    int encode_limit;
-    int fec_mode;                // FEC_MODE_RS or FEC_MODE_LDPC for payloads
-
-    // Multithreading
-    int num_threads;             // 0 = single-threaded, 1+ = num worker threads
-    gop_job_t *gop_jobs;         // Array of GOP job slots [num_threads]
-    pthread_t *worker_threads;   // Array of worker thread handles [num_threads]
-    pthread_mutex_t job_mutex;   // Mutex for job slot access
-    pthread_cond_t job_ready;    // Signal when a job slot is ready for encoding
-    pthread_cond_t job_complete; // Signal when a job slot is complete
-    volatile int shutdown_workers; // 1 when workers should exit
-
-    // Encoder params (template for worker threads)
-    tav_encoder_params_t enc_params;
-} dt_encoder_t;
-
-// =============================================================================
-// Utility Functions
-// =============================================================================
-
-static void print_usage(const char *program) {
-    printf("TAV-DT Encoder - Digital Tape Format with FEC\n");
-    printf("\nUsage: %s -i input.mp4 -o output.tavdt [options]\n\n", program);
-    printf("Required:\n");
-    printf("  -i, --input FILE     Input video file (via FFmpeg)\n");
-    printf("  -o, --output FILE    Output TAV-DT file\n");
-    printf("\nOptions:\n");
-    printf("  -q, --quality N      Quality level 0-5 (default: 3)\n");
-    printf("  -f, --fps NUM/DEN    Output framerate (e.g., 30/1, 24000/1001)\n");
-    printf("  --ntsc               Force NTSC format (720x480, default)\n");
-    printf("  --pal                Force PAL format (720x576)\n");
-    printf("  --interlaced         Interlaced output\n");
-    printf("  --ldpc-payload       Use LDPC(255,223) instead of RS(255,223) for payloads\n");
-    printf("                       (experimental: better at high error rates)\n");
-    printf("  --encode-limit N     Encode only N frames (for testing)\n");
-    printf("  -t, --threads N      Parallel encoding threads (default: min(8, available CPUs))\n");
-    printf("                       0 or 1 = single-threaded, 2-16 = multithreaded\n");
-    printf("  -v, --verbose        Verbose output\n");
-    printf("  -h, --help           Show this help\n");
-}
-
-// =============================================================================
-// FEC Block Encoding (RS or LDPC based on mode)
-// =============================================================================
-
-static size_t encode_fec_blocks(const uint8_t *data, size_t data_len, uint8_t *output, int fec_mode) {
-    if (fec_mode == FEC_MODE_LDPC) {
-        // Use LDPC(255,223) encoding
-        return ldpc_p_encode_blocks(data, data_len, output);
-    } else {
-        // Use RS(255,223) encoding (default)
-        size_t output_len = 0;
-        size_t remaining = data_len;
-        const uint8_t *src = data;
-        uint8_t *dst = output;
-
-        while (remaining > 0) {
-            size_t block_data = (remaining > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining;
-            size_t encoded_len = rs_encode(src, block_data, dst);
-
-            // Pad to full block size for consistent block boundaries
-            if (encoded_len < RS_BLOCK_SIZE) {
-                memset(dst + encoded_len, 0, RS_BLOCK_SIZE - encoded_len);
-            }
-
-            src += block_data;
-            dst += RS_BLOCK_SIZE;
-            output_len += RS_BLOCK_SIZE;
-            remaining -= block_data;
-        }
-
-        return output_len;
-    }
-}
-
-// =============================================================================
-// Packet Writing
-// =============================================================================
-
-static int write_packet(dt_encoder_t *enc, uint64_t timecode_ns,
-                        const uint8_t *tad_data, size_t tad_size,
-                        const uint8_t *tav_data, size_t tav_size,
-                        int gop_size, uint16_t audio_samples, uint8_t audio_quant_bits) {
-
-    // Calculate RS block counts
-    uint32_t tad_rs_blocks = (tad_size + RS_DATA_SIZE - 1) / RS_DATA_SIZE;
-    uint32_t tav_rs_blocks = (tav_size + RS_DATA_SIZE - 1) / RS_DATA_SIZE;
-
-    // Calculate sizes
-    size_t tad_rs_size = tad_rs_blocks * RS_BLOCK_SIZE;
-    size_t tav_rs_size = tav_rs_blocks * RS_BLOCK_SIZE;
-
-    // Subpacket sizes: LDPC header + RS payload (TAV includes sync)
-    size_t tad_subpacket_size = DT_TAD_HEADER_SIZE * 2 + tad_rs_size;        // 28 + RS
-    size_t tav_subpacket_size = 4 + DT_TAV_HEADER_SIZE * 2 + tav_rs_size;    // sync(4) + 28 + RS
-
-    uint32_t offset_to_video = tad_subpacket_size;  // Offset from after main header to TAV sync
-    uint32_t packet_size = tad_subpacket_size + tav_subpacket_size;
-
-    // Build main header (28 bytes raw = 224 bits, sync written separately)
-    // Layout: fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
-    // CRC is calculated over bytes 0-23 (everything except CRC itself)
-    uint8_t master_sync[4];
-    uint8_t header[DT_MAIN_HEADER_SIZE];  // 28 bytes
-    memset(header, 0, DT_MAIN_HEADER_SIZE);
-
-    // Write sync pattern in big-endian (network byte order)
-    uint32_t sync = enc->is_pal ? TAV_DT_SYNC_PAL : TAV_DT_SYNC_NTSC;
-    master_sync[0] = (sync >> 24) & 0xFF;
-    master_sync[1] = (sync >> 16) & 0xFF;
-    master_sync[2] = (sync >> 8) & 0xFF;
-    master_sync[3] = sync & 0xFF;
-
-    // FPS byte: encode framerate
-    uint8_t fps_byte;
-    if (enc->fps_den == 1) fps_byte = enc->fps_num;
-    else if (enc->fps_den == 1001) fps_byte = enc->fps_num / 1000;
-    else fps_byte = enc->fps_num / enc->fps_den;
-    header[0] = fps_byte;
-
-    // Flags byte
-    uint8_t flags = 0;
-    flags |= (enc->is_interlaced ? 0x01 : 0x00);
-    flags |= (enc->fps_den == 1001 ? 0x02 : 0x00);
-    flags |= ((enc->fec_mode & 0x01) << 2);  // FEC mode in bit 2
-    flags |= (enc->quality_index & 0x0F) << 4;
-    header[1] = flags;
-
-    // Reserved (2 bytes) at offset 2-3
-    header[2] = 0;
-    header[3] = 0;
-
-    // Packet size (4 bytes) at offset 4-7
-    memcpy(header + 4, &packet_size, 4);
-
-    // Timecode (8 bytes) at offset 8-15
-    memcpy(header + 8, &timecode_ns, 8);
-
-    // Offset to video (4 bytes) at offset 16-20
-    memcpy(header + 16, &offset_to_video, 4);
-
-    // Reserved (4 bytes) at offset 20-24
-    // Already zero from memset
-
-    // CRC-32 (4 bytes) at offset 24-27, calculated over bytes 0-23
-    uint32_t crc = calculate_crc32(header, 24);
-    memcpy(header + 24, &crc, 4);
-
-    // LDPC encode main header (28 -> 56 bytes, rate 224/448 bits)
-    uint8_t ldpc_header[DT_MAIN_HEADER_SIZE * 2];
-    ldpc_encode(header, DT_MAIN_HEADER_SIZE, ldpc_header);
-
-    // Build TAD subpacket header (14 bytes raw = 112 bits)
-    // Layout: sample_count(2) + quant_bits(1) + compressed_size(4) + rs_block_count(3) + crc(4)
-    uint8_t tad_header[DT_TAD_HEADER_SIZE];  // 14 bytes
-    memset(tad_header, 0, DT_TAD_HEADER_SIZE);
-
-    memcpy(tad_header + 0, &audio_samples, 2);
-    tad_header[2] = audio_quant_bits;
-    uint32_t tad_compressed_size = tad_size;
-    memcpy(tad_header + 3, &tad_compressed_size, 4);
-    // RS block count as uint24 at offset 7-9
-    tad_header[7] = tad_rs_blocks & 0xFF;
-    tad_header[8] = (tad_rs_blocks >> 8) & 0xFF;
-    tad_header[9] = (tad_rs_blocks >> 16) & 0xFF;
-    // CRC-32 (4 bytes) at offset 12-15, calculated over bytes 0-9
-    uint32_t tad_crc = calculate_crc32(tad_header, 10);
-    memcpy(tad_header + 10, &tad_crc, 4);
-
-    // LDPC encode TAD header (14 -> 28 bytes, rate 112/224 bits)
-    uint8_t ldpc_tad_header[DT_TAD_HEADER_SIZE * 2];
-    ldpc_encode(tad_header, DT_TAD_HEADER_SIZE, ldpc_tad_header);
-
-    // Build TAV subpacket header (14 bytes raw = 112 bits)
-    // Layout: sync(4) + gop_size(1) + compressed_size(4) + rs_block_count(3) + crc(4)
-    uint8_t tav_sync[4];
-    uint8_t tav_header[DT_TAV_HEADER_SIZE];  // 14 bytes
-    memset(tav_header, 0, DT_TAV_HEADER_SIZE);
-
-    // Write TAV subpacket sync pattern in big-endian
-    tav_sync[0] = (TAV_SUBPACKET_SYNC >> 24) & 0xFF;
-    tav_sync[1] = (TAV_SUBPACKET_SYNC >> 16) & 0xFF;
-    tav_sync[2] = (TAV_SUBPACKET_SYNC >> 8) & 0xFF;
-    tav_sync[3] = TAV_SUBPACKET_SYNC & 0xFF;
-
-    tav_header[0] = gop_size;
-    uint32_t tav_compressed_size = tav_size;
-    memcpy(tav_header + 3, &tav_compressed_size, 4);
-    // RS block count as uint24 at offset 7-9
-    tav_header[7] = tav_rs_blocks & 0xFF;
-    tav_header[8] = (tav_rs_blocks >> 8) & 0xFF;
-    tav_header[9] = (tav_rs_blocks >> 16) & 0xFF;
-    // CRC-32 (4 bytes) at offset 12-15, calculated over bytes 0-11
-    uint32_t tav_crc = calculate_crc32(tav_header, 10);
-    memcpy(tav_header + 10, &tav_crc, 4);
-
-    // LDPC encode TAV header (14 -> 28 bytes, rate 112/224 bits)
-    uint8_t ldpc_tav_header[DT_TAV_HEADER_SIZE * 2];
-    ldpc_encode(tav_header, DT_TAV_HEADER_SIZE, ldpc_tav_header);
-
-    // FEC encode payloads (RS or LDPC based on mode)
-    uint8_t *tad_rs_data = malloc(tad_rs_size);
-    uint8_t *tav_rs_data = malloc(tav_rs_size);
-
-    encode_fec_blocks(tad_data, tad_size, tad_rs_data, enc->fec_mode);
-    encode_fec_blocks(tav_data, tav_size, tav_rs_data, enc->fec_mode);
-
-    // Write everything
-    // Sync patterns are written separately (not LDPC-coded) per spec
-    fwrite(master_sync, 1, 4, enc->output_fp);               // Main sync (4 bytes)
-    fwrite(ldpc_header, 1, DT_MAIN_HEADER_SIZE * 2, enc->output_fp);  // LDPC header (56 bytes)
-    fwrite(ldpc_tad_header, 1, DT_TAD_HEADER_SIZE * 2, enc->output_fp);  // TAD LDPC header (28 bytes)
-    fwrite(tad_rs_data, 1, tad_rs_size, enc->output_fp);     // TAD RS payload
-    fwrite(tav_sync, 1, 4, enc->output_fp);                  // TAV sync (4 bytes)
-    fwrite(ldpc_tav_header, 1, DT_TAV_HEADER_SIZE * 2, enc->output_fp);  // TAV LDPC header (28 bytes)
-    fwrite(tav_rs_data, 1, tav_rs_size, enc->output_fp);     // TAV RS payload
-
-    size_t total_written = 4 + DT_MAIN_HEADER_SIZE * 2 + tad_subpacket_size + 4 + tav_subpacket_size;
-
-    if (enc->verbose) {
-        printf("GOP %lu: %d frames, header=%zu tad=%zu tav=%zu total=%zu bytes\n",
-               enc->packets_written + 1, gop_size,
-               (size_t)(DT_MAIN_HEADER_SIZE * 2), tad_subpacket_size, tav_subpacket_size, total_written);
-    }
-
-    free(tad_rs_data);
-    free(tav_rs_data);
-
-    enc->packets_written++;
-    enc->bytes_written += total_written;
-
-    return 0;
-}
-
-// =============================================================================
-// FFmpeg Integration
-// =============================================================================
-
-static FILE *spawn_ffmpeg_video(dt_encoder_t *enc, pid_t *pid) {
-    int pipefd[2];
-    if (pipe(pipefd) < 0) {
-        fprintf(stderr, "Error: Failed to create pipe\n");
-        return NULL;
-    }
-
-    *pid = fork();
-    if (*pid < 0) {
-        fprintf(stderr, "Error: Failed to fork\n");
-        close(pipefd[0]);
-        close(pipefd[1]);
-        return NULL;
-    }
-
-    if (*pid == 0) {
-        // Child process
-        close(pipefd[0]);
-        dup2(pipefd[1], STDOUT_FILENO);
-        close(pipefd[1]);
-
-        char video_size[32];
-        snprintf(video_size, sizeof(video_size), "%dx%d", enc->width, enc->height);
-
-        // Build fps filter prefix if conversion is requested
-        char fps_filter[128] = "";
-        if (enc->target_fps_num > 0 && enc->target_fps_den > 0 &&
-            enc->original_fps_num > 0 && enc->original_fps_den > 0) {
-            // Compare framerates
-            long long target_rate = (long long)enc->target_fps_num * enc->original_fps_den;
-            long long source_rate = (long long)enc->original_fps_num * enc->target_fps_den;
-
-            if (target_rate > source_rate) {
-                // Upsampling: use motion interpolation
-                snprintf(fps_filter, sizeof(fps_filter), "minterpolate=fps=%d/%d,",
-                         enc->target_fps_num, enc->target_fps_den);
-            } else if (target_rate < source_rate) {
-                // Downsampling: use fps filter
-                snprintf(fps_filter, sizeof(fps_filter), "fps=%d/%d,",
-                         enc->target_fps_num, enc->target_fps_den);
-            }
-            // If equal, fps_filter remains empty
-        }
-
-        // Use same filtergraph as reference TAV encoder
-        char vf[320];
-        snprintf(vf, sizeof(vf),
-                 "%sscale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d%s",
-                 fps_filter,
-                 enc->width, enc->height, enc->width, enc->height,
-                 enc->is_interlaced ? ",setfield=tff" : "");
-
-        execlp("ffmpeg", "ffmpeg",
-               "-hide_banner",
-               "-i", enc->input_file,
-               "-vf", vf,
-               "-pix_fmt", "rgb24",
-               "-f", "rawvideo",
-               "-an",
-               "-v", "warning",
-               "-",
-               (char*)NULL);
-
-        fprintf(stderr, "Error: Failed to execute FFmpeg\n");
-        exit(1);
-    }
-
-    close(pipefd[1]);
-    return fdopen(pipefd[0], "rb");
-}
-
-static FILE *spawn_ffmpeg_audio(dt_encoder_t *enc, pid_t *pid) {
-    int pipefd[2];
-    if (pipe(pipefd) < 0) {
-        fprintf(stderr, "Error: Failed to create pipe\n");
-        return NULL;
-    }
-
-    *pid = fork();
-    if (*pid < 0) {
-        fprintf(stderr, "Error: Failed to fork\n");
-        close(pipefd[0]);
-        close(pipefd[1]);
-        return NULL;
-    }
-
-    if (*pid == 0) {
-        // Child process
-        close(pipefd[0]);
-        dup2(pipefd[1], STDOUT_FILENO);
-        close(pipefd[1]);
-
-        execlp("ffmpeg", "ffmpeg",
-               "-i", enc->input_file,
-               "-f", "f32le",
-               "-acodec", "pcm_f32le",
-               "-ar", "32000",
-               "-ac", "2",
-               "-vn",
-               "-v", "warning",
-               "-",
-               (char*)NULL);
-
-        fprintf(stderr, "Error: Failed to execute FFmpeg\n");
-        exit(1);
-    }
-
-    close(pipefd[1]);
-    return fdopen(pipefd[0], "rb");
-}
-
-// =============================================================================
-// Multithreading Support
-// =============================================================================
-
-/**
- * Worker thread context - passed to worker_thread_main.
- */
-typedef struct {
-    dt_encoder_t *enc;
-    int thread_id;
-} worker_context_t;
-
-/**
- * Worker thread main function.
- * Continuously picks up jobs from the job pool and encodes them.
- */
-static void *worker_thread_main(void *arg) {
-    worker_context_t *wctx = (worker_context_t *)arg;
-    dt_encoder_t *enc = wctx->enc;
-    (void)wctx->thread_id;  // Unused but kept for debugging
-
-    while (1) {
-        pthread_mutex_lock(&enc->job_mutex);
-
-        // Wait for a job or shutdown signal
-        while (!enc->shutdown_workers) {
-            // Look for a job slot that is ready to encode
-            int found_job = -1;
-            for (int i = 0; i < enc->num_threads; i++) {
-                if (enc->gop_jobs[i].status == GOP_SLOT_READY) {
-                    enc->gop_jobs[i].status = GOP_SLOT_ENCODING;
-                    found_job = i;
-                    break;
-                }
-            }
-
-            if (found_job >= 0) {
-                pthread_mutex_unlock(&enc->job_mutex);
-
-                // Encode this GOP
-                gop_job_t *job = &enc->gop_jobs[found_job];
-
-                // Create thread-local encoder context
-                tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
-                if (!ctx) {
-                    fprintf(stderr, "Failed to create encoder for GOP %d\n", job->gop_index);
-                    job->success = 0;
-                } else {
-                    // Encode video GOP
-                    int result = tav_encoder_encode_gop(ctx,
-                                                         (const uint8_t **)job->rgb_frames,
-                                                         job->num_frames, job->frame_numbers,
-                                                         &job->packet);
-                    job->success = (result >= 0 && job->packet != NULL);
-
-                    // Encode audio
-                    if (job->success && job->audio_sample_count > 0) {
-                        int max_index = tad32_quality_to_max_index(enc->quality_index);
-                        job->tad_size = tad32_encode_chunk(job->audio_samples, job->audio_sample_count,
-                                                           max_index, 1.0f, enc->enc_params.zstd_level,
-                                                           job->tad_output);
-                    }
-
-                    tav_encoder_free(ctx);
-                }
-
-                // Mark job as complete (reacquire lock for next iteration)
-                pthread_mutex_lock(&enc->job_mutex);
-                job->status = GOP_SLOT_COMPLETE;
-                pthread_cond_broadcast(&enc->job_complete);
-                // Keep lock held for next iteration of inner while loop
-                continue;  // Look for more jobs
-            }
-
-            // No job found, wait for signal
-            pthread_cond_wait(&enc->job_ready, &enc->job_mutex);
-        }
-
-        pthread_mutex_unlock(&enc->job_mutex);
-        break;  // Shutdown
-    }
-
-    free(wctx);
-    return NULL;
-}
-
-/**
- * Initialize multithreading resources.
- * Returns 0 on success, -1 on failure.
- */
-static int init_threading(dt_encoder_t *enc) {
-    if (enc->num_threads <= 0) {
-        return 0;  // Single-threaded mode
-    }
-
-    // Initialize mutex and condition variables
-    if (pthread_mutex_init(&enc->job_mutex, NULL) != 0) {
-        fprintf(stderr, "Error: Failed to initialize job mutex\n");
-        return -1;
-    }
-    if (pthread_cond_init(&enc->job_ready, NULL) != 0) {
-        fprintf(stderr, "Error: Failed to initialize job_ready cond\n");
-        pthread_mutex_destroy(&enc->job_mutex);
-        return -1;
-    }
-    if (pthread_cond_init(&enc->job_complete, NULL) != 0) {
-        fprintf(stderr, "Error: Failed to initialize job_complete cond\n");
-        pthread_cond_destroy(&enc->job_ready);
-        pthread_mutex_destroy(&enc->job_mutex);
-        return -1;
-    }
-
-    // Allocate job slots (one per thread)
-    enc->gop_jobs = calloc(enc->num_threads, sizeof(gop_job_t));
-    if (!enc->gop_jobs) {
-        fprintf(stderr, "Error: Failed to allocate job slots\n");
-        pthread_cond_destroy(&enc->job_complete);
-        pthread_cond_destroy(&enc->job_ready);
-        pthread_mutex_destroy(&enc->job_mutex);
-        return -1;
-    }
-
-    // Allocate worker thread handles
-    enc->worker_threads = malloc(enc->num_threads * sizeof(pthread_t));
-    if (!enc->worker_threads) {
-        fprintf(stderr, "Error: Failed to allocate thread handles\n");
-        free(enc->gop_jobs);
-        pthread_cond_destroy(&enc->job_complete);
-        pthread_cond_destroy(&enc->job_ready);
-        pthread_mutex_destroy(&enc->job_mutex);
-        return -1;
-    }
-
-    // Start worker threads
-    enc->shutdown_workers = 0;
-    for (int i = 0; i < enc->num_threads; i++) {
-        worker_context_t *wctx = malloc(sizeof(worker_context_t));
-        if (!wctx) {
-            fprintf(stderr, "Error: Failed to allocate worker context\n");
-            enc->shutdown_workers = 1;
-            pthread_cond_broadcast(&enc->job_ready);
-            for (int j = 0; j < i; j++) {
-                pthread_join(enc->worker_threads[j], NULL);
-            }
-            free(enc->worker_threads);
-            free(enc->gop_jobs);
-            pthread_cond_destroy(&enc->job_complete);
-            pthread_cond_destroy(&enc->job_ready);
-            pthread_mutex_destroy(&enc->job_mutex);
-            return -1;
-        }
-        wctx->enc = enc;
-        wctx->thread_id = i;
-
-        if (pthread_create(&enc->worker_threads[i], NULL, worker_thread_main, wctx) != 0) {
-            fprintf(stderr, "Error: Failed to create worker thread %d\n", i);
-            free(wctx);
-            enc->shutdown_workers = 1;
-            pthread_cond_broadcast(&enc->job_ready);
-            for (int j = 0; j < i; j++) {
-                pthread_join(enc->worker_threads[j], NULL);
-            }
-            free(enc->worker_threads);
-            free(enc->gop_jobs);
-            pthread_cond_destroy(&enc->job_complete);
-            pthread_cond_destroy(&enc->job_ready);
-            pthread_mutex_destroy(&enc->job_mutex);
-            return -1;
-        }
-    }
-
-    printf("Started %d worker threads for parallel GOP encoding\n", enc->num_threads);
-    return 0;
-}
-
-/**
- * Shutdown multithreading resources.
- */
-static void shutdown_threading(dt_encoder_t *enc) {
-    if (enc->num_threads <= 0) {
-        return;
-    }
-
-    // Signal workers to shutdown
-    pthread_mutex_lock(&enc->job_mutex);
-    enc->shutdown_workers = 1;
-    pthread_cond_broadcast(&enc->job_ready);
-    pthread_mutex_unlock(&enc->job_mutex);
-
-    // Wait for all workers to finish
-    for (int i = 0; i < enc->num_threads; i++) {
-        pthread_join(enc->worker_threads[i], NULL);
-    }
-
-    // Free job slots (and any remaining resources)
-    if (enc->gop_jobs) {
-        for (int i = 0; i < enc->num_threads; i++) {
-            if (enc->gop_jobs[i].packet) {
-                tav_encoder_free_packet(enc->gop_jobs[i].packet);
-            }
-        }
-        free(enc->gop_jobs);
-        enc->gop_jobs = NULL;
-    }
-
-    if (enc->worker_threads) {
-        free(enc->worker_threads);
-        enc->worker_threads = NULL;
-    }
-
-    pthread_cond_destroy(&enc->job_complete);
-    pthread_cond_destroy(&enc->job_ready);
-    pthread_mutex_destroy(&enc->job_mutex);
-}
-
-// =============================================================================
-// Main Encoding Loop
-// =============================================================================
-
-// Single-threaded encoding loop
-static int run_encoder_st(dt_encoder_t *enc, FILE *video_pipe, FILE *audio_pipe,
-                          pid_t video_pid __attribute__((unused)),
-                          pid_t audio_pid __attribute__((unused))) {
-    size_t frame_size = enc->width * enc->height * 3;
-    double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num;
-    size_t audio_samples_per_gop = (size_t)(AUDIO_SAMPLE_RATE * gop_duration) + 1024;
-
-    // TAD output buffer
-    size_t tad_buffer_size = audio_samples_per_gop * 2;
-    uint8_t *tad_output = malloc(tad_buffer_size);
-
-    enc->frame_number = 0;
-    enc->gop_frame_count = 0;
-    enc->current_timecode_ns = 0;
-
-    clock_t start_time = clock();
-
-    while (1) {
-        if (enc->encode_limit > 0 && enc->frame_number >= enc->encode_limit) {
-            break;
-        }
-
-        size_t bytes_read = fread(enc->gop_frames[enc->gop_frame_count], 1, frame_size, video_pipe);
-        if (bytes_read < frame_size) {
-            break;
-        }
-
-        enc->gop_frame_count++;
-        enc->frame_number++;
-
-        // Read corresponding audio
-        double frame_duration = (double)enc->fps_den / enc->fps_num;
-        size_t audio_samples_per_frame = (size_t)(AUDIO_SAMPLE_RATE * frame_duration);
-        size_t audio_bytes = audio_samples_per_frame * 2 * sizeof(float);
-
-        if (enc->audio_buffer_samples + audio_samples_per_frame > enc->audio_buffer_capacity) {
-            size_t new_capacity = enc->audio_buffer_capacity * 2;
-            float *new_buffer = realloc(enc->audio_buffer, new_capacity * 2 * sizeof(float));
-            if (new_buffer) {
-                enc->audio_buffer = new_buffer;
-                enc->audio_buffer_capacity = new_capacity;
-            }
-        }
-
-        size_t audio_read = fread(enc->audio_buffer + enc->audio_buffer_samples * 2,
-                                  1, audio_bytes, audio_pipe);
-        enc->audio_buffer_samples += audio_read / (2 * sizeof(float));
-
-        // Encode GOP when full
-        if (enc->gop_frame_count >= DT_GOP_SIZE) {
-            tav_encoder_packet_t *video_packet = NULL;
-            int frame_numbers[DT_GOP_SIZE];
-            for (int i = 0; i < DT_GOP_SIZE; i++) {
-                frame_numbers[i] = enc->frame_number - DT_GOP_SIZE + i;
-            }
-
-            int result = tav_encoder_encode_gop(enc->video_ctx,
-                                                 (const uint8_t **)enc->gop_frames,
-                                                 DT_GOP_SIZE, frame_numbers, &video_packet);
-
-            if (result < 0 || !video_packet) {
-                fprintf(stderr, "Error: Video encoding failed\n");
-                break;
-            }
-
-            int max_index = tad32_quality_to_max_index(enc->quality_index);
-            size_t tad_size = tad32_encode_chunk(enc->audio_buffer, enc->audio_buffer_samples,
-                                                  max_index, 1.0f, enc->enc_params.zstd_level,
-                                                  tad_output);
-
-            write_packet(enc, enc->current_timecode_ns,
-                         tad_output, tad_size,
-                         video_packet->data, video_packet->size,
-                         DT_GOP_SIZE, (uint16_t)enc->audio_buffer_samples, max_index);
-
-            enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
-            enc->frames_encoded += DT_GOP_SIZE;
-            enc->gop_frame_count = 0;
-            enc->audio_buffer_samples = 0;
-
-            tav_encoder_free_packet(video_packet);
-
-            // Display progress
-            clock_t now = clock();
-            double elapsed = (double)(now - start_time) / CLOCKS_PER_SEC;
-            double fps = elapsed > 0 ? (double)enc->frame_number / elapsed : 0.0;
-            double duration = (double)enc->frame_number * enc->fps_den / enc->fps_num;
-            double bitrate = duration > 0 ? (ftell(enc->output_fp) * 8.0) / duration / 1000.0 : 0.0;
-            long gop_count = enc->frame_number / DT_GOP_SIZE;
-            size_t total_kb = ftell(enc->output_fp) / 1024;
-
-            printf("\rFrame %d | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB    ",
-                   enc->frame_number, gop_count, fps, bitrate, total_kb);
-            fflush(stdout);
-        }
-    }
-
-    // Handle partial final GOP
-    if (enc->gop_frame_count > 0) {
-        tav_encoder_packet_t *video_packet = NULL;
-        int *frame_numbers = malloc(enc->gop_frame_count * sizeof(int));
-        for (int i = 0; i < enc->gop_frame_count; i++) {
-            frame_numbers[i] = enc->frame_number - enc->gop_frame_count + i;
-        }
-
-        int result = tav_encoder_encode_gop(enc->video_ctx,
-                                             (const uint8_t **)enc->gop_frames,
-                                             enc->gop_frame_count, frame_numbers, &video_packet);
-
-        if (result >= 0 && video_packet) {
-            int max_index = tad32_quality_to_max_index(enc->quality_index);
-            size_t tad_size = tad32_encode_chunk(enc->audio_buffer, enc->audio_buffer_samples,
-                                                  max_index, 1.0f, enc->enc_params.zstd_level,
-                                                  tad_output);
-
-            write_packet(enc, enc->current_timecode_ns,
-                         tad_output, tad_size,
-                         video_packet->data, video_packet->size,
-                         enc->gop_frame_count, (uint16_t)enc->audio_buffer_samples, max_index);
-
-            enc->frames_encoded += enc->gop_frame_count;
-            tav_encoder_free_packet(video_packet);
-        }
-        free(frame_numbers);
-    }
-
-    free(tad_output);
-    return 0;
-}
-
-// Multithreaded encoding loop
-static int run_encoder_mt(dt_encoder_t *enc, FILE *video_pipe, FILE *audio_pipe,
-                          pid_t video_pid __attribute__((unused)),
-                          pid_t audio_pid __attribute__((unused))) {
-    size_t frame_size = enc->width * enc->height * 3;
-    double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num;
-    // Calculate audio buffer size with generous padding to handle FFmpeg's audio delivery
-    // FFmpeg may deliver all audio for a GOP in the first read, so we need space for:
-    // 1. The expected GOP audio: AUDIO_SAMPLE_RATE * gop_duration
-    // 2. Worst-case per-frame variations: DT_GOP_SIZE * samples_per_frame
-    size_t expected_samples = (size_t)(AUDIO_SAMPLE_RATE * gop_duration);
-    size_t samples_per_frame = (size_t)(AUDIO_SAMPLE_RATE * enc->fps_den / enc->fps_num) + 1;
-    size_t audio_samples_per_gop = expected_samples + (DT_GOP_SIZE * samples_per_frame);
-    size_t tad_buffer_size = audio_samples_per_gop * 2;
-
-    // Initialize threading
-    if (init_threading(enc) < 0) {
-        return -1;
-    }
-
-    // Allocate per-slot frame buffers and audio buffers
-    for (int slot = 0; slot < enc->num_threads; slot++) {
-        enc->gop_jobs[slot].rgb_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t*));
-        enc->gop_jobs[slot].frame_numbers = malloc(DT_GOP_SIZE * sizeof(int));
-        enc->gop_jobs[slot].audio_samples = malloc(audio_samples_per_gop * 2 * sizeof(float));
-        enc->gop_jobs[slot].tad_output = malloc(tad_buffer_size);
-
-        if (!enc->gop_jobs[slot].rgb_frames || !enc->gop_jobs[slot].frame_numbers ||
-            !enc->gop_jobs[slot].audio_samples || !enc->gop_jobs[slot].tad_output) {
-            fprintf(stderr, "Error: Failed to allocate job slot %d buffers\n", slot);
-            shutdown_threading(enc);
-            return -1;
-        }
-
-        for (int f = 0; f < DT_GOP_SIZE; f++) {
-            enc->gop_jobs[slot].rgb_frames[f] = malloc(frame_size);
-            if (!enc->gop_jobs[slot].rgb_frames[f]) {
-                fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d\n", slot);
-                shutdown_threading(enc);
-                return -1;
-            }
-        }
-
-        // Copy encoder params for thread safety
-        enc->gop_jobs[slot].params = enc->enc_params;
-        enc->gop_jobs[slot].status = GOP_SLOT_EMPTY;
-        enc->gop_jobs[slot].num_frames = 0;
-        enc->gop_jobs[slot].audio_sample_count = 0;
-        enc->gop_jobs[slot].tad_size = 0;
-        enc->gop_jobs[slot].packet = NULL;
-        enc->gop_jobs[slot].success = 0;
-    }
-
-    printf("Encoding frames with %d threads...\n", enc->num_threads);
-    clock_t start_time = clock();
-
-    int current_slot = 0;
-    int next_gop_to_write = 0;
-    int current_gop_index = 0;
-    int frames_in_current_gop = 0;
-    int encoding_error = 0;
-    int eof_reached = 0;
-    enc->current_timecode_ns = 0;
-
-    while (!encoding_error && !eof_reached) {
-        // Step 1: Try to write any completed GOPs in order
-        pthread_mutex_lock(&enc->job_mutex);
-        while (!encoding_error) {
-            int found = -1;
-            for (int i = 0; i < enc->num_threads; i++) {
-                if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
-                    enc->gop_jobs[i].gop_index == next_gop_to_write) {
-                    found = i;
-                    break;
-                }
-            }
-
-            if (found < 0) break;
-
-            gop_job_t *job = &enc->gop_jobs[found];
-            pthread_mutex_unlock(&enc->job_mutex);
-
-            // Write this GOP
-            if (job->success && job->packet) {
-                int max_index = tad32_quality_to_max_index(enc->quality_index);
-                write_packet(enc, enc->current_timecode_ns,
-                             job->tad_output, job->tad_size,
-                             job->packet->data, job->packet->size,
-                             job->num_frames, (uint16_t)job->audio_sample_count, max_index);
-
-                enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
-                enc->frames_encoded += job->num_frames;
-
-                tav_encoder_free_packet(job->packet);
-                job->packet = NULL;
-
-                // Display progress
-                clock_t now = clock();
-                double elapsed = (double)(now - start_time) / CLOCKS_PER_SEC;
-                double fps = elapsed > 0 ? (double)enc->frames_encoded / elapsed : 0.0;
-                double duration = (double)enc->frames_encoded * enc->fps_den / enc->fps_num;
-                double bitrate = duration > 0 ? (ftell(enc->output_fp) * 8.0) / duration / 1000.0 : 0.0;
-                long gop_count = enc->frames_encoded / DT_GOP_SIZE;
-                size_t total_kb = ftell(enc->output_fp) / 1024;
-
-                printf("\rFrame %lu | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB    ",
-                       enc->frames_encoded, gop_count, fps, bitrate, total_kb);
-                fflush(stdout);
-            }
-
-            pthread_mutex_lock(&enc->job_mutex);
-            job->status = GOP_SLOT_EMPTY;
-            job->num_frames = 0;
-            job->audio_sample_count = 0;
-            job->tad_size = 0;
-            next_gop_to_write++;
-        }
-        pthread_mutex_unlock(&enc->job_mutex);
-
-        if (encoding_error || eof_reached) break;
-
-        // Step 2: Fill current slot with frames
-        gop_job_t *slot = &enc->gop_jobs[current_slot];
-
-        // Wait for slot to be empty
-        pthread_mutex_lock(&enc->job_mutex);
-        while (slot->status != GOP_SLOT_EMPTY && !enc->shutdown_workers) {
-            // While waiting, check if we can write any completed GOPs
-            int wrote_something = 0;
-            for (int i = 0; i < enc->num_threads; i++) {
-                if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
-                    enc->gop_jobs[i].gop_index == next_gop_to_write) {
-                    gop_job_t *job = &enc->gop_jobs[i];
-                    pthread_mutex_unlock(&enc->job_mutex);
-
-                    if (job->success && job->packet) {
-                        int max_index = tad32_quality_to_max_index(enc->quality_index);
-                        write_packet(enc, enc->current_timecode_ns,
-                                     job->tad_output, job->tad_size,
-                                     job->packet->data, job->packet->size,
-                                     job->num_frames, (uint16_t)job->audio_sample_count, max_index);
-
-                        enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
-                        enc->frames_encoded += job->num_frames;
-
-                        tav_encoder_free_packet(job->packet);
-                        job->packet = NULL;
-                    }
-
-                    pthread_mutex_lock(&enc->job_mutex);
-                    job->status = GOP_SLOT_EMPTY;
-                    job->num_frames = 0;
-                    job->audio_sample_count = 0;
-                    job->tad_size = 0;
-                    next_gop_to_write++;
-                    wrote_something = 1;
-                    break;
-                }
-            }
-            if (!wrote_something) {
-                pthread_cond_wait(&enc->job_complete, &enc->job_mutex);
-            }
-        }
-        pthread_mutex_unlock(&enc->job_mutex);
-
-        // Reset audio accumulator only when starting a fresh GOP
-        if (frames_in_current_gop == 0) {
-            slot->audio_sample_count = 0;
-        }
-
-        // Read frames into the slot
-        while (frames_in_current_gop < DT_GOP_SIZE && !eof_reached) {
-            if (enc->encode_limit > 0 && enc->frame_number >= enc->encode_limit) {
-                eof_reached = 1;
-                break;
-            }
-
-            size_t bytes_read = fread(slot->rgb_frames[frames_in_current_gop], 1, frame_size, video_pipe);
-            if (bytes_read < frame_size) {
-                eof_reached = 1;
-                break;
-            }
-
-            slot->frame_numbers[frames_in_current_gop] = enc->frame_number;
-            enc->frame_number++;
-            frames_in_current_gop++;
-
-            // Read corresponding audio - read whatever is available up to buffer capacity
-            // Note: FFmpeg may buffer audio, so the first read might get multiple frames worth
-            size_t audio_buffer_capacity_samples = audio_samples_per_gop;
-            size_t audio_space_remaining = audio_buffer_capacity_samples - slot->audio_sample_count;
-
-            if (audio_space_remaining > 0) {
-                // Read up to the remaining buffer space
-                size_t max_read_bytes = audio_space_remaining * 2 * sizeof(float);
-                size_t audio_read = fread(slot->audio_samples + slot->audio_sample_count * 2,
-                                          1, max_read_bytes, audio_pipe);
-                slot->audio_sample_count += audio_read / (2 * sizeof(float));
-            }
-
-            // Submit GOP when full
-            if (frames_in_current_gop >= DT_GOP_SIZE) {
-                slot->num_frames = frames_in_current_gop;
-                slot->gop_index = current_gop_index;
-
-                pthread_mutex_lock(&enc->job_mutex);
-                slot->status = GOP_SLOT_READY;
-                pthread_cond_broadcast(&enc->job_ready);
-                pthread_mutex_unlock(&enc->job_mutex);
-
-                current_slot = (current_slot + 1) % enc->num_threads;
-                current_gop_index++;
-                frames_in_current_gop = 0;
-                break;  // Exit frame-reading loop to wait for next available slot
-            }
-        }
-    }
-
-    // Submit any partial GOP at EOF
-    if (frames_in_current_gop > 0) {
-        gop_job_t *slot = &enc->gop_jobs[current_slot];
-        slot->num_frames = frames_in_current_gop;
-        slot->gop_index = current_gop_index;
-
-        pthread_mutex_lock(&enc->job_mutex);
-        slot->status = GOP_SLOT_READY;
-        pthread_cond_broadcast(&enc->job_ready);
-        pthread_mutex_unlock(&enc->job_mutex);
-
-        current_gop_index++;
-    }
-
-    // Wait for all remaining GOPs to complete and write them
-    while (!encoding_error && next_gop_to_write < current_gop_index) {
-        pthread_mutex_lock(&enc->job_mutex);
-
-        int found = -1;
-        while (found < 0 && !encoding_error) {
-            for (int i = 0; i < enc->num_threads; i++) {
-                if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
-                    enc->gop_jobs[i].gop_index == next_gop_to_write) {
-                    found = i;
-                    break;
-                }
-            }
-            if (found < 0) {
-                pthread_cond_wait(&enc->job_complete, &enc->job_mutex);
-            }
-        }
-
-        if (found >= 0) {
-            gop_job_t *job = &enc->gop_jobs[found];
-            pthread_mutex_unlock(&enc->job_mutex);
-
-            if (job->success && job->packet) {
-                int max_index = tad32_quality_to_max_index(enc->quality_index);
-                write_packet(enc, enc->current_timecode_ns,
-                             job->tad_output, job->tad_size,
-                             job->packet->data, job->packet->size,
-                             job->num_frames, (uint16_t)job->audio_sample_count, max_index);
-
-                enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
-                enc->frames_encoded += job->num_frames;
-
-                tav_encoder_free_packet(job->packet);
-                job->packet = NULL;
-            }
-
-            pthread_mutex_lock(&enc->job_mutex);
-            job->status = GOP_SLOT_EMPTY;
-            job->num_frames = 0;
-            job->audio_sample_count = 0;
-            job->tad_size = 0;
-            next_gop_to_write++;
-            pthread_mutex_unlock(&enc->job_mutex);
-        } else {
-            pthread_mutex_unlock(&enc->job_mutex);
-        }
-    }
-
-    // Free per-slot buffers before shutdown
-    for (int slot = 0; slot < enc->num_threads; slot++) {
-        if (enc->gop_jobs[slot].rgb_frames) {
-            for (int f = 0; f < DT_GOP_SIZE; f++) {
-                free(enc->gop_jobs[slot].rgb_frames[f]);
-            }
-            free(enc->gop_jobs[slot].rgb_frames);
-        }
-        free(enc->gop_jobs[slot].frame_numbers);
-        free(enc->gop_jobs[slot].audio_samples);
-        free(enc->gop_jobs[slot].tad_output);
-    }
-
-    shutdown_threading(enc);
-
-    return encoding_error ? -1 : 0;
-}
-
-static int run_encoder(dt_encoder_t *enc) {
-    // Open output file
-    enc->output_fp = fopen(enc->output_file, "wb");
-    if (!enc->output_fp) {
-        fprintf(stderr, "Error: Cannot create output file: %s\n", enc->output_file);
-        return -1;
-    }
-
-    // Set up video encoder params
-    tav_encoder_params_init(&enc->enc_params, enc->width, enc->height);
-    enc->enc_params.fps_num = enc->fps_num;
-    enc->enc_params.fps_den = enc->fps_den;
-    enc->enc_params.wavelet_type = 1;           // CDF 9/7
-    enc->enc_params.temporal_wavelet = 255;     // Haar
-    enc->enc_params.decomp_levels = DT_SPATIAL_LEVELS;
-    enc->enc_params.temporal_levels = DT_TEMPORAL_LEVELS;
-    enc->enc_params.enable_temporal_dwt = 1;
-    enc->enc_params.gop_size = DT_GOP_SIZE;
-    enc->enc_params.quality_level = enc->quality_index;
-    enc->enc_params.quantiser_y = QUALITY_Y[enc->quality_index];
-    enc->enc_params.quantiser_co = QUALITY_CO[enc->quality_index];
-    enc->enc_params.quantiser_cg = QUALITY_CG[enc->quality_index];
-    enc->enc_params.entropy_coder = 1;          // EZBC
-    enc->enc_params.encoder_preset = 0x01;      // Sports mode
-    enc->enc_params.monoblock = 1;              // Force monoblock
-    enc->enc_params.verbose = enc->verbose;
-    enc->enc_params.zstd_level = -1; // disable Zstd
-
-    // For single-threaded mode, create a context to validate params
-    enc->video_ctx = tav_encoder_create(&enc->enc_params);
-    if (!enc->video_ctx) {
-        fprintf(stderr, "Error: Cannot create video encoder\n");
-        fclose(enc->output_fp);
-        return -1;
-    }
-
-    printf("Forced Monoblock mode (--monoblock)\n");
-
-    // Get actual parameters (may have been adjusted)
-    tav_encoder_get_params(enc->video_ctx, &enc->enc_params);
-
-    if (enc->verbose) {
-        printf("Auto-selected Haar temporal wavelet with sports mode (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
-               enc->width, enc->height, enc->width * enc->height, enc->enc_params.quantiser_y);
-    }
-
-    // Spawn FFmpeg for video
-    pid_t video_pid;
-    FILE *video_pipe = spawn_ffmpeg_video(enc, &video_pid);
-    if (!video_pipe) {
-        tav_encoder_free(enc->video_ctx);
-        fclose(enc->output_fp);
-        return -1;
-    }
-
-    // Spawn FFmpeg for audio
-    pid_t audio_pid;
-    FILE *audio_pipe = spawn_ffmpeg_audio(enc, &audio_pid);
-    if (!audio_pipe) {
-        fclose(video_pipe);
-        waitpid(video_pid, NULL, 0);
-        tav_encoder_free(enc->video_ctx);
-        fclose(enc->output_fp);
-        return -1;
-    }
-
-    // Allocate frame buffers for single-threaded mode
-    size_t frame_size = enc->width * enc->height * 3;
-    enc->gop_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t *));
-    for (int i = 0; i < DT_GOP_SIZE; i++) {
-        enc->gop_frames[i] = malloc(frame_size);
-    }
-
-    // Audio buffer (enough for one GOP worth of audio)
-    double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num;
-    size_t audio_samples_per_gop = (size_t)(AUDIO_SAMPLE_RATE * gop_duration) + 1024;
-    enc->audio_buffer = malloc(audio_samples_per_gop * 2 * sizeof(float));
-    enc->audio_buffer_capacity = audio_samples_per_gop;
-    enc->audio_buffer_samples = 0;
-
-    clock_t start_time = clock();
-
-    // Run encoding
-    if (enc->num_threads > 0) {
-        printf("Multithreaded mode: %d threads\n", enc->num_threads);
-        run_encoder_mt(enc, video_pipe, audio_pipe, video_pid, audio_pid);
-    } else {
-        printf("Single-threaded mode\n");
-        run_encoder_st(enc, video_pipe, audio_pipe, video_pid, audio_pid);
-    }
-
-    clock_t end_time = clock();
-    double elapsed = (double)(end_time - start_time) / CLOCKS_PER_SEC;
-
-    // Print statistics
-    printf("\nEncoding complete%s:\n", enc->num_threads > 0 ? " (multithreaded)" : "");
-    printf("  Frames: %lu\n", enc->frames_encoded);
-    printf("  GOPs: %lu\n", enc->packets_written);
-    printf("  Output size: %lu bytes (%.2f MB)\n", enc->bytes_written, enc->bytes_written / 1048576.0);
-    printf("  Encoding speed: %.1f fps\n", enc->frames_encoded / elapsed);
-    if (enc->frames_encoded > 0) {
-        printf("  Bitrate: %.1f kbps\n",
-               enc->bytes_written * 8.0 / (enc->frames_encoded * enc->fps_den / enc->fps_num) / 1000.0);
-    }
-
-    // Cleanup
-    free(enc->audio_buffer);
-    for (int i = 0; i < DT_GOP_SIZE; i++) {
-        free(enc->gop_frames[i]);
-    }
-    free(enc->gop_frames);
-
-    fclose(video_pipe);
-    fclose(audio_pipe);
-    waitpid(video_pid, NULL, 0);
-    waitpid(audio_pid, NULL, 0);
-
-    tav_encoder_free(enc->video_ctx);
-    fclose(enc->output_fp);
-
-    return 0;
-}
-
-// =============================================================================
-// Main
-// =============================================================================
-
-int main(int argc, char **argv) {
-    dt_encoder_t enc;
-    memset(&enc, 0, sizeof(enc));
-
-    // Defaults
-    enc.width = DT_WIDTH;
-    enc.height = DT_HEIGHT_NTSC;
-    enc.fps_num = 24;
-    enc.fps_den = 1;
-    enc.quality_index = 3;
-    enc.is_pal = 0;
-    enc.is_interlaced = 0;
-    enc.num_threads = get_default_thread_count();  // Default: min(8, available CPUs)
-
-    // Initialize FEC libraries
-    rs_init();
-    ldpc_init();
-    ldpc_p_init();  // LDPC payload codec
-
-    static struct option long_options[] = {
-        {"input",        required_argument, 0, 'i'},
-        {"output",       required_argument, 0, 'o'},
-        {"quality",      required_argument, 0, 'q'},
-        {"fps",          required_argument, 0, 'f'},
-        {"threads",      required_argument, 0, 't'},
-        {"ntsc",         no_argument,       0, 'N'},
-        {"pal",          no_argument,       0, 'P'},
-        {"interlaced",   no_argument,       0, 'I'},
-        {"ldpc-payload", no_argument,       0, 'D'},
-        {"encode-limit", required_argument, 0, 'L'},
-        {"verbose",      no_argument,       0, 'v'},
-        {"help",         no_argument,       0, 'h'},
-        {0, 0, 0, 0}
-    };
-
-    int opt;
-    while ((opt = getopt_long(argc, argv, "i:o:q:f:t:vhNPI", long_options, NULL)) != -1) {
-        switch (opt) {
-            case 'i':
-                enc.input_file = optarg;
-                break;
-            case 'o':
-                enc.output_file = optarg;
-                break;
-            case 'q':
-                enc.quality_index = atoi(optarg);
-                if (enc.quality_index < 0) enc.quality_index = 0;
-                if (enc.quality_index > 5) enc.quality_index = 5;
-                break;
-            case 'f': {
-                int num, den = 1;
-                if (sscanf(optarg, "%d/%d", &num, &den) < 1) {
-                    fprintf(stderr, "Error: Invalid fps format. Use NUM or NUM/DEN\n");
-                    return 1;
-                }
-                enc.target_fps_num = num;
-                enc.target_fps_den = den;
-                enc.fps_num = num;
-                enc.fps_den = den;
-                break;
-            }
-            case 't': {
-                int threads = atoi(optarg);
-                if (threads < 0) {
-                    fprintf(stderr, "Error: Thread count must be positive\n");
-                    return 1;
-                }
-                // Both 0 and 1 mean single-threaded (use value 0 internally)
-                enc.num_threads = (threads <= 1) ? 0 : threads;
-                if (enc.num_threads > 16) enc.num_threads = 16;  // Cap at 16
-                break;
-            }
-            case 'N':
-                enc.is_pal = 0;
-                enc.height = DT_HEIGHT_NTSC;
-                break;
-            case 'P':
-                enc.is_pal = 1;
-                enc.height = DT_HEIGHT_PAL;
-                break;
-            case 'I':
-                enc.is_interlaced = 1;
-                break;
-            case 'D':
-                enc.fec_mode = FEC_MODE_LDPC;
-                break;
-            case 'L':
-                enc.encode_limit = atoi(optarg);
-                break;
-            case 'v':
-                enc.verbose = 1;
-                break;
-            case 'h':
-                print_usage(argv[0]);
-                return 0;
-            default:
-                print_usage(argv[0]);
-                return 1;
-        }
-    }
-
-    if (!enc.input_file || !enc.output_file) {
-        fprintf(stderr, "Error: Input and output files are required\n");
-        print_usage(argv[0]);
-        return 1;
-    }
-
-    // Probe input file for framerate (always probe to get original fps)
-    enc.original_fps_num = 24;
-    enc.original_fps_den = 1;
-    char probe_cmd[4096];
-    snprintf(probe_cmd, sizeof(probe_cmd),
-             "ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate -of default=nw=1:nk=1 '%s'",
-             enc.input_file);
-
-    FILE *probe = popen(probe_cmd, "r");
-    if (probe) {
-        char line[256];
-        if (fgets(line, sizeof(line), probe)) {
-            if (sscanf(line, "%d/%d", &enc.original_fps_num, &enc.original_fps_den) != 2) {
-                enc.original_fps_num = 24;
-                enc.original_fps_den = 1;
-            }
-        }
-        pclose(probe);
-    }
-
-    // If user didn't specify target fps, use probed fps
-    if (enc.target_fps_num == 0) {
-        enc.fps_num = enc.original_fps_num;
-        enc.fps_den = enc.original_fps_den;
-    }
-
-    printf("\nTAV-DT Encoder (Revised Spec 2025-12-11)\n");
-    printf("  Format: %s %s\n", enc.is_pal ? "PAL" : "NTSC",
-           enc.is_interlaced ? "interlaced" : "progressive");
-    printf("  Resolution: %dx%d (internal: %dx%d)\n", enc.width, enc.height,
-           enc.width, enc.is_interlaced ? enc.height / 2 : enc.height);
-    printf("  Source framerate: %d/%d\n", enc.original_fps_num, enc.original_fps_den);
-
-    // Report fps conversion if enabled
-    if (enc.target_fps_num > 0) {
-        long long target_rate = (long long)enc.target_fps_num * enc.original_fps_den;
-        long long source_rate = (long long)enc.original_fps_num * enc.target_fps_den;
-
-        if (target_rate > source_rate) {
-            printf("  Framerate conversion: %d/%d -> %d/%d (minterpolate)\n",
-                   enc.original_fps_num, enc.original_fps_den,
-                   enc.target_fps_num, enc.target_fps_den);
-        } else if (target_rate < source_rate) {
-            printf("  Framerate conversion: %d/%d -> %d/%d (fps)\n",
-                   enc.original_fps_num, enc.original_fps_den,
-                   enc.target_fps_num, enc.target_fps_den);
-        }
-        // If equal, no conversion message needed
-    }
-    printf("  Quality: %d\n", enc.quality_index);
-    printf("  GOP size: %d\n", DT_GOP_SIZE);
-    printf("  Payload FEC: %s\n", enc.fec_mode == FEC_MODE_LDPC ? "LDPC(255,223)" : "RS(255,223)");
-    printf("  Threads: %d%s\n", enc.num_threads > 0 ? enc.num_threads : 1,
-           enc.num_threads > 0 ? " (multithreaded)" : " (single-threaded)");
-    printf("  Header sizes: main=%dB tad=%dB tav=%dB (after LDPC)\n",
-           DT_MAIN_HEADER_SIZE * 2, DT_TAD_HEADER_SIZE * 2, DT_TAV_HEADER_SIZE * 2);
-
-    return run_encoder(&enc);
-}
diff --git a/video_encoder/tav_inspector.c b/video_encoder/tav_inspector.c
deleted file mode 100644
index b51367b..0000000
--- a/video_encoder/tav_inspector.c
+++ /dev/null
@@ -1,1307 +0,0 @@
-// TAV Packet Inspector - Comprehensive packet analysis tool for TAV files
-// to compile: gcc -o tav_inspector tav_inspector.c lib/libfec.a -lzstd -lm
-// Created by CuriousTorvald and Claude on 2025-10-14
-// Updated 2025-12-02: Added TAV-DT (Digital Tape) format support
-// Updated 2025-12-17: Updated for revised TAV-DT spec (sync outside LDPC, LDPC 1/2 headers)
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <getopt.h>
-#include <zstd.h>
-#include "lib/libfec/ldpc.h"
-
-// TAV-DT sync patterns (big endian)
-#define TAV_DT_SYNC_NTSC  0xE3537A1F  // 720x480
-#define TAV_DT_SYNC_PAL   0xD193A745  // 720x576
-#define TAV_DT_SYNC_TAV   0xA3F7C91E  // TAV subpacket sync
-
-// TAV-DT header sizes (revised spec 2025-12-11)
-// Sync patterns are written separately (NOT LDPC-coded)
-#define DT_MAIN_HEADER_RAW    28   // fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
-#define DT_MAIN_HEADER_LDPC   56   // After LDPC 1/2 encoding
-#define DT_TAD_HEADER_RAW     14   // sample_count(2) + quant_bits(1) + compressed_size(4) + rs_block_count(3) + crc(4)
-#define DT_TAD_HEADER_LDPC    28   // After LDPC 1/2 encoding
-#define DT_TAV_HEADER_RAW     14   // gop_size(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4)
-#define DT_TAV_HEADER_LDPC    28   // After LDPC 1/2 encoding
-
-// RS(255,223) parameters
-#define RS_DATA_LEN   223
-#define RS_PARITY_LEN 32
-
-// Frame mode constants (from TAV spec)
-#define FRAME_MODE_SKIP  0x00
-#define FRAME_MODE_INTRA 0x01
-#define FRAME_MODE_DELTA 0x02
-
-// Packet type constants
-#define TAV_PACKET_IFRAME         0x10
-#define TAV_PACKET_PFRAME         0x11
-#define TAV_PACKET_GOP_UNIFIED    0x12  // Unified 3D DWT GOP (all frames in single block)
-#define TAV_PACKET_GOP_UNIFIED_MOTION    0x13
-#define TAV_PACKET_PFRAME_RESIDUAL 0x14  // P-frame with MPEG-style residual coding (block motion compensation)
-#define TAV_PACKET_BFRAME_RESIDUAL 0x15  // B-frame with MPEG-style residual coding (bidirectional prediction)
-#define TAV_PACKET_PFRAME_ADAPTIVE 0x16  // P-frame with adaptive quad-tree block partitioning
-#define TAV_PACKET_BFRAME_ADAPTIVE 0x17  // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
-#define TAV_PACKET_AUDIO_MP2      0x20
-#define TAV_PACKET_AUDIO_PCM8     0x21
-#define TAV_PACKET_AUDIO_TAD      0x24
-#define TAV_PACKET_SUBTITLE       0x30  // Legacy SSF (frame-locked), also used for Font ROM upload
-#define TAV_PACKET_SUBTITLE_TC    0x31  // SSF-TC (timecode-based)
-#define TAV_PACKET_VIDEOTEX       0x3F  // Videotex (text-mode video)
-#define TAV_PACKET_AUDIO_TRACK    0x40
-#define TAV_PACKET_VIDEO_CH2_I    0x70
-#define TAV_PACKET_VIDEO_CH2_P    0x71
-#define TAV_PACKET_VIDEO_CH3_I    0x72
-#define TAV_PACKET_VIDEO_CH3_P    0x73
-#define TAV_PACKET_VIDEO_CH4_I    0x74
-#define TAV_PACKET_VIDEO_CH4_P    0x75
-#define TAV_PACKET_VIDEO_CH5_I    0x76
-#define TAV_PACKET_VIDEO_CH5_P    0x77
-#define TAV_PACKET_VIDEO_CH6_I    0x78
-#define TAV_PACKET_VIDEO_CH6_P    0x79
-#define TAV_PACKET_VIDEO_CH7_I    0x7A
-#define TAV_PACKET_VIDEO_CH7_P    0x7B
-#define TAV_PACKET_VIDEO_CH8_I    0x7C
-#define TAV_PACKET_VIDEO_CH8_P    0x7D
-#define TAV_PACKET_VIDEO_CH9_I    0x7E
-#define TAV_PACKET_VIDEO_CH9_P    0x7F
-#define TAV_PACKET_EXIF           0xE0
-#define TAV_PACKET_ID3V1          0xE1
-#define TAV_PACKET_ID3V2          0xE2
-#define TAV_PACKET_VORBIS_COMMENT 0xE3
-#define TAV_PACKET_CD_TEXT        0xE4
-#define TAV_PACKET_EXTENDED_HDR   0xEF
-#define TAV_PACKET_LOOP_START     0xF0
-#define TAV_PACKET_LOOP_END       0xF1
-#define TAV_PACKET_SCREEN_MASK    0xF2
-#define TAV_PACKET_GOP_SYNC       0xFC  // GOP sync packet (N frames decoded)
-#define TAV_PACKET_TIMECODE       0xFD
-#define TAV_PACKET_SYNC_NTSC      0xFE
-#define TAV_PACKET_SYNC           0xFF
-#define TAV_PACKET_NOOP           0x00
-
-// Statistics structure
-typedef struct {
-    int iframe_count;
-    int pframe_count;
-    int pframe_intra_count;
-    int pframe_delta_count;
-    int pframe_skip_count;
-    int gop_unified_count;
-    int gop_unified_motion_count;
-    int gop_sync_count;
-    int total_gop_frames;
-    int audio_count;
-    int audio_mp2_count;
-    int audio_pcm8_count;
-    int audio_tad_count;
-    int audio_track_count;
-    int subtitle_count;
-    int videotex_count;
-    int timecode_count;
-    int sync_count;
-    int sync_ntsc_count;
-    int extended_header_count;
-    int metadata_count;
-    int loop_point_count;
-    int mux_video_count;
-    int unknown_count;
-    uint64_t total_video_bytes;
-    uint64_t total_audio_bytes;
-    uint64_t audio_mp2_bytes;
-    uint64_t audio_pcm8_bytes;
-    uint64_t audio_tad_bytes;
-    uint64_t audio_track_bytes;
-    uint64_t videotex_bytes;
-} packet_stats_t;
-
-// Display options
-typedef struct {
-    int show_all;
-    int show_video;
-    int show_audio;
-    int show_subtitles;
-    int show_timecode;
-    int show_metadata;
-    int show_sync;
-    int show_extended;
-    int verbose;
-    int summary_only;
-} display_options_t;
-
-// Helper to read data from either file or DT payload buffer
-static size_t read_packet_data(void *dest, size_t size, size_t count, FILE *fp,
-                               uint8_t *payload, uint32_t payload_size, uint32_t *offset) {
-    if (payload) {
-        // DT mode: read from payload buffer
-        size_t bytes_to_read = size * count;
-        if (*offset + bytes_to_read > payload_size) {
-            return 0;  // Not enough data
-        }
-        memcpy(dest, payload + *offset, bytes_to_read);
-        *offset += bytes_to_read;
-        return count;
-    } else {
-        // TAV mode: read from file
-        return fread(dest, size, count, fp);
-    }
-}
-
-const char* get_packet_type_name(uint8_t type) {
-    switch (type) {
-        case TAV_PACKET_IFRAME: return "I-FRAME";
-        case TAV_PACKET_PFRAME: return "P-FRAME";
-        case TAV_PACKET_GOP_UNIFIED: return "GOP (3D DWT Unified)";
-        case TAV_PACKET_GOP_UNIFIED_MOTION: return "GOP (3D DWT Unified with Motion Data)";
-        case TAV_PACKET_PFRAME_RESIDUAL: return "P-FRAME (residual)";
-        case TAV_PACKET_BFRAME_RESIDUAL: return "B-FRAME (residual)";
-        case TAV_PACKET_PFRAME_ADAPTIVE: return "P-FRAME (quadtree)";
-        case TAV_PACKET_BFRAME_ADAPTIVE: return "B-FRAME (quadtree)";
-        case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2";
-        case TAV_PACKET_AUDIO_PCM8: return "AUDIO PCM8 (zstd)";
-        case TAV_PACKET_AUDIO_TAD: return "AUDIO TAD (zstd)";
-        case TAV_PACKET_SUBTITLE: return "SUBTITLE (SSF frame-locked)";
-        case TAV_PACKET_SUBTITLE_TC: return "SUBTITLE (SSF-TC timecoded)";
-        case TAV_PACKET_VIDEOTEX: return "VIDEOTEX (text-mode video)";
-        case TAV_PACKET_AUDIO_TRACK: return "AUDIO TRACK (Separate MP2)";
-        case TAV_PACKET_EXIF: return "METADATA (EXIF)";
-        case TAV_PACKET_ID3V1: return "METADATA (ID3v1)";
-        case TAV_PACKET_ID3V2: return "METADATA (ID3v2)";
-        case TAV_PACKET_VORBIS_COMMENT: return "METADATA (Vorbis)";
-        case TAV_PACKET_CD_TEXT: return "METADATA (CD-Text)";
-        case TAV_PACKET_EXTENDED_HDR: return "EXTENDED HEADER";
-        case TAV_PACKET_LOOP_START: return "LOOP START";
-        case TAV_PACKET_LOOP_END: return "LOOP END";
-        case TAV_PACKET_SCREEN_MASK: return "SCREEN MASK";
-        case TAV_PACKET_GOP_SYNC: return "GOP SYNC";
-        case TAV_PACKET_TIMECODE: return "TIMECODE";
-        case TAV_PACKET_SYNC_NTSC: return "SYNC (NTSC)";
-        case TAV_PACKET_SYNC: return "SYNC";
-        case TAV_PACKET_NOOP: return "NO-OP";
-        default:
-            if (type >= 0x70 && type <= 0x7F) {
-                return "MUX VIDEO";
-            }
-            return "UNKNOWN";
-    }
-}
-
-int should_display_packet(uint8_t type, display_options_t *opts) {
-    if (opts->show_all) return 1;
-
-    if (opts->show_video && (type == TAV_PACKET_IFRAME || type == TAV_PACKET_PFRAME ||
-        type == TAV_PACKET_GOP_UNIFIED || type == TAV_PACKET_GOP_SYNC ||
-        (type >= 0x70 && type <= 0x7F))) return 1;
-    if (opts->show_audio && (type == TAV_PACKET_AUDIO_MP2 || type == TAV_PACKET_AUDIO_PCM8 ||
-        type == TAV_PACKET_AUDIO_TAD || type == TAV_PACKET_AUDIO_TRACK)) return 1;
-    if (opts->show_subtitles && (type == TAV_PACKET_SUBTITLE || type == TAV_PACKET_SUBTITLE_TC)) return 1;
-    if (opts->show_timecode && type == TAV_PACKET_TIMECODE) return 1;
-    if (opts->show_metadata && (type >= 0xE0 && type <= 0xE4)) return 1;
-    if (opts->show_sync && (type == TAV_PACKET_SYNC || type == TAV_PACKET_SYNC_NTSC)) return 1;
-    if (opts->show_extended && type == TAV_PACKET_EXTENDED_HDR) return 1;
-
-    return 0;
-}
-
-void print_subtitle_packet(FILE *fp, uint32_t size, int is_timecoded, int verbose) {
-    if (!verbose) {
-        fseek(fp, size, SEEK_CUR);
-        return;
-    }
-
-    // Read 24-bit index
-    uint32_t index = 0;
-    for (int i = 0; i < 3; i++) {
-        uint8_t byte;
-        if (fread(&byte, 1, 1, fp) != 1) return;
-        index |= (byte << (i * 8));
-    }
-
-    // Read timecode if SSF-TC (0x31)
-    uint64_t timecode_ns = 0;
-    int header_size = 4;  // 3 bytes index + 1 byte opcode
-    if (is_timecoded) {
-        uint8_t timecode_bytes[8];
-        if (fread(timecode_bytes, 1, 8, fp) != 8) return;
-        for (int i = 0; i < 8; i++) {
-            timecode_ns |= ((uint64_t)timecode_bytes[i]) << (i * 8);
-        }
-        header_size += 8;  // Add 8 bytes for timecode
-    }
-
-    uint8_t opcode;
-    if (fread(&opcode, 1, 1, fp) != 1) return;
-
-    printf(" [Index=%u", index);
-    if (is_timecoded) {
-        printf(", Time=%.3fs", timecode_ns / 1000000000.0);
-    }
-    printf(", Opcode=0x%02X", opcode);
-
-    switch (opcode) {
-        case 0x01: printf(" (SHOW)"); break;
-        case 0x02: printf(" (HIDE)"); break;
-        case 0x03: printf(" (MOVE)"); break;
-        case 0x80: printf(" (UPLOAD LOW FONT)"); break;
-        case 0x81: printf(" (UPLOAD HIGH FONT)"); break;
-        default:
-            if (opcode >= 0x10 && opcode <= 0x2F) printf(" (SHOW LANG)");
-            else if (opcode >= 0x30 && opcode <= 0x41) printf(" (REVEAL)");
-            break;
-    }
-    printf("]");
-
-    // Read and display text content for SHOW commands
-    int remaining = size - header_size;  // Already read index + timecode (if any) + opcode
-    if ((opcode == 0x01 || (opcode >= 0x10 && opcode <= 0x2F) || (opcode >= 0x30 && opcode <= 0x41)) && remaining > 0) {
-        char *text = malloc(remaining + 1);
-        if (text && fread(text, 1, remaining, fp) == remaining) {
-            text[remaining] = '\0';
-
-            // Truncate long text for display
-            /*if (remaining > 60) {
-                text[57] = '.';
-                text[58] = '.';
-                text[59] = '.';
-                text[60] = '\0';
-            }*/
-
-            // Clean up newlines and control characters for display
-            for (int i = 0; text[i]; i++) {
-                if (text[i] == '\n' || text[i] == '\r' || text[i] == '\t') {
-                    text[i] = ' ';
-                }
-            }
-
-            printf(" Text: \"%s\"", text);
-            free(text);
-        } else {
-            free(text);
-            fseek(fp, remaining, SEEK_CUR);
-        }
-    } else {
-        // Skip remaining payload for other opcodes
-        fseek(fp, remaining, SEEK_CUR);
-    }
-}
-
-void print_extended_header(FILE *fp, int verbose) {
-    uint16_t num_pairs;
-    if (fread(&num_pairs, sizeof(uint16_t), 1, fp) != 1) {
-        printf("ERROR: Failed to read KV pair count\n");
-        return;
-    }
-
-    printf(" - %u key-value pairs", num_pairs);
-    if (verbose) {
-        printf(":\n");
-    }
-
-    for (int i = 0; i < num_pairs; i++) {
-        char key[5] = {0};
-        uint8_t value_type;
-
-        if (fread(key, 1, 4, fp) != 4 || fread(&value_type, 1, 1, fp) != 1) {
-            if (verbose) printf("    ERROR: Failed to read KV pair %d\n", i);
-            break;
-        }
-
-        if (verbose) {
-            const char *value_type_str = "Unknown";
-            switch (value_type) {
-                case 0x00: value_type_str = "Int16"; break;
-                case 0x01: value_type_str = "Int24"; break;
-                case 0x02: value_type_str = "Int32"; break;
-                case 0x03: value_type_str = "Int48"; break;
-                case 0x04: value_type_str = "Int64"; break;
-                case 0x10: value_type_str = "Bytes"; break;
-            }
-
-            printf("    %.4s (type: %s (0x%02X)): ", key, value_type_str, value_type);
-        }
-
-
-        if (value_type == 0x04) {  // Uint64
-            uint64_t value;
-            if (fread(&value, sizeof(uint64_t), 1, fp) != 1) {
-                if (verbose) printf("ERROR reading value\n");
-                break;
-            }
-
-            if (verbose) {
-                if (strcmp(key, "CDAT") == 0) {
-                    time_t time_sec = value / 1000000ULL; // microseconds
-                    struct tm *time_info = gmtime(&time_sec);
-                    if (time_info) {
-                        char time_str[64];
-                        strftime(time_str, sizeof(time_str), "%a %b %d %H:%M:%S %Y UTC", time_info);
-                        printf("%s", time_str);
-                    }
-                } else {
-                    printf("%.6f seconds", value / 1000000000.0); // nanoseconds
-                }
-            }
-        } else if (value_type == 0x10) {  // Bytes
-            uint16_t length;
-            if (fread(&length, sizeof(uint16_t), 1, fp) != 1) {
-                if (verbose) printf("ERROR reading length\n");
-                break;
-            }
-
-            char *data = malloc(length + 1);
-            if (fread(data, 1, length, fp) != length) {
-                if (verbose) printf("ERROR reading data\n");
-                free(data);
-                break;
-            }
-
-            if (verbose) {
-                data[length] = '\0';
-
-                // Special handling for XFPS: show parsed framerate
-                if (strncmp(key, "XFPS", 4) == 0) {
-                    int num, den;
-                    if (sscanf(data, "%d/%d", &num, &den) == 2) {
-                        printf("%d/%d (%.3f fps)", num, den, (double)num / den);
-                    } else {
-                        printf("\"%s\"", data);
-                    }
-                } else {
-                    printf("\"%s\"", data);
-                }
-            }
-            free(data);
-        } else {
-            if (verbose) printf("Unknown type");
-        }
-
-        if (verbose && i < num_pairs - 1) {
-            printf("\n");
-        }
-    }
-}
-
-// Frame info structure
-typedef struct {
-    int mode;              // 0=SKIP, 1=INTRA, 2=DELTA, -1=error
-    uint8_t quantiser;     // Quantiser override (0xFF = default)
-} frame_info_t;
-
-// Read frame mode and quantiser from compressed frame data
-// Works for both I-frames and P-frames
-frame_info_t get_frame_info(FILE *fp, uint32_t compressed_size) {
-    frame_info_t info = {-1, 0xFF};
-
-    // Read compressed data
-    uint8_t *compressed_data = malloc(compressed_size);
-    if (!compressed_data) {
-        fseek(fp, compressed_size, SEEK_CUR);
-        return info;
-    }
-
-    if (fread(compressed_data, 1, compressed_size, fp) != compressed_size) {
-        free(compressed_data);
-        return info;
-    }
-
-    // Allocate buffer for decompression
-    // TAV frames are at most ~1.5MB decompressed, use 2MB to be safe
-    size_t const decompress_size = 2 * 1024 * 1024;  // 2MB
-    uint8_t *decompressed_data = malloc(decompress_size);
-    if (!decompressed_data) {
-        free(compressed_data);
-        return info;
-    }
-
-    // Decompress
-    size_t actual_size = ZSTD_decompress(decompressed_data, decompress_size, compressed_data, compressed_size);
-    free(compressed_data);
-
-    if (ZSTD_isError(actual_size) || actual_size < 2) {
-        free(decompressed_data);
-        return info;
-    }
-
-    // Read mode byte (first byte of decompressed data)
-    info.mode = decompressed_data[0];
-
-    // Read quantiser override (second byte) if mode is not SKIP
-    if (info.mode != FRAME_MODE_SKIP && actual_size >= 2) {
-        info.quantiser = decompressed_data[1];
-    }
-
-    free(decompressed_data);
-    return info;
-}
-
-void print_help(const char *program_name) {
-    printf("TAV Packet Inspector - Comprehensive packet analysis tool\n");
-    printf("Usage: %s [options] <tav_file>\n\n", program_name);
-    printf("Options:\n");
-    printf("  -a, --all          Show all packets (default)\n");
-    printf("  -v, --video        Show video packets only\n");
-    printf("  -u, --audio        Show audio packets only\n");
-    printf("  -s, --subtitles    Show subtitle packets only\n");
-    printf("  -t, --timecode     Show timecode packets only\n");
-    printf("  -m, --metadata     Show metadata packets only\n");
-    printf("  -x, --extended     Show extended header only\n");
-    printf("  -S, --sync         Show sync packets\n");
-    printf("  --summary          Show summary statistics only\n");
-    printf("  -h, --help         Show this help\n\n");
-    printf("Examples:\n");
-    printf("  %s video.mv3                    # Show all packets\n", program_name);
-    printf("  %s -v video.mv3                 # Show video packets only\n", program_name);
-    printf("  %s -V video.mv3                 # Verbose output\n", program_name);
-    printf("  %s --summary video.mv3          # Statistics only\n", program_name);
-}
-
-int main(int argc, char *argv[]) {
-    display_options_t opts = {0};
-    opts.show_all = 1;  // Default: show all
-
-    // Track absolute frame number
-    int current_frame = 0;
-
-    static struct option long_options[] = {
-        {"all", no_argument, 0, 'a'},
-        {"video", no_argument, 0, 'v'},
-        {"audio", no_argument, 0, 'u'},
-        {"subtitles", no_argument, 0, 's'},
-        {"timecode", no_argument, 0, 't'},
-        {"metadata", no_argument, 0, 'm'},
-        {"extended", no_argument, 0, 'x'},
-        {"sync", no_argument, 0, 'S'},
-        {"summary", no_argument, 0, 1000},
-        {"help", no_argument, 0, 'h'},
-        {0, 0, 0, 0}
-    };
-
-    int c;
-    while ((c = getopt_long(argc, argv, "avustmxSVh", long_options, NULL)) != -1) {
-        switch (c) {
-            case 'a': opts.show_all = 1; break;
-            case 'v': opts.show_video = 1; opts.show_all = 0; break;
-            case 'u': opts.show_audio = 1; opts.show_all = 0; break;
-            case 's': opts.show_subtitles = 1; opts.show_all = 0; break;
-            case 't': opts.show_timecode = 1; opts.show_all = 0; break;
-            case 'm': opts.show_metadata = 1; opts.show_all = 0; break;
-            case 'x': opts.show_extended = 1; opts.show_all = 0; break;
-            case 'S': opts.show_sync = 1; opts.show_all = 0; break;
-            case 1000: opts.summary_only = 1; break;
-            case 'h':
-                print_help(argv[0]);
-                return 0;
-            default:
-                print_help(argv[0]);
-                return 1;
-        }
-    }
-
-    opts.verbose = 1;
-
-    if (optind >= argc) {
-        fprintf(stderr, "Error: No input file specified\n\n");
-        print_help(argv[0]);
-        return 1;
-    }
-
-    const char *filename = argv[optind];
-    FILE *fp = fopen(filename, "rb");
-    if (!fp) {
-        fprintf(stderr, "Error: Cannot open file %s\n", filename);
-        return 1;
-    }
-
-    // Parse and display header
-    if (!opts.summary_only) {
-        printf("TAV Packet Inspector\n");
-        printf("File: %s\n", filename);
-        printf("==================================================\n\n");
-    }
-
-    // Detect format: TAV (with magic) or TAV-DT (with sync pattern)
-    uint8_t header[32];
-    int is_dt_format = 0;
-    uint16_t dt_width = 0, dt_height = 0;
-    uint8_t dt_framerate = 0;
-    uint8_t dt_quality = 0;
-    int dt_is_interlaced = 0;
-    int dt_is_ntsc_framerate = 0;
-
-    // Read first 4 bytes to check format
-    uint8_t format_check[4];
-    if (fread(format_check, 1, 4, fp) != 4) {
-        fprintf(stderr, "Error: Failed to read file header\n");
-        fclose(fp);
-        return 1;
-    }
-
-    // Check if it's a TAV-DT sync pattern
-    uint32_t sync = (format_check[0] << 24) | (format_check[1] << 16) |
-                    (format_check[2] << 8) | format_check[3];
-
-    if (sync == TAV_DT_SYNC_NTSC || sync == TAV_DT_SYNC_PAL) {
-        // TAV-DT format detected (revised spec 2025-12-11)
-        // Structure: sync(4) + LDPC_header(56) + TAD_LDPC_header(28) + TAD_RS_payload + TAV_sync(4) + TAV_LDPC_header(28) + TAV_RS_payload
-        is_dt_format = 1;
-        dt_width = 720;
-        dt_height = (sync == TAV_DT_SYNC_NTSC) ? 480 : 576;
-
-        // Initialize LDPC decoder
-        ldpc_init();
-
-        // Read LDPC-coded main header (56 bytes)
-        uint8_t ldpc_header[DT_MAIN_HEADER_LDPC];
-        if (fread(ldpc_header, 1, DT_MAIN_HEADER_LDPC, fp) != DT_MAIN_HEADER_LDPC) {
-            fprintf(stderr, "Error: Failed to read TAV-DT LDPC header\n");
-            fclose(fp);
-            return 1;
-        }
-
-        // Decode LDPC to get raw header (28 bytes)
-        uint8_t raw_header[DT_MAIN_HEADER_RAW];
-        int ldpc_result = ldpc_decode(ldpc_header, DT_MAIN_HEADER_LDPC, raw_header);
-
-        // Parse raw header fields:
-        // fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
-        dt_framerate = raw_header[0];
-        uint8_t flags = raw_header[1];
-        dt_is_interlaced = flags & 0x01;
-        dt_is_ntsc_framerate = flags & 0x02;
-        dt_quality = (flags >> 4) & 0x0F;
-
-        uint32_t packet_size = raw_header[4] | (raw_header[5] << 8) |
-                               (raw_header[6] << 16) | (raw_header[7] << 24);
-        uint64_t timecode_ns = 0;
-        for (int i = 0; i < 8; i++) {
-            timecode_ns |= ((uint64_t)raw_header[8 + i]) << (i * 8);
-        }
-        uint32_t offset_to_video = raw_header[16] | (raw_header[17] << 8) |
-                                   (raw_header[18] << 16) | (raw_header[19] << 24);
-        uint32_t header_crc = raw_header[24] | (raw_header[25] << 8) |
-                              (raw_header[26] << 16) | (raw_header[27] << 24);
-
-        // Rewind to start of first packet so the loop can process it
-        fseek(fp, -(4 + DT_MAIN_HEADER_LDPC), SEEK_CUR);
-
-        if (!opts.summary_only) {
-            printf("TAV-DT Header (Digital Tape Streaming Format, revised 2025-12-11):\n");
-            printf("  Format:           %s %s\n",
-                   (sync == TAV_DT_SYNC_NTSC) ? "NTSC" : "PAL",
-                   dt_is_interlaced ? "interlaced" : "progressive");
-            printf("  Resolution:       %dx%d\n", dt_width, dt_height);
-            printf("  Frame rate:       %d fps", dt_framerate);
-            if (dt_is_ntsc_framerate) printf(" (NTSC)");
-            printf("\n");
-            printf("  Quality index:    %d (0-5)\n", dt_quality);
-            printf("  LDPC decode:      %s\n", ldpc_result == 0 ? "OK" : "FAILED (using best-effort)");
-            printf("  Header CRC:       0x%08X\n", header_crc);
-            printf("  First packet:     %u bytes, timecode=%.3fs, video_offset=%u\n",
-                   packet_size, timecode_ns / 1000000000.0, offset_to_video);
-            printf("  Packet structure:\n");
-            printf("    Main sync:      4 bytes (not LDPC)\n");
-            printf("    Main header:    %d bytes LDPC (%d bytes raw)\n", DT_MAIN_HEADER_LDPC, DT_MAIN_HEADER_RAW);
-            printf("    TAD header:     %d bytes LDPC (%d bytes raw)\n", DT_TAD_HEADER_LDPC, DT_TAD_HEADER_RAW);
-            printf("    TAD payload:    RS(255,223) coded\n");
-            printf("    TAV sync:       4 bytes (not LDPC)\n");
-            printf("    TAV header:     %d bytes LDPC (%d bytes raw)\n", DT_TAV_HEADER_LDPC, DT_TAV_HEADER_RAW);
-            printf("    TAV payload:    RS(255,223) coded\n");
-            printf("  Wavelet:          1 (CDF 9/7, fixed for DT)\n");
-            printf("  Decomp levels:    4 spatial + 2 temporal (fixed for DT)\n");
-            printf("  Entropy coder:    EZBC (fixed for DT)\n");
-            printf("  Channel layout:   YCoCg-R (fixed for DT)\n");
-            printf("\n");
-        }
-    } else {
-        // Regular TAV format - rewind and read full header
-        rewind(fp);
-        if (fread(header, 1, 32, fp) != 32) {
-            fprintf(stderr, "Error: Failed to read TAV header\n");
-            fclose(fp);
-            return 1;
-        }
-
-        // Verify magic number
-        const char *magic = "\x1F\x54\x53\x56\x4D\x54\x41\x56";  // "\x1FTSVM TAV"
-        if (memcmp(header, magic, 8) != 0) {
-            fprintf(stderr, "Error: Invalid TAV magic number\n");
-            fclose(fp);
-            return 1;
-        }
-    }
-
-    if (!opts.summary_only && !is_dt_format) {
-        // Parse header fields (TAV format only)
-        uint8_t version = header[8];
-        uint8_t base_version = (version > 8) ? (version - 8) : version;
-        uint8_t temporal_motion_coder = (version > 8) ? 1 : 0;
-        uint16_t width = *((uint16_t*)&header[9]);
-        uint16_t height = *((uint16_t*)&header[11]);
-        uint8_t fps = header[13];
-        uint32_t total_frames = *((uint32_t*)&header[14]);
-        uint8_t wavelet = header[18];
-        uint8_t decomp_levels = header[19];
-        uint8_t quant_y = header[20];
-        uint8_t quant_co = header[21];
-        uint8_t quant_cg = header[22];
-        uint8_t extra_flags = header[23];
-        uint8_t video_flags = header[24];
-        uint8_t quality = header[25];
-        uint8_t channel_layout = header[26];
-        uint8_t entropy_coder = header[27];
-        uint8_t encoder_preset = header[28];
-
-static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
-static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"};
-
-        int is_monoblock = (3 <= base_version && base_version <= 6);
-        int is_perceptual = (5 <= base_version && base_version <= 8);
-
-static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, uniform", "YCoCg monoblock, uniform", "ICtCp monoblock, uniform", "YCoCg monoblock, perceptual", "ICtCp monoblock, perceptual", "YCoCg tiled, perceptual", "ICtCp tiled, perceptual"};
-static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"};
-
-        printf("TAV Header:\n");
-        printf("  Version:          %d (base: %d - %s, temporal: %s)\n",
-               version, base_version, VERDESC[base_version], TEMPORAL_WAVELET[temporal_motion_coder]);
-        printf("  Resolution:       %dx%d\n", width, height);
-        if (fps == 0xFF) {
-            printf("  Frame rate:       (extended - see XFPS in extended header)\n");
-        } else if (fps == 0) {
-            printf("  Frame rate:       (still image)\n");
-        } else {
-            printf("  Frame rate:       %d fps", fps);
-            if (video_flags & 0x02) printf(" (NTSC)");
-            printf("\n");
-        }
-        printf("  Total frames:     %u\n", total_frames);
-        printf("  Wavelet:          %d", wavelet);
-        const char *wavelet_names[] = {"LGT 5/3", "CDF 9/7", "CDF 13/7", "Reserved", "Reserved",
-                                       "Reserved", "Reserved", "Reserved", "Reserved",
-                                       "Reserved", "Reserved", "Reserved", "Reserved",
-                                       "Reserved", "Reserved", "Reserved", "DD-4"};
-        if (wavelet < 17) printf(" (%s)", wavelet_names[wavelet == 16 ? 16 : (wavelet > 16 ? wavelet : wavelet)]);
-        if (wavelet == 255) printf(" (Haar)");
-        printf("\n");
-        printf("  Decomp levels:    %d\n", decomp_levels);
-        printf("  Quantisers:       Y=%d, Co=%d, Cg=%d (Index=%d,%d,%d)\n", QLUT[quant_y], QLUT[quant_co], QLUT[quant_cg], quant_y, quant_co, quant_cg);
-        if (quality > 0)
-            printf("  Quality:          %d\n", quality - 1);
-        else
-            printf("  Quality:          n/a\n");
-        printf("  Channel layout:   %s\n", CLAYOUT[channel_layout]);
-        printf("  Entropy coder:    %s\n", entropy_coder == 0 ? "Twobit-map" : "EZBC");
-        printf("  Encoder preset:   ");
-        if (encoder_preset == 0) {
-            printf("Default\n");
-        } else {
-            int first = 1;
-            if (encoder_preset & 0x01) {
-                printf("%sSports", first ? "" : ", ");
-                first = 0;
-            }
-            if (encoder_preset & 0x02) {
-                printf("%sAnime", first ? "" : ", ");
-                first = 0;
-            }
-            printf("\n");
-        }
-        printf("  Flags:\n");
-        printf("    Has audio:      %s\n", (extra_flags & 0x01) ? "Yes" : "No");
-        printf("    Has subtitles:  %s\n", (extra_flags & 0x02) ? "Yes" : "No");
-        printf("    Progressive:    %s\n", (video_flags & 0x01) ? "No (interlaced)" : "Yes");
-        printf("    Lossless:       %s\n", (video_flags & 0x04) ? "Yes" : "No");
-        if (extra_flags & 0x04) printf("    Progressive TX: Enabled\n");
-        if (extra_flags & 0x08) printf("    ROI encoding:   Enabled\n");
-        printf("\nPackets:\n");
-        printf("==================================================\n");
-    }
-
-    packet_stats_t stats = {0};
-    int packet_num = 0;
-
-    while (!feof(fp)) {
-        long packet_offset = ftell(fp);
-        uint8_t packet_type;
-        uint8_t *packet_payload = NULL;
-        uint32_t payload_size = 0;
-        uint32_t payload_offset = 1;  // Start at 1 to skip packet type byte in DT mode
-
-        if (is_dt_format) {
-            // TAV-DT (revised spec 2025-12-11):
-            // Structure: sync(4) + LDPC_main_header(56) + LDPC_tad_header(28) + TAD_RS_payload + TAV_sync(4) + LDPC_tav_header(28) + TAV_RS_payload
-
-            // Read main sync (4 bytes)
-            uint8_t sync_bytes[4];
-            if (fread(sync_bytes, 1, 4, fp) != 4) break;
-
-            uint32_t sync_check = (sync_bytes[0] << 24) | (sync_bytes[1] << 16) |
-                                  (sync_bytes[2] << 8) | sync_bytes[3];
-
-            // Verify sync pattern
-            if (sync_check != TAV_DT_SYNC_NTSC && sync_check != TAV_DT_SYNC_PAL) {
-                if (!opts.summary_only) {
-                    fprintf(stderr, "Warning: Invalid sync pattern 0x%08X at offset 0x%lX\n",
-                           sync_check, packet_offset);
-                }
-                break;
-            }
-
-            // Read and decode main LDPC header (56 bytes -> 28 bytes raw)
-            uint8_t ldpc_main[DT_MAIN_HEADER_LDPC];
-            if (fread(ldpc_main, 1, DT_MAIN_HEADER_LDPC, fp) != DT_MAIN_HEADER_LDPC) break;
-
-            uint8_t raw_main[DT_MAIN_HEADER_RAW];
-            int main_ldpc_ok = (ldpc_decode(ldpc_main, DT_MAIN_HEADER_LDPC, raw_main) == 0);
-
-            // Parse main header: fps(1) + flags(1) + reserved(2) + size(4) + timecode(8) + offset(4) + reserved(4) + crc(4)
-            uint32_t packet_size_total = raw_main[4] | (raw_main[5] << 8) |
-                                         (raw_main[6] << 16) | (raw_main[7] << 24);
-            uint64_t timecode_ns = 0;
-            for (int i = 0; i < 8; i++) {
-                timecode_ns |= ((uint64_t)raw_main[8 + i]) << (i * 8);
-            }
-            uint32_t offset_to_video = raw_main[16] | (raw_main[17] << 8) |
-                                       (raw_main[18] << 16) | (raw_main[19] << 24);
-
-            // Read and decode TAD LDPC header (28 bytes -> 14 bytes raw)
-            uint8_t ldpc_tad[DT_TAD_HEADER_LDPC];
-            if (fread(ldpc_tad, 1, DT_TAD_HEADER_LDPC, fp) != DT_TAD_HEADER_LDPC) break;
-
-            uint8_t raw_tad[DT_TAD_HEADER_RAW];
-            int tad_ldpc_ok = (ldpc_decode(ldpc_tad, DT_TAD_HEADER_LDPC, raw_tad) == 0);
-
-            // Parse TAD header: sample_count(2) + quant_bits(1) + compressed_size(4) + rs_block_count(3) + crc(4)
-            uint16_t tad_sample_count = raw_tad[0] | (raw_tad[1] << 8);
-            uint8_t tad_quant_bits = raw_tad[2];
-            uint32_t tad_compressed_size = raw_tad[3] | (raw_tad[4] << 8) |
-                                           (raw_tad[5] << 16) | (raw_tad[6] << 24);
-            uint32_t tad_rs_blocks = raw_tad[7] | (raw_tad[8] << 8) | (raw_tad[9] << 16);
-
-            // Calculate TAD RS payload size
-            uint32_t tad_rs_size = tad_rs_blocks * 255;
-
-            // Skip TAD RS payload
-            fseek(fp, tad_rs_size, SEEK_CUR);
-
-            // Read TAV sync (4 bytes)
-            uint8_t tav_sync_bytes[4];
-            if (fread(tav_sync_bytes, 1, 4, fp) != 4) break;
-
-            uint32_t tav_sync_check = (tav_sync_bytes[0] << 24) | (tav_sync_bytes[1] << 16) |
-                                      (tav_sync_bytes[2] << 8) | tav_sync_bytes[3];
-
-            int tav_sync_ok = (tav_sync_check == TAV_DT_SYNC_TAV);
-
-            // Read and decode TAV LDPC header (28 bytes -> 14 bytes raw)
-            uint8_t ldpc_tav[DT_TAV_HEADER_LDPC];
-            if (fread(ldpc_tav, 1, DT_TAV_HEADER_LDPC, fp) != DT_TAV_HEADER_LDPC) break;
-
-            uint8_t raw_tav[DT_TAV_HEADER_RAW];
-            int tav_ldpc_ok = (ldpc_decode(ldpc_tav, DT_TAV_HEADER_LDPC, raw_tav) == 0);
-
-            // Parse TAV header: gop_size(1) + reserved(2) + compressed_size(4) + rs_block_count(3) + crc(4)
-            uint8_t tav_gop_size = raw_tav[0];
-            uint32_t tav_compressed_size = raw_tav[3] | (raw_tav[4] << 8) |
-                                           (raw_tav[5] << 16) | (raw_tav[6] << 24);
-            uint32_t tav_rs_blocks = raw_tav[7] | (raw_tav[8] << 8) | (raw_tav[9] << 16);
-
-            // Calculate TAV RS payload size
-            uint32_t tav_rs_size = tav_rs_blocks * 255;
-
-            // Skip TAV RS payload
-            fseek(fp, tav_rs_size, SEEK_CUR);
-
-            // For display, create a synthetic payload with the TAV inner packet
-            // The inspector will show this as a GOP packet
-            payload_size = tav_compressed_size + 16;  // Approximate
-            packet_payload = NULL;  // Don't read actual payload for now
-
-            // Set packet type to GOP unified
-            packet_type = TAV_PACKET_GOP_UNIFIED;
-
-            // Display DT packet info
-            if (!opts.summary_only && should_display_packet(packet_type, &opts)) {
-                printf("Packet %d (offset 0x%lX): TAV-DT Packet\n", packet_num, packet_offset);
-                printf("  Main header:  LDPC %s, timecode=%.3fs, size=%u\n",
-                       main_ldpc_ok ? "OK" : "ERR", timecode_ns / 1000000000.0, packet_size_total);
-                printf("  TAD subpkt:   LDPC %s, samples=%u, Q=%u, RS blocks=%u (%u bytes)\n",
-                       tad_ldpc_ok ? "OK" : "ERR", tad_sample_count, tad_quant_bits, tad_rs_blocks, tad_rs_size);
-                printf("  TAV subpkt:   sync %s, LDPC %s, GOP=%u, RS blocks=%u (%u bytes)\n",
-                       tav_sync_ok ? "OK" : "ERR", tav_ldpc_ok ? "OK" : "ERR",
-                       tav_gop_size, tav_rs_blocks, tav_rs_size);
-            }
-
-            // Update stats
-            stats.gop_unified_count++;
-            stats.total_gop_frames += tav_gop_size;
-            stats.audio_tad_count++;
-            stats.audio_tad_bytes += tad_compressed_size;
-            stats.total_audio_bytes += tad_compressed_size;
-            stats.total_video_bytes += tav_compressed_size;
-
-            packet_num++;
-            continue;  // Skip the normal packet processing
-        } else {
-            // Regular TAV: Read packet type directly
-            if (fread(&packet_type, 1, 1, fp) != 1) break;
-        }
-
-        int display = should_display_packet(packet_type, &opts);
-
-        if (!opts.summary_only && display) {
-            printf("Packet %d (offset 0x%lX): Type 0x%02X (%s)",
-                   packet_num, packet_offset, packet_type, get_packet_type_name(packet_type));
-            if (is_dt_format) {
-                printf(" [DT payload: %u bytes]", payload_size);
-            }
-        }
-
-        switch (packet_type) {
-            case TAV_PACKET_EXTENDED_HDR: {
-                stats.extended_header_count++;
-                if (!opts.summary_only && display) {
-                    print_extended_header(fp, opts.verbose);
-                } else {
-                    // Skip extended header
-                    uint16_t num_pairs;
-                    fread(&num_pairs, sizeof(uint16_t), 1, fp);
-                    for (int i = 0; i < num_pairs; i++) {
-                        fseek(fp, 5, SEEK_CUR);  // key + type
-                        uint8_t type;
-                        fseek(fp, -1, SEEK_CUR);
-                        fread(&type, 1, 1, fp);
-                        if (type == 0x04) fseek(fp, 8, SEEK_CUR);
-                        else if (type == 0x10) {
-                            uint16_t len;
-                            fread(&len, 2, 1, fp);
-                            fseek(fp, len, SEEK_CUR);
-                        }
-                    }
-                }
-                break;
-            }
-
-            case TAV_PACKET_TIMECODE: {
-                stats.timecode_count++;
-                uint64_t timecode_ns;
-                if (read_packet_data(&timecode_ns, sizeof(uint64_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                if (!opts.summary_only && display) {
-                    double timecode_sec = timecode_ns / 1000000000.0;
-                    printf(" - %.6f seconds (Frame %d)", timecode_sec, current_frame);
-                }
-                break;
-            }
-
-            case TAV_PACKET_GOP_UNIFIED: case TAV_PACKET_GOP_UNIFIED_MOTION: {
-                // Unified GOP packet: [gop_size][motion_vectors...][compressed_size][data]
-                uint8_t gop_size;
-                if (read_packet_data(&gop_size, 1, 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                // Read motion vectors
-                uint32_t size0 = 0;
-                if (packet_type == TAV_PACKET_GOP_UNIFIED_MOTION) {
-                    if (read_packet_data(&size0, sizeof(uint32_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) { break; }
-                    stats.total_video_bytes += size0;
-                    stats.gop_unified_motion_count++;
-                    if (!packet_payload) fseek(fp, size0, SEEK_CUR);
-                    else payload_offset += size0;
-                }
-
-                // Read compressed data size
-                uint32_t size1;
-                if (read_packet_data(&size1, sizeof(uint32_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) { break; }
-                stats.total_video_bytes += size1;
-                if (!packet_payload) fseek(fp, size1, SEEK_CUR);
-                // else: data is already in payload buffer, skip ahead
-                else payload_offset += size1;
-
-
-                stats.total_gop_frames += gop_size;
-                if (packet_type == TAV_PACKET_GOP_UNIFIED) {
-                    stats.gop_unified_count++;
-                }
-
-                if (!opts.summary_only && display) {
-                    printf(" - GOP size=%u, data size=%u bytes (%.2f bytes/frame)",
-                           gop_size, (size0 + size1), (double)(size0 + size1) / gop_size);
-                }
-
-                break;
-            }
-
-            case TAV_PACKET_GOP_SYNC: {
-                // GOP sync packet: [frame_count]
-                uint8_t frame_count;
-                if (read_packet_data(&frame_count, 1, 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                stats.gop_sync_count++;
-                current_frame += frame_count;  // Advance frame counter
-
-                if (!opts.summary_only && display) {
-                    printf(" - %u frames decoded from GOP block", frame_count);
-                }
-                break;
-            }
-
-            case TAV_PACKET_IFRAME:
-            case TAV_PACKET_PFRAME:
-            case TAV_PACKET_VIDEO_CH2_I:
-            case TAV_PACKET_VIDEO_CH2_P:
-            case TAV_PACKET_VIDEO_CH3_I:
-            case TAV_PACKET_VIDEO_CH3_P:
-            case TAV_PACKET_VIDEO_CH4_I:
-            case TAV_PACKET_VIDEO_CH4_P:
-            case TAV_PACKET_VIDEO_CH5_I:
-            case TAV_PACKET_VIDEO_CH5_P:
-            case TAV_PACKET_VIDEO_CH6_I:
-            case TAV_PACKET_VIDEO_CH6_P:
-            case TAV_PACKET_VIDEO_CH7_I:
-            case TAV_PACKET_VIDEO_CH7_P:
-            case TAV_PACKET_VIDEO_CH8_I:
-            case TAV_PACKET_VIDEO_CH8_P:
-            case TAV_PACKET_VIDEO_CH9_I:
-            case TAV_PACKET_VIDEO_CH9_P: {
-                uint32_t size;
-                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
-                stats.total_video_bytes += size;
-
-                // Get frame info (mode and quantiser) for both I-frames and P-frames
-                frame_info_t frame_info = get_frame_info(fp, size);
-
-                if (packet_type == TAV_PACKET_PFRAME ||
-                    (packet_type >= 0x71 && packet_type <= 0x7F && (packet_type & 1))) {
-                    // This is a P-frame (main or multiplexed)
-                    if (packet_type == TAV_PACKET_PFRAME) {
-                        stats.pframe_count++;
-                        if (frame_info.mode == FRAME_MODE_INTRA) stats.pframe_intra_count++;
-                        else if (frame_info.mode == FRAME_MODE_DELTA) stats.pframe_delta_count++;
-                        else if (frame_info.mode == FRAME_MODE_SKIP) stats.pframe_skip_count++;
-                        current_frame++;  // Increment for P-frame
-                    } else {
-                        stats.mux_video_count++;
-                    }
-                } else {
-                    // I-frame
-                    if (packet_type == TAV_PACKET_IFRAME) {
-                        stats.iframe_count++;
-                        current_frame++;  // Increment for I-frame
-                    } else {
-                        stats.mux_video_count++;
-                    }
-                }
-
-                if (!opts.summary_only && display) {
-                    printf(" - size=%u bytes", size);
-
-                    // Show frame mode (for both I-frames and P-frames)
-                    if (frame_info.mode >= 0) {
-                        if (frame_info.mode == FRAME_MODE_SKIP) printf(" [SKIP]");
-                        else if (frame_info.mode == FRAME_MODE_DELTA) printf(" [DELTA]");
-                        else if (frame_info.mode == FRAME_MODE_INTRA) printf(" [INTRA]");
-
-                        // Show quantiser override if not default
-                        if (frame_info.mode != FRAME_MODE_SKIP) {
-                            if (frame_info.quantiser != 0xFF) {
-                                printf(" [Q=%u]", frame_info.quantiser);
-                            }
-                        }
-                    }
-
-                    if (packet_type >= 0x70 && packet_type <= 0x7F) {
-                        int channel = ((packet_type - 0x70) / 2) + 2;
-                        printf(" (Channel %d)", channel);
-                    }
-                }
-                break;
-            }
-
-            case TAV_PACKET_AUDIO_MP2: {
-                stats.audio_count++;
-                stats.audio_mp2_count++;
-                uint32_t size;
-                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
-                stats.total_audio_bytes += size;
-                stats.audio_mp2_bytes += size;
-
-                if (!opts.summary_only && display) {
-                    printf(" - size=%u bytes", size);
-                }
-                fseek(fp, size, SEEK_CUR);
-                break;
-            }
-
-            case TAV_PACKET_AUDIO_PCM8: {
-                stats.audio_count++;
-                stats.audio_pcm8_count++;
-                uint32_t size;
-                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
-                stats.total_audio_bytes += size;
-                stats.audio_pcm8_bytes += size;
-
-                if (!opts.summary_only && display) {
-                    printf(" - size=%u bytes (zstd compressed)", size);
-                }
-                fseek(fp, size, SEEK_CUR);
-                break;
-            }
-
-            case TAV_PACKET_AUDIO_TAD: {
-                stats.audio_count++;
-                stats.audio_tad_count++;
-
-                // Read sample count
-                uint16_t sample_count0;
-                if (read_packet_data(&sample_count0, sizeof(uint16_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                // Read payload_size + 7
-                uint32_t payload_size_plus_7;
-                if (read_packet_data(&payload_size_plus_7, sizeof(uint32_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                // Read sample count
-                uint16_t sample_count;
-                if (read_packet_data(&sample_count, sizeof(uint16_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                // Read quantiser index
-                uint8_t quantiser;
-                if (read_packet_data(&quantiser, sizeof(uint8_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                // Read compressed size
-                uint32_t compressed_size;
-                if (read_packet_data(&compressed_size, sizeof(uint32_t), 1, fp, packet_payload, payload_size, &payload_offset) != 1) break;
-
-                stats.total_audio_bytes += compressed_size;
-                stats.audio_tad_bytes += compressed_size;
-
-                if (!opts.summary_only && display) {
-                    printf(" - samples=%u, size=%u bytes, quantiser=%u steps (index %u)",
-                           sample_count, compressed_size, quantiser * 2 + 1, quantiser);
-                }
-
-                // Skip compressed data
-                if (!packet_payload) fseek(fp, compressed_size, SEEK_CUR);
-                else payload_offset += compressed_size;
-                break;
-            }
-
-            case TAV_PACKET_AUDIO_TRACK: {
-                stats.audio_count++;
-                stats.audio_track_count++;
-                uint32_t size;
-                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
-                stats.total_audio_bytes += size;
-                stats.audio_track_bytes += size;
-
-                if (!opts.summary_only && display) {
-                    printf(" - size=%u bytes (separate track)", size);
-                }
-                fseek(fp, size, SEEK_CUR);
-                break;
-            }
-
-            case TAV_PACKET_SUBTITLE:
-            case TAV_PACKET_SUBTITLE_TC: {
-                stats.subtitle_count++;
-                uint32_t size;
-                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
-
-                if (!opts.summary_only && display) {
-                    printf(" - size=%u bytes", size);
-                    print_subtitle_packet(fp, size, packet_type == TAV_PACKET_SUBTITLE_TC, opts.verbose);
-                } else {
-                    fseek(fp, size, SEEK_CUR);
-                }
-                break;
-            }
-
-            case TAV_PACKET_VIDEOTEX: {
-                stats.videotex_count++;
-                uint32_t size;
-                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
-                stats.videotex_bytes += size;
-
-                if (!opts.summary_only && display) {
-                    // Read compressed data
-                    uint8_t *compressed_data = malloc(size);
-                    if (compressed_data && fread(compressed_data, 1, size, fp) == size) {
-                        // Allocate decompression buffer (max 2 + 80*32*3 = 7682 bytes)
-                        size_t const decompress_size = 8192;
-                        uint8_t *decompressed_data = malloc(decompress_size);
-                        if (decompressed_data) {
-                            size_t actual_size = ZSTD_decompress(decompressed_data, decompress_size,
-                                                                compressed_data, size);
-                            if (!ZSTD_isError(actual_size) && actual_size >= 2) {
-                                uint8_t rows = decompressed_data[0];
-                                uint8_t cols = decompressed_data[1];
-                                printf(" - size=%u bytes (decompressed: %zu bytes, grid: %ux%u, ratio: %.2f:1)",
-                                       size, actual_size, cols, rows, (double)actual_size / size);
-                            } else {
-                                printf(" - size=%u bytes (decompression failed)", size);
-                            }
-                            free(decompressed_data);
-                        } else {
-                            printf(" - size=%u bytes", size);
-                        }
-                        free(compressed_data);
-                    } else {
-                        printf(" - size=%u bytes", size);
-                        fseek(fp, size, SEEK_CUR);
-                    }
-                } else {
-                    fseek(fp, size, SEEK_CUR);
-                }
-                break;
-            }
-
-            case TAV_PACKET_EXIF:
-            case TAV_PACKET_ID3V1:
-            case TAV_PACKET_ID3V2:
-            case TAV_PACKET_VORBIS_COMMENT:
-            case TAV_PACKET_CD_TEXT: {
-                stats.metadata_count++;
-                uint32_t size;
-                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
-
-                if (!opts.summary_only && display) {
-                    printf(" - size=%u bytes", size);
-                }
-                fseek(fp, size, SEEK_CUR);
-                break;
-            }
-
-            case TAV_PACKET_LOOP_START:
-            case TAV_PACKET_LOOP_END:
-                stats.loop_point_count++;
-                if (!opts.summary_only && display) {
-                    printf(" (no payload)");
-                }
-                break;
-
-            case TAV_PACKET_SCREEN_MASK:
-                uint32_t frame_number;
-                if (fread(&frame_number, sizeof(uint32_t), 1, fp) != 1) break;
-                uint16_t top;
-                if (fread(&top, sizeof(uint16_t), 1, fp) != 1) break;
-                uint16_t right;
-                if (fread(&right, sizeof(uint16_t), 1, fp) != 1) break;
-                uint16_t bottom;
-                if (fread(&bottom, sizeof(uint16_t), 1, fp) != 1) break;
-                uint16_t left;
-                if (fread(&left, sizeof(uint16_t), 1, fp) != 1) break;
-
-                if (!opts.summary_only && display) {
-                    printf(" - Frame=%u [top=%u, right=%u, bottom=%u, left=%u]", frame_number, top, right, bottom, left);
-                }
-                break;
-
-            case TAV_PACKET_SYNC:
-                stats.sync_count++;
-                break;
-
-            case TAV_PACKET_SYNC_NTSC:
-                stats.sync_ntsc_count++;
-                break;
-
-            case TAV_PACKET_NOOP:
-                // Silent no-op
-                break;
-
-            default:
-                stats.unknown_count++;
-                if (!opts.summary_only && display) {
-                    printf(" (UNKNOWN)");
-                }
-                break;
-        }
-
-        if (!opts.summary_only && display) {
-            printf("\n");
-        }
-
-        // Free DT packet payload if allocated
-        if (packet_payload) {
-            free(packet_payload);
-        }
-
-        packet_num++;
-    }
-
-    fclose(fp);
-
-    // Print summary
-    printf("\n==================================================\n");
-    printf("Summary Statistics:\n");
-    printf("==================================================\n");
-    printf("Total packets:        %d\n", packet_num);
-    printf("\nVideo:\n");
-    printf("  I-frames:           %d\n", stats.iframe_count);
-    printf("  P-frames:           %d", stats.pframe_count);
-    if (stats.pframe_count > 0) {
-        printf(" (INTRA: %d, DELTA: %d, SKIP: %d",
-               stats.pframe_intra_count, stats.pframe_delta_count, stats.pframe_skip_count);
-        int known_modes = stats.pframe_intra_count + stats.pframe_delta_count + stats.pframe_skip_count;
-        if (known_modes < stats.pframe_count) {
-            printf(", Unknown: %d", stats.pframe_count - known_modes);
-        }
-        printf(")");
-    }
-    printf("\n");
-    if (stats.gop_unified_count + stats.gop_unified_motion_count > 0) {
-        printf("  3D GOP packets:     %d (total frames: %d, avg %.1f frames/GOP)\n",
-               (stats.gop_unified_count + stats.gop_unified_motion_count), stats.total_gop_frames,
-               (double)stats.total_gop_frames / (stats.gop_unified_count + stats.gop_unified_motion_count));
-        printf("  GOP sync packets:   %d\n", stats.gop_sync_count);
-    }
-    printf("  Mux video:          %d\n", stats.mux_video_count);
-    printf("  Total video bytes:  %llu (%.2f MB)\n",
-           (unsigned long long)stats.total_video_bytes,
-           stats.total_video_bytes / 1024.0 / 1024.0);
-    printf("\nAudio:\n");
-    printf("  Total packets:      %d\n", stats.audio_count);
-    if (stats.audio_mp2_count > 0) {
-        printf("    MP2:              %d packets, %llu bytes (%.2f MB)\n",
-               stats.audio_mp2_count,
-               (unsigned long long)stats.audio_mp2_bytes,
-               stats.audio_mp2_bytes / 1024.0 / 1024.0);
-    }
-    if (stats.audio_pcm8_count > 0) {
-        printf("    PCM8 (zstd):      %d packets, %llu bytes (%.2f MB)\n",
-               stats.audio_pcm8_count,
-               (unsigned long long)stats.audio_pcm8_bytes,
-               stats.audio_pcm8_bytes / 1024.0 / 1024.0);
-    }
-    if (stats.audio_tad_count > 0) {
-        printf("    TAD32 (zstd):     %d packets, %llu bytes (%.2f MB)\n",
-               stats.audio_tad_count,
-               (unsigned long long)stats.audio_tad_bytes,
-               stats.audio_tad_bytes / 1024.0 / 1024.0);
-    }
-    if (stats.audio_track_count > 0) {
-        printf("    Separate track:   %d packets, %llu bytes (%.2f MB)\n",
-               stats.audio_track_count,
-               (unsigned long long)stats.audio_track_bytes,
-               stats.audio_track_bytes / 1024.0 / 1024.0);
-    }
-    printf("  Total audio bytes:  %llu (%.2f MB)\n",
-           (unsigned long long)stats.total_audio_bytes,
-           stats.total_audio_bytes / 1024.0 / 1024.0);
-    printf("\nOther:\n");
-    printf("  Timecodes:          %d\n", stats.timecode_count);
-    printf("  Subtitles:          %d\n", stats.subtitle_count);
-    if (stats.videotex_count > 0) {
-        printf("  Videotex frames:    %d (%llu bytes, %.2f MB)\n",
-               stats.videotex_count,
-               (unsigned long long)stats.videotex_bytes,
-               stats.videotex_bytes / 1024.0 / 1024.0);
-    }
-    printf("  Extended headers:   %d\n", stats.extended_header_count);
-    printf("  Metadata packets:   %d\n", stats.metadata_count);
-    printf("  Loop points:        %d\n", stats.loop_point_count);
-    printf("  Sync packets:       %d\n", stats.sync_count);
-    printf("  NTSC sync packets:  %d\n", stats.sync_ntsc_count);
-    printf("  Unknown packets:    %d\n", stats.unknown_count);
-
-    return 0;
-}
diff --git a/video_encoder/tav_visualise_coefficients.c b/video_encoder/tav_visualise_coefficients.c
deleted file mode 100644
index 70dfb5a..0000000
--- a/video_encoder/tav_visualise_coefficients.c
+++ /dev/null
@@ -1,294 +0,0 @@
-// Visualise DWT Coefficients as Image
-// Converts .bin coefficient file to PPM image with logarithmic color mapping
-// Usage: ./visualise_coefficients <input.bin> <output.ppm> <width> <height>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <math.h>
-
-// Logarithmic color mapping for coefficient visualisation
-// Zero: Black (#000000)
-// Positive: Red to Yellow (#FF0000 to #FFFF00) - logarithmic
-// Negative: Blue to Cyan (#0000FF to #00FFFF) - logarithmic
-typedef struct {
-    uint8_t r, g, b;
-} rgb_t;
-
-static rgb_t map_coefficient_to_color(int16_t coeff) {
-    rgb_t color = {0, 0, 0};
-
-    if (coeff == 0) {
-        // Zero: pure black
-        return color;
-    }
-
-    if (coeff == 1) {
-        // +1: Light green #55FF55
-        color.r = 0x55;
-        color.g = 0xFF;
-        color.b = 0x55;
-        return color;
-    }
-
-    if (coeff == -1) {
-        // -1: Dark green #005500
-        color.r = 0x00;
-        color.g = 0x55;
-        color.b = 0x00;
-        return color;
-    }
-
-    if (coeff > 0) {
-        // Positive: Red (#FF0000) to Yellow (#FFFF00)
-        // Logarithmic mapping: log2(1) = 0, log2(32767) ≈ 14.99
-        double log_val = log2((double)coeff);
-        double log_max = log2(32767.0);
-        double normalised = log_val / log_max;  // 0.0 to 1.0
-
-        color.r = 255;
-        color.g = (uint8_t)(normalised * 255.0);
-        color.b = 0;
-    } else {
-        // Negative: Blue (#0000FF) to Cyan (#00FFFF)
-        // Logarithmic mapping: log2(1) = 0, log2(32768) = 15
-        double log_val = log2((double)(-coeff));
-        double log_max = log2(32768.0);
-        double normalised = log_val / log_max;  // 0.0 to 1.0
-
-        color.r = 0;
-        color.g = (uint8_t)(normalised * 255.0);
-        color.b = 255;
-    }
-
-    return color;
-}
-
-int main(int argc, char *argv[]) {
-    if (argc != 5) {
-        printf("Usage: %s <input.bin> <output.ppm> <width> <height>\n", argv[0]);
-        printf("Example: %s frame_060.tavframe.y.bin output.ppm 560 448\n", argv[0]);
-        return 1;
-    }
-
-    const char *input_file = argv[1];
-    const char *output_file = argv[2];
-    int width = atoi(argv[3]);
-    int height = atoi(argv[4]);
-
-    if (width <= 0 || height <= 0) {
-        printf("Error: Invalid dimensions %dx%d\n", width, height);
-        return 1;
-    }
-
-    size_t expected_count = width * height;
-
-    // Load coefficient file
-    FILE *fp_in = fopen(input_file, "rb");
-    if (!fp_in) {
-        printf("Error: Cannot open %s\n", input_file);
-        return 1;
-    }
-
-    // Get file size
-    fseek(fp_in, 0, SEEK_END);
-    long file_size = ftell(fp_in);
-    fseek(fp_in, 0, SEEK_SET);
-
-    size_t coeff_count = file_size / sizeof(int16_t);
-
-    if (coeff_count != expected_count) {
-        printf("Warning: File contains %zu coefficients, expected %zu (%dx%d)\n",
-               coeff_count, expected_count, width, height);
-    }
-
-    // Allocate coefficient buffer
-    int16_t *coeffs = malloc(expected_count * sizeof(int16_t));
-    if (!coeffs) {
-        printf("Error: Memory allocation failed\n");
-        fclose(fp_in);
-        return 1;
-    }
-
-    // Read coefficients
-    size_t read_count = fread(coeffs, sizeof(int16_t), expected_count, fp_in);
-    fclose(fp_in);
-
-    if (read_count != expected_count) {
-        printf("Error: Read %zu coefficients, expected %zu\n", read_count, expected_count);
-        free(coeffs);
-        return 1;
-    }
-
-    // Analyse coefficient distribution - Overall and per-subband
-    size_t zeros = 0, ones = 0, positives = 0, negatives = 0;
-    int16_t min_val = INT16_MAX, max_val = INT16_MIN;
-
-    // Calculate overall statistics
-    for (size_t i = 0; i < expected_count; i++) {
-        if (coeffs[i] == 0) zeros++;
-        else if (coeffs[i] == 1 || coeffs[i] == -1) ones++;
-        else if (coeffs[i] > 0) positives++;
-        else negatives++;
-
-        if (coeffs[i] < min_val) min_val = coeffs[i];
-        if (coeffs[i] > max_val) max_val = coeffs[i];
-    }
-
-    printf("Overall coefficient statistics:\n");
-    printf("  Total: %zu\n", expected_count);
-    printf("  Zeros: %zu (%.1f%%)\n", zeros, 100.0 * zeros / expected_count);
-    printf("  Ones: %zu (%.1f%%)\n", ones, 100.0 * ones / expected_count);
-    printf("  Positives: %zu (%.1f%%)\n", positives, 100.0 * positives / expected_count);
-    printf("  Negatives: %zu (%.1f%%)\n", negatives, 100.0 * negatives / expected_count);
-    printf("  Range: [%d, %d]\n\n", min_val, max_val);
-
-    // Per-subband statistics using 2D spatial layout
-    // The coefficients are stored in 2D spatial arrangement like the PPM image
-    int num_levels = 6;
-
-    // Helper macro to get coefficient from 2D position
-    #define GET_COEFF(x, y) coeffs[(y) * width + (x)]
-
-    // Calculate subband dimensions for each level
-    int level_w[7], level_h[7];  // level_w[1] = width/2, level_w[6] = width/64
-    for (int i = 1; i <= num_levels; i++) {
-        level_w[i] = width / (1 << i);
-        level_h[i] = height / (1 << i);
-    }
-
-    // LL6 subband (top-left corner)
-    {
-        int ll_w = level_w[6], ll_h = level_h[6];
-        size_t ll_zeros = 0, ll_ones = 0, ll_pos = 0, ll_neg = 0;
-        int16_t ll_min = INT16_MAX, ll_max = INT16_MIN;
-
-        for (int y = 0; y < ll_h; y++) {
-            for (int x = 0; x < ll_w; x++) {
-                int16_t val = GET_COEFF(x, y);
-                if (val == 0) ll_zeros++;
-                else if (val == 1 || val == -1) ll_ones++;
-                else if (val > 0) ll_pos++;
-                else ll_neg++;
-                if (val < ll_min) ll_min = val;
-                if (val > ll_max) ll_max = val;
-            }
-        }
-
-        size_t ll_total = ll_w * ll_h;
-        printf("LL%d subband (%dx%d):\n", num_levels, ll_w, ll_h);
-        printf("  Total: %zu\n", ll_total);
-        printf("  Zeros: %zu (%.1f%%)\n", ll_zeros, 100.0 * ll_zeros / ll_total);
-        printf("  Ones: %zu (%.1f%%)\n", ll_ones, 100.0 * ll_ones / ll_total);
-        printf("  Positives: %zu (%.1f%%)\n", ll_pos, 100.0 * ll_pos / ll_total);
-        printf("  Negatives: %zu (%.1f%%)\n", ll_neg, 100.0 * ll_neg / ll_total);
-        printf("  Range: [%d, %d]\n\n", ll_min, ll_max);
-    }
-
-    // Process each level from deepest (6) to finest (1)
-    for (int level = num_levels; level >= 1; level--) {
-        int half_w = level_w[level];
-        int half_h = level_h[level];
-
-        // LH subband (horizontal high-pass) - right of LL region
-        size_t lh_zeros = 0, lh_ones = 0, lh_pos = 0, lh_neg = 0;
-        int16_t lh_min = INT16_MAX, lh_max = INT16_MIN;
-        int lh_x0 = half_w, lh_y0 = 0;
-        int lh_x1 = half_w * 2, lh_y1 = half_h;
-
-        for (int y = lh_y0; y < lh_y1; y++) {
-            for (int x = lh_x0; x < lh_x1; x++) {
-                int16_t val = GET_COEFF(x, y);
-                if (val == 0) lh_zeros++;
-                else if (val == 1 || val == -1) lh_ones++;
-                else if (val > 0) lh_pos++;
-                else lh_neg++;
-                if (val < lh_min) lh_min = val;
-                if (val > lh_max) lh_max = val;
-            }
-        }
-
-        // HL subband (vertical high-pass) - below LL region
-        size_t hl_zeros = 0, hl_ones = 0, hl_pos = 0, hl_neg = 0;
-        int16_t hl_min = INT16_MAX, hl_max = INT16_MIN;
-        int hl_x0 = 0, hl_y0 = half_h;
-        int hl_x1 = half_w, hl_y1 = half_h * 2;
-
-        for (int y = hl_y0; y < hl_y1; y++) {
-            for (int x = hl_x0; x < hl_x1; x++) {
-                int16_t val = GET_COEFF(x, y);
-                if (val == 0) hl_zeros++;
-                else if (val == 1 || val == -1) hl_ones++;
-                else if (val > 0) hl_pos++;
-                else hl_neg++;
-                if (val < hl_min) hl_min = val;
-                if (val > hl_max) hl_max = val;
-            }
-        }
-
-        // HH subband (diagonal high-pass) - bottom-right of LL region
-        size_t hh_zeros = 0, hh_ones = 0, hh_pos = 0, hh_neg = 0;
-        int16_t hh_min = INT16_MAX, hh_max = INT16_MIN;
-        int hh_x0 = half_w, hh_y0 = half_h;
-        int hh_x1 = half_w * 2, hh_y1 = half_h * 2;
-
-        for (int y = hh_y0; y < hh_y1; y++) {
-            for (int x = hh_x0; x < hh_x1; x++) {
-                int16_t val = GET_COEFF(x, y);
-                if (val == 0) hh_zeros++;
-                else if (val == 1 || val == -1) hh_ones++;
-                else if (val > 0) hh_pos++;
-                else hh_neg++;
-                if (val < hh_min) hh_min = val;
-                if (val > hh_max) hh_max = val;
-            }
-        }
-
-        size_t sub_total = half_w * half_h;
-        printf("Level %d subbands (%dx%d each):\n", level, half_w, half_h);
-        printf("  LH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
-               level, sub_total, lh_zeros, 100.0*lh_zeros/sub_total, lh_ones, 100.0*lh_ones/sub_total,
-               lh_pos, 100.0*lh_pos/sub_total, lh_neg, 100.0*lh_neg/sub_total, lh_min, lh_max);
-        printf("  HL%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
-               level, sub_total, hl_zeros, 100.0*hl_zeros/sub_total, hl_ones, 100.0*hl_ones/sub_total,
-               hl_pos, 100.0*hl_pos/sub_total, hl_neg, 100.0*hl_neg/sub_total, hl_min, hl_max);
-        printf("  HH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n\n",
-               level, sub_total, hh_zeros, 100.0*hh_zeros/sub_total, hh_ones, 100.0*hh_ones/sub_total,
-               hh_pos, 100.0*hh_pos/sub_total, hh_neg, 100.0*hh_neg/sub_total, hh_min, hh_max);
-    }
-
-    #undef GET_COEFF
-
-    // Write PPM image
-    FILE *fp_out = fopen(output_file, "wb");
-    if (!fp_out) {
-        printf("Error: Cannot create %s\n", output_file);
-        free(coeffs);
-        return 1;
-    }
-
-    // PPM header
-    fprintf(fp_out, "P6\n%d %d\n255\n", width, height);
-
-    // Write pixel data
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            size_t idx = y * width + x;
-            rgb_t color = map_coefficient_to_color(coeffs[idx]);
-            fwrite(&color, 3, 1, fp_out);
-        }
-    }
-
-    fclose(fp_out);
-    free(coeffs);
-
-    printf("\nWrote %dx%d image to %s\n", width, height, output_file);
-    printf("Color mapping:\n");
-    printf("  Black:  Zero coefficients\n");
-    printf("  Light Green (#55FF55): +1 coefficients\n");
-    printf("  Dark Green (#00AA00): -1 coefficients\n");
-    printf("  Red→Yellow: Positive coefficients > +1 (logarithmic)\n");
-    printf("  Blue→Cyan: Negative coefficients < -1 (logarithmic)\n");
-
-    return 0;
-}
diff --git a/video_encoder/tavdt_noise_injector.c b/video_encoder/tavdt_noise_injector.c
deleted file mode 100644
index 447b03c..0000000
--- a/video_encoder/tavdt_noise_injector.c
+++ /dev/null
@@ -1,402 +0,0 @@
-// TAV-DT Noise Injector - Simulates satellite transmission channel noise
-// Models QPSK over Ku-band satellite with AWGN and burst interference
-// to compile: gcc -O2 -o tavdt_noise_injector tavdt_noise_injector.c -lm
-// Created by CuriousTorvald and Claude on 2025-12-14
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-#include <getopt.h>
-#include <time.h>
-
-// Buffer size for streaming processing
-#define BUFFER_SIZE (1024 * 1024)  // 1 MB chunks
-
-// Default TAV-DT bitrate for timing calculations (~2 Mbps)
-#define DEFAULT_BITRATE_BPS 2000000.0
-
-// Global bitrate (can be overridden by --bitrate)
-static double g_bitrate_bps = DEFAULT_BITRATE_BPS;
-
-// Burst noise parameters
-#define BURST_LENGTH_MEAN   100.0
-#define BURST_LENGTH_STDDEV  30.0
-#define BURST_LENGTH_MIN     10
-
-//=============================================================================
-// PRNG Functions (xorshift64)
-//=============================================================================
-
-static uint64_t xorshift64(uint64_t *state) {
-    uint64_t x = *state;
-    x ^= x << 13;
-    x ^= x >> 7;
-    x ^= x << 17;
-    return *state = x;
-}
-
-// Returns uniform random in [0, 1)
-static double rand_uniform(uint64_t *state) {
-    return (double)xorshift64(state) / (double)UINT64_MAX;
-}
-
-// Box-Muller transform for Gaussian random numbers
-static double gaussian_rand(uint64_t *state, double mean, double stddev) {
-    double u1 = rand_uniform(state);
-    double u2 = rand_uniform(state);
-
-    // Avoid log(0)
-    if (u1 < 1e-15) u1 = 1e-15;
-
-    double z = sqrt(-2.0 * log(u1)) * cos(2.0 * M_PI * u2);
-    return mean + stddev * z;
-}
-
-//=============================================================================
-// BER Calculation
-//=============================================================================
-
-// Calculate BER from SNR in dB for QPSK modulation
-// BER = 0.5 * erfc(sqrt(Eb/N0))
-// For QPSK, Eb/N0 = SNR (2 bits per symbol)
-static double snr_to_ber(double snr_db) {
-    double snr_linear = pow(10.0, snr_db / 10.0);
-    double eb_n0 = snr_linear;
-    return 0.5 * erfc(sqrt(eb_n0));
-}
-
-//=============================================================================
-// Burst State Management
-//=============================================================================
-
-typedef struct {
-    double current_time_sec;       // Elapsed playback time
-    double next_burst_time;        // When next burst occurs
-    int burst_bytes_remaining;     // Bytes left in current burst (0 = no active burst)
-    double burst_interval;         // Mean interval between bursts (60.0 / bursts_per_minute)
-    double burst_ber;              // BER during burst
-    int burst_count;               // Total bursts applied
-    int total_burst_bytes;         // Total bytes affected by bursts
-    int verbose;                   // Verbose output flag
-} burst_state_t;
-
-static void burst_state_init(burst_state_t *state, double bursts_per_minute,
-                             double burst_ber, int verbose, uint64_t *seed) {
-    state->current_time_sec = 0.0;
-    state->burst_bytes_remaining = 0;
-    state->burst_ber = burst_ber;
-    state->burst_count = 0;
-    state->total_burst_bytes = 0;
-    state->verbose = verbose;
-
-    if (bursts_per_minute > 0) {
-        state->burst_interval = 60.0 / bursts_per_minute;
-        // Schedule first burst using exponential distribution
-        state->next_burst_time = -state->burst_interval * log(rand_uniform(seed));
-    } else {
-        state->burst_interval = 0;
-        state->next_burst_time = 1e30;  // Never burst
-    }
-}
-
-static void burst_state_advance_time(burst_state_t *state, double delta_sec, uint64_t *seed) {
-    double end_time = state->current_time_sec + delta_sec;
-
-    // Check if any bursts should occur during this time span
-    while (state->burst_interval > 0 && state->next_burst_time < end_time) {
-        // A burst should start during this chunk
-        if (state->burst_bytes_remaining == 0) {
-            double length = gaussian_rand(seed, BURST_LENGTH_MEAN, BURST_LENGTH_STDDEV);
-            state->burst_bytes_remaining = (int)fmax(BURST_LENGTH_MIN, length);
-            state->burst_count++;
-
-            if (state->verbose) {
-                fprintf(stderr, "  [burst] time %.2fs, %d bytes\n",
-                        state->next_burst_time, state->burst_bytes_remaining);
-            }
-        }
-
-        // Schedule next burst
-        double wait = -state->burst_interval * log(rand_uniform(seed));
-        if (wait < 0.001) wait = 0.001;  // Minimum 1ms between bursts
-        state->next_burst_time += wait;
-    }
-
-    state->current_time_sec = end_time;
-}
-
-//=============================================================================
-// Noise Application Functions
-//=============================================================================
-
-// Apply AWGN-based bit errors to buffer
-// Returns number of bits flipped
-static int apply_background_noise(uint8_t *data, size_t len, double ber, uint64_t *seed) {
-    int bits_flipped = 0;
-
-    // Optimization: if BER is extremely low, use probability-based skipping
-    if (ber < 1e-10) {
-        return 0;  // Effectively no errors at this BER
-    }
-
-    for (size_t i = 0; i < len; i++) {
-        for (int bit = 0; bit < 8; bit++) {
-            if (rand_uniform(seed) < ber) {
-                data[i] ^= (1 << bit);
-                bits_flipped++;
-            }
-        }
-    }
-
-    return bits_flipped;
-}
-
-// Apply burst noise to buffer (checks/updates burst state)
-// Returns number of bits flipped
-static int apply_burst_noise(uint8_t *data, size_t len, burst_state_t *state, uint64_t *seed) {
-    int bits_flipped = 0;
-
-    if (state->burst_bytes_remaining <= 0) {
-        return 0;
-    }
-
-    // Apply burst BER to bytes while burst is active
-    size_t burst_bytes = (size_t)state->burst_bytes_remaining;
-    if (burst_bytes > len) {
-        burst_bytes = len;
-    }
-
-    for (size_t i = 0; i < burst_bytes; i++) {
-        for (int bit = 0; bit < 8; bit++) {
-            if (rand_uniform(seed) < state->burst_ber) {
-                data[i] ^= (1 << bit);
-                bits_flipped++;
-            }
-        }
-    }
-
-    state->total_burst_bytes += burst_bytes;
-    state->burst_bytes_remaining -= burst_bytes;
-
-    return bits_flipped;
-}
-
-//=============================================================================
-// Byte Position to Time Conversion
-//=============================================================================
-
-// Convert byte position to approximate playback time based on bitrate
-static double bytes_to_time(size_t byte_pos) {
-    return (double)(byte_pos * 8) / g_bitrate_bps;
-}
-
-//=============================================================================
-// Main Program
-//=============================================================================
-
-static void print_usage(const char *prog) {
-    fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
-    fprintf(stderr, "Simulates QPSK satellite transmission channel noise\n\n");
-    fprintf(stderr, "Usage: %s -i input.tavdt -o output.tavdt --snr N [options]\n\n", prog);
-    fprintf(stderr, "Required:\n");
-    fprintf(stderr, "  -i, --input FILE     Input TAV-DT file\n");
-    fprintf(stderr, "  -o, --output FILE    Output corrupted file\n");
-    fprintf(stderr, "  --snr N              Signal-to-noise ratio in dB (0-30)\n");
-    fprintf(stderr, "\nOptional:\n");
-    fprintf(stderr, "  --burst N            Burst events per minute (default: 0)\n");
-    fprintf(stderr, "  --burst-ber N        BER during burst events (default: 0.5)\n");
-    fprintf(stderr, "  --bitrate N          Stream bitrate in Mbps for timing (default: 2.0)\n");
-    fprintf(stderr, "  --seed N             RNG seed for reproducibility\n");
-    fprintf(stderr, "  -v, --verbose        Show detailed progress\n");
-    fprintf(stderr, "  -h, --help           Show this help\n");
-    fprintf(stderr, "\nSNR Reference:\n");
-    fprintf(stderr, "   0 dB: Worst case (BER ~7.9e-2, 1 in 13 bits)\n");
-    fprintf(stderr, "   6 dB: Poor but working (BER ~2.4e-3)\n");
-    fprintf(stderr, "   9 dB: Typical working (BER ~1.9e-4)\n");
-    fprintf(stderr, "  12 dB: Good condition (BER ~3.8e-6)\n");
-    fprintf(stderr, "  30 dB: Near-perfect (BER ~2.9e-16)\n");
-}
-
-int main(int argc, char *argv[]) {
-    const char *input_file = NULL;
-    const char *output_file = NULL;
-    double snr_db = -1;
-    double bursts_per_minute = 0;
-    double burst_ber = 0.5;
-    uint64_t seed = 0;
-    int seed_provided = 0;
-    int verbose = 0;
-
-    static struct option long_options[] = {
-        {"input",     required_argument, 0, 'i'},
-        {"output",    required_argument, 0, 'o'},
-        {"snr",       required_argument, 0, 's'},
-        {"burst",     required_argument, 0, 'b'},
-        {"burst-ber", required_argument, 0, 'B'},
-        {"bitrate",   required_argument, 0, 'r'},
-        {"seed",      required_argument, 0, 'S'},
-        {"verbose",   no_argument,       0, 'v'},
-        {"help",      no_argument,       0, 'h'},
-        {0, 0, 0, 0}
-    };
-
-    int opt;
-    while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, NULL)) != -1) {
-        switch (opt) {
-            case 'i':
-                input_file = optarg;
-                break;
-            case 'o':
-                output_file = optarg;
-                break;
-            case 's':
-                snr_db = atof(optarg);
-                break;
-            case 'b':
-                bursts_per_minute = atof(optarg);
-                break;
-            case 'B':
-                burst_ber = atof(optarg);
-                break;
-            case 'r':
-                g_bitrate_bps = atof(optarg) * 1000000.0;  // Convert Mbps to bps
-                break;
-            case 'S':
-                seed = strtoull(optarg, NULL, 10);
-                seed_provided = 1;
-                break;
-            case 'v':
-                verbose = 1;
-                break;
-            case 'h':
-            default:
-                print_usage(argv[0]);
-                return opt == 'h' ? 0 : 1;
-        }
-    }
-
-    // Validate arguments
-    if (!input_file || !output_file || snr_db < 0) {
-        fprintf(stderr, "Error: Missing required arguments\n\n");
-        print_usage(argv[0]);
-        return 1;
-    }
-
-    if (burst_ber < 0 || burst_ber > 1) {
-        fprintf(stderr, "Error: --burst-ber must be between 0 and 1\n");
-        return 1;
-    }
-
-    // Initialize RNG
-    if (!seed_provided) {
-        seed = (uint64_t)time(NULL) ^ ((uint64_t)clock() << 32);
-    }
-    // Ensure seed is not zero (xorshift64 requirement)
-    if (seed == 0) seed = 0x853c49e6748fea9bULL;
-    // Warm up the generator (small seeds produce poor initial values)
-    for (int i = 0; i < 10; i++) xorshift64(&seed);
-
-    // Calculate BER from SNR
-    double ber = snr_to_ber(snr_db);
-
-    // Open files
-    FILE *in_fp = fopen(input_file, "rb");
-    if (!in_fp) {
-        fprintf(stderr, "Error: Cannot open input file: %s\n", input_file);
-        return 1;
-    }
-
-    FILE *out_fp = fopen(output_file, "wb");
-    if (!out_fp) {
-        fprintf(stderr, "Error: Cannot open output file: %s\n", output_file);
-        fclose(in_fp);
-        return 1;
-    }
-
-    // Print header info
-    fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
-    fprintf(stderr, "Input:  %s\n", input_file);
-    fprintf(stderr, "Output: %s\n", output_file);
-    fprintf(stderr, "SNR:    %.1f dB (BER: %.2e)\n", snr_db, ber);
-    if (bursts_per_minute > 0) {
-        fprintf(stderr, "Burst:  %.1f events/minute (burst BER: %.2f)\n",
-                bursts_per_minute, burst_ber);
-    } else {
-        fprintf(stderr, "Burst:  disabled\n");
-    }
-    if (seed_provided) {
-        fprintf(stderr, "Seed:   %llu\n", (unsigned long long)seed);
-    }
-    fprintf(stderr, "\n");
-
-    // Initialize burst state
-    burst_state_t burst;
-    burst_state_init(&burst, bursts_per_minute, burst_ber, verbose, &seed);
-
-    // Allocate buffer for streaming processing
-    uint8_t *buffer = malloc(BUFFER_SIZE);
-    if (!buffer) {
-        fprintf(stderr, "Error: Cannot allocate buffer\n");
-        fclose(in_fp);
-        fclose(out_fp);
-        return 1;
-    }
-
-    // Processing statistics
-    long long total_bytes = 0;
-    long long bits_flipped_bg = 0;
-    long long bits_flipped_burst = 0;
-    int chunk_count = 0;
-
-    // Process file in chunks
-    size_t bytes_read;
-    while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, in_fp)) > 0) {
-        // Calculate time delta for this chunk (for burst scheduling)
-        double delta_sec = bytes_to_time(bytes_read);
-        burst_state_advance_time(&burst, delta_sec, &seed);
-
-        // Apply noise to chunk
-        bits_flipped_bg += apply_background_noise(buffer, bytes_read, ber, &seed);
-        bits_flipped_burst += apply_burst_noise(buffer, bytes_read, &burst, &seed);
-
-        // Write corrupted chunk
-        fwrite(buffer, 1, bytes_read, out_fp);
-
-        total_bytes += bytes_read;
-        chunk_count++;
-
-        if (verbose && chunk_count % 10 == 0) {
-            double time_pos = bytes_to_time(total_bytes);
-            fprintf(stderr, "\rProcessed %.1f MB (%.1f sec)...",
-                    total_bytes / (1024.0 * 1024.0), time_pos);
-        }
-    }
-
-    if (verbose) {
-        fprintf(stderr, "\r                                        \r");
-    }
-
-    // Clean up
-    free(buffer);
-    fclose(in_fp);
-    fclose(out_fp);
-
-    // Print summary
-    double duration_sec = bytes_to_time(total_bytes);
-    long long total_bits = total_bytes * 8;
-
-    fprintf(stderr, "Complete.\n");
-    fprintf(stderr, "  Total bytes: %lld (%.1f sec @ ~%.1f Mbps)\n",
-            total_bytes, duration_sec, g_bitrate_bps / 1000000.0);
-    fprintf(stderr, "  Background bits flipped: %lld (%.4f%%)\n",
-            bits_flipped_bg, 100.0 * bits_flipped_bg / total_bits);
-    if (bursts_per_minute > 0) {
-        fprintf(stderr, "  Burst events: %d (%d bytes total)\n",
-                burst.burst_count, burst.total_burst_bytes);
-        fprintf(stderr, "  Burst bits flipped: %lld\n", bits_flipped_burst);
-    }
-
-    return 0;
-}
diff --git a/video_encoder/test_mesh_roundtrip.cpp b/video_encoder/test_mesh_roundtrip.cpp
deleted file mode 100644
index b385f0d..0000000
--- a/video_encoder/test_mesh_roundtrip.cpp
+++ /dev/null
@@ -1,328 +0,0 @@
-// Test mesh warp round-trip consistency
-// Warps a frame forward, then backward, and checks if we get the original back
-// This is critical for MC-lifting invertibility
-
-#include <opencv2/opencv.hpp>
-#include <cstdlib>
-#include <cstring>
-#include <cmath>
-#include <cstdio>
-#include <ctime>
-
-// Include the mesh functions from encoder
-extern "C" {
-    void estimate_motion_optical_flow(
-        const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
-        int width, int height,
-        float **out_flow_x, float **out_flow_y
-    );
-
-    void build_mesh_from_flow(
-        const float *flow_x, const float *flow_y,
-        int width, int height,
-        int mesh_w, int mesh_h,
-        int16_t *mesh_dx, int16_t *mesh_dy
-    );
-
-    void smooth_mesh_laplacian(
-        int16_t *mesh_dx, int16_t *mesh_dy,
-        int mesh_width, int mesh_height,
-        float smoothness, int iterations
-    );
-}
-
-// Mesh warp with bilinear interpolation (translation only)
-static void apply_mesh_warp_rgb(
-    const cv::Mat &src,
-    cv::Mat &dst,
-    const int16_t *mesh_dx,
-    const int16_t *mesh_dy,
-    int mesh_w, int mesh_h
-) {
-    int width = src.cols;
-    int height = src.rows;
-    int cell_w = width / mesh_w;
-    int cell_h = height / mesh_h;
-
-    dst = cv::Mat(height, width, CV_8UC3);
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int cell_x = x / cell_w;
-            int cell_y = y / cell_h;
-
-            cell_x = std::min(cell_x, mesh_w - 2);
-            cell_y = std::min(cell_y, mesh_h - 2);
-
-            int idx_00 = cell_y * mesh_w + cell_x;
-            int idx_10 = idx_00 + 1;
-            int idx_01 = (cell_y + 1) * mesh_w + cell_x;
-            int idx_11 = idx_01 + 1;
-
-            float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
-            float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
-            float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
-            float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
-
-            float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
-            float beta = (y - cp_y0) / (cp_y1 - cp_y0);
-            alpha = std::max(0.0f, std::min(1.0f, alpha));
-            beta = std::max(0.0f, std::min(1.0f, beta));
-
-            float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
-                       alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
-                       (1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
-                       alpha * beta * (mesh_dx[idx_11] / 8.0f);
-
-            float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
-                       alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
-                       (1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
-                       alpha * beta * (mesh_dy[idx_11] / 8.0f);
-
-            float src_x = x + dx;
-            float src_y = y + dy;
-
-            int sx0 = (int)floorf(src_x);
-            int sy0 = (int)floorf(src_y);
-            int sx1 = sx0 + 1;
-            int sy1 = sy0 + 1;
-
-            sx0 = std::max(0, std::min(width - 1, sx0));
-            sy0 = std::max(0, std::min(height - 1, sy0));
-            sx1 = std::max(0, std::min(width - 1, sx1));
-            sy1 = std::max(0, std::min(height - 1, sy1));
-
-            float fx = src_x - sx0;
-            float fy = src_y - sy0;
-
-            for (int c = 0; c < 3; c++) {
-                float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
-                float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
-                float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
-                float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
-
-                float val = (1 - fx) * (1 - fy) * val_00 +
-                            fx * (1 - fy) * val_10 +
-                            (1 - fx) * fy * val_01 +
-                            fx * fy * val_11;
-
-                dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
-            }
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
-    int num_tests = (argc > 2) ? atoi(argv[2]) : 5;
-
-    printf("Opening video: %s\n", video_file);
-    cv::VideoCapture cap(video_file);
-
-    if (!cap.isOpened()) {
-        fprintf(stderr, "Error: Cannot open video file\n");
-        return 1;
-    }
-
-    int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
-    int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
-    int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
-
-    printf("Video: %dx%d, %d frames\n", width, height, total_frames);
-
-    // Mesh dimensions (32×32 cells)
-    int mesh_cell_size = 32;
-    int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
-    int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
-    if (mesh_w < 2) mesh_w = 2;
-    if (mesh_h < 2) mesh_h = 2;
-
-    printf("Mesh: %dx%d (approx %dx%d px cells)\n\n",
-           mesh_w, mesh_h, width / mesh_w, height / mesh_h);
-
-    float smoothness = 0.5f;
-    int smooth_iterations = 8;
-
-    srand(time(NULL));
-
-    double total_forward_psnr = 0.0;
-    double total_roundtrip_psnr = 0.0;
-    double total_half_roundtrip_psnr = 0.0;
-
-    for (int test = 0; test < num_tests; test++) {
-        int frame_num = 5 + rand() % (total_frames - 10);
-
-        printf("[Test %d/%d] Frame pair %d → %d\n", test + 1, num_tests, frame_num - 1, frame_num);
-
-        cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
-        cv::Mat frame0, frame1;
-        cap >> frame0;
-        cap >> frame1;
-
-        if (frame0.empty() || frame1.empty()) {
-            fprintf(stderr, "Error reading frames\n");
-            continue;
-        }
-
-        cv::Mat frame0_rgb, frame1_rgb;
-        cv::cvtColor(frame0, frame0_rgb, cv::COLOR_BGR2RGB);
-        cv::cvtColor(frame1, frame1_rgb, cv::COLOR_BGR2RGB);
-
-        // Compute mesh (F0 → F1)
-        float *flow_x = nullptr, *flow_y = nullptr;
-        estimate_motion_optical_flow(frame0_rgb.data, frame1_rgb.data,
-                                     width, height, &flow_x, &flow_y);
-
-        int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
-        smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
-
-        // Create inverted mesh
-        int16_t *inv_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *inv_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        for (int i = 0; i < mesh_w * mesh_h; i++) {
-            inv_mesh_dx[i] = -mesh_dx[i];
-            inv_mesh_dy[i] = -mesh_dy[i];
-        }
-
-        // Create half-mesh for symmetric lifting test
-        int16_t *half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *neg_half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *neg_half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        for (int i = 0; i < mesh_w * mesh_h; i++) {
-            half_mesh_dx[i] = mesh_dx[i] / 2;
-            half_mesh_dy[i] = mesh_dy[i] / 2;
-            neg_half_mesh_dx[i] = -half_mesh_dx[i];
-            neg_half_mesh_dy[i] = -half_mesh_dy[i];
-        }
-
-        // TEST 1: Full forward warp quality (F0 → F1)
-        cv::Mat warped_forward;
-        apply_mesh_warp_rgb(frame0, warped_forward, mesh_dx, mesh_dy, mesh_w, mesh_h);
-
-        double forward_mse = 0.0;
-        for (int y = 0; y < height; y++) {
-            for (int x = 0; x < width; x++) {
-                for (int c = 0; c < 3; c++) {
-                    double diff = (double)warped_forward.at<cv::Vec3b>(y, x)[c] -
-                                 (double)frame1.at<cv::Vec3b>(y, x)[c];
-                    forward_mse += diff * diff;
-                }
-            }
-        }
-        forward_mse /= (width * height * 3);
-        double forward_psnr = (forward_mse > 0) ? 10.0 * log10(255.0 * 255.0 / forward_mse) : 999.0;
-        total_forward_psnr += forward_psnr;
-
-        // TEST 2: Full round-trip (F0 → forward → backward → F0')
-        cv::Mat roundtrip;
-        apply_mesh_warp_rgb(warped_forward, roundtrip, inv_mesh_dx, inv_mesh_dy, mesh_w, mesh_h);
-
-        double roundtrip_mse = 0.0;
-        for (int y = 0; y < height; y++) {
-            for (int x = 0; x < width; x++) {
-                for (int c = 0; c < 3; c++) {
-                    double diff = (double)roundtrip.at<cv::Vec3b>(y, x)[c] -
-                                 (double)frame0.at<cv::Vec3b>(y, x)[c];
-                    roundtrip_mse += diff * diff;
-                }
-            }
-        }
-        roundtrip_mse /= (width * height * 3);
-        double roundtrip_psnr = (roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / roundtrip_mse) : 999.0;
-        total_roundtrip_psnr += roundtrip_psnr;
-
-        // TEST 3: Half-step symmetric round-trip (MC-lifting style)
-        // F0 → +½mesh, then → -½mesh (should return to F0)
-        cv::Mat half_forward, half_roundtrip;
-        apply_mesh_warp_rgb(frame0, half_forward, half_mesh_dx, half_mesh_dy, mesh_w, mesh_h);
-        apply_mesh_warp_rgb(half_forward, half_roundtrip, neg_half_mesh_dx, neg_half_mesh_dy, mesh_w, mesh_h);
-
-        double half_roundtrip_mse = 0.0;
-        for (int y = 0; y < height; y++) {
-            for (int x = 0; x < width; x++) {
-                for (int c = 0; c < 3; c++) {
-                    double diff = (double)half_roundtrip.at<cv::Vec3b>(y, x)[c] -
-                                 (double)frame0.at<cv::Vec3b>(y, x)[c];
-                    half_roundtrip_mse += diff * diff;
-                }
-            }
-        }
-        half_roundtrip_mse /= (width * height * 3);
-        double half_roundtrip_psnr = (half_roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / half_roundtrip_mse) : 999.0;
-        total_half_roundtrip_psnr += half_roundtrip_psnr;
-
-        printf("  Forward warp (F0→F1):       PSNR = %.2f dB\n", forward_psnr);
-        printf("  Full round-trip (F0→F0'):   PSNR = %.2f dB\n", roundtrip_psnr);
-        printf("  Half round-trip (±½mesh):   PSNR = %.2f dB\n", half_roundtrip_psnr);
-
-        // Compute motion stats
-        float avg_motion = 0.0f, max_motion = 0.0f;
-        for (int i = 0; i < mesh_w * mesh_h; i++) {
-            float dx = mesh_dx[i] / 8.0f;
-            float dy = mesh_dy[i] / 8.0f;
-            float motion = sqrtf(dx * dx + dy * dy);
-            avg_motion += motion;
-            if (motion > max_motion) max_motion = motion;
-        }
-        avg_motion /= (mesh_w * mesh_h);
-        printf("  Motion: avg=%.2f px, max=%.2f px\n\n", avg_motion, max_motion);
-
-        // Save visualisation for worst case
-        if (test == 0 || roundtrip_psnr < 30.0) {
-            char filename[256];
-            sprintf(filename, "roundtrip_%04d_original.png", frame_num);
-            cv::imwrite(filename, frame0);
-            sprintf(filename, "roundtrip_%04d_forward.png", frame_num);
-            cv::imwrite(filename, warped_forward);
-            sprintf(filename, "roundtrip_%04d_roundtrip.png", frame_num);
-            cv::imwrite(filename, roundtrip);
-
-            // Difference images
-            cv::Mat diff_roundtrip = cv::Mat::zeros(height, width, CV_8UC3);
-            for (int y = 0; y < height; y++) {
-                for (int x = 0; x < width; x++) {
-                    for (int c = 0; c < 3; c++) {
-                        int diff = abs((int)roundtrip.at<cv::Vec3b>(y, x)[c] -
-                                      (int)frame0.at<cv::Vec3b>(y, x)[c]);
-                        diff_roundtrip.at<cv::Vec3b>(y, x)[c] = std::min(diff * 5, 255);
-                    }
-                }
-            }
-            sprintf(filename, "roundtrip_%04d_diff.png", frame_num);
-            cv::imwrite(filename, diff_roundtrip);
-            printf("  Saved visualisation: roundtrip_%04d_*.png\n\n", frame_num);
-        }
-
-        free(flow_x);
-        free(flow_y);
-        free(mesh_dx);
-        free(mesh_dy);
-        free(inv_mesh_dx);
-        free(inv_mesh_dy);
-        free(half_mesh_dx);
-        free(half_mesh_dy);
-        free(neg_half_mesh_dx);
-        free(neg_half_mesh_dy);
-    }
-
-    printf("===========================================\n");
-    printf("Average Results (%d tests):\n", num_tests);
-    printf("  Forward warp quality:       %.2f dB\n", total_forward_psnr / num_tests);
-    printf("  Full round-trip error:      %.2f dB\n", total_roundtrip_psnr / num_tests);
-    printf("  Half round-trip error:      %.2f dB\n", total_half_roundtrip_psnr / num_tests);
-    printf("===========================================\n\n");
-
-    if (total_roundtrip_psnr / num_tests < 35.0) {
-        printf("WARNING: Round-trip PSNR < 35 dB indicates poor invertibility!\n");
-        printf("This will cause MC-lifting to accumulate errors and hurt compression.\n");
-        printf("Bilinear interpolation artifacts are likely the culprit.\n");
-    } else {
-        printf("Round-trip consistency looks acceptable (>35 dB).\n");
-    }
-
-    cap.release();
-    return 0;
-}
diff --git a/video_encoder/test_mesh_warp.cpp b/video_encoder/test_mesh_warp.cpp
deleted file mode 100644
index 775c02b..0000000
--- a/video_encoder/test_mesh_warp.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-// Visual unit test for mesh warping with hierarchical block matching and affine estimation
-// Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh,
-// and saves both warped and target frames for visual comparison
-// Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms
-
-#include <opencv2/opencv.hpp>
-#include <opencv2/video/tracking.hpp>
-#include <cstdlib>
-#include <cstring>
-#include <cmath>
-#include <cstdio>
-#include <ctime>
-
-// Include the mesh functions from encoder
-extern "C" {
-    void estimate_motion_optical_flow(
-        const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
-        int width, int height,
-        float **out_flow_x, float **out_flow_y
-    );
-
-    void build_mesh_from_flow(
-        const float *flow_x, const float *flow_y,
-        int width, int height,
-        int mesh_w, int mesh_h,
-        int16_t *mesh_dx, int16_t *mesh_dy
-    );
-
-    void smooth_mesh_laplacian(
-        int16_t *mesh_dx, int16_t *mesh_dy,
-        int mesh_width, int mesh_height,
-        float smoothness, int iterations
-    );
-
-    int estimate_cell_affine(
-        const float *flow_x, const float *flow_y,
-        int width, int height,
-        int cell_x, int cell_y,
-        int cell_w, int cell_h,
-        float threshold,
-        int16_t *out_tx, int16_t *out_ty,
-        int16_t *out_a11, int16_t *out_a12,
-        int16_t *out_a21, int16_t *out_a22
-    );
-}
-
-// Mesh warp with bilinear interpolation and optional affine support
-static void apply_mesh_warp_rgb(
-    const cv::Mat &src,          // Input BGR image
-    cv::Mat &dst,                 // Output warped BGR image
-    const int16_t *mesh_dx,       // Mesh motion vectors (1/8 pixel)
-    const int16_t *mesh_dy,
-    const uint8_t *affine_mask,   // 1=affine, 0=translation
-    const int16_t *affine_a11,
-    const int16_t *affine_a12,
-    const int16_t *affine_a21,
-    const int16_t *affine_a22,
-    int mesh_w, int mesh_h
-) {
-    int width = src.cols;
-    int height = src.rows;
-    int cell_w = width / mesh_w;
-    int cell_h = height / mesh_h;
-
-    dst = cv::Mat(height, width, CV_8UC3);
-
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            int cell_x = x / cell_w;
-            int cell_y = y / cell_h;
-
-            // Clamp to valid mesh range
-            cell_x = std::min(cell_x, mesh_w - 2);
-            cell_y = std::min(cell_y, mesh_h - 2);
-
-            // Four corner control points
-            int idx_00 = cell_y * mesh_w + cell_x;
-            int idx_10 = idx_00 + 1;
-            int idx_01 = (cell_y + 1) * mesh_w + cell_x;
-            int idx_11 = idx_01 + 1;
-
-            // Control point positions
-            float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
-            float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
-            float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
-            float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
-
-            // Local coordinates
-            float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
-            float beta = (y - cp_y0) / (cp_y1 - cp_y0);
-            alpha = std::max(0.0f, std::min(1.0f, alpha));
-            beta = std::max(0.0f, std::min(1.0f, beta));
-
-            // Bilinear interpolation of motion vectors
-            float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
-                       alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
-                       (1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
-                       alpha * beta * (mesh_dx[idx_11] / 8.0f);
-
-            float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
-                       alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
-                       (1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
-                       alpha * beta * (mesh_dy[idx_11] / 8.0f);
-
-            // Check if we're using affine in this cell
-            // For simplicity, just use the top-left corner's affine parameters
-            int cell_idx = cell_y * mesh_w + cell_x;
-            if (affine_mask && affine_mask[cell_idx]) {
-                // Apply affine transform
-                // Compute position relative to cell center
-                float rel_x = x - (cell_x * cell_w + cell_w / 2.0f);
-                float rel_y = y - (cell_y * cell_h + cell_h / 2.0f);
-
-                float a11 = affine_a11[cell_idx] / 256.0f;
-                float a12 = affine_a12[cell_idx] / 256.0f;
-                float a21 = affine_a21[cell_idx] / 256.0f;
-                float a22 = affine_a22[cell_idx] / 256.0f;
-
-                // Affine warp: [x'] = [a11 a12][x] + [dx]
-                //               [y']   [a21 a22][y]   [dy]
-                dx = a11 * rel_x + a12 * rel_y + dx;
-                dy = a21 * rel_x + a22 * rel_y + dy;
-            }
-
-            // Source coordinates (inverse warp)
-            float src_x = x + dx;
-            float src_y = y + dy;
-
-            // Bilinear interpolation
-            int sx0 = (int)floorf(src_x);
-            int sy0 = (int)floorf(src_y);
-            int sx1 = sx0 + 1;
-            int sy1 = sy0 + 1;
-
-            sx0 = std::max(0, std::min(width - 1, sx0));
-            sy0 = std::max(0, std::min(height - 1, sy0));
-            sx1 = std::max(0, std::min(width - 1, sx1));
-            sy1 = std::max(0, std::min(height - 1, sy1));
-
-            float fx = src_x - sx0;
-            float fy = src_y - sy0;
-
-            // Interpolate each channel
-            for (int c = 0; c < 3; c++) {
-                float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
-                float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
-                float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
-                float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
-
-                float val = (1 - fx) * (1 - fy) * val_00 +
-                            fx * (1 - fy) * val_10 +
-                            (1 - fx) * fy * val_01 +
-                            fx * fy * val_11;
-
-                dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
-            }
-        }
-    }
-}
-
-// Create visualisation overlay showing affine cells
-static void create_affine_overlay(
-    cv::Mat &img,
-    const uint8_t *affine_mask,
-    int mesh_w, int mesh_h
-) {
-    int width = img.cols;
-    int height = img.rows;
-    int cell_w = width / mesh_w;
-    int cell_h = height / mesh_h;
-
-    for (int my = 0; my < mesh_h; my++) {
-        for (int mx = 0; mx < mesh_w; mx++) {
-            int idx = my * mesh_w + mx;
-
-            if (affine_mask[idx]) {
-                // Draw green rectangle for affine cells
-                int x0 = mx * cell_w;
-                int y0 = my * cell_h;
-                int x1 = (mx + 1) * cell_w;
-                int y1 = (my + 1) * cell_h;
-
-                cv::rectangle(img,
-                             cv::Point(x0, y0),
-                             cv::Point(x1, y1),
-                             cv::Scalar(0, 255, 0), 1);
-            }
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
-    int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5;
-
-    printf("Opening video: %s\n", video_file);
-    cv::VideoCapture cap(video_file);
-
-    if (!cap.isOpened()) {
-        fprintf(stderr, "Error: Cannot open video file %s\n", video_file);
-        return 1;
-    }
-
-    int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
-    int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
-    int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
-
-    printf("Video: %dx%d, %d frames\n", width, height, total_frames);
-
-    if (total_frames < 10) {
-        fprintf(stderr, "Error: Video too short (need at least 10 frames)\n");
-        return 1;
-    }
-
-    // Calculate mesh dimensions (32×32 pixel cells, matches current encoder)
-    int mesh_cell_size = 32;
-    int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
-    int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
-    if (mesh_w < 2) mesh_w = 2;
-    if (mesh_h < 2) mesh_h = 2;
-
-    printf("Mesh: %dx%d (approx %dx%d px cells)\n",
-           mesh_w, mesh_h, width / mesh_w, height / mesh_h);
-
-    // Encoder parameters (match current encoder_tav.c settings)
-    float smoothness = 0.5f;      // Mesh smoothness weight
-    int smooth_iterations = 8;     // Smoothing iterations
-    float affine_threshold = 0.40f; // 40% improvement required for affine
-
-    printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n",
-           smoothness, smooth_iterations, affine_threshold * 100.0f);
-
-    // Seed random number generator
-    srand(time(NULL));
-
-    // Pick random frames (avoid first and last 5 frames)
-    printf("\nTesting %d random frame pairs:\n", num_test_frames);
-    for (int test = 0; test < num_test_frames; test++) {
-        // Pick random frame (ensure we have a previous frame)
-        int frame_num = 5 + rand() % (total_frames - 10);
-
-        printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n",
-               test + 1, num_test_frames, frame_num - 1, frame_num);
-
-        // Read previous frame (source for warping)
-        cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
-
-        cv::Mat prev_frame;
-        cap >> prev_frame;
-        if (prev_frame.empty()) {
-            fprintf(stderr, "Error reading frame %d\n", frame_num - 1);
-            continue;
-        }
-
-        // Read current frame (target to match)
-        cv::Mat curr_frame;
-        cap >> curr_frame;
-        if (curr_frame.empty()) {
-            fprintf(stderr, "Error reading frame %d\n", frame_num);
-            continue;
-        }
-
-        // Convert to RGB for block matching
-        cv::Mat prev_rgb, curr_rgb;
-        cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB);
-        cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB);
-
-        // Compute hierarchical block matching (replaces optical flow)
-        printf("  Computing hierarchical block matching...\n");
-        float *flow_x = nullptr, *flow_y = nullptr;
-        estimate_motion_optical_flow(
-            prev_rgb.data, curr_rgb.data,
-            width, height,
-            &flow_x, &flow_y
-        );
-
-        // Build mesh from flow
-        printf("  Building mesh from block matches...\n");
-        int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
-
-        // Apply Laplacian smoothing
-        printf("  Applying Laplacian smoothing (%d iterations, %.2f weight)...\n",
-               smooth_iterations, smoothness);
-        smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
-
-        // Estimate selective per-cell affine transforms
-        printf("  Estimating selective affine transforms (threshold=%.0f%%)...\n",
-               affine_threshold * 100.0f);
-        uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t));
-        int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-        int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
-
-        int cell_w = width / mesh_w;
-        int cell_h = height / mesh_h;
-        int affine_count = 0;
-
-        for (int cy = 0; cy < mesh_h; cy++) {
-            for (int cx = 0; cx < mesh_w; cx++) {
-                int cell_idx = cy * mesh_w + cx;
-
-                int16_t tx, ty, a11, a12, a21, a22;
-                int use_affine = estimate_cell_affine(
-                    flow_x, flow_y,
-                    width, height,
-                    cx, cy, cell_w, cell_h,
-                    affine_threshold,
-                    &tx, &ty, &a11, &a12, &a21, &a22
-                );
-
-                affine_mask[cell_idx] = use_affine ? 1 : 0;
-                mesh_dx[cell_idx] = tx;
-                mesh_dy[cell_idx] = ty;
-                affine_a11[cell_idx] = a11;
-                affine_a12[cell_idx] = a12;
-                affine_a21[cell_idx] = a21;
-                affine_a22[cell_idx] = a22;
-
-                if (use_affine) affine_count++;
-            }
-        }
-
-        printf("  Affine usage: %d/%d cells (%.1f%%)\n",
-               affine_count, mesh_w * mesh_h,
-               100.0f * affine_count / (mesh_w * mesh_h));
-
-        // Warp previous frame to current frame
-        printf("  Warping frame with mesh + affine...\n");
-        cv::Mat warped;
-        apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy,
-                           affine_mask, affine_a11, affine_a12, affine_a21, affine_a22,
-                           mesh_w, mesh_h);
-
-        // Create visualisation with affine overlay
-        cv::Mat warped_viz = warped.clone();
-        create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h);
-
-        // Compute MSE between warped and target
-        double mse = 0.0;
-        for (int y = 0; y < height; y++) {
-            for (int x = 0; x < width; x++) {
-                for (int c = 0; c < 3; c++) {
-                    double diff = (double)warped.at<cv::Vec3b>(y, x)[c] -
-                                 (double)curr_frame.at<cv::Vec3b>(y, x)[c];
-                    mse += diff * diff;
-                }
-            }
-        }
-        mse /= (width * height * 3);
-        double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0;
-        printf("  Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr);
-
-        // Save images
-        char filename[256];
-        sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1);
-        cv::imwrite(filename, prev_frame);
-        printf("  Saved source: %s\n", filename);
-
-        sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num);
-        cv::imwrite(filename, warped);
-        printf("  Saved warped: %s\n", filename);
-
-        sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num);
-        cv::imwrite(filename, warped_viz);
-        printf("  Saved warped+viz (green=affine): %s\n", filename);
-
-        sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num);
-        cv::imwrite(filename, curr_frame);
-        printf("  Saved target: %s\n", filename);
-
-        // Compute difference image
-        cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3);
-        for (int y = 0; y < height; y++) {
-            for (int x = 0; x < width; x++) {
-                for (int c = 0; c < 3; c++) {
-                    int diff = abs((int)warped.at<cv::Vec3b>(y, x)[c] -
-                                  (int)curr_frame.at<cv::Vec3b>(y, x)[c]);
-                    diff_img.at<cv::Vec3b>(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility
-                }
-            }
-        }
-        sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num);
-        cv::imwrite(filename, diff_img);
-        printf("  Saved difference (amplified 3x): %s\n", filename);
-
-        // Compute motion statistics
-        float max_motion = 0.0f, avg_motion = 0.0f;
-        for (int i = 0; i < mesh_w * mesh_h; i++) {
-            float dx = mesh_dx[i] / 8.0f;
-            float dy = mesh_dy[i] / 8.0f;
-            float motion = sqrtf(dx * dx + dy * dy);
-            avg_motion += motion;
-            if (motion > max_motion) max_motion = motion;
-        }
-        avg_motion /= (mesh_w * mesh_h);
-        printf("  Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion);
-
-        // Cleanup
-        free(flow_x);
-        free(flow_y);
-        free(mesh_dx);
-        free(mesh_dy);
-        free(affine_mask);
-        free(affine_a11);
-        free(affine_a12);
-        free(affine_a21);
-        free(affine_a22);
-    }
-
-    printf("\nDone! Check output images:\n");
-    printf("  *_source.png: Original frame before warping\n");
-    printf("  *_warped.png: Warped frame (should match target)\n");
-    printf("  *_warped_viz.png: Warped with green overlay showing affine cells\n");
-    printf("  *_target.png: Target frame to match\n");
-    printf("  *_diff.png: Difference image (should be mostly black if warp is good)\n");
-
-    cap.release();
-    return 0;
-}