video_decoder removed; fix video regression and updated to no-zstd

2026-06-06 05:28:31 +09:00 · 2026-05-10 05:56:56 +09:00
parent b27ef0dbf9
commit 2cdd731c3b
63 changed files with 127 additions and 31850 deletions
--- a/.idea/libraries/badlogicgames_gdx.xml
+++ b/.idea/libraries/badlogicgames_gdx.xml
@@ -0,0 +1,11 @@
 <component name="libraryTable">
  <library name="badlogicgames.gdx" type="repository">
    <properties maven-id="com.badlogicgames.gdx:gdx:1.12.1" />
    <CLASSES>
      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
  </library>
 </component>
--- a/.idea/libraries/badlogicgames_gdx_backend_lwjgl3.xml
+++ b/.idea/libraries/badlogicgames_gdx_backend_lwjgl3.xml
@@ -0,0 +1,62 @@
 <component name="libraryTable">
  <library name="badlogicgames.gdx.backend.lwjgl3" type="repository">
    <properties maven-id="com.badlogicgames.gdx:gdx-backend-lwjgl3:1.12.1" />
    <CLASSES>
      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-backend-lwjgl3/1.12.1/gdx-backend-lwjgl3-1.12.1.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm32.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows-x86.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm32.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows-x86.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm32.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows-x86.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm32.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows-x86.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm32.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows-x86.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm32.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos-arm64.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows-x86.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/jlayer/jlayer/1.0.1-gdx/jlayer-1.0.1-gdx.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/jcraft/jorbis/0.0.17/jorbis-0.0.17.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
  </library>
 </component>
--- a/2taud.sh
+++ b/2taud.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env fish
-for f in *.mod; python3 mod2taud.py $f assets/disk0/(basename $f .mod).taud; end
+for f in *.mod; python3 mod2taud.py $f assets/disk0/home/music/(basename $f .mod).taud; end
-for f in *.s3m; python3 s3m2taud.py $f assets/disk0/(basename $f .s3m).taud; end
+for f in *.s3m; python3 s3m2taud.py $f assets/disk0/home/music/(basename $f .s3m).taud; end
-for f in *.it; python3 it2taud.py $f assets/disk0/(basename $f .it).taud; end
+for f in *.it; python3 it2taud.py $f assets/disk0/home/music/(basename $f .it).taud; end
-for f in *.xm; python3 xm2taud.py $f assets/disk0/(basename $f .xm).taud; end
+for f in *.xm; python3 xm2taud.py $f assets/disk0/home/music/(basename $f .xm).taud; end
-for f in *.mon; python3 mon2taud.py $f assets/disk0/(basename $f .mon).taud; end
+for f in *.mon; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .mon).taud; end
-for f in *.MON; python3 mon2taud.py $f assets/disk0/(basename $f .MON).taud; end
+for f in *.MON; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .MON).taud; end
--- a/TerranBASICexecutable/TerranBASICexecutable.iml
+++ b/TerranBASICexecutable/TerranBASICexecutable.iml
@@ -10,5 +10,7 @@
    <orderEntry type="module" module-name="tsvm_core" />
    <orderEntry type="library" name="TerranVirtualDisk" level="project" />
    <orderEntry type="library" name="lib" level="project" />
    <orderEntry type="library" name="badlogicgames.gdx" level="project" />
    <orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
  </component>
 </module>
--- a/assets/disk0/tvdos/bin/playtad.js
+++ b/assets/disk0/tvdos/bin/playtad.js
@@ -1,7 +1,9 @@
 const SND_BASE_ADDR = audio.getBaseAddr()
 const SND_MEM_ADDR = audio.getMemAddr()
-const TAD_INPUT_ADDR = SND_MEM_ADDR - 262144  // TAD input buffer (matches TAV packet 0x24)
+// tadInputBin lives at audio-local offset 917504 and tadDecodedBin at 983040
-const TAD_DECODED_ADDR = SND_MEM_ADDR - 262144 + 65536  // TAD decoded buffer
+// (post-bef85f6 memory map; the old 262144 offset now hits the enlarged sampleBin).
 const TAD_INPUT_ADDR = SND_MEM_ADDR - 917504  // TAD input buffer (matches TAV packet 0x24)
 const TAD_DECODED_ADDR = SND_MEM_ADDR - 983040  // TAD decoded buffer
 if (!SND_BASE_ADDR) return 10
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -1746,7 +1746,9 @@ try {
                    tadInitialised = true
                }
-                seqread.readBytes(payloadLen, SND_MEM_ADDR - 262144)
+                // tadInputBin lives at audio-local offset 917504 (post-bef85f6 memory map);
                // the previous 262144 offset now points into the enlarged sampleBin.
                seqread.readBytes(payloadLen, SND_MEM_ADDR - 917504)
                audio.tadDecode()
                audio.tadUploadDecoded(AUDIO_DEVICE, sampleLen)
            }
--- a/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
@@ -275,6 +275,7 @@ class AudioJSR223Delegate(private val vm: VM) {
    // while the following code does work, it was decided that MP3 is "too new" for tsvm and thus removed.
    /*
    js-mp3
    https://github.com/soundbus-technologies/js-mp3
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -5433,6 +5433,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
    // Zstd magic = 0x28 0xB5 0x2F 0xFD (little-endian frame magic).
    // Newer TAV files default to no Zstd (Video Flags bit 4); detecting the magic
    // lets the decoder accept both compressed and raw payloads transparently.
    private fun tavDecompressIfZstd(data: ByteArray): ByteArray {
        if (data.size >= 4 &&
            data[0] == 0x28.toByte() && data[1] == 0xB5.toByte() &&
            data[2] == 0x2F.toByte() && data[3] == 0xFD.toByte()) {
            return ZstdInputStream(ByteArrayInputStream(data)).use { it.readBytes() }
        }
        return data
    }
    // New tavDecode function that accepts compressed data and decompresses internally
    fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
                            width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
@@ -5445,12 +5457,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        }
        return try {
-            // Decompress using Zstd
+            // Decompress with Zstd if the payload starts with the Zstd frame magic;
-            val bais = ByteArrayInputStream(compressedData)
+            // otherwise pass through (TAV files written without --zstd-level).
-            val zis = ZstdInputStream(bais)
+            val decompressedData = tavDecompressIfZstd(compressedData)
            val decompressedData = zis.readBytes()
            zis.close()
            bais.close()
            // Allocate buffer for decompressed data
            val decompressedBuffer = vm.malloc(decompressedData.size)
@@ -6725,9 +6734,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        )
        val decompressedData = try {
-            ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
+            // Decompress with Zstd if the payload starts with the Zstd frame magic;
-                zstd.readBytes()
+            // otherwise pass through (TAV files written without --zstd-level).
-            }
+            tavDecompressIfZstd(compressedData)
        } catch (e: Exception) {
            println("ERROR: Zstd decompression failed: ${e.message}")
            return arrayOf(0, dbgOut)
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
@@ -911,24 +911,32 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                            ((tadInputBin[offset++].toUint()) shl 8)
                    )
            val maxIndex = tadInputBin[offset++].toUint()
-            val payloadSize = (
+            val payloadSizeField = (
                    (tadInputBin[offset++].toUint()) or
                            ((tadInputBin[offset++].toUint()) shl 8) or
                            ((tadInputBin[offset++].toUint()) shl 16) or
                            ((tadInputBin[offset++].toUint()) shl 24)
                    )
-            // Decompress payload
+            // MSB of payload size = 1 means the payload is stored uncompressed (no Zstd).
            val payloadIsRaw = (payloadSizeField and 0x80000000.toInt()) != 0
            val payloadSize = payloadSizeField and 0x7FFFFFFF
            // Read payload bytes
            val compressed = ByteArray(payloadSize)
            UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong())
-            val payload: ByteArray = try {
+            val payload: ByteArray = if (payloadIsRaw) {
-                ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
+                compressed
-                    zstd.readBytes()
+            } else {
                try {
                    ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
                        zstd.readBytes()
                    }
                } catch (e: Exception) {
                    println("ERROR: Zstd decompression failed: ${e.message}")
                    return
                }
            } catch (e: Exception) {
                println("ERROR: Zstd decompression failed: ${e.message}")
                return
            }
            // Decode using binary tree EZBC - FIXED!
--- a/tsvm_core/tsvm_core.iml
+++ b/tsvm_core/tsvm_core.iml
@@ -12,5 +12,7 @@
    <orderEntry type="library" name="jetbrains.kotlin.reflect" level="project" />
    <orderEntry type="library" name="jetbrains.kotlin.test" level="project" />
    <orderEntry type="library" name="lib" level="project" />
    <orderEntry type="library" name="badlogicgames.gdx" level="project" />
    <orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
  </component>
 </module>
--- a/tsvm_executable.iml
+++ b/tsvm_executable.iml
@@ -10,5 +10,7 @@
    <orderEntry type="library" name="TerranVirtualDisk" level="project" />
    <orderEntry type="module" module-name="tsvm_core" />
    <orderEntry type="library" name="lib" level="project" />
    <orderEntry type="library" name="badlogicgames.gdx" level="project" />
    <orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
  </component>
 </module>
--- a/video_encoder/Makefile
+++ b/video_encoder/Makefile
@@ -1,221 +0,0 @@
 # Created by CuriousTorvald and Claude on 2025-08-17.
 # Makefile for TSVM Enhanced Video (TEV) encoder and libraries
 CC = gcc
 CXX = g++
 CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
 CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
 DBGFLAGS =
 PREFIX = /usr/local
 # Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
 ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
 ZSTD_LIBS = $(shell pkg-config --libs libzstd 2>/dev/null || echo "-lzstd")
 LIBS = -lm $(ZSTD_LIBS)
 # =============================================================================
 # Library Object Files
 # =============================================================================
 # libtavenc - TAV encoder library
 LIBTAVENC_OBJ = lib/libtavenc/tav_encoder_lib.o \
                lib/libtavenc/tav_encoder_color.o \
                lib/libtavenc/tav_encoder_dwt.o \
                lib/libtavenc/tav_encoder_quantize.o \
                lib/libtavenc/tav_encoder_ezbc.o \
                lib/libtavenc/tav_encoder_utils.o \
                lib/libtavenc/tav_encoder_tile.o
 # libtavdec - TAV decoder library
 LIBTAVDEC_OBJ = lib/libtavdec/tav_video_decoder.o
 # libtadenc - TAD encoder library
 LIBTADENC_OBJ = lib/libtadenc/encoder_tad.o
 # libtaddec - TAD decoder library
 LIBTADDEC_OBJ = lib/libtaddec/decoder_tad.o
 # libfec - Forward Error Correction library (LDPC + Reed-Solomon)
 LIBFEC_OBJ = lib/libfec/ldpc.o lib/libfec/reed_solomon.o lib/libfec/ldpc_payload.o
 # =============================================================================
 # Targets
 # =============================================================================
 # Source files and targets
 TARGETS = libs encoder_tav_ref decoder_tav_ref tav_inspector tad tav_dt
 LIBRARIES = lib/libtavenc.a lib/libtavdec.a lib/libtadenc.a lib/libtaddec.a lib/libfec.a
 TAV_TARGETS = encoder_tav_ref decoder_tav_ref tav_inspector
 TAD_TARGETS = encoder_tad decoder_tad
 DT_TARGETS = encoder_tav_dt decoder_tav_dt tavdt_noise_injector
 # Build all encoders (default)
 all: clean $(TARGETS)
 # Build all libraries
 libs: $(LIBRARIES)
 # Reference encoder using libtavenc (replaces old monolithic encoder)
 encoder_tav_ref: src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a
 	rm -f encoder_tav_ref
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o encoder_tav_ref src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a $(LIBS)
 	@echo ""
 	@echo "Reference encoder built: encoder_tav_ref"
 	@echo "This is the official reference implementation with all features"
 # Reference decoder using libtavdec (replaces old monolithic decoder)
 decoder_tav_ref: src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a
 	rm -f decoder_tav_ref
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o decoder_tav_ref src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a $(LIBS)
 	@echo ""
 	@echo "Reference decoder built: decoder_tav_ref"
 	@echo "This is the official reference implementation with all features"
 tav_inspector: tav_inspector.c lib/libfec.a
 	rm -f tav_inspector
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Ilib/libfec -o tav_inspector $< lib/libfec.a $(LIBS)
 tav: $(TAV_TARGETS)
 # Build TAD (Terrarum Advanced Audio) tools
 encoder_tad: src/encoder_tad_standalone.c lib/libtadenc/encoder_tad.c include/encoder_tad.h
 	rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtadenc/encoder_tad.c -o encoder_tad.o
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/encoder_tad_standalone.c -o encoder_tad_standalone.o
 	$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
 decoder_tad: lib/libtaddec/decoder_tad.c
 	rm -f decoder_tad
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o decoder_tad $< $(LIBS)
 # Build all TAD tools
 tad: $(TAD_TARGETS)
 # =============================================================================
 # Library Build Rules
 # =============================================================================
 # Compile library object files
 lib/libtavenc/%.o: lib/libtavenc/%.c
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
 lib/libtavdec/%.o: lib/libtavdec/%.c
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
 lib/libtadenc/%.o: lib/libtadenc/%.c
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
 lib/libtaddec/%.o: lib/libtaddec/%.c
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c $< -o $@
 lib/libfec/%.o: lib/libfec/%.c
 	$(CC) $(CFLAGS) -Ilib/libfec -c $< -o $@
 # Build static libraries
 lib/libtavenc.a: $(LIBTAVENC_OBJ)
 	ar rcs $@ $^
 lib/libtavdec.a: $(LIBTAVDEC_OBJ)
 	ar rcs $@ $^
 lib/libtadenc.a: $(LIBTADENC_OBJ)
 	ar rcs $@ $^
 lib/libtaddec.a: $(LIBTADDEC_OBJ)
 	ar rcs $@ $^
 lib/libfec.a: $(LIBFEC_OBJ)
 	ar rcs $@ $^
 # =============================================================================
 # TAV-DT (Digital Tape) Encoder/Decoder
 # =============================================================================
 # TAV-DT encoder with FEC (multithreaded)
 encoder_tav_dt: src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a
 	rm -f encoder_tav_dt
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o encoder_tav_dt src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a $(LIBS) -lpthread
 	@echo ""
 	@echo "TAV-DT encoder built: encoder_tav_dt"
 	@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
 # TAV-DT decoder with FEC (multithreaded)
 decoder_tav_dt: src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a
 	rm -f decoder_tav_dt
 	$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o decoder_tav_dt src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a $(LIBS) -lpthread
 	@echo ""
 	@echo "TAV-DT decoder built: decoder_tav_dt"
 	@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
 # TAV-DT noise injector (channel simulator)
 tavdt_noise_injector: tavdt_noise_injector.c
 	rm -f tavdt_noise_injector
 	$(CC) -std=c99 -Wall -Ofast -D_GNU_SOURCE -o tavdt_noise_injector tavdt_noise_injector.c -lm
 	@echo ""
 	@echo "TAV-DT noise injector built: tavdt_noise_injector"
 	@echo "Simulates QPSK satellite channel noise (AWGN + burst)"
 # Build all TAV-DT tools
 tav_dt: $(DT_TARGETS)
 # Build with debug symbols
 debug: CFLAGS += -g -DDEBUG -fsanitize=address -fno-omit-frame-pointer
 debug: DBGFLAGS += -fsanitize=address -fno-omit-frame-pointer
 debug: clean $(TARGETS)
 # Clean build artifacts
 clean:
 	rm -f $(TARGETS) $(TAD_TARGETS) $(DT_TARGETS) $(LIBRARIES) *.o lib/*/*.o
 # Install (copy to PATH)
 install: $(TARGETS)
 	cp encoder_tav_ref $(PREFIX)/bin/
 	cp decoder_tav_ref $(PREFIX)/bin/
 	cp encoder_tad $(PREFIX)/bin/
 	cp decoder_tad $(PREFIX)/bin/
 	cp encoder_tav_dt $(PREFIX)/bin/
 	cp decoder_tav_dt $(PREFIX)/bin/
 	cp tav_inspector $(PREFIX)/bin/
 # Check for required dependencies
 check-deps:
 	@echo "Checking dependencies..."
 	@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install libzstd-dev or equivalent" && exit 1)
 	@echo "All dependencies found."
 # Help
 help:
 	@echo "TSVM Advanced Video (TAV) and Audio (TAD) Encoders"
 	@echo ""
 	@echo "Targets:"
 	@echo "  all          - Build video encoders (default)"
 	@echo "  libs         - Build all codec libraries (.a files)"
 	@echo "  tav          - Build the TAV advanced video encoder"
 	@echo "  tav_dt       - Build all TAV-DT (Digital Tape) tools with FEC"
 	@echo "  tavdt_noise_injector - Build TAV-DT channel noise simulator"
 	@echo "  tad          - Build all TAD audio tools (encoder, decoder)"
 	@echo "  encoder_tad  - Build TAD audio encoder"
 	@echo "  decoder_tad  - Build TAD audio decoder"
 	@echo "  tests        - Build test programs"
 	@echo "  debug        - Build with debug symbols"
 	@echo "  clean        - Remove build artifacts"
 	@echo "  install      - Install to /usr/local/bin"
 	@echo "  check-deps   - Check for required dependencies"
 	@echo "  help         - Show this help"
 	@echo ""
 	@echo "Libraries:"
 	@echo "  lib/libtavenc.a  - TAV encoder library"
 	@echo "  lib/libtavdec.a  - TAV decoder library"
 	@echo "  lib/libtadenc.a  - TAD encoder library"
 	@echo "  lib/libtaddec.a  - TAD decoder library"
 	@echo "  lib/libfec.a     - Forward Error Correction library (LDPC + RS)"
 	@echo ""
 	@echo "Usage:"
 	@echo "  make               # Build video encoders"
 	@echo "  make libs          # Build all libraries"
 	@echo "  make tav           # Build TAV encoder"
 	@echo "  make tav_dt        # Build TAV-DT encoder/decoder with FEC"
 	@echo "  make tad           # Build all TAD audio tools"
 	@echo "  sudo make install  # Install all encoders"
 .PHONY: all libs clean install check-deps help debug tad tav_dt tests
--- a/video_encoder/TAD_README.md
+++ b/video_encoder/TAD_README.md
@@ -1,350 +0,0 @@
 # TAD - TSVM Advanced Audio Codec
 A perceptually-optimised wavelet-based audio codec designed for resource-constrained systems, featuring CDF 9/7 wavelets, EZBC sparse coding, and sophisticated perceptual quantisation.
 ## Overview
 TAD (TSVM Advanced Audio) is a modern audio codec built on discrete wavelet transform (DWT) using Cohen-Daubechies-Feauveau (CDF) 9/7 biorthogonal wavelets. It combines perceptual quantisation, advanced entropy coding, and careful optimisation for resource-constrained systems.
 ### Key Advantages
 - **Perceptual optimisation**: HVS-aware quantisation preserves audio quality where it matters
 - **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity (86.9% zeros in typical content)
 - **Variable chunk sizes**: Supports any chunk size ≥1024 samples, including non-power-of-2
 - **Stereo decorrelation**: Mid/Side encoding exploits stereo correlation for better compression
 - **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
 ## Features
 ### Compression Technology
 - **CDF 9/7 Biorthogonal Wavelets**
  - 9-level fixed decomposition for all chunk sizes
  - Lifting scheme implementation for efficient computation
  - Optimal frequency discrimination for audio signals
 - **Pre-processing**
  - First-order IIR pre-emphasis filter (α=0.5) shifts quantisation noise to lower frequencies, where they are less objectionable to listeners
  - Gamma companding (γ=0.5) for dynamic range compression before quantisation
  - Mid/Side stereo transformation exploits stereo correlation
  - Lambda companding (λ=6.0) with Laplacian CDF mapping for full bit utilisation
 - **Perceptual Quantisation**
  - Channel-specific (Mid/Side) frequency-dependent weights
  - Subband-aware quantisation preserves perceptually important frequencies
 - **EZBC Encoding**
  - Binary tree embedded zero block coding
  - Exploits coefficient sparsity (86.9% Mid, 97.8% Side typical)
  - Progressive refinement structure
  - Spatial clustering of non-zero coefficients
 - **Entropy Coding**
  - Zstandard compression (level 7) on concatenated EZBC bitstreams
  - Cross-channel compression optimisation
  - Optional Zstd bypass for debugging
 ### Audio Format
 - **Sample Rate**: 32 KHz (TSVM audio hardware native format)
 - **Channels**: Stereo (L/R input, Mid/Side internal representation)
 - **Chunk Sizes**: Variable, any size ≥1024 samples (including non-power-of-2)
 - **Bit Depth**: 32-bit float internal, 8-bit unsigned PCM output with noise-shaped dithering
 - **Bandwidth**: Full 0-16 KHz frequency range preserved
 ### Quality Levels
 Six quality levels (0-5) provide a wide range of compression/quality trade-offs:
 - **Level 0**: Lowest quality, smallest file size
 - **Level 3**: Default, balanced quality/compression (2.51:1 vs PCMu8)
 - **Level 5**: Highest quality, largest file size
 Quality levels are designed to be synchronised with TAV video codec for unified encoding.
 ## Building
 ### Prerequisites
 - C compiler (GCC/Clang)
 - Zstandard library (libzstd)
 - Math library (libm)
 ### Compilation
 ```bash
 # Build TAD encoder/decoder
 make tad
 # Build all tools
 make all
 # Clean build artifacts
 make clean
 ```
 ### Build Targets
 - `encoder_tad` - Standalone audio encoder with FFmpeg calls
 - `decoder_tad` - Standalone audio decoder
 ## Usage
 ### Basic Encoding
 Encoding requires FFmpeg executable installed in your system.
 ```bash
 # Default encoding (quality level 3)
 ./encoder_tad -i input.mp3 -o output.tad
 # Specify quality level (0-5)
 ./encoder_tad -i input.m4a -o output.tad -q 0    # Lowest quality
 ./encoder_tad -i input.ogg -o output.tad -q 5    # Highest quality
 # Disable Zstd compression (for debugging)
 ./encoder_tad -i input.opus -o output.tad --no-zstd
 # Verbose output with statistics
 ./encoder_tad -i input.flac -o output.tad -v
 ```
 ### Decoding
 ```bash
 # Decode to PCMu8
 ./decoder_tad -i input.tad -o output.pcm --raw-pcm
 # Decode to WAV
 ./decoder_tad -i input.tad -o output.wav
 ```
 ### Input Formats
 TAD encoder accepts any audio format supported by FFmpeg:
 - Audio files: WAV, MP3, FLAC, OGG, AAC, etc.
 - Video files with audio streams: MP4, MKV, AVI, etc.
 - Raw PCM formats
 Audio is automatically resampled to 32 KHz stereo if necessary.
 ## Technical Architecture
 ### Encoder Pipeline
 1. **Input Processing**
   - FFmpeg demuxing and audio stream extraction
   - Resampling to 32 KHz stereo
   - Conversion to PCM32f
 2. **Pre-emphasis Filter**
   - First-order IIR filter with α=0.5
   - Shifts quantisation noise toward lower frequencies
   - Improves perceptual quality
 3. **Gamma Companding**
   - Dynamic range compression with γ=0.5
   - Applied independently to each sample
   - Reduces quantisation error for low-amplitude signals
 4. **Stereo Decorrelation**
   - Left/Right to Mid/Side transformation
   - Mid = (L + R) / 2
   - Side = (L - R) / 2
   - Exploits stereo correlation for better compression
 5. **9-Level CDF 9/7 DWT**
   - Fixed 9 decomposition levels for all chunk sizes
   - Forward lifting scheme implementation
   - Correct length tracking for non-power-of-2 sizes
 6. **Perceptual Quantisation**
   - Channel-specific (Mid/Side) subband weights
   - Lambda companding with λ=6.0
   - Laplacian CDF mapping: `sign(x) * floor(λ * log(1 + |x|/λ))`
   - Quantised to int8 coefficients
 7. **EZBC Encoding**
   - Binary tree structure per channel
   - Progressive refinement by bitplanes
   - Zero block coding exploits sparsity
   - Independent bitstreams for Mid and Side
 8. **Zstd Compression**
   - Level 7 compression on concatenated `[Mid_bitstream][Side_bitstream]`
   - Cross-channel optimisation opportunities
   - Adaptive compression based on content
 ### Decoder Pipeline
 1. **Container Parsing**
   - TAD packet identification (type 0x24)
   - Chunk size extraction
   - Compressed data boundaries
 2. **Zstd Decompression**
   - Decompress concatenated bitstreams
   - Split into Mid and Side EZBC streams
 3. **EZBC Decoding**
   - Binary tree decoder per channel
   - Reconstruct quantised int8 coefficients
   - Progressive refinement reconstruction
 4. **Lambda Decompanding**
   - Inverse Laplacian CDF with channel-specific weights
   - Reconstruct float32 DWT coefficients
   - Apply subband-specific perceptual weights
 5. **9-Level Inverse CDF 9/7 DWT**
   - Inverse lifting scheme implementation
   - Correct length tracking for non-power-of-2 chunk sizes
   - Pre-calculated length sequence from forward transform
 6. **Mid/Side to Left/Right**
   - L = Mid + Side
   - R = Mid - Side
   - Reconstruct stereo channels
 7. **Gamma Decompanding**
   - Inverse gamma with γ⁻¹=2.0
   - Restore original dynamic range
 8. **De-emphasis Filter**
   - Reverse pre-emphasis with α=0.5
   - Remove frequency shaping
   - Restore flat frequency response
 9. **PCM32f to PCM8u Conversion**
   - Noise-shaped dithering for 8-bit output
   - Clamping to valid range
   - Final output format
 ### Wavelet Implementation
 CDF 9/7 wavelet follows a **two-stage lifting scheme**:
 ```c
 // Forward Transform: Predict → Update
 // Predict step (generate high-pass)
 temp[half + i] = data[odd] - α * (data[even_left] + data[even_right]);
 // Update step (generate low-pass)
 temp[i] = data[even] + β * (temp[half + i - 1] + temp[half + i]);
 // Normalization (K factor)
 temp[i] *= K;
 temp[half + i] /= K;
 // Inverse Transform: Denormalize → Undo Update → Undo Predict (reversed order)
 temp[i] /= K;
 temp[half + i] *= K;
 temp[i] -= β * (temp[half + i - 1] + temp[half + i]);
 data[odd] = temp[half + i] + α * (temp[i] + temp[i + 1]);
 data[even] = temp[i];
 ```
 **CDF 9/7 Coefficients**:
 - α = -1.586134342
 - β = -0.052980118
 - γ = +0.882911075
 - δ = +0.443506852
 - K = 1.230174105
 ### Non-Power-of-2 Chunk Size Handling
 Critical implementation detail for variable chunk sizes:
 ```c
 // Pre-calculate exact length sequence from forward transform
 int lengths[MAX_LEVELS + 1];
 lengths[0] = chunk_size;
 for (int i = 1; i <= levels; i++) {
    lengths[i] = (lengths[i - 1] + 1) / 2;
 }
 // Apply inverse DWT using lengths[level] for each level
 // NEVER use simple doubling (length *= 2) - incorrect for non-power-of-2!
 ```
 Incorrect length tracking causes mirrored subband artefacts in decoded audio.
 ### Perceptual Quantisation Weights
 Channel-specific weights for Mid (channel 0) and Side (channel 1):
 ```c
 // Base quantiser weights per subband (9 levels + approximation)
 float BASE_QUANTISER_WEIGHTS[2][10] = {
    // Mid channel (0)
    {4.0f, 2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f, 1.3f, 2.0f},
    // Side channel (1)
    {6.0f, 5.0f, 2.6f, 2.4f, 1.8f, 1.3f, 1.0f, 1.0f, 1.6f, 3.2f}
 };
 // During dequantisation:
 float weight = BASE_QUANTISER_WEIGHTS[channel][subband] * quantiser_scale;
 coeffs[i] = normalised_val * TAD32_COEFF_SCALARS[subband] * weight;
 ```
 Different weights for Mid and Side channels reflect perceptual importance of frequency bands in each channel. DC frequency has highest weight (4.0 Mid, 6.0 Side) due to energy concentration.
 ## Performance Characteristics
 ### Compression Efficiency
 - **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input)
 - **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3
 - **Audio Quality**: Preserves full 0-16 KHz bandwidth
 - **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
 - **EZBC Benefits**: Exploits sparsity, progressive refinement, spatial clustering
 ### Computational Complexity
 - **Encoding**: O(n log n) per chunk for DWT, O(n) for EZBC encoding
 - **Decoding**: O(n log n) per chunk for inverse DWT, O(n) for EZBC decoding
 - **Memory**: O(n) working memory for chunk processing
 ### Quality Characteristics
 - **Frequency Response**: Flat 0-16 KHz within perceptual limits
 - **Dynamic Range**: Preserved through gamma companding
 - **Stereo Imaging**: Maintained through Mid/Side decorrelation
 - **Perceptual Quality**: Optimised for human auditory system characteristics
 ## Integration with TAV
 TAD is designed as an includable API for TAV video encoder integration:
 - **Variable Chunk Sizes**: Audio chunks can match video GOP boundaries (e.g., 32016 samples for 1-second TAV GOP)
 - **Unified Quality Levels**: TAD quality 0-5 synchronised with TAV quality 0-5
 - **Embedded Packets**: TAV embeds TAD-compressed audio using packet type 0x24
 - **Shared Container**: Single .tav file contains both video and audio streams
 ### TAV Integration Example
 ```c
 // TAD handles non-power-of-2 chunk size correctly
 tad_encode_chunk(audio_buffer, audio_samples_per_gop, output_buffer, &output_size);
 // TAV embeds TAD packet
 tav_write_packet(TAV_PACKET_AUDIO, output_buffer, output_size);
 ```
 ## Format Specification
 For complete packet structure and bitstream format details, refer to `format documentation.txt`.
 ### Key Packet Types
 - `0x24`: TAD audio packet (used in standalone .tad files and embedded in .tav files)
 ## Related Projects
 - **TAV** (TSVM Advanced Video): Wavelet-based video codec with integrated TAD audio
 - **TSVM**: Target virtual machine platform for TAD playback
 ## Licence
 MIT.
--- a/video_encoder/TAV_README.md
+++ b/video_encoder/TAV_README.md
@@ -1,261 +0,0 @@
 # TAV - TSVM Advanced Video Codec
 A perceptually-optimised wavelet-based video codec designed for resource-constrained systems, featuring multiple wavelet types, temporal 3D DWT, and sophisticated compression techniques.
 ## Overview
 TAV (TSVM Advanced Video) is a modern video codec built on discrete wavelet transformation (DWT). It combines cutting-edge compression techniques with careful optimisation for resource-constrained systems.
 ### Key Advantages
 - **No blocking artefacts**: Large-tile DWT encoding with padding eliminates DCT block boundaries
 - **No colour banding**: Wavelets spreads gradients across scales, preventing banding in the first place
 - **Perceptual optimisation**: HVS-aware quantisation preserves visual quality where it matters
 - **Temporal coherence**: 3D DWT with GOP encoding exploits inter-frame similarity
 - **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity for 16-18% additional compression
 - **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
 ## Features
 ### Compression Technology
 - **Wavelet Types**
  - **5/3 Reversible** (JPEG 2000 standard): Lossless-capable, good for archival
  - **9/7 Irreversible** (default): Best overall compression, CDF 9/7 variant
 - **Spatial Encoding**
  - Large-tile encoding with padding, with optional single-tile mode (no blocking artefacts)
  - 6-level DWT decomposition for deep frequency analysis
  - Perceptual quantisation with HVS-optimised coefficient scaling
  - YCoCg-R colour space with anisotropic chroma quantisation
 - **Temporal Encoding** (3D DWT Mode)
  - Group-of-pictures (GOP) encoding with adaptive size (typically 20 frames)
  - Unified EZBC encoding across temporal dimension
  - Adaptive GOP boundaries with scene change detection
 - **EZBC Encoding**
  - Binary tree embedded zero block coding exploits coefficient sparsity
  - Progressive refinement structure with bitplane encoding
  - Concatenated channel layout for cross-channel compression optimisation
  - Typical sparsity: 86.9% (Y), 97.8% (Co), 99.5% (Cg)
  - 16-18% compression improvement over naive coefficient encoding
 ### Audio Integration
 TAV seamlessly integrates with the TAD (TSVM Advanced Audio) codec for synchronised audio/video encoding:
 - Variable chunk sizes match video GOP boundaries
 - Embedded TAD packets (type 0x24) with Zstd compression
 - Unified container format
 ## Building
 ### Prerequisites
 - C compiler (GCC/Clang)
 - Zstandard library
 - OpenCV 4 library (only used by experimental motion estimation feature)
 ### Compilation
 ```bash
 # Build TAV encoder/decoder
 make tav
 # Build all tools including TAD audio codec
 make all
 # Clean build artefacts
 make clean
 ```
 ### Build Targets
 - `encoder_tav` - Main video encoder
 - `decoder_tav` - Standalone video decoder
 - `tav_inspector` - Packet analysis and debugging tool
 ## Usage
 ### Basic Encoding
 Encoding requires FFmpeg executable installed in your system.
 ```bash
 # Default encoding (CDF 9/7 wavelet, quality level 3)
 ./encoder_tav -i input.mp4 -o output.tav
 # Quality levels (0-5)
 ./encoder_tav -i input.avi -q 0 -o output.tav    # Lowest quality, smallest file
 ./encoder_tav -i input.mkv -q 5 -o output.tav    # Highest quality, largest file
 ```
 ### Intra-only Encoding
 ```bash
 # Enable Intra-only encoding
 ./encoder_tav -i input.mp4 --intra-only -o output.tav
 ```
 ### Decoding and Inspection
 ```bash
 # Decode TAV to raw video
 ./decoder_tav -i input.tav -o output.mkv
 # Inspect packet structure (debugging)
 ./tav_inspector input.tav -v
 ```
 ### Frame Limiting
 ```bash
 # Encode only first N frames (useful for testing)
 ./encoder_tav -i input.mp4 -o output.tav --encode-limit 100
 ```
 ## Technical Architecture
 ### Encoder Pipeline
 1. **Input Processing**
   - FFmpeg demuxing and frame extraction
   - RGB to YCoCg-R colour space conversion
   - Resolution validation and padding
 2. **DWT Transform**
   - Spatial: 6-level decomposition per frame
   - Temporal: 1D DWT across GOP frames (3D DWT mode)
   - Lifting scheme implementation for all wavelets
 3. **Perceptual Quantisation**
   - HVS-based subband weights
   - Anisotropic chroma quantisation (YCoCg-R specific)
   - Quality-dependent quantisation matrices
 4. **EZBC Encoding**
   - Binary tree embedded zero block coding per channel
   - Progressive refinement by bitplanes
   - Concatenated bitstream layout: `[Y_bitstream][Co_bitstream][Cg_bitstream]`
   - Cross-channel compression optimisation
 5. **Entropy Coding**
   - Zstandard compression (level 7) on concatenated EZBC bitstreams
   - Cross-channel compression opportunities
   - Adaptive compression based on GOP structure
 ### Decoder Pipeline
 1. **Container Parsing**
   - Packet type identification (0x00-0xFF)
   - Timecode synchronisation
   - GOP boundary detection
 2. **Entropy Decoding**
   - Zstd decompression of concatenated bitstreams
   - EZBC binary tree decoding per channel
   - Progressive coefficient reconstruction
 3. **Inverse Quantisation**
   - Perceptual weight application
   - Subband-specific scaling
   - Coefficient reconstruction from sparse representation
 4. **Inverse DWT**
   - Temporal: 1D inverse DWT across frames (3D DWT mode)
   - Spatial: 6-level inverse wavelet reconstruction
 5. **Output Conversion**
   - YCoCg-R to RGB colour space
   - Clamping and dithering
   - Frame buffering for display
 ### Wavelet Implementation
 All wavelets follow a **lifting scheme** pattern with symmetric boundary extension:
 ```c
 // Forward Transform: Predict → Update
 temp[half + i] = data[odd] - predict(data[even]);  // High-pass
 temp[i] = data[even] + update(temp[half]);         // Low-pass
 // Inverse Transform: Undo Update → Undo Predict (reversed order)
 data[even] = temp[i] - update(temp[half]);         // Undo low-pass
 data[odd] = temp[half + i] + predict(data[even]);  // Undo high-pass
 ```
 **Critical**: Forward and inverse transforms must use identical coefficient indexing and exactly reverse operations to avoid grid artefacts.
 ### Coefficient Layout
 TAV uses **2D Spatial Layout** in memory for each decomposition level:
 ```
 [LL] [LH] [HL] [HH] [LH] [HL] [HH] ...
 └── Level 0 ──┘ └─── Level 1 ───┘
 ```
 - `LL`: Low-pass (approximation) - progressively smaller with each level
 - `LH`, `HL`, `HH`: High-pass subbands (horizontal, vertical, diagonal detail)
 ## Performance Characteristics
 ### Compression Efficiency
 - **Sparsity Exploitation**: Typical quantised coefficient sparsity
  - Y channel: 86.9% zeros
  - Co channel: 97.8% zeros
  - Cg channel: 99.5% zeros
 - **EZBC Benefits**: 16-18% compression improvement over naive coefficient encoding through sparsity exploitation
 - **Temporal Coherence**: Additional 15-25% improvement with 3D DWT (content-dependent)
 ### Computational Complexity
 - **Encoding**: O(n log n) per frame for spatial DWT
 - **Decoding**: O(n log n) per frame, optimised lifting scheme implementation
 - **Memory**: Single-tile encoding requires O(w × h) working memory
 ### Quality Characteristics
 - **No blocking artefacts**: Wavelet-based encoding is inherently smooth
 - **Perceptual optimisation**: Better subjective quality than bitrate-equivalent DCT codecs
 - **Scalability**: 6 quality levels (0-5) provide wide range of bitrate/quality trade-offs
 - **Temporal stability**: 3D DWT mode reduces flickering and temporal artefacts
 ## Format Specification
 For complete packet structure and bitstream format details, refer to `format documentation.txt`.
 ### Key Packet Types
 - `0x00`: Metadata and initialisation
 - `0x01`: I-frame (intra-coded frame)
 - `0x12`: GOP unified packet (3D DWT mode)
 - `0x24`: Embedded TAD audio
 - `0xFC`: GOP synchronisation
 - `0xFD`: Timecode
 ## Debugging Tools
 ### TAV Inspector
 Analyse TAV packet structure and decode individual frames:
 ```bash
 # Verbose packet analysis
 ./tav_inspector input.tav -v
 # Extract specific frame ranges
 ./tav_inspector input.tav --frame-range 100-200
 ```
 ## Related Projects
 - **TAD** (TSVM Advanced Audio): Perceptual audio codec using CDF 9/7 wavelets
 - **TSVM**: Target virtual machine platform for TAV playback
 ## Licence
 MIT.
--- a/video_encoder/create_ucf_payload.c
+++ b/video_encoder/create_ucf_payload.c
@@ -1,424 +0,0 @@
 /**
 * TAV+UCF Payload Writer for TAV Files
 * Creates a TAV header-only (32 bytes) + UCF cue file (4KB) for concatenated TAV files
 * Total output size: 4096 bytes (32 + 4064)
 * Usage: ./create_ucf_payload input.tav output.ucf [track_names.txt]
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
 #define TAV_HEADER_SIZE 32
 #define UCF_SIZE 4064
 #define TAV_OFFSET_BIAS (TAV_HEADER_SIZE + UCF_SIZE)
 #define TAV_MAGIC "\x1FTSVMTA"  // Matches both TAV and TAP
 typedef struct {
    uint8_t magic[8];
    uint8_t version;
    uint16_t width;
    uint16_t height;
    uint8_t fps;
    uint32_t total_frames;
    // ... rest of header fields
 } __attribute__((packed)) TAVHeader;
 // Write TAV header-only payload (File Role = 1)
 static void write_tav_header_only(FILE *out) {
    uint8_t header[TAV_HEADER_SIZE] = {0};
    // Magic: "\x1FTSVMTAV"
    header[0] = 0x1F;
    header[1] = 'T';
    header[2] = 'S';
    header[3] = 'V';
    header[4] = 'M';
    header[5] = 'T';
    header[6] = 'A';
    header[7] = 'V';
    // Version: 5 (YCoCg-R perceptual)
    header[8] = 5;
    // Width: 560 (little-endian)
    header[9] = 0x30;
    header[10] = 0x02;
    // Height: 448 (little-endian)
    header[11] = 0xC0;
    header[12] = 0x01;
    // FPS: 30
    header[13] = 30;
    // Total Frames: 0xFFFFFFFF (still image marker / not applicable)
    header[14] = 0xFF;
    header[15] = 0xFF;
    header[16] = 0xFF;
    header[17] = 0xFF;
    // Wavelet Filter Type: 1 (9/7 irreversible, default)
    header[18] = 1;
    // Decomposition Levels: 6
    header[19] = 6;
    // Quantiser Indices (Y, Co, Cg): 255 (not applicable for header-only)
    header[20] = 0xFF;
    header[21] = 0xFF;
    header[22] = 0xFF;
    // Extra Feature Flags: 0x80 (bit 7 = has no actual packets)
    header[23] = 0x80;
    // Video Flags: 0
    header[24] = 0;
    // Encoder quality level: 0
    header[25] = 0;
    // Channel layout: 0 (Y-Co-Cg)
    header[26] = 0;
    // Reserved[4]: zeros (27-30 already initialised to 0)
    // File Role: 1 (header-only, UCF payload follows)
    header[31] = 1;
    fwrite(header, 1, TAV_HEADER_SIZE, out);
 }
 // Write UCF header
 static void write_ucf_header(FILE *out, uint16_t num_cues) {
    uint8_t magic[8] = {0x1F, 'T', 'S', 'V', 'M', 'U', 'C', 'F'};
    uint8_t version = 1;
    uint32_t cue_file_size = TAV_OFFSET_BIAS;
    uint8_t reserved = 0;
    fwrite(magic, 1, 8, out);
    fwrite(&version, 1, 1, out);
    fwrite(&num_cues, 2, 1, out);
    fwrite(&cue_file_size, 4, 1, out);
    fwrite(&reserved, 1, 1, out);
 }
 // Write UCF cue element (internal addressing, human+machine interactable)
 static void write_cue_element(FILE *out, uint64_t offset, const char *name) {
    uint8_t addressing_mode = 0x22;  // 0x20 (human) | 0x01 (machine) | 0x02 (internal)
    uint16_t name_len = strlen(name);
    // Offset with 4KB bias
    uint64_t biased_offset = offset + TAV_OFFSET_BIAS;
    fwrite(&addressing_mode, 1, 1, out);
    fwrite(&name_len, 2, 1, out);
    fwrite(name, 1, name_len, out);
    // Write 48-bit (6-byte) offset
    fwrite(&biased_offset, 6, 1, out);
 }
 // Read track names from file (newline-delimited)
 static char **read_track_names(const char *filename, int *count_out) {
    FILE *f = fopen(filename, "r");
    if (!f) {
        return NULL;
    }
    char **names = NULL;
    int count = 0;
    int capacity = 16;
    char line[256];
    names = malloc(capacity * sizeof(char *));
    if (!names) {
        fclose(f);
        return NULL;
    }
    while (fgets(line, sizeof(line), f)) {
        // Remove trailing newline
        size_t len = strlen(line);
        if (len > 0 && line[len - 1] == '\n') {
            line[len - 1] = '\0';
            len--;
        }
        if (len > 0 && line[len - 1] == '\r') {
            line[len - 1] = '\0';
            len--;
        }
        // Skip empty lines
        if (len == 0) {
            continue;
        }
        // Expand capacity if needed
        if (count >= capacity) {
            capacity *= 2;
            char **new_names = realloc(names, capacity * sizeof(char *));
            if (!new_names) {
                // Cleanup on failure
                for (int i = 0; i < count; i++) {
                    free(names[i]);
                }
                free(names);
                fclose(f);
                return NULL;
            }
            names = new_names;
        }
        // Allocate and copy name
        names[count] = strdup(line);
        if (!names[count]) {
            // Cleanup on failure
            for (int i = 0; i < count; i++) {
                free(names[i]);
            }
            free(names);
            fclose(f);
            return NULL;
        }
        count++;
    }
    fclose(f);
    *count_out = count;
    return names;
 }
 // Find all TAV headers in the file (with smart packet-wise skipping)
 static int find_tav_headers(FILE *in, uint64_t **offsets_out) {
    uint64_t *offsets = NULL;
    int count = 0;
    int capacity = 16;
    offsets = malloc(capacity * sizeof(uint64_t));
    if (!offsets) {
        fprintf(stderr, "Error: Memory allocation failed\n");
        return -1;
    }
    // Seek to beginning
    fseek(in, 0, SEEK_SET);
    uint8_t magic[8];
    while (1) {
        // Remember current position before reading
        uint64_t pos = ftell(in);
        // Try to read magic
        if (fread(magic, 1, 8, in) != 8) {
            // End of file
            break;
        }
        // Check for TAV magic signature
        if (memcmp(magic, TAV_MAGIC, 7) == 0 && (magic[7] == 'V' || magic[7] == 'P')) {
            // Found TAV header
            if (count >= capacity) {
                capacity *= 2;
                uint64_t *new_offsets = realloc(offsets, capacity * sizeof(uint64_t));
                if (!new_offsets) {
                    fprintf(stderr, "Error: Memory reallocation failed\n");
                    free(offsets);
                    return -1;
                }
                offsets = new_offsets;
            }
            offsets[count++] = pos;
            printf("Found TAV header at offset: 0x%lX (%lu)\n", pos, pos);
            // Skip past this header (32 bytes total)
            uint64_t packet_pos = pos + 32;
            fseek(in, packet_pos, SEEK_SET);
            // Smart packet-wise skipping
            while (1) {
                uint8_t packet_type;
                if (fread(&packet_type, 1, 1, in) != 1) {
                    // End of file
                    break;
                }
                // Check if this is the start of next TAV file (0x1F is prohibited as packet type)
                if (packet_type == 0x1F) {
                    // Rewind 1 byte to re-read as magic at the top of outer loop
                    fseek(in, packet_pos, SEEK_SET);
                    break;
                }
                // printf("TAV Packet 0x%02X at 0x%lX\n", packet_type, packet_pos);
                // Sync packets (0xFE, 0xFF) have no payload size - they're single-byte packets
                if (packet_type == 0xFE || packet_type == 0xFF) {
                    packet_pos += 1;
                    fseek(in, packet_pos, SEEK_SET);
                    continue;
                }
                // Read payload size (uint32, little-endian)
                uint32_t payload_size = 0;
                if (fread(&payload_size, 4, 1, in) != 1) {
                    // End of file
                    break;
                }
                // Skip packet: 1 byte (type) + 4 bytes (size) + payload_size
                packet_pos += 1 + 4 + payload_size;
                fseek(in, packet_pos, SEEK_SET);
            }
        } else {
            // Move forward by 1 byte for next search
            fseek(in, pos + 1, SEEK_SET);
        }
    }
    *offsets_out = offsets;
    return count;
 }
 int main(int argc, char *argv[]) {
    if (argc < 3 || argc > 4) {
        fprintf(stderr, "Usage: %s <input.tav> <output.ucf> [track_names.txt]\n", argv[0]);
        fprintf(stderr, "Creates a 4KB UCF payload for concatenated TAV file\n");
        fprintf(stderr, "  track_names.txt: Optional file with track names (one per line)\n");
        return 1;
    }
    const char *input_path = argv[1];
    const char *output_path = argv[2];
    const char *names_path = (argc == 4) ? argv[3] : NULL;
    // Read track names if provided
    char **track_names = NULL;
    int num_names = 0;
    if (names_path) {
        track_names = read_track_names(names_path, &num_names);
        if (track_names) {
            printf("Loaded %d track name(s) from '%s'\n", num_names, names_path);
        } else {
            fprintf(stderr, "Warning: Could not read track names from '%s', using defaults\n", names_path);
        }
    }
    // Open input file
    FILE *in = fopen(input_path, "rb");
    if (!in) {
        fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
        if (track_names) {
            for (int i = 0; i < num_names; i++) {
                free(track_names[i]);
            }
            free(track_names);
        }
        return 1;
    }
    // Find all TAV headers
    uint64_t *offsets = NULL;
    int num_tracks = find_tav_headers(in, &offsets);
    fclose(in);
    if (num_tracks < 0) {
        fprintf(stderr, "Error: Failed to scan input file\n");
        if (track_names) {
            for (int i = 0; i < num_names; i++) {
                free(track_names[i]);
            }
            free(track_names);
        }
        return 1;
    }
    if (num_tracks == 0) {
        fprintf(stderr, "Error: No TAV headers found in input file\n");
        free(offsets);
        if (track_names) {
            for (int i = 0; i < num_names; i++) {
                free(track_names[i]);
            }
            free(track_names);
        }
        return 1;
    }
    printf("\nFound %d TAV header(s)\n", num_tracks);
    // Create output UCF file
    FILE *out = fopen(output_path, "wb");
    if (!out) {
        fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
        free(offsets);
        if (track_names) {
            for (int i = 0; i < num_names; i++) {
                free(track_names[i]);
            }
            free(track_names);
        }
        return 1;
    }
    // Write TAV header-only payload (File Role = 1)
    write_tav_header_only(out);
    printf("Written TAV header-only payload (%d bytes)\n", TAV_HEADER_SIZE);
    // Write UCF header
    write_ucf_header(out, num_tracks);
    // Write cue elements
    for (int i = 0; i < num_tracks; i++) {
        char default_name[32];
        const char *name;
        // Use custom name if available, otherwise generate default
        if (track_names && i < num_names) {
            name = track_names[i];
        } else {
            snprintf(default_name, sizeof(default_name), "Track %d", i + 1);
            name = default_name;
        }
        write_cue_element(out, offsets[i], name);
        printf("Written cue element: '%s' at offset 0x%lX (biased: 0x%lX)\n",
               name, offsets[i], offsets[i] + TAV_OFFSET_BIAS);
    }
    // Get current file position
    long current_pos = ftell(out);
    // Fill remaining space with zeros to reach TAV header + 4KB UCF
    size_t target_size = TAV_HEADER_SIZE + UCF_SIZE;
    if (current_pos < target_size) {
        size_t remaining = target_size - current_pos;
        uint8_t *zeros = calloc(remaining, 1);
        if (zeros) {
            fwrite(zeros, 1, remaining, out);
            free(zeros);
        }
    }
    fclose(out);
    free(offsets);
    // Clean up track names
    if (track_names) {
        for (int i = 0; i < num_names; i++) {
            free(track_names[i]);
        }
        free(track_names);
    }
    printf("\nTAV+UCF payload created successfully: %s\n", output_path);
    printf("File size: %zu bytes (TAV header: %d + UCF: %d)\n",
           (size_t)(TAV_HEADER_SIZE + UCF_SIZE), TAV_HEADER_SIZE, UCF_SIZE);
    printf("\nTo create seekable TAV file, prepend this payload to your concatenated TAV file:\n");
    printf("  cat %s input.tav > output_seekable.tav\n", output_path);
    return 0;
 }
--- a/video_encoder/encoder_ipf1d.c
+++ b/video_encoder/encoder_ipf1d.c
@@ -1,935 +0,0 @@
 #define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <math.h>
 #include <zlib.h>
 #include <unistd.h>
 #include <sys/wait.h>
 #include <getopt.h>
 #include <sys/time.h>
 // TVDOS Movie format constants
 #define TVDOS_MAGIC "\x1F\x54\x53\x56\x4D\x4D\x4F\x56"  // "\x1FTSVM MOV"
 #define IPF_BLOCK_SIZE 12
 // iPF1-delta opcodes
 #define SKIP_OP  0x00
 #define PATCH_OP 0x01
 #define REPEAT_OP 0x02
 #define END_OP   0xFF
 // Video packet types
 #define IPF1_PACKET_TYPE 0x04, 0x00      // iPF Type 1 (4 + 0)
 #define IPF1_DELTA_PACKET_TYPE 0x04, 0x02 // iPF Type 1 delta
 #define SYNC_PACKET_TYPE 0xFF, 0xFF      // Sync packet
 // Audio constants
 #define MP2_SAMPLE_RATE 32000
 #define MP2_DEFAULT_PACKET_SIZE 0x240
 #define MP2_PACKET_TYPE_BASE 0x11
 // Default values
 #define DEFAULT_WIDTH 560
 #define DEFAULT_HEIGHT 448
 #define TEMP_AUDIO_FILE "/tmp/tvdos_temp_audio.mp2"
 typedef struct {
    char *input_file;
    char *output_file;
    int width;
    int height;
    int fps;
    int total_frames;
    double duration;
    int has_audio;
    int output_to_stdout;
    // Internal buffers
    uint8_t *previous_ipf_frame;
    uint8_t *current_ipf_frame;
    uint8_t *delta_buffer;
    uint8_t *rgb_buffer;
    uint8_t *compressed_buffer;
    uint8_t *mp2_buffer;
    size_t frame_buffer_size;
    // Audio handling
    FILE *mp2_file;
    int mp2_packet_size;
    int mp2_rate_index;
    size_t audio_remaining;
    int audio_frames_in_buffer;
    int target_audio_buffer_size;
    // FFmpeg processes
    FILE *ffmpeg_video_pipe;
    FILE *ffmpeg_audio_pipe;
    // Progress tracking
    struct timeval start_time;
    struct timeval last_progress_time;
    size_t total_output_bytes;
    // Dithering mode
    int dither_mode;
 } encoder_config_t;
 // CORRECTED YCoCg conversion matching Kotlin implementation
 typedef struct {
    float y, co, cg;
 } ycocg_t;
 static ycocg_t rgb_to_ycocg_correct(uint8_t r, uint8_t g, uint8_t b, float ditherThreshold) {
    ycocg_t result;
    float rf = floor((ditherThreshold / 15.0 + r / 255.0) * 15.0) / 15.0;
    float gf = floor((ditherThreshold / 15.0 + g / 255.0) * 15.0) / 15.0;
    float bf = floor((ditherThreshold / 15.0 + b / 255.0) * 15.0) / 15.0;
    // CORRECTED: Match Kotlin implementation exactly
    float co = rf - bf;           // co = r - b    [-1..1]
    float tmp = bf + co / 2.0f;   // tmp = b + co/2
    float cg = gf - tmp;          // cg = g - tmp  [-1..1]
    float y = tmp + cg / 2.0f;    // y = tmp + cg/2 [0..1]
    result.y = y;
    result.co = co;
    result.cg = cg;
    return result;
 }
 static int quantise_4bit_y(float value) {
    // Y quantisation: round(y * 15)
    return (int)round(fmaxf(0.0f, fminf(15.0f, value * 15.0f)));
 }
 static int chroma_to_four_bits(float f) {
    // CORRECTED: Match Kotlin chromaToFourBits function exactly
    // return (round(f * 8) + 7).coerceIn(0..15)
    int result = (int)round(f * 8.0f) + 7;
    return fmaxf(0, fminf(15, result));
 }
 // Parse resolution string like "1024x768"
 static int parse_resolution(const char *res_str, int *width, int *height) {
    if (!res_str) return 0;
    return sscanf(res_str, "%dx%d", width, height) == 2;
 }
 // Execute command and capture output
 static char *execute_command(const char *command) {
    FILE *pipe = popen(command, "r");
    if (!pipe) return NULL;
    char *result = malloc(4096);
    size_t len = fread(result, 1, 4095, pipe);
    result[len] = '\0';
    pclose(pipe);
    return result;
 }
 // Get video metadata using ffprobe
 static int get_video_metadata(encoder_config_t *config) {
    char command[1024];
    char *output;
    // Get frame count
    snprintf(command, sizeof(command), 
        "ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 \"%s\"", 
        config->input_file);
    output = execute_command(command);
    if (!output) {
        fprintf(stderr, "Failed to get frame count\n");
        return 0;
    }
    config->total_frames = atoi(output);
    free(output);
    // Get frame rate
    snprintf(command, sizeof(command),
        "ffprobe -v quiet -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 \"%s\"",
        config->input_file);
    output = execute_command(command);
    if (!output) {
        fprintf(stderr, "Failed to get frame rate\n");
        return 0;
    }
    // Parse framerate (could be "30/1" or "29.97")
    int num, den;
    if (sscanf(output, "%d/%d", &num, &den) == 2) {
        config->fps = (den > 0) ? (num / den) : 30;
    } else {
        config->fps = (int)round(atof(output));
    }
    free(output);
    // Get duration
    snprintf(command, sizeof(command),
        "ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"%s\"",
        config->input_file);
    output = execute_command(command);
    if (output) {
        config->duration = atof(output);
        free(output);
    }
    // Check if has audio
    snprintf(command, sizeof(command),
        "ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\"",
        config->input_file);
    output = execute_command(command);
    config->has_audio = (output && strlen(output) > 0 && atoi(output) >= 0);
    if (output) free(output);
    // Validate frame count using duration if needed
    if (config->total_frames <= 0 && config->duration > 0) {
        config->total_frames = (int)(config->duration * config->fps);
    }
    fprintf(stderr, "Video metadata:\n");
    fprintf(stderr, "  Frames: %d\n", config->total_frames);
    fprintf(stderr, "  FPS: %d\n", config->fps);
    fprintf(stderr, "  Duration: %.2fs\n", config->duration);
    fprintf(stderr, "  Audio: %s\n", config->has_audio ? "Yes" : "No");
    fprintf(stderr, "  Resolution: %dx%d\n", config->width, config->height);
    return (config->total_frames > 0 && config->fps > 0);
 }
 // Start FFmpeg process for video conversion
 static int start_video_conversion(encoder_config_t *config) {
    char command[2048];
    snprintf(command, sizeof(command),
        "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d -y - 2>/dev/null",
        config->input_file, config->width, config->height, config->width, config->height);
    config->ffmpeg_video_pipe = popen(command, "r");
    return (config->ffmpeg_video_pipe != NULL);
 }
 // Start FFmpeg process for audio conversion
 static int start_audio_conversion(encoder_config_t *config) {
    if (!config->has_audio) return 1;
    char command[2048];
    snprintf(command, sizeof(command),
        "ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
        config->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
    int result = system(command);
    if (result == 0) {
        config->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
        if (config->mp2_file) {
            fseek(config->mp2_file, 0, SEEK_END);
            config->audio_remaining = ftell(config->mp2_file);
            fseek(config->mp2_file, 0, SEEK_SET);
            return 1;
        }
    }
    fprintf(stderr, "Warning: Failed to convert audio, proceeding without audio\n");
    config->has_audio = 0;
    return 1;
 }
 // Write variable-length integer
 static void write_varint(uint8_t **ptr, uint32_t value) {
    while (value >= 0x80) {
        **ptr = (uint8_t)((value & 0x7F) | 0x80);
        (*ptr)++;
        value >>= 7;
    }
    **ptr = (uint8_t)(value & 0x7F);
    (*ptr)++;
 }
 // Get MP2 packet size and rate index
 static int get_mp2_packet_size(uint8_t *header) {
    int bitrate_index = (header[2] >> 4) & 0xF;
    int padding_bit = (header[2] >> 1) & 0x1;
    int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, -1};
    int bitrate = bitrates[bitrate_index];
    if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE;
    int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit;
    return frame_size;
 }
 static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
    int rate_index;
    switch (packet_size) {
        case 144:  rate_index = 0; break;
        case 216:  rate_index = 2; break;
        case 252:  rate_index = 4; break;
        case 288:  rate_index = 6; break;
        case 360:  rate_index = 8; break;
        case 432:  rate_index = 10; break;
        case 504:  rate_index = 12; break;
        case 576:  rate_index = 14; break;
        case 720:  rate_index = 16; break;
        case 864:  rate_index = 18; break;
        case 1008: rate_index = 20; break;
        case 1152: rate_index = 22; break;
        case 1440: rate_index = 24; break;
        case 1728: rate_index = 26; break;
        default: rate_index = 14; break;
    }
    return rate_index + (is_mono ? 1 : 0);
 }
 // Gzip compress function (instead of zlib)
 static size_t gzip_compress(uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_max) {
    z_stream stream = {0};
    stream.next_in = src;
    stream.avail_in = src_len;
    stream.next_out = dst;
    stream.avail_out = dst_max;
    // Use deflateInit2 with gzip format
    if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
        return 0;
    }
    if (deflate(&stream, Z_FINISH) != Z_STREAM_END) {
        deflateEnd(&stream);
        return 0;
    }
    size_t compressed_size = stream.total_out;
    deflateEnd(&stream);
    return compressed_size;
 }
 // Bayer dithering kernels (4 patterns, each 4x4)
 static const float bayerKernels[4][16] = {
    { // Pattern 0
        (0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
        (12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
        (3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f,
        (15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f
    },
    { // Pattern 1
        (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
        (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
        (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f,
        (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f
    },
    { // Pattern 2
        (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f,
        (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
        (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
        (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f
    },
    { // Pattern 3
        (15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f,
        (0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
        (12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
        (3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f
    }
 };
 // CORRECTED: Encode a 4x4 block to iPF1 format matching Kotlin implementation
 static void encode_ipf1_block_correct(uint8_t *rgb_data, int width, int height, int block_x, int block_y,
                                     int channels, int pattern, uint8_t *output) {
    ycocg_t pixels[16];
    int y_values[16];
    float co_values[16];  // Keep full precision for subsampling
    float cg_values[16];  // Keep full precision for subsampling
    // Convert 4x4 block to YCoCg using corrected transform
    for (int py = 0; py < 4; py++) {
        for (int px = 0; px < 4; px++) {
            int src_x = block_x * 4 + px;
            int src_y = block_y * 4 + py;
            float t = (pattern < 0) ? 0.0f : bayerKernels[pattern % 4][4 * (py % 4) + (px % 4)];
            int idx = py * 4 + px;
            if (src_x < width && src_y < height) {
                int pixel_offset = (src_y * width + src_x) * channels;
                uint8_t r = rgb_data[pixel_offset];
                uint8_t g = rgb_data[pixel_offset + 1];
                uint8_t b = rgb_data[pixel_offset + 2];
                pixels[idx] = rgb_to_ycocg_correct(r, g, b, t);
            } else {
                pixels[idx] = (ycocg_t){0.0f, 0.0f, 0.0f};
            }
            y_values[idx] = quantise_4bit_y(pixels[idx].y);
            co_values[idx] = pixels[idx].co;
            cg_values[idx] = pixels[idx].cg;
        }
    }
    // CORRECTED: Chroma subsampling (4:2:0 for iPF1) with correct averaging
    int cos1 = chroma_to_four_bits((co_values[0] + co_values[1] + co_values[4] + co_values[5]) / 4.0f);
    int cos2 = chroma_to_four_bits((co_values[2] + co_values[3] + co_values[6] + co_values[7]) / 4.0f);
    int cos3 = chroma_to_four_bits((co_values[8] + co_values[9] + co_values[12] + co_values[13]) / 4.0f);
    int cos4 = chroma_to_four_bits((co_values[10] + co_values[11] + co_values[14] + co_values[15]) / 4.0f);
    int cgs1 = chroma_to_four_bits((cg_values[0] + cg_values[1] + cg_values[4] + cg_values[5]) / 4.0f);
    int cgs2 = chroma_to_four_bits((cg_values[2] + cg_values[3] + cg_values[6] + cg_values[7]) / 4.0f);
    int cgs3 = chroma_to_four_bits((cg_values[8] + cg_values[9] + cg_values[12] + cg_values[13]) / 4.0f);
    int cgs4 = chroma_to_four_bits((cg_values[10] + cg_values[11] + cg_values[14] + cg_values[15]) / 4.0f);
    // CORRECTED: Pack into iPF1 format matching Kotlin exactly
    // Co values (2 bytes): cos2|cos1, cos4|cos3
    output[0] = ((cos2 << 4) | cos1);
    output[1] = ((cos4 << 4) | cos3);
    // Cg values (2 bytes): cgs2|cgs1, cgs4|cgs3
    output[2] = ((cgs2 << 4) | cgs1);
    output[3] = ((cgs4 << 4) | cgs3);
    // CORRECTED: Y values (8 bytes) with correct ordering from Kotlin
    output[4] = ((y_values[1] << 4) | y_values[0]);   // Y1|Y0
    output[5] = ((y_values[5] << 4) | y_values[4]);   // Y5|Y4  
    output[6] = ((y_values[3] << 4) | y_values[2]);   // Y3|Y2
    output[7] = ((y_values[7] << 4) | y_values[6]);   // Y7|Y6
    output[8] = ((y_values[9] << 4) | y_values[8]);   // Y9|Y8
    output[9] = ((y_values[13] << 4) | y_values[12]); // Y13|Y12
    output[10] = ((y_values[11] << 4) | y_values[10]); // Y11|Y10
    output[11] = ((y_values[15] << 4) | y_values[14]); // Y15|Y14
 }
 // Helper function for contrast weighting
 static double contrast_weight(int v1, int v2, int delta, int weight) {
    double avg = (v1 + v2) / 2.0;
    double contrast = (avg < 4 || avg > 11) ? 1.5 : 1.0;
    return delta * weight * contrast;
 }
 // Check if two iPF1 blocks are significantly different
 static int is_significantly_different(uint8_t *block_a, uint8_t *block_b) {
    double score = 0.0;
    // Co values (bytes 0-1)
    uint16_t co_a = block_a[0] | (block_a[1] << 8);
    uint16_t co_b = block_b[0] | (block_b[1] << 8);
    for (int i = 0; i < 4; i++) {
        int va = (co_a >> (i * 4)) & 0xF;
        int vb = (co_b >> (i * 4)) & 0xF;
        int delta = abs(va - vb);
        score += contrast_weight(va, vb, delta, 3);
    }
    // Cg values (bytes 2-3)
    uint16_t cg_a = block_a[2] | (block_a[3] << 8);
    uint16_t cg_b = block_b[2] | (block_b[3] << 8);
    for (int i = 0; i < 4; i++) {
        int va = (cg_a >> (i * 4)) & 0xF;
        int vb = (cg_b >> (i * 4)) & 0xF;
        int delta = abs(va - vb);
        score += contrast_weight(va, vb, delta, 3);
    }
    // Y values (bytes 4-11)
    for (int i = 4; i < 12; i++) {
        int byte_a = block_a[i] & 0xFF;
        int byte_b = block_b[i] & 0xFF;
        int y_a_high = (byte_a >> 4) & 0xF;
        int y_a_low = byte_a & 0xF;
        int y_b_high = (byte_b >> 4) & 0xF;
        int y_b_low = byte_b & 0xF;
        int delta_high = abs(y_a_high - y_b_high);
        int delta_low = abs(y_a_low - y_b_low);
        score += contrast_weight(y_a_high, y_b_high, delta_high, 2);
        score += contrast_weight(y_a_low, y_b_low, delta_low, 2);
    }
    return score > 4.0;
 }
 // Encode iPF1 frame to buffer
 static void encode_ipf1_frame(uint8_t *rgb_data, int width, int height, int channels, int pattern,
                             uint8_t *ipf_buffer) {
    int blocks_per_row = (width + 3) / 4;
    int blocks_per_col = (height + 3) / 4;
    for (int block_y = 0; block_y < blocks_per_col; block_y++) {
        for (int block_x = 0; block_x < blocks_per_row; block_x++) {
            int block_index = block_y * blocks_per_row + block_x;
            uint8_t *output_block = ipf_buffer + block_index * IPF_BLOCK_SIZE;
            encode_ipf1_block_correct(rgb_data, width, height, block_x, block_y, channels, pattern, output_block);
        }
    }
 }
 // Create iPF1-delta encoded frame
 static size_t encode_ipf1_delta(uint8_t *previous_frame, uint8_t *current_frame, 
                               int width, int height, uint8_t *delta_buffer) {
    int blocks_per_row = (width + 3) / 4;
    int blocks_per_col = (height + 3) / 4;
    int total_blocks = blocks_per_row * blocks_per_col;
    uint8_t *output_ptr = delta_buffer;
    int skip_count = 0;
    uint8_t *patch_blocks = malloc(total_blocks * IPF_BLOCK_SIZE);
    int patch_count = 0;
    for (int block_index = 0; block_index < total_blocks; block_index++) {
        uint8_t *prev_block = previous_frame + block_index * IPF_BLOCK_SIZE;
        uint8_t *curr_block = current_frame + block_index * IPF_BLOCK_SIZE;
        if (is_significantly_different(prev_block, curr_block)) {
            if (skip_count > 0) {
                *output_ptr++ = SKIP_OP;
                write_varint(&output_ptr, skip_count);
                skip_count = 0;
            }
            memcpy(patch_blocks + patch_count * IPF_BLOCK_SIZE, curr_block, IPF_BLOCK_SIZE);
            patch_count++;
        } else {
            if (patch_count > 0) {
                *output_ptr++ = PATCH_OP;
                write_varint(&output_ptr, patch_count);
                memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
                output_ptr += patch_count * IPF_BLOCK_SIZE;
                patch_count = 0;
            }
            skip_count++;
        }
    }
    if (patch_count > 0) {
        *output_ptr++ = PATCH_OP;
        write_varint(&output_ptr, patch_count);
        memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
        output_ptr += patch_count * IPF_BLOCK_SIZE;
    }
    *output_ptr++ = END_OP;
    free(patch_blocks);
    return output_ptr - delta_buffer;
 }
 // Get current time in seconds
 static double get_current_time_sec(struct timeval *tv) {
    gettimeofday(tv, NULL);
    return tv->tv_sec + tv->tv_usec / 1000000.0;
 }
 // Display progress information similar to FFmpeg
 static void display_progress(encoder_config_t *config, int frame_num) {
    struct timeval current_time;
    double current_sec = get_current_time_sec(&current_time);
    // Only update progress once per second
    double last_progress_sec = config->last_progress_time.tv_sec + config->last_progress_time.tv_usec / 1000000.0;
    if (current_sec - last_progress_sec < 1.0) {
        return;
    }
    config->last_progress_time = current_time;
    // Calculate timing
    double start_sec = config->start_time.tv_sec + config->start_time.tv_usec / 1000000.0;
    double elapsed_sec = current_sec - start_sec;
    double current_video_time = (double)frame_num / config->fps;
    double fps = frame_num / elapsed_sec;
    double speed = (elapsed_sec > 0) ? current_video_time / elapsed_sec : 0.0;
    double bitrate = (elapsed_sec > 0) ? (config->total_output_bytes * 8.0 / 1024.0) / elapsed_sec : 0.0;
    // Format output size in human readable format
    char size_str[32];
    if (config->total_output_bytes >= 1024 * 1024) {
        snprintf(size_str, sizeof(size_str), "%.1fMB", config->total_output_bytes / (1024.0 * 1024.0));
    } else if (config->total_output_bytes >= 1024) {
        snprintf(size_str, sizeof(size_str), "%.1fkB", config->total_output_bytes / 1024.0);
    } else {
        snprintf(size_str, sizeof(size_str), "%zuB", config->total_output_bytes);
    }
    // Format current time as HH:MM:SS.xx
    int hours = (int)(current_video_time / 3600);
    int minutes = (int)((current_video_time - hours * 3600) / 60);
    double seconds = current_video_time - hours * 3600 - minutes * 60;
    // Print progress line (overwrite previous line)
    fprintf(stderr, "\rframe=%d fps=%.1f size=%s time=%02d:%02d:%05.2f bitrate=%.1fkbits/s speed=%4.2fx", 
            frame_num, fps, size_str, hours, minutes, seconds, bitrate, speed);
    fflush(stderr);
 }
 // Process audio for current frame
 static int process_audio(encoder_config_t *config, int frame_num, FILE *output) {
    if (!config->has_audio || !config->mp2_file || config->audio_remaining <= 0) {
        return 1;
    }
    // Initialise packet size on first frame
    if (config->mp2_packet_size == 0) {
        uint8_t header[4];
        if (fread(header, 1, 4, config->mp2_file) != 4) return 1;
        fseek(config->mp2_file, 0, SEEK_SET);
        config->mp2_packet_size = get_mp2_packet_size(header);
        int is_mono = (header[3] >> 6) == 3;
        config->mp2_rate_index = mp2_packet_size_to_rate_index(config->mp2_packet_size, is_mono);
    }
    // Calculate how much audio time each frame represents (in seconds)
    double frame_audio_time = 1.0 / config->fps;
    // Calculate how much audio time each MP2 packet represents
    // MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
    double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
    // Estimate how many packets we consume per video frame
    double packets_per_frame = frame_audio_time / packet_audio_time;
    // Only insert audio when buffer would go below 2 frames
    // Initialise with 2 packets on first frame to prime the buffer
    int packets_to_insert = 0;
    if (frame_num == 1) {
        packets_to_insert = 2;
        config->audio_frames_in_buffer = 2;
    } else {
        // Simulate buffer consumption (packets consumed per frame)
        config->audio_frames_in_buffer -= (int)ceil(packets_per_frame);
        // Only insert packets when buffer gets low (≤ 2 frames)
        if (config->audio_frames_in_buffer <= 2) {
            packets_to_insert = config->target_audio_buffer_size - config->audio_frames_in_buffer;
            packets_to_insert = (packets_to_insert > 0) ? packets_to_insert : 1;
        }
    }
    // Insert the calculated number of audio packets
    for (int q = 0; q < packets_to_insert; q++) {
        size_t bytes_to_read = config->mp2_packet_size;
        if (bytes_to_read > config->audio_remaining) {
            bytes_to_read = config->audio_remaining;
        }
        size_t bytes_read = fread(config->mp2_buffer, 1, bytes_to_read, config->mp2_file);
        if (bytes_read == 0) break;
        uint8_t audio_packet_type[2] = {config->mp2_rate_index, MP2_PACKET_TYPE_BASE};
        fwrite(audio_packet_type, 1, 2, output);
        fwrite(config->mp2_buffer, 1, bytes_read, output);
        // Track audio bytes written
        config->total_output_bytes += 2 + bytes_read;
        config->audio_remaining -= bytes_read;
        config->audio_frames_in_buffer++;
    }
    return 1;
 }
 // Write TVDOS header
 static void write_tvdos_header(encoder_config_t *config, FILE *output) {
    fwrite(TVDOS_MAGIC, 1, 8, output);
    fwrite(&config->width, 2, 1, output);
    fwrite(&config->height, 2, 1, output);
    fwrite(&config->fps, 2, 1, output);
    fwrite(&config->total_frames, 4, 1, output);
    uint16_t unused = 0x00FF;
    fwrite(&unused, 2, 1, output);
    int audio_sample_size = 2 * (((MP2_SAMPLE_RATE / config->fps) + 1));
    int audio_queue_size = config->has_audio ? 
        (int)ceil(audio_sample_size / 2304.0) + 1 : 0;
    uint16_t audio_queue_info = config->has_audio ? 
        (MP2_DEFAULT_PACKET_SIZE >> 2) | (audio_queue_size << 12) : 0x0000;
    fwrite(&audio_queue_info, 2, 1, output);
    // Store target buffer size for audio timing
    config->target_audio_buffer_size = audio_queue_size;
    uint8_t reserved[10] = {0};
    fwrite(reserved, 1, 10, output);
 }
 // Initialise encoder configuration
 static encoder_config_t *init_encoder_config() {
    encoder_config_t *config = calloc(1, sizeof(encoder_config_t));
    if (!config) return NULL;
    config->width = DEFAULT_WIDTH;
    config->height = DEFAULT_HEIGHT;
    return config;
 }
 // Allocate encoder buffers
 static int allocate_buffers(encoder_config_t *config) {
    config->frame_buffer_size = ((config->width + 3) / 4) * ((config->height + 3) / 4) * IPF_BLOCK_SIZE;
    config->rgb_buffer = malloc(config->width * config->height * 3);
    config->previous_ipf_frame = malloc(config->frame_buffer_size);
    config->current_ipf_frame = malloc(config->frame_buffer_size);
    config->delta_buffer = malloc(config->frame_buffer_size * 2);
    config->compressed_buffer = malloc(config->frame_buffer_size * 2);
    config->mp2_buffer = malloc(2048);
    return (config->rgb_buffer && config->previous_ipf_frame && 
            config->current_ipf_frame && config->delta_buffer && 
            config->compressed_buffer && config->mp2_buffer);
 }
 // Process one frame - CORRECTED ORDER: Audio -> Video -> Sync
 static int process_frame(encoder_config_t *config, int frame_num, int is_keyframe, FILE *output) {
    // Read RGB data from FFmpeg pipe first
    size_t rgb_size = config->width * config->height * 3;
    if (fread(config->rgb_buffer, 1, rgb_size, config->ffmpeg_video_pipe) != rgb_size) {
        if (feof(config->ffmpeg_video_pipe)) return 0;
        return -1;
    }
    // Step 1: Process audio FIRST (matches working file pattern)
    if (!process_audio(config, frame_num, output)) {
        return -1;
    }
    // Step 2: Encode and write video
    int pattern;
    switch (config->dither_mode) {
        case 0: pattern = -1; break;  // No dithering
        case 1: pattern = 0; break;   // Static pattern
        case 2: pattern = frame_num % 4; break;  // Dynamic pattern
        default: pattern = 0; break;  // Fallback to static
    }
    encode_ipf1_frame(config->rgb_buffer, config->width, config->height, 3, pattern,
                     config->current_ipf_frame);
    // Determine if we should use delta encoding
    int use_delta = 0;
    size_t data_size = config->frame_buffer_size;
    uint8_t *frame_data = config->current_ipf_frame;
    if (frame_num > 1 && !is_keyframe) {
        size_t delta_size = encode_ipf1_delta(config->previous_ipf_frame, 
                                            config->current_ipf_frame,
                                            config->width, config->height,
                                            config->delta_buffer);
        if (delta_size < config->frame_buffer_size * 0.576) {
            use_delta = 1;
            data_size = delta_size;
            frame_data = config->delta_buffer;
        }
    }
    // Compress the frame data using gzip
    size_t compressed_size = gzip_compress(frame_data, data_size, 
                                          config->compressed_buffer, 
                                          config->frame_buffer_size * 2);
    if (compressed_size == 0) {
        fprintf(stderr, "Gzip compression failed\n");
        return -1;
    }
    // Write video packet
    if (use_delta) {
        uint8_t packet_type[2] = {IPF1_DELTA_PACKET_TYPE};
        fwrite(packet_type, 1, 2, output);
    } else {
        uint8_t packet_type[2] = {IPF1_PACKET_TYPE};
        fwrite(packet_type, 1, 2, output);
    }
    uint32_t size_le = compressed_size;
    fwrite(&size_le, 4, 1, output);
    fwrite(config->compressed_buffer, 1, compressed_size, output);
    // Step 3: Write sync packet AFTER video (matches working file pattern)
    uint8_t sync[2] = {SYNC_PACKET_TYPE};
    fwrite(sync, 1, 2, output);
    // Track video bytes written (packet type + size + compressed data + sync)
    config->total_output_bytes += 2 + 4 + compressed_size + 2;
    // Swap frame buffers
    uint8_t *temp = config->previous_ipf_frame;
    config->previous_ipf_frame = config->current_ipf_frame;
    config->current_ipf_frame = temp;
    // Display progress
    display_progress(config, frame_num);
    return 1;
 }
 // Cleanup function
 static void cleanup_config(encoder_config_t *config) {
    if (!config) return;
    if (config->ffmpeg_video_pipe) pclose(config->ffmpeg_video_pipe);
    if (config->mp2_file) fclose(config->mp2_file);
    free(config->input_file);
    free(config->output_file);
    free(config->rgb_buffer);
    free(config->previous_ipf_frame);
    free(config->current_ipf_frame);
    free(config->delta_buffer);
    free(config->compressed_buffer);
    free(config->mp2_buffer);
    // Remove temporary audio file
    unlink(TEMP_AUDIO_FILE);
    free(config);
 }
 // Print usage information
 static void print_usage(const char *program_name) {
    printf("TVDOS Movie Encoder\n\n");
    printf("Usage: %s [options] input_video\n\n", program_name);
    printf("Options:\n");
    printf("  -o, --output FILE    Output TVDOS movie file (default: stdout)\n");
    printf("  -s, --size WxH       Video resolution (default: 560x448)\n");
    printf("  -d, --dither MODE    Dithering mode (default: 1)\n");
    printf("                         0: No dithering\n");
    printf("                         1: Static pattern\n");
    printf("                         2: Dynamic pattern (better quality, larger files)\n");
    printf("  -h, --help           Show this help message\n\n");
    printf("Examples:\n");
    printf("  %s input.mp4 -o output.mov\n", program_name);
    printf("  %s input.avi -s 1024x768 -o output.mov\n", program_name);
    printf("  yt-dlp -o - \"https://youtube.com/watch?v=VIDEO_ID\" | ffmpeg -i pipe:0 -c copy temp.mp4 && %s temp.mp4 -o youtube_video.mov && rm temp.mp4\n", program_name);
 }
 int main(int argc, char *argv[]) {
    encoder_config_t *config = init_encoder_config();
    if (!config) {
        fprintf(stderr, "Failed to initialise encoder\n");
        return 1;
    }
    config->output_to_stdout = 1; // Default to stdout
    config->dither_mode = 1; // Default to static dithering
    // Parse command line arguments
    static struct option long_options[] = {
        {"output", required_argument, 0, 'o'},
        {"size", required_argument, 0, 's'},
        {"dither", required_argument, 0, 'd'},
        {"help", no_argument, 0, 'h'},
        {0, 0, 0, 0}
    };
    int c;
    while ((c = getopt_long(argc, argv, "o:s:d:h", long_options, NULL)) != -1) {
        switch (c) {
            case 'o':
                config->output_file = strdup(optarg);
                config->output_to_stdout = 0;
                break;
            case 's':
                if (!parse_resolution(optarg, &config->width, &config->height)) {
                    fprintf(stderr, "Invalid resolution format: %s\n", optarg);
                    cleanup_config(config);
                    return 1;
                }
                break;
            case 'd':
                config->dither_mode = atoi(optarg);
                if (config->dither_mode < 0 || config->dither_mode > 2) {
                    fprintf(stderr, "Invalid dither mode: %s (must be 0, 1, or 2)\n", optarg);
                    cleanup_config(config);
                    return 1;
                }
                break;
            case 'h':
                print_usage(argv[0]);
                cleanup_config(config);
                return 0;
            default:
                print_usage(argv[0]);
                cleanup_config(config);
                return 1;
        }
    }
    if (optind >= argc) {
        fprintf(stderr, "Error: Input video file required\n\n");
        print_usage(argv[0]);
        cleanup_config(config);
        return 1;
    }
    config->input_file = strdup(argv[optind]);
    // Get video metadata
    if (!get_video_metadata(config)) {
        fprintf(stderr, "Failed to analyze video metadata\n");
        cleanup_config(config);
        return 1;
    }
    // Allocate buffers
    if (!allocate_buffers(config)) {
        fprintf(stderr, "Failed to allocate memory buffers\n");
        cleanup_config(config);
        return 1;
    }
    // Start video conversion
    if (!start_video_conversion(config)) {
        fprintf(stderr, "Failed to start video conversion\n");
        cleanup_config(config);
        return 1;
    }
    // Start audio conversion
    if (!start_audio_conversion(config)) {
        fprintf(stderr, "Failed to start audio conversion\n");
        cleanup_config(config);
        return 1;
    }
    // Open output
    FILE *output = config->output_to_stdout ? stdout : fopen(config->output_file, "wb");
    if (!output) {
        fprintf(stderr, "Failed to open output file\n");
        cleanup_config(config);
        return 1;
    }
    // Write TVDOS header
    write_tvdos_header(config, output);
    // Initialise progress tracking
    gettimeofday(&config->start_time, NULL);
    config->last_progress_time = config->start_time;
    config->total_output_bytes = 8 + 2 + 2 + 2 + 4 + 2 + 2 + 10; // TVDOS header size
    // Process frames with correct order: Audio -> Video -> Sync
    for (int frame = 1; frame <= config->total_frames; frame++) {
        int is_keyframe = (frame == 1) || (frame % 30 == 0);
        int result = process_frame(config, frame, is_keyframe, output);
        if (result <= 0) {
            if (result == 0) {
                fprintf(stderr, "End of video at frame %d\n", frame);
            }
            break;
        }
    }
    // Final progress update and newline
    fprintf(stderr, "\n");
    if (!config->output_to_stdout) {
        fclose(output);
        fprintf(stderr, "Encoding complete: %s\n", config->output_file);
    }
    cleanup_config(config);
    return 0;
 }
--- a/video_encoder/encoder_tav_opencv.cpp
+++ b/video_encoder/encoder_tav_opencv.cpp
@@ -1,183 +0,0 @@
 // Created by CuriousTorvald and Claude on 2025-10-17
 // MPEG-style bidirectional block motion compensation for TAV encoder
 // Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement
 #include <opencv2/opencv.hpp>
 #include <cstdlib>
 #include <cstring>
 #include <cmath>
 extern "C" {
 // Dense optical flow estimation using Farneback algorithm
 // Computes flow at every pixel, then samples at block centers for motion vectors
 // Much more spatially coherent than independent block matching
 void estimate_optical_flow_motion(
    const float *current_y,    // Current frame Y channel (width×height)
    const float *reference_y,  // Reference frame Y channel
    int width, int height,
    int block_size,            // Block size (e.g., 16)
    int16_t *mvs_x,           // Output: motion vectors X (in 1/4-pixel units)
    int16_t *mvs_y            // Output: motion vectors Y (in 1/4-pixel units)
 ) {
    // Convert float Y channels to 8-bit grayscale for OpenCV
    cv::Mat cur_gray(height, width, CV_8UC1);
    cv::Mat ref_gray(height, width, CV_8UC1);
    // Detect if Y is in [0,1] range and scale to [0,255] if needed
    float y_min = current_y[0], y_max = current_y[0];
    for (int i = 1; i < width * height; i++) {
        if (current_y[i] < y_min) y_min = current_y[i];
        if (current_y[i] > y_max) y_max = current_y[i];
    }
    float scale = (y_max <= 1.1f) ? 255.0f : 1.0f;
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int idx = y * width + x;
            cur_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, current_y[idx] * scale)));
            ref_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, reference_y[idx] * scale)));
        }
    }
    // Compute dense optical flow using Farneback algorithm
    // IMPORTANT: We need BACKWARD flow (current → reference) for motion compensation
    // This tells us where to PULL pixels FROM in the reference frame
    cv::Mat flow;
    cv::calcOpticalFlowFarneback(
        cur_gray,      // Current frame (source)
        ref_gray,      // Reference frame (destination)
        flow,          // Output flow (2-channel float: dx, dy per pixel)
        0.5,           // pyr_scale: pyramid scale (0.5 = each layer is half size)
        3,             // levels: number of pyramid levels
        20,            // winsize: averaging window size
        3,             // iterations: number of iterations at each pyramid level
        5,             // poly_n: size of pixel neighborhood (5 or 7)
        1.2,           // poly_sigma: standard deviation of Gaussian for polynomial expansion
        0              // flags: 0 = normal, OPTFLOW_USE_INITIAL_FLOW = use input flow as initial estimate
    );
    // Sample flow at block centers to get motion vectors
    int num_blocks_x = (width + block_size - 1) / block_size;
    int num_blocks_y = (height + block_size - 1) / block_size;
    for (int by = 0; by < num_blocks_y; by++) {
        for (int bx = 0; bx < num_blocks_x; bx++) {
            int block_idx = by * num_blocks_x + bx;
            // Block center position
            int center_x = bx * block_size + block_size / 2;
            int center_y = by * block_size + block_size / 2;
            // Clamp to frame boundaries
            if (center_x >= width) center_x = width - 1;
            if (center_y >= height) center_y = height - 1;
            // Get flow at block center
            cv::Point2f flow_vec = flow.at<cv::Point2f>(center_y, center_x);
            // Convert to 1/4-pixel units and store
            // Flow is in pixels, positive = motion to the right/down
            mvs_x[block_idx] = (int16_t)std::round(flow_vec.x * 4.0f);
            mvs_y[block_idx] = (int16_t)std::round(flow_vec.y * 4.0f);
        }
    }
 }
 // Block-based motion compensation with bilinear interpolation (sub-pixel precision)
 // MVs are in 1/4-pixel units
 // This implements the warp() function from MC-EZBC pseudocode
 void warp_block_motion(
    const float *src,          // Source frame
    int width, int height,
    const int16_t *mvs_x,      // Motion vectors X (1/4-pixel units)
    const int16_t *mvs_y,      // Motion vectors Y (1/4-pixel units)
    int block_size,            // Block size (e.g., 16)
    float *dst                 // Output warped frame
 ) {
    int num_blocks_x = (width + block_size - 1) / block_size;
    int num_blocks_y = (height + block_size - 1) / block_size;
    // Process each block
    for (int by = 0; by < num_blocks_y; by++) {
        for (int bx = 0; bx < num_blocks_x; bx++) {
            int block_idx = by * num_blocks_x + bx;
            // Get motion vector for this block (in 1/4-pixel units)
            float mv_x = mvs_x[block_idx] / 4.0f;  // Convert to pixels
            float mv_y = mvs_y[block_idx] / 4.0f;
            // Block boundaries in destination frame
            int block_x_start = bx * block_size;
            int block_y_start = by * block_size;
            int block_x_end = std::min(block_x_start + block_size, width);
            int block_y_end = std::min(block_y_start + block_size, height);
            // Warp each pixel in the block
            for (int y = block_y_start; y < block_y_end; y++) {
                for (int x = block_x_start; x < block_x_end; x++) {
                    // Source position (backward warping)
                    float src_x = x - mv_x;
                    float src_y = y - mv_y;
                    // Clamp to valid range
                    src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
                    src_y = std::max(0.0f, std::min((float)(height - 1), src_y));
                    // Bilinear interpolation
                    int x0 = (int)src_x;
                    int y0 = (int)src_y;
                    int x1 = std::min(x0 + 1, width - 1);
                    int y1 = std::min(y0 + 1, height - 1);
                    float fx = src_x - x0;
                    float fy = src_y - y0;
                    float val00 = src[y0 * width + x0];
                    float val10 = src[y0 * width + x1];
                    float val01 = src[y1 * width + x0];
                    float val11 = src[y1 * width + x1];
                    float val_top = (1.0f - fx) * val00 + fx * val10;
                    float val_bot = (1.0f - fx) * val01 + fx * val11;
                    float val = (1.0f - fy) * val_top + fy * val_bot;
                    dst[y * width + x] = val;
                }
            }
        }
    }
 }
 // Bidirectional motion compensation for MC-EZBC predict step
 // Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
 void warp_bidirectional(
    const float *f0, const float *f1,
    int width, int height,
    const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y,  // F0 → F1
    const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y,  // F1 → F0
    int block_size,
    float *prediction          // Output: 0.5 * (warped_f0 + warped_f1)
 ) {
    int num_pixels = width * height;
    // Allocate temporary buffers
    float *warped_f0 = new float[num_pixels];
    float *warped_f1 = new float[num_pixels];
    // Warp f0 forward using forward MVs
    warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);
    // Warp f1 backward using backward MVs
    warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);
    // Average the two warped frames
    for (int i = 0; i < num_pixels; i++) {
        prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
    }
    delete[] warped_f0;
    delete[] warped_f1;
 }
 } // extern "C"
--- a/video_encoder/encoder_tav_text.c
+++ b/video_encoder/encoder_tav_text.c
@@ -1,795 +0,0 @@
 /*
 encoder_tav_text.c
 Text-based video encoder for TSVM using custom font ROMs
 Outputs Videotex files with custom header and packet type 0x3F (text mode)
 File structure:
  - Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
  - Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
  - Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
  - Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
 Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
  - rows: uint8 (32)
  - cols: uint8 (80)
  - fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
  - bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
  - char-array: rows*cols bytes (glyph indices 0-255)
 Total uncompressed size: 2 + (80*32*3) = 7682 bytes
 Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
 Video size: 80×32 characters (560×448 pixels with 7×14 font)
 Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
 Each text frame is treated as an I-frame with sync packet
 Usage:
  gcc -Ofast -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
  ./encoder_tav_text -i video.mp4 -f font.chr -o output.mv3
 */
 #define _POSIX_C_SOURCE 200809L
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <math.h>
 #include <zstd.h>
 #include <unistd.h>
 #include <time.h>
 #include <sys/time.h>
 #define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
 #define CHAR_W 7
 #define CHAR_H 14
 #define GRID_W 80
 #define GRID_H 32
 #define PIXEL_W (GRID_W * CHAR_W)  // 560
 #define PIXEL_H (GRID_H * CHAR_H)  // 448
 #define PATCH_SZ (CHAR_W * CHAR_H)
 #define SAMPLE_RATE 32000
 #define MP2_DEFAULT_PACKET_SIZE 1152
 // TAV packet types
 #define PACKET_TIMECODE 0xFD
 #define PACKET_SYNC 0xFF
 #define PACKET_AUDIO_MP2 0x20
 #define PACKET_SSF 0x30
 #define PACKET_TEXT 0x3F
 #define PACKET_EXTENDED_HDR 0xEF
 // SSF opcodes for font ROM
 #define SSF_OPCODE_LOWROM 0x80
 #define SSF_OPCODE_HIGHROM 0x81
 // Font ROM size constants
 #define FONTROM_PADDED_SIZE 1920
 #define GLYPHS_PER_ROM 128
 // Color mapping (4-bit RGB to TSVM palette)
 #define COLOR_BLACK 0xF0
 #define COLOR_WHITE 0xFE
 // Generate random filename for temporary audio file
 static void generate_random_filename(char *filename) {
    srand(time(NULL));
    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    const int charset_size = sizeof(charset) - 1;
    // Start with the prefix
    strcpy(filename, "/tmp/");
    // Generate 32 random characters
    for (int i = 0; i < 32; i++) {
        filename[5 + i] = charset[rand() % charset_size];
    }
    // Add the .mp2 extension
    strcpy(filename + 37, ".mp2");
    filename[41] = '\0';  // Null terminate
 }
 char TEMP_AUDIO_FILE[42];
 // Global flag to disable inverted character matching
 int g_no_invert_char = 0;
 typedef struct {
    uint8_t *data;     // Binary glyph data (PATCH_SZ bytes per glyph)
    int count;         // Number of glyphs
 } FontROM;
 // Get FFmpeg version string
 char *get_ffmpeg_version(void) {
    FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
    if (!pipe) return NULL;
    char *version = malloc(256);
    if (!version) {
        pclose(pipe);
        return NULL;
    }
    if (fgets(version, 256, pipe)) {
        // Remove trailing newline
        size_t len = strlen(version);
        if (len > 0 && version[len - 1] == '\n') {
            version[len - 1] = '\0';
        }
        pclose(pipe);
        return version;
    }
    free(version);
    pclose(pipe);
    return NULL;
 }
 // Detect video FPS using ffprobe
 float detect_fps(const char *video_path) {
    char cmd[1024];
    snprintf(cmd, sizeof(cmd),
             "ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
             "-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
             video_path);
    FILE *pipe = popen(cmd, "r");
    if (!pipe) return 30.0f; // fallback
    char fps_str[64] = {0};
    if (fgets(fps_str, sizeof(fps_str), pipe)) {
        // Parse fraction like "30/1" or "24000/1001"
        int num = 0, den = 1;
        if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
            pclose(pipe);
            return (float)num / (float)den;
        }
    }
    pclose(pipe);
    return 30.0f; // fallback
 }
 // Load font ROM (14 bytes per glyph, no header)
 FontROM *load_font_rom(const char *path) {
    FILE *f = fopen(path, "rb");
    if (!f) return NULL;
    fseek(f, 0, SEEK_END);
    long size = ftell(f);
    fseek(f, 0, SEEK_SET);
    if (size % 14 != 0) {
        fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
    }
    int glyph_count = size / 14;
    FontROM *rom = malloc(sizeof(FontROM));
    rom->count = glyph_count;
    rom->data = malloc(glyph_count * PATCH_SZ);
    // Read and unpack glyphs
    for (int g = 0; g < glyph_count; g++) {
        uint8_t row_bytes[14];
        if (fread(row_bytes, 14, 1, f) != 1) {
            free(rom->data);
            free(rom);
            fclose(f);
            return NULL;
        }
        // Unpack bits to binary pixels
        for (int row = 0; row < CHAR_H; row++) {
            for (int col = 0; col < CHAR_W; col++) {
                // Bit 6 = leftmost, bit 0 = rightmost
                int bit = (row_bytes[row] >> (6 - col)) & 1;
                rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
            }
        }
    }
    fclose(f);
    fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
    return rom;
 }
 // Find best matching glyph for a grayscale patch
 int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
    // Try both normal and inverted matching (unless --no-invert-char is set)
    int best_glyph = 0;
    float best_error = INFINITY;
    uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
    for (int g = 0; g < rom->count; g++) {
        const uint8_t *glyph = &rom->data[g * PATCH_SZ];
        // Try normal: glyph 1 = fg, glyph 0 = bg
        float err_normal = 0;
        for (int i = 0; i < PATCH_SZ; i++) {
            int expected = glyph[i] ? 255 : 0;
            int diff = patch[i] - expected;
            err_normal += diff * diff;
        }
        if (err_normal < best_error) {
            best_error = err_normal;
            best_glyph = g;
            best_bg = COLOR_BLACK;
            best_fg = COLOR_WHITE;
        }
        // Try inverted: glyph 0 = fg, glyph 1 = bg (skip if --no-invert-char)
        if (!g_no_invert_char) {
            float err_inverted = 0;
            for (int i = 0; i < PATCH_SZ; i++) {
                int expected = glyph[i] ? 0 : 255;
                int diff = patch[i] - expected;
                err_inverted += diff * diff;
            }
            if (err_inverted < best_error) {
                best_error = err_inverted;
                best_glyph = g;
                best_bg = COLOR_WHITE;
                best_fg = COLOR_BLACK;
            }
        }
    }
    *out_bg = best_bg;
    *out_fg = best_fg;
    return best_glyph;
 }
 // Convert frame to text mode
 void frame_to_text(const uint8_t *pixels, const FontROM *rom,
                   uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
    uint8_t patch[PATCH_SZ];
    for (int gr = 0; gr < GRID_H; gr++) {
        for (int gc = 0; gc < GRID_W; gc++) {
            int idx = gr * GRID_W + gc;
            // Extract patch
            for (int y = 0; y < CHAR_H; y++) {
                for (int x = 0; x < CHAR_W; x++) {
                    int px = gc * CHAR_W + x;
                    int py = gr * CHAR_H + y;
                    patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
                }
            }
            // Find best match
            chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
        }
    }
 }
 // Get current time in nanoseconds since UNIX epoch
 uint64_t get_current_time_ns(void) {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
 }
 // Parse MP2 packet header to get accurate packet size
 int get_mp2_packet_size(uint8_t *header) {
    int bitrate_index = (header[2] >> 4) & 0x0F;
    int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
    if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
    int bitrate = bitrates[bitrate_index];
    if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
    int sampling_freq_index = (header[2] >> 2) & 0x03;
    int sampling_freqs[] = {44100, 48000, 32000, 0};
    int sampling_freq = sampling_freqs[sampling_freq_index];
    if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
    int padding = (header[2] >> 1) & 0x01;
    return (144 * bitrate * 1000) / sampling_freq + padding;
 }
 // Write Videotex header (32 bytes, similar to TAV but simpler)
 void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
    fwrite("\x1FTSVMTAV", 8, 1, f);
    // Version: 1 (uint8)
    fputc(1, f);
    // Grid dimensions (uint8 each)
    uint16_t width = GRID_W;
    uint16_t height = GRID_H;
    fwrite(&width, sizeof(uint16_t), 1, f);  // cols = 80
    fwrite(&height, sizeof(uint16_t), 1, f);  // rows = 32
    // FPS (uint8)
    fputc(fps, f);
    // Total frames (uint32, little-endian)
    fwrite(&total_frames, sizeof(uint32_t), 1, f);
    fputc(0, f); // wavelet filter type
    fputc(0, f); // decomposition levels
    fputc(0, f); // quantiser Y
    fputc(0, f); // quantiser Co
    fputc(0, f); // quantiser Cg
    // Feature Flags
    fputc(0x03, f);  // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
    // Video Flags
    fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
    fputc(0, f); // encoder quality level
    fputc(0x02, f); // channel layout: Y only
    fputc(0, f); // entropy coder
    fputc(0, f); // reserved
    fputc(0, f); // reserved
    fputc(0, f); // device orientation: no rotation
    fputc(0, f); // file role: generic
 }
 // Write extended header packet with metadata
 // Returns the file offset where ENDT value is written (for later update)
 long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
    fputc(PACKET_EXTENDED_HDR, f);
    // Helper macros for key-value pairs
    #define WRITE_KV_UINT64(key_str, value) do { \
        fwrite(key_str, 1, 4, f); \
        uint8_t value_type = 0x04; /* Uint64 */ \
        fwrite(&value_type, 1, 1, f); \
        uint64_t val = (value); \
        fwrite(&val, sizeof(uint64_t), 1, f); \
    } while(0)
    #define WRITE_KV_BYTES(key_str, data, len) do { \
        fwrite(key_str, 1, 4, f); \
        uint8_t value_type = 0x10; /* Bytes */ \
        fwrite(&value_type, 1, 1, f); \
        uint16_t length = (len); \
        fwrite(&length, sizeof(uint16_t), 1, f); \
        fwrite((data), 1, (len), f); \
    } while(0)
    // Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
    uint16_t num_pairs = ffmpeg_version ? 5 : 4;  // FMPG is optional
    fwrite(&num_pairs, sizeof(uint16_t), 1, f);
    // BGNT: Video begin time (0 for frame 0)
    WRITE_KV_UINT64("BGNT", 0ULL);
    // ENDT: Video end time (placeholder, will be updated at end)
    long endt_offset = ftell(f);
    WRITE_KV_UINT64("ENDT", 0ULL);
    // CDAT: Creation time in nanoseconds since UNIX epoch
    WRITE_KV_UINT64("CDAT", creation_time_ns);
    // VNDR: Encoder name and version
    const char *vendor_str = ENCODER_VENDOR_STRING;
    WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
    // FMPG: FFmpeg version (if available)
    if (ffmpeg_version) {
        WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
    }
    #undef WRITE_KV_UINT64
    #undef WRITE_KV_BYTES
    // Return offset of ENDT value (skip key, type byte)
    return endt_offset + 4 + 1;  // 4 bytes for "ENDT", 1 byte for type
 }
 // Write font ROM packet (SSF packet type 0x30)
 void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
    // Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
    uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
    memcpy(padded_data, rom_data, data_size);
    // Packet structure:
    // [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
    uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
    // Write packet type and size
    fputc(PACKET_SSF, f);
    fwrite(&packet_size, sizeof(uint32_t), 1, f);
    // Write SSF payload
    // Index (3 bytes, always 0 for font ROM)
    fputc(0, f);
    fputc(0, f);
    fputc(0, f);
    // Opcode (0x80=lowrom, 0x81=highrom)
    fputc(opcode, f);
    // Payload length (uint16, little-endian)
    uint16_t payload_len = FONTROM_PADDED_SIZE;
    fwrite(&payload_len, sizeof(uint16_t), 1, f);
    // Font data (padded to 1920 bytes)
    fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
    // Terminator
    fputc(0x00, f);
    free(padded_data);
    fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
            data_size, FONTROM_PADDED_SIZE, opcode);
 }
 // Write timecode packet (nanoseconds)
 void write_timecode(FILE *f, uint64_t timecode_ns) {
    fputc(PACKET_TIMECODE, f);
    fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
 }
 // Write sync packet
 void write_sync(FILE *f) {
    fputc(PACKET_SYNC, f);
 }
 // Write MP2 audio packet
 void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
    fputc(PACKET_AUDIO_MP2, f);
    fwrite(&size, sizeof(uint32_t), 1, f);
    fwrite(data, 1, size, f);
 }
 // Write text packet with separated arrays (better compression)
 void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
                       const uint8_t *chars, int rows, int cols) {
    int grid_size = rows * cols;
    // Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
    // Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
    size_t uncompressed_size = 2 + grid_size * 3;
    uint8_t *uncompressed = malloc(uncompressed_size);
    uncompressed[0] = rows;
    uncompressed[1] = cols;
    // Copy arrays in order: foreground, background, characters
    memcpy(&uncompressed[2], fg_col, grid_size);                    // Foreground first
    memcpy(&uncompressed[2 + grid_size], bg_col, grid_size);        // Background second
    memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size);     // Characters third
    // Compress with Zstd
    size_t max_compressed = ZSTD_compressBound(uncompressed_size);
    uint8_t *compressed = malloc(max_compressed);
    size_t compressed_size = ZSTD_compress(compressed, max_compressed,
                                           uncompressed, uncompressed_size, 3);
    if (ZSTD_isError(compressed_size)) {
        fprintf(stderr, "Zstd compression error\n");
        exit(1);
    }
    // Write packet: [type][size][data]
    fputc(PACKET_TEXT, f);
    uint32_t size32 = compressed_size;
    fwrite(&size32, 4, 1, f);
    fwrite(compressed, compressed_size, 1, f);
    free(compressed);
    free(uncompressed);
 }
 int main(int argc, char **argv) {
    if (argc < 7) {
        fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav> [--no-invert-char]\n", argv[0]);
        return 1;
    }
    const char *input_video = NULL;
    const char *font_path = NULL;
    const char *output_path = NULL;
    for (int i = 1; i < argc; i++) {
        if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
        else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
        else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
        else if (strcmp(argv[i], "--no-invert-char") == 0) g_no_invert_char = 1;
    }
    if (!input_video || !font_path || !output_path) {
        fprintf(stderr, "Missing required arguments\n");
        return 1;
    }
    if (g_no_invert_char) {
        fprintf(stderr, "Inverted character matching disabled\n");
    }
    // Generate random temp filename for audio
    generate_random_filename(TEMP_AUDIO_FILE);
    // Capture creation time and FFmpeg version for extended header
    uint64_t creation_time_ns = get_current_time_ns();
    char *ffmpeg_version = get_ffmpeg_version();
    // Detect video FPS
    float fps_float = detect_fps(input_video);
    uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
    fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
    // Load font ROM
    FontROM *rom = load_font_rom(font_path);
    if (!rom) {
        fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
        return 1;
    }
    // Open FFmpeg pipe for grayscale frames at 560×448
    char ffmpeg_cmd[1024];
    snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
             "ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
             "-f rawvideo -pix_fmt gray - 2>/dev/null",
             input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
    fprintf(stderr, "Opening video stream...\n");
    FILE *video_pipe = popen(ffmpeg_cmd, "r");
    if (!video_pipe) {
        fprintf(stderr, "Failed to open FFmpeg pipe\n");
        return 1;
    }
    // Extract MP2 audio to temporary file using libtwolame
    fprintf(stderr, "Extracting MP2 audio...\n");
    char audio_cmd[1024];
    snprintf(audio_cmd, sizeof(audio_cmd),
             "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 224k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
             input_video, SAMPLE_RATE, TEMP_AUDIO_FILE);
    int audio_result = system(audio_cmd);
    if (audio_result != 0) {
        fprintf(stderr, "Warning: Audio extraction failed, continuing without audio\n");
    }
    // Open MP2 file for reading
    FILE *mp2_file = NULL;
    long audio_remaining = 0;
    if (audio_result == 0) {
        mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
        if (mp2_file) {
            fseek(mp2_file, 0, SEEK_END);
            audio_remaining = ftell(mp2_file);
            fseek(mp2_file, 0, SEEK_SET);
            fprintf(stderr, "Audio ready: %ld bytes\n", audio_remaining);
        }
    }
    // Open output file
    FILE *out = fopen(output_path, "wb");
    if (!out) {
        fprintf(stderr, "Failed to open output file\n");
        pclose(video_pipe);
        if (mp2_file) fclose(mp2_file);
        return 1;
    }
    // Write Videotex header with placeholder total_frames (will update at end)
    long header_offset = ftell(out);
    write_videotex_header(out, fps, 0);
    // Write extended header packet (before first timecode)
    long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
    // Upload font ROM to TSVM (split into lowrom and highrom)
    fprintf(stderr, "Uploading font ROM to TSVM...\n");
    FILE *rom_file = fopen(font_path, "rb");
    if (rom_file) {
        fseek(rom_file, 0, SEEK_END);
        long rom_size = ftell(rom_file);
        fseek(rom_file, 0, SEEK_SET);
        uint8_t *raw_rom = malloc(rom_size);
        if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
            // Split into lowrom and highrom
            size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
            // Write lowrom (first 128 glyphs)
            if (rom_size >= bytes_per_half) {
                write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
            }
            // Write highrom (second 128 glyphs)
            if (rom_size >= bytes_per_half * 2) {
                write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
            } else if (rom_size > bytes_per_half) {
                // Partial highrom
                write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
            }
            free(raw_rom);
        }
        fclose(rom_file);
    }
    // Allocate buffers
    size_t frame_size = PIXEL_W * PIXEL_H;
    uint8_t *gray_pixels = malloc(frame_size);
    uint8_t *bg_col = malloc(GRID_W * GRID_H);
    uint8_t *fg_col = malloc(GRID_W * GRID_H);
    uint8_t *chars = malloc(GRID_W * GRID_H);
    // Audio buffer for MP2 packets
    #define MP2_BUFFER_SIZE 2048
    uint8_t *audio_buffer = malloc(MP2_BUFFER_SIZE);
    uint32_t frame_num = 0;
    uint64_t total_audio_bytes = 0;
    // Audio timing calculation
    double frame_audio_time = 1.0 / fps_float;  // Time per video frame
    double packet_audio_time = (double)MP2_DEFAULT_PACKET_SIZE / SAMPLE_RATE;  // Time per audio packet
    double packets_per_frame = frame_audio_time / packet_audio_time;
    double audio_frames_in_buffer = 0.0;  // Simulated audio buffer level
    fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
            GRID_W, GRID_H, PIXEL_W, PIXEL_H);
    // Track encoding start time
    struct timeval start_time, now;
    gettimeofday(&start_time, NULL);
    // Read and process frames
    while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
        // Calculate timecode in nanoseconds
        uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
        // Write audio packets for this frame (based on timing)
        if (mp2_file && audio_remaining > 0) {
            // Simulate buffer consumption
            audio_frames_in_buffer -= packets_per_frame;
            // Calculate how many packets we need to maintain buffer
            double target_level = fmax(packets_per_frame, 2.0);
            int packets_to_insert = 0;
            if (audio_frames_in_buffer < target_level) {
                double deficit = target_level - audio_frames_in_buffer;
                packets_to_insert = (int)ceil(deficit);
            }
            // Insert the calculated number of audio packets
            for (int q = 0; q < packets_to_insert; q++) {
                // Peek at header to get actual packet size
                long pos = ftell(mp2_file);
                uint8_t header[4];
                if (fread(header, 1, 4, mp2_file) != 4) break;
                fseek(mp2_file, pos, SEEK_SET);  // Rewind to re-read with full packet
                int actual_packet_size = get_mp2_packet_size(header);
                size_t bytes_to_read = actual_packet_size;
                // Clamp to remaining audio
                if (bytes_to_read > audio_remaining) {
                    bytes_to_read = audio_remaining;
                }
                // Sanity check
                if (bytes_to_read > MP2_BUFFER_SIZE) {
                    fprintf(stderr, "ERROR: MP2 packet size %zu exceeds buffer\n", bytes_to_read);
                    break;
                }
                // Read full packet
                size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
                if (bytes_read == 0) break;
                // Write MP2 audio packet
                write_audio_mp2(out, audio_buffer, bytes_read);
                // Track audio
                audio_remaining -= bytes_read;
                audio_frames_in_buffer++;
                total_audio_bytes += bytes_read;
            }
        }
        // Write timecode
        write_timecode(out, timecode_ns);
        // Convert to text mode
        frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
        // Write text packet (treated as I-frame)
        write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
        // Write sync packet after each frame
        write_sync(out);
        frame_num++;
        if (frame_num % 30 == 0) {
            // Calculate encoding speed
            gettimeofday(&now, NULL);
            double elapsed = (now.tv_sec - start_time.tv_sec) +
                           (now.tv_usec - start_time.tv_usec) / 1000000.0;
            double encoding_fps = frame_num / elapsed;
            fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
            fflush(stderr);
        }
    }
    // Write any remaining audio
    if (mp2_file && audio_remaining > 0) {
        while (audio_remaining > 0) {
            // Peek at header to get actual packet size
            long pos = ftell(mp2_file);
            uint8_t header[4];
            if (fread(header, 1, 4, mp2_file) != 4) break;
            fseek(mp2_file, pos, SEEK_SET);
            int actual_packet_size = get_mp2_packet_size(header);
            size_t bytes_to_read = (actual_packet_size < audio_remaining) ? actual_packet_size : audio_remaining;
            if (bytes_to_read > MP2_BUFFER_SIZE) break;
            size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
            if (bytes_read == 0) break;
            write_audio_mp2(out, audio_buffer, bytes_read);
            audio_remaining -= bytes_read;
            total_audio_bytes += bytes_read;
        }
    }
    // Final timing
    gettimeofday(&now, NULL);
    double total_time = (now.tv_sec - start_time.tv_sec) +
                       (now.tv_usec - start_time.tv_usec) / 1000000.0;
    double final_fps = frame_num / total_time;
    fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
            frame_num, total_time, final_fps);
    fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
            (unsigned long long)total_audio_bytes,
            total_audio_bytes / 1024.0 / 1024.0);
    // Update total_frames in header
    if (frame_num > 0) {
        fseek(out, header_offset + 14, SEEK_SET);  // Offset to total_frames field
        fwrite(&frame_num, sizeof(uint32_t), 1, out);
        fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
    }
    // Update ENDT in extended header (calculate end time for last frame)
    if (frame_num > 0) {
        // Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
        uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
        uint64_t endt_ns = duration_ns;
        fseek(out, endt_offset, SEEK_SET);
        fwrite(&endt_ns, sizeof(uint64_t), 1, out);
        fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
                (unsigned long long)endt_ns, endt_ns / 1000000000.0);
    }
    // Cleanup
    pclose(video_pipe);
    if (mp2_file) {
        fclose(mp2_file);
        unlink(TEMP_AUDIO_FILE);  // Remove temporary audio file
    }
    fclose(out);
    free(gray_pixels);
    free(bg_col);
    free(fg_col);
    free(chars);
    free(audio_buffer);
    free(rom->data);
    free(rom);
    if (ffmpeg_version) free(ffmpeg_version);
    return 0;
 }
--- a/video_encoder/encoder_tev.c
+++ b/video_encoder/encoder_tev.c
--- a/video_encoder/estimate_affine_from_blocks.cpp
+++ b/video_encoder/estimate_affine_from_blocks.cpp
@@ -1,169 +0,0 @@
 // Affine estimation for TAV mesh warping
 // This file contains logic to estimate per-cell affine transforms from block motion
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
 extern "C" {
 // Estimate affine transform for a mesh cell from surrounding block motion vectors
 // Uses least-squares fitting of motion vectors to affine model: [x'] = [a11 a12][x] + [tx]
 //                                                                  [y']   [a21 a22][y]   [ty]
 //
 // Returns 1 if affine improves residual by >threshold, 0 if translation-only is better
 int estimate_cell_affine(
    const float *flow_x, const float *flow_y,
    int width, int height,
    int cell_x, int cell_y,      // Cell position in mesh coordinates
    int cell_w, int cell_h,       // Cell size in pixels
    float threshold,              // Residual improvement threshold (e.g. 0.10 = 10%)
    short *out_tx, short *out_ty, // Translation (1/8 pixel)
    short *out_a11, short *out_a12, // Affine matrix (1/256 fixed-point)
    short *out_a21, short *out_a22
 ) {
    // Compute cell bounding box
    int x_start = cell_x * cell_w;
    int y_start = cell_y * cell_h;
    int x_end = (cell_x + 1) * cell_w;
    int y_end = (cell_y + 1) * cell_h;
    if (x_end > width) x_end = width;
    if (y_end > height) y_end = height;
    // Sample motion vectors from a 4×4 grid within the cell
    const int samples_x = 4;
    const int samples_y = 4;
    float sample_motion_x[16];
    float sample_motion_y[16];
    int sample_px[16];
    int sample_py[16];
    int n_samples = 0;
    for (int sy = 0; sy < samples_y; sy++) {
        for (int sx = 0; sx < samples_x; sx++) {
            int px = x_start + (x_end - x_start) * sx / (samples_x - 1);
            int py = y_start + (y_end - y_start) * sy / (samples_y - 1);
            if (px >= width) px = width - 1;
            if (py >= height) py = height - 1;
            int idx = py * width + px;
            sample_motion_x[n_samples] = flow_x[idx];
            sample_motion_y[n_samples] = flow_y[idx];
            sample_px[n_samples] = px - (x_start + x_end) / 2;  // Relative to cell center
            sample_py[n_samples] = py - (y_start + y_end) / 2;
            n_samples++;
        }
    }
    // 1. Compute translation-only model (average motion)
    float avg_dx = 0, avg_dy = 0;
    for (int i = 0; i < n_samples; i++) {
        avg_dx += sample_motion_x[i];
        avg_dy += sample_motion_y[i];
    }
    avg_dx /= n_samples;
    avg_dy /= n_samples;
    // Translation residual
    float trans_residual = 0;
    for (int i = 0; i < n_samples; i++) {
        float dx_err = sample_motion_x[i] - avg_dx;
        float dy_err = sample_motion_y[i] - avg_dy;
        trans_residual += dx_err * dx_err + dy_err * dy_err;
    }
    // 2. Estimate affine model using least-squares
    // Solve: [vx] = [a11 a12][px] + [tx]
    //        [vy]   [a21 a22][py]   [ty]
    // Using normal equations for 2×2 affine
    double sum_x = 0, sum_y = 0, sum_xx = 0, sum_yy = 0, sum_xy = 0;
    double sum_vx = 0, sum_vy = 0, sum_vx_x = 0, sum_vx_y = 0;
    double sum_vy_x = 0, sum_vy_y = 0;
    for (int i = 0; i < n_samples; i++) {
        double px = sample_px[i];
        double py = sample_py[i];
        double vx = sample_motion_x[i];
        double vy = sample_motion_y[i];
        sum_x += px;
        sum_y += py;
        sum_xx += px * px;
        sum_yy += py * py;
        sum_xy += px * py;
        sum_vx += vx;
        sum_vy += vy;
        sum_vx_x += vx * px;
        sum_vx_y += vx * py;
        sum_vy_x += vy * px;
        sum_vy_y += vy * py;
    }
    // Solve 2×2 system for [a11, a12, tx] and [a21, a22, ty]
    double n = n_samples;
    double det = n * sum_xx * sum_yy + 2 * sum_x * sum_y * sum_xy -
                 sum_xx * sum_y * sum_y - sum_yy * sum_x * sum_x - n * sum_xy * sum_xy;
    if (fabs(det) < 1e-6) {
        // Singular matrix, fall back to translation
        *out_tx = (short)(avg_dx * 8.0f);
        *out_ty = (short)(avg_dy * 8.0f);
        *out_a11 = 256;  // Identity
        *out_a12 = 0;
        *out_a21 = 0;
        *out_a22 = 256;
        return 0;  // Translation only
    }
    // Solve for affine parameters (simplified for readability)
    double a11 = (sum_vx_x * sum_yy * n - sum_vx_y * sum_xy * n - sum_vx * sum_y * sum_y +
                  sum_vx * sum_xy * sum_y + sum_vx_y * sum_x * sum_y - sum_vx_x * sum_y * sum_y) / det;
    double a12 = (sum_vx_y * sum_xx * n - sum_vx_x * sum_xy * n - sum_vx * sum_x * sum_xy +
                  sum_vx * sum_xx * sum_y + sum_vx_x * sum_x * sum_y - sum_vx_y * sum_x * sum_x) / det;
    double tx = (sum_vx - a11 * sum_x - a12 * sum_y) / n;
    double a21 = (sum_vy_x * sum_yy * n - sum_vy_y * sum_xy * n - sum_vy * sum_y * sum_y +
                  sum_vy * sum_xy * sum_y + sum_vy_y * sum_x * sum_y - sum_vy_x * sum_y * sum_y) / det;
    double a22 = (sum_vy_y * sum_xx * n - sum_vy_x * sum_xy * n - sum_vy * sum_x * sum_xy +
                  sum_vy * sum_xx * sum_y + sum_vy_x * sum_x * sum_y - sum_vy_y * sum_x * sum_x) / det;
    double ty = (sum_vy - a21 * sum_x - a22 * sum_y) / n;
    // Affine residual
    float affine_residual = 0;
    for (int i = 0; i < n_samples; i++) {
        double px = sample_px[i];
        double py = sample_py[i];
        double pred_vx = a11 * px + a12 * py + tx;
        double pred_vy = a21 * px + a22 * py + ty;
        double dx_err = sample_motion_x[i] - pred_vx;
        double dy_err = sample_motion_y[i] - pred_vy;
        affine_residual += dx_err * dx_err + dy_err * dy_err;
    }
    // Decision: Use affine if residual improves by > threshold
    float improvement = (trans_residual - affine_residual) / (trans_residual + 1e-6f);
    if (improvement > threshold) {
        // Use affine
        *out_tx = (short)(tx * 8.0f);
        *out_ty = (short)(ty * 8.0f);
        *out_a11 = (short)(a11 * 256.0);
        *out_a12 = (short)(a12 * 256.0);
        *out_a21 = (short)(a21 * 256.0);
        *out_a22 = (short)(a22 * 256.0);
        return 1;  // Affine
    } else {
        // Use translation
        *out_tx = (short)(avg_dx * 8.0f);
        *out_ty = (short)(avg_dy * 8.0f);
        *out_a11 = 256;  // Identity
        *out_a12 = 0;
        *out_a21 = 0;
        *out_a22 = 256;
        return 0;  // Translation only
    }
 }
 } // extern "C"
--- a/video_encoder/exponential_numeric_system.ods
+++ b/video_encoder/exponential_numeric_system.ods
--- a/video_encoder/include/coefficient_compress.h
+++ b/video_encoder/include/coefficient_compress.h
@@ -1,65 +0,0 @@
 // Simple coefficient preprocessing for better compression
 // Insert right before Zstd compression
 #ifndef COEFFICIENT_COMPRESS_H
 #define COEFFICIENT_COMPRESS_H
 #include <stdint.h>
 #include <string.h>
 // Preprocess coefficients using significance map
 // Returns new buffer size, modifies buffer in-place if possible
 static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) {
    // Count non-zero coefficients
    int nonzero_count = 0;
    for (int i = 0; i < coeff_count; i++) {
        if (coeffs[i] != 0) nonzero_count++;
    }
    // Create significance map (1 bit per coefficient, packed into bytes)
    int map_bytes = (coeff_count + 7) / 8;  // Round up to nearest byte
    uint8_t *sig_map = output_buffer;
    int16_t *values = (int16_t *)(output_buffer + map_bytes);
    // Clear significance map
    memset(sig_map, 0, map_bytes);
    // Fill significance map and extract non-zero values
    int value_idx = 0;
    for (int i = 0; i < coeff_count; i++) {
        if (coeffs[i] != 0) {
            // Set bit in significance map
            int byte_idx = i / 8;
            int bit_idx = i % 8;
            sig_map[byte_idx] |= (1 << bit_idx);
            // Store the value
            values[value_idx++] = coeffs[i];
        }
    }
    return map_bytes + (nonzero_count * sizeof(int16_t));
 }
 // Decoder: reconstruct coefficients from significance map
 static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
    int map_bytes = (coeff_count + 7) / 8;
    uint8_t *sig_map = compressed_data;
    int16_t *values = (int16_t *)(compressed_data + map_bytes);
    // Clear output
    memset(output_coeffs, 0, coeff_count * sizeof(int16_t));
    // Reconstruct coefficients
    int value_idx = 0;
    for (int i = 0; i < coeff_count; i++) {
        int byte_idx = i / 8;
        int bit_idx = i % 8;
        if (sig_map[byte_idx] & (1 << bit_idx)) {
            output_coeffs[i] = values[value_idx++];
        }
    }
 }
 #endif // COEFFICIENT_COMPRESS_H
--- a/video_encoder/include/decoder_tad.h
+++ b/video_encoder/include/decoder_tad.h
@@ -1,39 +0,0 @@
 #ifndef TAD32_DECODER_H
 #define TAD32_DECODER_H
 #include <stdint.h>
 #include <stddef.h>
 // TAD32 (Terrarum Advanced Audio - PCM32f version) Decoder
 // DWT-based perceptual audio codec for TSVM
 // Shared decoder library used by both decoder_tad (standalone) and decoder_tav (video decoder)
 // Constants (must match encoder)
 #define TAD32_SAMPLE_RATE 32000
 #define TAD32_CHANNELS 2  // Stereo
 #define TAD_DEFAULT_CHUNK_SIZE 32768  // Default chunk size for standalone TAD files
 /**
 * Decode audio chunk with TAD32 codec
 *
 * @param input           Input TAD32 chunk data
 * @param input_size      Size of input buffer
 * @param pcmu8_stereo    Output PCMu8 stereo samples (interleaved L,R)
 * @param bytes_consumed  [out] Number of bytes consumed from input
 * @param samples_decoded [out] Number of samples decoded per channel
 * @return                0 on success, -1 on error
 *
 * Input format:
 *   uint16 sample_count (samples per channel)
 *   uint8  max_index (maximum quantisation index)
 *   uint32 payload_size (bytes in payload)
 *   *      payload (encoded M/S data, Zstd-compressed with EZBC)
 *
 * Output format:
 *   PCMu8 stereo interleaved (8-bit unsigned PCM, L,R pairs)
 *   Range: [0, 255] where 128 = silence
 */
 int tad32_decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
                       size_t *bytes_consumed, size_t *samples_decoded);
 #endif // TAD32_DECODER_H
--- a/video_encoder/include/encoder_tad.h
+++ b/video_encoder/include/encoder_tad.h
@@ -1,63 +0,0 @@
 #ifndef TAD32_ENCODER_H
 #define TAD32_ENCODER_H
 #include <stdint.h>
 #include <stddef.h>
 // TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder
 // DWT-based perceptual audio codec for TSVM
 // Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
 // Constants
 #define TAD32_COEFF_SCALARS {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f} // value only valid for CDF 9/7 with decomposition level 9. Index 0 = LL band
 #define TAD32_MIN_CHUNK_SIZE 1024       // Minimum: 1024 samples
 #define TAD32_SAMPLE_RATE 32000
 #define TAD32_CHANNELS 2  // Stereo
 #define TAD32_QUALITY_MIN 0
 #define TAD32_QUALITY_MAX 6
 #define TAD32_QUALITY_DEFAULT 3
 #define TAD32_ZSTD_LEVEL 15
 static inline int tad32_quality_to_max_index(int quality) {
    static const int quality_map[6] = {21, 31, 44, 63, 89, 127};
    if (quality < 0) quality = 0;
    if (quality > 5) quality = 5;
    return quality_map[quality];
 }
 /**
 * Encode audio chunk with TAD32 codec (PCM32f version)
 *
 * @param pcm32_stereo    Input PCM32fLE stereo samples (interleaved L,R)
 * @param num_samples     Number of samples per channel (min 1024)
 * @param max_index       Maximum quantisation index (7=3bit, 15=4bit, 31=5bit, 63=6bit, 127=7bit)
 * @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantisation)
 *                        Higher values = more aggressive quantisation = smaller files
 * @param zstd_level      Zstd compression level (1-22). Use negative value to disable compression.
 *                        When disabled, MSB of payload_size is set to indicate uncompressed data.
 * @param output          Output buffer (must be large enough)
 * @return                Number of bytes written to output, or 0 on error
 *
 * Output format:
 *   uint16 sample_count (samples per channel)
 *   uint8  max_index (maximum quantisation index)
 *   uint32 payload_size (bytes in payload; MSB=1 indicates uncompressed)
 *   *      payload (encoded M/S data, optionally Zstd-compressed)
 */
 size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
                          int max_index,
                          float quantiser_scale, int zstd_level, uint8_t *output);
 /**
 * Print accumulated coefficient statistics
 * Only effective if TAD_COEFF_STATS environment variable is set
 */
 void tad32_print_statistics(void);
 /**
 * Free accumulated statistics memory
 * Should be called after tad32_print_statistics()
 */
 void tad32_free_statistics(void);
 #endif // TAD32_ENCODER_H
--- a/video_encoder/include/entropy_coder.h
+++ b/video_encoder/include/entropy_coder.h
@@ -1,74 +0,0 @@
 // TEV Entropy Coder - Specialised for DCT coefficients
 // Replaces gzip with video-optimized compression
 #ifndef ENTROPY_CODER_H
 #define ENTROPY_CODER_H
 #include <stdint.h>
 #include <stdio.h>
 // Bit writer for variable-length codes
 typedef struct {
    uint8_t *buffer;
    size_t buffer_size;
    size_t byte_pos;
    int bit_pos;  // 0-7, next bit to write
 } bit_writer_t;
 // Bit reader for decoding
 typedef struct {
    const uint8_t *buffer;
    size_t buffer_size;
    size_t byte_pos;
    int bit_pos;  // 0-7, next bit to read
 } bit_reader_t;
 // Huffman table entry
 typedef struct {
    uint16_t code;    // Huffman code
    uint8_t bits;     // Code length in bits
 } huffman_entry_t;
 // Video entropy coder optimized for TEV coefficients
 typedef struct {
    // Huffman tables for different coefficient types
    huffman_entry_t y_dc_table[512];      // Y DC coefficients (-255 to +255)
    huffman_entry_t y_ac_table[512];      // Y AC coefficients
    huffman_entry_t c_dc_table[512];      // Chroma DC coefficients  
    huffman_entry_t c_ac_table[512];      // Chroma AC coefficients
    huffman_entry_t run_table[256];       // Zero run lengths (0-255)
    // Motion vector Huffman tables
    huffman_entry_t mv_table[65];         // Motion vectors (-32 to +32)
    // Bit writer/reader
    bit_writer_t writer;
    bit_reader_t reader;
 } entropy_coder_t;
 static const huffman_entry_t BLOCK_MODE_HUFFMAN[16];
 void write_bits(bit_writer_t *writer, uint32_t value, int bits);
 uint32_t read_bits(bit_reader_t *reader, int bits);
 // Initialise entropy coder
 entropy_coder_t* entropy_coder_create(uint8_t *buffer, size_t buffer_size);
 void entropy_coder_destroy(entropy_coder_t *coder);
 // Encoding functions
 int encode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
 int encode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
 int encode_motion_vector(entropy_coder_t *coder, int16_t mv_x, int16_t mv_y);
 int encode_block_mode(entropy_coder_t *coder, uint8_t mode);
 // Decoding functions  
 void entropy_coder_init_reader(entropy_coder_t *coder, const uint8_t *buffer, size_t buffer_size);
 int decode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
 int decode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
 int decode_motion_vector(entropy_coder_t *coder, int16_t *mv_x, int16_t *mv_y);
 int decode_block_mode(entropy_coder_t *coder, uint8_t *mode);
 // Get compressed size
 size_t entropy_coder_get_size(entropy_coder_t *coder);
 void entropy_coder_reset(entropy_coder_t *coder);
 #endif // ENTROPY_CODER_H
--- a/video_encoder/include/tav_avx512.h
+++ b/video_encoder/include/tav_avx512.h
@@ -1,837 +0,0 @@
 /*
 * TAV AVX-512 Optimisations
 *
 * This file contains AVX-512 optimised versions of performance-critical functions
 * in the TAV encoder. Runtime CPU detection ensures fallback to scalar versions
 * on non-AVX-512 systems.
 *
 * Optimised functions:
 * - 1D DWT transforms (5/3, 9/7, Haar, Bior13/7, DD4)
 * - Quantisation functions
 * - RGB to YCoCg colour conversion
 * - 2D DWT gather/scatter operations
 *
 * Compile with: -mavx512f -mavx512dq -mavx512bw -mavx512vl
 */
 #ifndef TAV_AVX512_H
 #define TAV_AVX512_H
 #include <immintrin.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
 #include <stdio.h>
 // =============================================================================
 // SIMD Capability Detection
 // =============================================================================
 typedef enum {
    SIMD_NONE = 0,
    SIMD_AVX512F = 1
 } simd_level_t;
 // Global SIMD level (set by tav_simd_init)
 static simd_level_t g_simd_level = SIMD_NONE;
 // CPU feature detection
 static inline int cpu_has_avx512f(void) {
 #ifdef __AVX512F__
    return __builtin_cpu_supports("avx512f") &&
           __builtin_cpu_supports("avx512dq");
 #else
    return 0;
 #endif
 }
 // Initialize SIMD detection (call once at startup)
 static inline void tav_simd_init(void) {
 #ifdef __AVX512F__
    if (cpu_has_avx512f()) {
        g_simd_level = SIMD_AVX512F;
        fprintf(stderr, "[TAV] AVX-512 optimisations enabled\n");
    } else {
        g_simd_level = SIMD_NONE;
        fprintf(stderr, "[TAV] AVX-512 not available, using scalar fallback\n");
    }
 #else
    g_simd_level = SIMD_NONE;
    fprintf(stderr, "[TAV] Compiled without AVX-512 support\n");
 #endif
 }
 #ifdef __AVX512F__
 // =============================================================================
 // Helper Functions
 // =============================================================================
 // Horizontal sum of 16 floats
 static inline float _mm512_reduce_add_ps_compat(__m512 v) {
    __m256 low = _mm512_castps512_ps256(v);
    __m256 high = _mm512_extractf32x8_ps(v, 1);
    __m256 sum256 = _mm256_add_ps(low, high);
    __m128 sum128 = _mm_add_ps(_mm256_castps256_ps128(sum256), _mm256_extractf128_ps(sum256, 1));
    sum128 = _mm_hadd_ps(sum128, sum128);
    sum128 = _mm_hadd_ps(sum128, sum128);
    return _mm_cvtss_f32(sum128);
 }
 // Clamp helper for vectorised operations
 static inline __m512 _mm512_clamp_ps(__m512 v, __m512 min_val, __m512 max_val) {
    return _mm512_min_ps(_mm512_max_ps(v, min_val), max_val);
 }
 // =============================================================================
 // AVX-512 Optimised 1D DWT Forward Transforms
 // =============================================================================
 // 5/3 Reversible Forward DWT with AVX-512
 static inline void dwt_53_forward_1d_avx512(float *data, int length) {
    if (length < 2) return;
    float *temp = (float*)calloc(length, sizeof(float));
    int half = (length + 1) / 2;
    // Predict step (high-pass) - vectorised
    // temp[half + i] = data[2*i+1] - 0.5 * (data[2*i] + data[2*i+2])
    int i;
    for (i = 0; i + 16 <= half; i += 16) {
        __mmask16 valid_mask = 0xFFFF;
        // Check boundary for last iteration
        for (int j = 0; j < 16; j++) {
            int idx = 2 * (i + j) + 1;
            if (idx >= length) {
                valid_mask &= ~(1 << j);
            }
        }
        if (valid_mask == 0) break;
        // Load data[2*i] - stride 2 load
        float even_curr_vals[16], even_next_vals[16], odd_vals[16];
        for (int j = 0; j < 16; j++) {
            if (valid_mask & (1 << j)) {
                even_curr_vals[j] = data[2 * (i + j)];
                even_next_vals[j] = (2 * (i + j) + 2 < length) ? data[2 * (i + j) + 2] : data[2 * (i + j)];
                odd_vals[j] = data[2 * (i + j) + 1];
            } else {
                even_curr_vals[j] = 0.0f;
                even_next_vals[j] = 0.0f;
                odd_vals[j] = 0.0f;
            }
        }
        __m512 even_curr = _mm512_loadu_ps(even_curr_vals);
        __m512 even_next = _mm512_loadu_ps(even_next_vals);
        __m512 odd = _mm512_loadu_ps(odd_vals);
        __m512 pred = _mm512_mul_ps(_mm512_add_ps(even_curr, even_next), _mm512_set1_ps(0.5f));
        __m512 high = _mm512_sub_ps(odd, pred);
        _mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
    }
    // Handle remaining elements
    for (; i < half; i++) {
        int idx = 2 * i + 1;
        if (idx < length) {
            float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
            temp[half + i] = data[idx] - pred;
        }
    }
    // Update step (low-pass) - vectorised
    // temp[i] = data[2*i] + 0.25 * (temp[half+i-1] + temp[half+i])
    for (i = 0; i + 16 <= half; i += 16) {
        __m512 even = _mm512_loadu_ps(&data[2 * i]);  // Load with stride 2 (simplified)
        // Manual gather for strided load
        float even_vals[16];
        for (int j = 0; j < 16 && (i + j) < half; j++) {
            even_vals[j] = data[2 * (i + j)];
        }
        even = _mm512_loadu_ps(even_vals);
        // Load high-pass neighbours
        float high_prev[16], high_curr[16];
        for (int j = 0; j < 16 && (i + j) < half; j++) {
            high_prev[j] = ((i + j) > 0) ? temp[half + (i + j) - 1] : 0.0f;
            high_curr[j] = ((i + j) < half - 1) ? temp[half + (i + j)] : 0.0f;
        }
        __m512 hp = _mm512_loadu_ps(high_prev);
        __m512 hc = _mm512_loadu_ps(high_curr);
        __m512 update = _mm512_mul_ps(_mm512_add_ps(hp, hc), _mm512_set1_ps(0.25f));
        __m512 low = _mm512_add_ps(even, update);
        __mmask16 store_mask = (i + 16 <= half) ? 0xFFFF : (1 << (half - i)) - 1;
        _mm512_mask_storeu_ps(&temp[i], store_mask, low);
    }
    // Handle remaining elements
    for (; i < half; i++) {
        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
                               (i < half - 1 ? temp[half + i] : 0));
        temp[i] = data[2 * i] + update;
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 // 9/7 Irreversible Forward DWT with AVX-512
 static inline void dwt_97_forward_1d_avx512(float *data, int length) {
    if (length < 2) return;
    int half = (length + 1) / 2;
    // Allocate aligned temp buffer once (64-byte align for cache lines)
    float *temp = NULL;
 #if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE)
    if (posix_memalign((void**)&temp, 64, (size_t)length * sizeof(float)) != 0) {
        temp = (float*)malloc((size_t)length * sizeof(float));
    }
 #else
    temp = (float*)aligned_alloc(64, ((size_t)length * sizeof(float) + 63) & ~63);
    if (!temp) temp = (float*)malloc((size_t)length * sizeof(float));
 #endif
    if (!temp) return; // allocation failure: bail out (preserve original behavior could be different)
    // FAST SPLIT: interleave into temp: first half = evens, second half = odds
    // This is simple, streaming-friendly, and much faster than per-iteration small-array gathers.
    {
        float *even = temp;
        float *odd  = temp + half;
        int i = 0;
        // process pairs to minimize branches and memory ops
        for (; i + 1 < length; i += 2) {
            even[0] = data[i];
            odd[0]  = data[i + 1];
            ++even; ++odd;
        }
        if (i < length) { // odd leftover
            even[0] = data[i];
        }
    }
    // Lifting coefficients as vectors
    const __m512 alpha_vec = _mm512_set1_ps(-1.586134342f);
    const __m512 beta_vec  = _mm512_set1_ps(-0.052980118f);
    const __m512 gamma_vec = _mm512_set1_ps(0.882911076f);
    const __m512 delta_vec = _mm512_set1_ps(0.443506852f);
    const __m512 K_vec     = _mm512_set1_ps(1.230174105f);
    const __m512 invK_vec  = _mm512_set1_ps(1.0f / 1.230174105f);
    // Helper variables
    int i;
    // -----------------------
    // Step 1: Predict α
    // d[i] += alpha * (s[i] + s[i+1])
    // -----------------------
    if (half > 0) {
        // handle small or trivial cases
        if (half == 1) {
            if (half < length) {
                temp[half + 0] += -1.586134342f * (temp[0] + temp[0]);
            }
        } else {
            // main vectorised body: ensure s_next loads (i+1) valid -> i <= half-2
            int limit = (half - 1);
            int n_full = (limit / 16) * 16; // process up to n_full (multiple of 16)
            i = 0;
            for (; i + 32 <= n_full; i += 32) {
                // unroll 2x (i and i+16)
                __m512 s0 = _mm512_loadu_ps(&temp[i]);
                __m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
                __m512 d0 = _mm512_loadu_ps(&temp[half + i]);
                __m512 sum0 = _mm512_add_ps(s0, s0n);
                d0 = _mm512_fmadd_ps(alpha_vec, sum0, d0);
                _mm512_storeu_ps(&temp[half + i], d0);
                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
                __m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
                __m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
                __m512 sum1 = _mm512_add_ps(s1, s1n);
                d1 = _mm512_fmadd_ps(alpha_vec, sum1, d1);
                _mm512_storeu_ps(&temp[half + i + 16], d1);
            }
            for (; i + 16 <= n_full; i += 16) {
                __m512 s = _mm512_loadu_ps(&temp[i]);
                __m512 sn = _mm512_loadu_ps(&temp[i + 1]);
                __m512 d = _mm512_loadu_ps(&temp[half + i]);
                __m512 sum = _mm512_add_ps(s, sn);
                d = _mm512_fmadd_ps(alpha_vec, sum, d);
                _mm512_storeu_ps(&temp[half + i], d);
            }
            // scalar remainder up to limit (half-2 -> last vector handled below)
            for (; i < limit; ++i) {
                temp[half + i] += -1.586134342f * (temp[i] + temp[i + 1]);
            }
            // handle last index i = half-1 (mirror)
            int last = half - 1;
            if (half + last < length) {
                float s_curr = temp[last];
                float s_next = s_curr;
                temp[half + last] += -1.586134342f * (s_curr + s_next);
            }
        }
    }
    // -----------------------
    // Step 2: Update β
    // s[i] += beta * (d[i-1] + d[i])
    // -----------------------
    if (half > 0) {
        // handle i == 0 separately (d_prev = d_curr for boundary semantics)
        if (half >= 1) {
            // i == 0
            if (half + 0 < length) {
                float d_curr0 = temp[half + 0];
                temp[0] += -0.052980118f * (d_curr0 + d_curr0);
            }
        }
        if (half > 1) {
            // main vector loop starting from i = 1 to half-1 (we will write s[i] for i>=1)
            int start = 1;
            int limit = half; // exclusive
            int n_elems = limit - start;
            int n_full = (n_elems / 16) * 16;
            i = start;
            for (; i + 32 <= start + n_full; i += 32) {
                // unroll 2x
                __m512 s0 = _mm512_loadu_ps(&temp[i]);
                __m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
                __m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
                __m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
                s0 = _mm512_fmadd_ps(beta_vec, sum0, s0);
                _mm512_storeu_ps(&temp[i], s0);
                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
                __m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
                __m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
                __m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
                s1 = _mm512_fmadd_ps(beta_vec, sum1, s1);
                _mm512_storeu_ps(&temp[i + 16], s1);
            }
            for (; i + 16 <= start + n_full; i += 16) {
                __m512 s = _mm512_loadu_ps(&temp[i]);
                __m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
                __m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
                __m512 sum = _mm512_add_ps(dprev, dcurr);
                s = _mm512_fmadd_ps(beta_vec, sum, s);
                _mm512_storeu_ps(&temp[i], s);
            }
            // scalar remainder
            for (; i < limit; ++i) {
                float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
                float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
                temp[i] += -0.052980118f * (d_prev + d_curr);
            }
        }
    }
    // -----------------------
    // Step 3: Predict γ
    // d[i] += gamma * (s[i] + s[i+1])
    // -----------------------
    if (half > 0) {
        if (half == 1) {
            if (half < length) {
                temp[half + 0] += 0.882911076f * (temp[0] + temp[0]);
            }
        } else {
            int limit = (half - 1);
            int n_full = (limit / 16) * 16;
            i = 0;
            for (; i + 32 <= n_full; i += 32) {
                __m512 s0 = _mm512_loadu_ps(&temp[i]);
                __m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
                __m512 d0 = _mm512_loadu_ps(&temp[half + i]);
                __m512 sum0 = _mm512_add_ps(s0, s0n);
                d0 = _mm512_fmadd_ps(gamma_vec, sum0, d0);
                _mm512_storeu_ps(&temp[half + i], d0);
                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
                __m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
                __m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
                __m512 sum1 = _mm512_add_ps(s1, s1n);
                d1 = _mm512_fmadd_ps(gamma_vec, sum1, d1);
                _mm512_storeu_ps(&temp[half + i + 16], d1);
            }
            for (; i + 16 <= n_full; i += 16) {
                __m512 s = _mm512_loadu_ps(&temp[i]);
                __m512 sn = _mm512_loadu_ps(&temp[i + 1]);
                __m512 d = _mm512_loadu_ps(&temp[half + i]);
                __m512 sum = _mm512_add_ps(s, sn);
                d = _mm512_fmadd_ps(gamma_vec, sum, d);
                _mm512_storeu_ps(&temp[half + i], d);
            }
            for (; i < limit; ++i) {
                temp[half + i] += 0.882911076f * (temp[i] + temp[i + 1]);
            }
            // last index mirror
            int last = half - 1;
            if (half + last < length) {
                float s_curr = temp[last];
                float s_next = s_curr;
                temp[half + last] += 0.882911076f * (s_curr + s_next);
            }
        }
    }
    // -----------------------
    // Step 4: Update δ
    // s[i] += delta * (d[i-1] + d[i])
    // -----------------------
    if (half > 0) {
        // i == 0
        if (half >= 1) {
            if (half + 0 < length) {
                float d_curr0 = temp[half + 0];
                temp[0] += 0.443506852f * (d_curr0 + d_curr0);
            }
        }
        if (half > 1) {
            int start = 1;
            int limit = half; // exclusive
            int n_elems = limit - start;
            int n_full = (n_elems / 16) * 16;
            i = start;
            for (; i + 32 <= start + n_full; i += 32) {
                __m512 s0 = _mm512_loadu_ps(&temp[i]);
                __m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
                __m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
                __m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
                s0 = _mm512_fmadd_ps(delta_vec, sum0, s0);
                _mm512_storeu_ps(&temp[i], s0);
                __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
                __m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
                __m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
                __m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
                s1 = _mm512_fmadd_ps(delta_vec, sum1, s1);
                _mm512_storeu_ps(&temp[i + 16], s1);
            }
            for (; i + 16 <= start + n_full; i += 16) {
                __m512 s = _mm512_loadu_ps(&temp[i]);
                __m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
                __m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
                __m512 sum = _mm512_add_ps(dprev, dcurr);
                s = _mm512_fmadd_ps(delta_vec, sum, s);
                _mm512_storeu_ps(&temp[i], s);
            }
            for (; i < limit; ++i) {
                float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
                float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
                temp[i] += 0.443506852f * (d_prev + d_curr);
            }
        }
    }
    // -----------------------
    // Step 5: Scaling
    // s *= K, d *= invK
    // -----------------------
    // s (first half)
    {
        int n_full = (half / 16) * 16;
        i = 0;
        for (; i + 32 <= n_full; i += 32) {
            __m512 s0 = _mm512_loadu_ps(&temp[i]);
            s0 = _mm512_mul_ps(s0, K_vec);
            _mm512_storeu_ps(&temp[i], s0);
            __m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
            s1 = _mm512_mul_ps(s1, K_vec);
            _mm512_storeu_ps(&temp[i + 16], s1);
        }
        for (; i + 16 <= n_full; i += 16) {
            __m512 s = _mm512_loadu_ps(&temp[i]);
            s = _mm512_mul_ps(s, K_vec);
            _mm512_storeu_ps(&temp[i], s);
        }
        for (; i < half; ++i) temp[i] *= 1.230174105f;
    }
    // d (second half)
    {
        int dlen = length - half;
        int n_full = (dlen / 16) * 16;
        i = 0;
        for (; i + 32 <= n_full; i += 32) {
            __m512 d0 = _mm512_loadu_ps(&temp[half + i]);
            d0 = _mm512_mul_ps(d0, invK_vec);
            _mm512_storeu_ps(&temp[half + i], d0);
            __m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
            d1 = _mm512_mul_ps(d1, invK_vec);
            _mm512_storeu_ps(&temp[half + i + 16], d1);
        }
        for (; i + 16 <= n_full; i += 16) {
            __m512 d = _mm512_loadu_ps(&temp[half + i]);
            d = _mm512_mul_ps(d, invK_vec);
            _mm512_storeu_ps(&temp[half + i], d);
        }
        for (; i < dlen; ++i) {
            if (half + i < length) temp[half + i] /= 1.230174105f;
        }
    }
    // Copy back and free
    memcpy(data, temp, (size_t)length * sizeof(float));
    free(temp);
 }
 // Haar Forward DWT with AVX-512
 static inline void dwt_haar_forward_1d_avx512(float *data, int length) {
    if (length < 2) return;
    float *temp = (float*)malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    const __m512 half_vec = _mm512_set1_ps(0.5f);
    // Process 16 pairs at a time
    int i;
    for (i = 0; i + 16 <= half; i += 16) {
        __mmask16 valid_mask = 0xFFFF;
        float even_vals[16], odd_vals[16];
        for (int j = 0; j < 16; j++) {
            even_vals[j] = data[2 * (i + j)];
            if (2 * (i + j) + 1 < length) {
                odd_vals[j] = data[2 * (i + j) + 1];
            } else {
                odd_vals[j] = even_vals[j];
                valid_mask &= ~(1 << j);
            }
        }
        __m512 even = _mm512_loadu_ps(even_vals);
        __m512 odd = _mm512_loadu_ps(odd_vals);
        // Low-pass: (even + odd) / 2
        __m512 low = _mm512_mul_ps(_mm512_add_ps(even, odd), half_vec);
        // High-pass: (even - odd) / 2
        __m512 high = _mm512_mul_ps(_mm512_sub_ps(even, odd), half_vec);
        _mm512_storeu_ps(&temp[i], low);
        _mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
    }
    // Remaining scalar
    for (; i < half; i++) {
        if (2 * i + 1 < length) {
            temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
            temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
        } else {
            temp[i] = data[2 * i];
            if (half + i < length) {
                temp[half + i] = 0.0f;
            }
        }
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 // =============================================================================
 // AVX-512 Optimised Quantisation Functions
 // =============================================================================
 static inline void quantise_dwt_coefficients_avx512(
    float *coeffs, int16_t *quantised, int size,
    float effective_q, float dead_zone_threshold,
    int width, int height, int decomp_levels, int is_chroma,
    int (*get_subband_level)(int, int, int, int),
    int (*get_subband_type)(int, int, int, int)
 ) {
    const __m512 q_vec = _mm512_set1_ps(effective_q);
    const __m512 inv_q_vec = _mm512_set1_ps(1.0f / effective_q);
    const __m512 half_vec = _mm512_set1_ps(0.5f);
    const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
    const __m512 zero_vec = _mm512_setzero_ps();
    const __m512i min_i32 = _mm512_set1_epi32(-32768);
    const __m512i max_i32 = _mm512_set1_epi32(32767);
    int i;
    for (i = 0; i + 16 <= size; i += 16) {
        __m512 coeff = _mm512_loadu_ps(&coeffs[i]);
        __m512 quant = _mm512_mul_ps(coeff, inv_q_vec);
        // Dead-zone handling (simplified - full version needs per-coeff logic)
        if (dead_zone_threshold > 0.0f && !is_chroma) {
            __m512 threshold_vec = _mm512_set1_ps(dead_zone_threshold);
            __m512 abs_quant = _mm512_abs_ps(quant);
            __mmask16 dead_mask = _mm512_cmp_ps_mask(abs_quant, threshold_vec, _CMP_LE_OQ);
            quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
        }
        // Manual rounding to match scalar behaviour (round away from zero)
        // First add 0.5 or -0.5 based on sign
        __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
        __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
        quant = _mm512_add_ps(quant, round_val);
        // Now truncate to int32 (this matches scalar (int32_t) cast after adding 0.5)
        __m512i quant_i32 = _mm512_cvttps_epi32(quant);  // cvtt = truncate (round toward zero)
        quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
        quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
        // Pack to int16 (AVX-512 has cvtsepi32_epi16)
        __m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
        _mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
    }
    // Remaining scalar
    for (; i < size; i++) {
        float quantised_val = coeffs[i] / effective_q;
        // Dead-zone (simplified)
        if (dead_zone_threshold > 0.0f && !is_chroma) {
            if (fabsf(quantised_val) <= dead_zone_threshold) {
                quantised_val = 0.0f;
            }
        }
        int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
        quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
    }
 }
 // Perceptual quantisation with per-coefficient weighting
 static inline void quantise_dwt_coefficients_perceptual_avx512(
    float *coeffs, int16_t *quantised, int size,
    float *weights,  // Pre-computed per-coefficient weights
    float base_quantiser
 ) {
    const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
    const __m512 half_vec = _mm512_set1_ps(0.5f);
    const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
    const __m512 zero_vec = _mm512_setzero_ps();
    const __m512i min_i32 = _mm512_set1_epi32(-32768);
    const __m512i max_i32 = _mm512_set1_epi32(32767);
    int i;
    for (i = 0; i + 16 <= size; i += 16) {
        __m512 coeff = _mm512_loadu_ps(&coeffs[i]);
        __m512 weight = _mm512_loadu_ps(&weights[i]);
        // effective_q = base_q * weight
        __m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
        __m512 quant = _mm512_div_ps(coeff, effective_q);
        // Manual rounding to match scalar behaviour
        __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
        __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
        quant = _mm512_add_ps(quant, round_val);
        // Truncate to int32 (matches scalar cast after rounding)
        __m512i quant_i32 = _mm512_cvttps_epi32(quant);
        quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
        quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
        __m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
        _mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
    }
    // Remaining scalar
    for (; i < size; i++) {
        float effective_q = base_quantiser * weights[i];
        float quantised_val = coeffs[i] / effective_q;
        int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
        quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
    }
 }
 // =============================================================================
 // AVX-512 Optimised Dequantisation Functions
 // =============================================================================
 // Basic dequantisation: quantised[i] * effective_q
 static inline void dequantise_dwt_coefficients_avx512(
    const int16_t *quantised, float *coeffs, int size,
    float effective_q
 ) {
    const __m512 q_vec = _mm512_set1_ps(effective_q);
    int i;
    for (i = 0; i + 16 <= size; i += 16) {
        // Load 16 int16 values
        __m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
        // Convert int16 to int32
        __m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
        // Convert int32 to float
        __m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
        // Multiply by quantiser
        __m512 dequant = _mm512_mul_ps(quant_f32, q_vec);
        _mm512_storeu_ps(&coeffs[i], dequant);
    }
    // Remaining scalar
    for (; i < size; i++) {
        coeffs[i] = (float)quantised[i] * effective_q;
    }
 }
 // Perceptual dequantisation with per-coefficient weights
 static inline void dequantise_dwt_coefficients_perceptual_avx512(
    const int16_t *quantised, float *coeffs, int size,
    const float *weights, float base_quantiser
 ) {
    const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
    int i;
    for (i = 0; i + 16 <= size; i += 16) {
        // Load 16 int16 values
        __m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
        // Convert int16 → int32 → float
        __m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
        __m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
        // Load weights
        __m512 weight = _mm512_loadu_ps(&weights[i]);
        // effective_q = base_q * weight
        __m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
        // dequant = quantised * effective_q
        __m512 dequant = _mm512_mul_ps(quant_f32, effective_q);
        _mm512_storeu_ps(&coeffs[i], dequant);
    }
    // Remaining scalar
    for (; i < size; i++) {
        float effective_q = base_quantiser * weights[i];
        coeffs[i] = (float)quantised[i] * effective_q;
    }
 }
 // =============================================================================
 // AVX-512 Optimised RGB to YCoCg Conversion
 // =============================================================================
 static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) {
    const int total_pixels = width * height;
    const __m512 half_vec = _mm512_set1_ps(0.5f);
    int i;
    // Process 16 pixels at a time (48 bytes of RGB data)
    for (i = 0; i + 16 <= total_pixels; i += 16) {
        // Load 16 RGB triplets (48 bytes)
        // We need to deinterleave R, G, B channels
        // Manual load and deinterleave (AVX-512 doesn't have direct RGB deinterleave)
        float r_vals[16], g_vals[16], b_vals[16];
        for (int j = 0; j < 16; j++) {
            r_vals[j] = (float)rgb[(i + j) * 3 + 0];
            g_vals[j] = (float)rgb[(i + j) * 3 + 1];
            b_vals[j] = (float)rgb[(i + j) * 3 + 2];
        }
        __m512 r = _mm512_loadu_ps(r_vals);
        __m512 g = _mm512_loadu_ps(g_vals);
        __m512 b = _mm512_loadu_ps(b_vals);
        // YCoCg-R transform:
        // co = r - b
        // tmp = b + co * 0.5
        // cg = g - tmp
        // y = tmp + cg * 0.5
        __m512 co_vec = _mm512_sub_ps(r, b);
        __m512 tmp = _mm512_fmadd_ps(co_vec, half_vec, b);  // tmp = b + co * 0.5
        __m512 cg_vec = _mm512_sub_ps(g, tmp);
        __m512 y_vec = _mm512_fmadd_ps(cg_vec, half_vec, tmp);  // y = tmp + cg * 0.5
        _mm512_storeu_ps(&y[i], y_vec);
        _mm512_storeu_ps(&co[i], co_vec);
        _mm512_storeu_ps(&cg[i], cg_vec);
    }
    // Remaining pixels (scalar)
    for (; i < total_pixels; i++) {
        const float r = rgb[i * 3 + 0];
        const float g = rgb[i * 3 + 1];
        const float b = rgb[i * 3 + 2];
        co[i] = r - b;
        const float tmp = b + co[i] * 0.5f;
        cg[i] = g - tmp;
        y[i] = tmp + cg[i] * 0.5f;
    }
 }
 // =============================================================================
 // AVX-512 Optimised 2D DWT with Gather/Scatter
 // =============================================================================
 // Optimised column extraction using gather
 static inline void dwt_2d_extract_column_avx512(
    const float *tile_data, float *column,
    int x, int width, int height
 ) {
    // Create gather indices for column extraction
    // indices[i] = (i * width + x)
    int y;
    for (y = 0; y + 16 <= height; y += 16) {
        // Build gather indices
        int indices[16];
        for (int j = 0; j < 16; j++) {
            indices[j] = (y + j) * width + x;
        }
        __m512i vindex = _mm512_loadu_si512((__m512i*)indices);
        __m512 col_data = _mm512_i32gather_ps(vindex, tile_data, 4);
        _mm512_storeu_ps(&column[y], col_data);
    }
    // Remaining scalar
    for (; y < height; y++) {
        column[y] = tile_data[y * width + x];
    }
 }
 // Optimised column insertion using scatter
 static inline void dwt_2d_insert_column_avx512(
    float *tile_data, const float *column,
    int x, int width, int height
 ) {
    int y;
    for (y = 0; y + 16 <= height; y += 16) {
        // Build scatter indices
        int indices[16];
        for (int j = 0; j < 16; j++) {
            indices[j] = (y + j) * width + x;
        }
        __m512i vindex = _mm512_loadu_si512((__m512i*)indices);
        __m512 col_data = _mm512_loadu_ps(&column[y]);
        _mm512_i32scatter_ps(tile_data, vindex, col_data, 4);
    }
    // Remaining scalar
    for (; y < height; y++) {
        tile_data[y * width + x] = column[y];
    }
 }
 #endif // __AVX512F__
 #endif // TAV_AVX512_H
--- a/video_encoder/include/tav_encoder_lib.h
+++ b/video_encoder/include/tav_encoder_lib.h
@@ -1,295 +0,0 @@
 /**
 * TAV Encoder Library - Public API
 *
 * High-level interface for encoding video using the TSVM Advanced Video (TAV) codec.
 * Supports GOP-based encoding with internal multi-threading for optimal performance.
 *
 * Created by CuriousTorvald and Claude on 2025-12-03.
 */
 #ifndef TAV_ENCODER_LIB_H
 #define TAV_ENCODER_LIB_H
 #include <stdint.h>
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 // =============================================================================
 // Opaque Encoder Context
 // =============================================================================
 /**
 * TAV encoder context - opaque to users.
 * Created with tav_encoder_create(), freed with tav_encoder_free().
 */
 typedef struct tav_encoder_context tav_encoder_context_t;
 // =============================================================================
 // Configuration Structures
 // =============================================================================
 /**
 * Video encoding parameters.
 */
 typedef struct {
    // === Video Dimensions ===
    int width;                    // Frame width (must be even)
    int height;                   // Frame height (must be even)
    int fps_num;                  // Framerate numerator (e.g., 60 for 60fps)
    int fps_den;                  // Framerate denominator (e.g., 1 for 60/1)
    // === Wavelet Configuration ===
    int wavelet_type;             // Spatial wavelet: 0=CDF 5/3, 1=CDF 9/7 (default), 2=CDF 13/7, 16=DD-4, 255=Haar
    int temporal_wavelet;         // Temporal wavelet: 0=Haar, 1=CDF 5/3 (default for smooth motion)
    int decomp_levels;            // Spatial DWT levels (0=auto, typically 6)
    int temporal_levels;          // Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
    // === Color Space ===
    int channel_layout;           // 0=YCoCg-R (default), 1=ICtCp (for HDR/BT.2100 sources)
    int perceptual_tuning;        // 1=enable HVS perceptual quantization (default), 0=uniform
    // === GOP Configuration ===
    int enable_temporal_dwt;      // 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
    int gop_size;                 // Frames per GOP (8, 16, or 24; 0=auto based on framerate)
    int enable_two_pass;          // 1=enable two-pass with scene change detection (default), 0=single-pass
    // === Quality Control ===
    int quality_level;
    int quantiser_y;                // Luma quantiser (0-255, indexed against QLUT)
    int quantiser_co;               // Orange chrominance quantiser (0-255, indexed against QLUT)
    int quantiser_cg;               // Green chrominance quantiser (0-255, indexed against QLUT)
    float dead_zone_threshold;    // Dead-zone quantization threshold (0.0=disabled, 0.6-1.5 typical)
    // === Entropy Coding ===
    int entropy_coder;            // 0=Twobitmap (default), 1=EZBC (better for high-quality)
    int zstd_level;               // Zstd compression level (3-22, default: 7)
    // === Multi-threading ===
    int num_threads;              // Worker threads (0=single-threaded, -1=auto, 1-16=explicit)
    // === Encoder Presets ===
    int encoder_preset;           // Preset flags: 0x01=sports (finer temporal quant), 0x02=anime (disable grain)
    // === Advanced Options ===
    int verbose;                  // 1=enable debug output, 0=quiet (default)
    int monoblock;                // -1=auto (based on dimensions), 0=force tiled, 1=force monoblock
 } tav_encoder_params_t;
 /**
 * Initialize encoder parameters with default values.
 *
 * @param params  Parameter structure to initialize
 * @param width   Frame width
 * @param height  Frame height
 */
 void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height);
 /**
 * Encoder output packet.
 * Contains encoded video or audio data.
 */
 typedef struct {
    uint8_t *data;                // Packet data (owned by encoder, valid until next encode/flush)
    size_t size;                  // Packet size in bytes
    uint8_t packet_type;          // TAV packet type (0x10=I-frame, 0x12=GOP, 0x24=audio, etc.)
    int frame_number;             // Frame number (for video packets)
    int is_video;                 // 1=video packet, 0=audio packet
 } tav_encoder_packet_t;
 // =============================================================================
 // Encoder Lifecycle
 // =============================================================================
 /**
 * Create TAV encoder context.
 *
 * Allocates internal buffers, initializes thread pool (if multi-threading enabled),
 * and prepares encoder for frame submission.
 *
 * @param params  Encoder parameters (copied internally)
 * @return        Encoder context, or NULL on failure
 */
 tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params);
 /**
 * Free TAV encoder context.
 *
 * Shuts down thread pool, frees all buffers and resources.
 * Any unflushed frames in the GOP buffer will be lost.
 *
 * @param ctx  Encoder context
 */
 void tav_encoder_free(tav_encoder_context_t *ctx);
 /**
 * Get last error message.
 *
 * @param ctx  Encoder context
 * @return     Error message string (valid until next encode operation)
 */
 const char *tav_encoder_get_error(tav_encoder_context_t *ctx);
 /**
 * Get encoder parameters (with calculated values).
 * After context creation, params will contain actual values used
 * (e.g., auto-calculated decomp_levels, gop_size).
 *
 * @param ctx     Encoder context
 * @param params  Output parameters structure
 */
 void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params);
 /**
 * DEBUG: Validate encoder context integrity
 * Returns 1 if context appears valid, 0 otherwise
 */
 int tav_encoder_validate_context(tav_encoder_context_t *ctx);
 // =============================================================================
 // Video Encoding
 // =============================================================================
 /*
 * DEPRECATED: tav_encoder_encode_frame() and tav_encoder_flush() have been
 * removed. Use tav_encoder_encode_gop() instead, which works for both
 * single-threaded and multi-threaded modes. The CLI should buffer frames
 * and call encode_gop() when a full GOP is ready.
 */
 /**
 * Encode a complete GOP (Group of Pictures) directly.
 *
 * This function is STATELESS and THREAD-SAFE with separate contexts.
 * Perfect for multithreaded encoding from CLI:
 * - Each thread creates its own encoder context
 * - Each thread calls encode_gop() with a batch of frames
 * - No shared state, no locking needed
 *
 * Example multithreaded usage:
 * ```c
 * // Worker thread function
 * void* worker(void* arg) {
 *     work_item_t* item = (work_item_t*)arg;
 *
 *     // Create thread-local encoder context
 *     tav_encoder_context_t* ctx = tav_encoder_create(&shared_params);
 *
 *     // Encode this GOP
 *     tav_encoder_packet_t* packet;
 *     tav_encoder_encode_gop(ctx, item->frames, item->num_frames,
 *                            item->frame_numbers, &packet);
 *
 *     // Store packet in output queue
 *     queue_push(output_queue, packet);
 *
 *     tav_encoder_free(ctx);
 *     return NULL;
 * }
 * ```
 *
 * @param ctx            Encoder context (one per thread)
 * @param rgb_frames     Array of RGB24 frames [frame][width*height*3]
 * @param num_frames     Number of frames in GOP (1-24)
 * @param frame_numbers  Frame indices for timecodes (can be NULL)
 * @param packet         Output packet pointer
 * @return               1 if packet ready, -1 on error
 */
 int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
                            const uint8_t **rgb_frames,
                            int num_frames,
                            const int *frame_numbers,
                            tav_encoder_packet_t **packet);
 /**
 * Free a packet returned by encode_frame(), flush(), or encode_gop().
 *
 * @param packet  Packet to free (can be NULL)
 */
 void tav_encoder_free_packet(tav_encoder_packet_t *packet);
 // =============================================================================
 // Audio Encoding (Optional)
 // =============================================================================
 /**
 * Encode audio samples (TAD codec).
 *
 * Audio is encoded synchronously and returned immediately.
 * For TAV muxing: interleave audio packets with video packets by frame PTS.
 *
 * @param ctx              Encoder context
 * @param pcm_samples      PCM32f stereo samples (interleaved: L,R,L,R,...), num_samples×2 floats
 * @param num_samples      Number of samples per channel
 * @param packet           Output packet pointer
 * @return                 1 if packet ready, -1 on error
 */
 int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
                              const float *pcm_samples,
                              size_t num_samples,
                              tav_encoder_packet_t **packet);
 // =============================================================================
 // Statistics and Info
 // =============================================================================
 /**
 * Get encoding statistics.
 */
 typedef struct {
    int64_t frames_encoded;       // Total frames encoded
    int64_t gops_encoded;         // Total GOPs encoded
    size_t total_bytes;           // Total bytes output (video + audio)
    size_t video_bytes;           // Video bytes
    size_t audio_bytes;           // Audio bytes
    double avg_bitrate_kbps;      // Average bitrate (kbps)
    double encoding_fps;          // Encoding speed (frames/sec)
 } tav_encoder_stats_t;
 /**
 * Get encoding statistics.
 *
 * @param ctx    Encoder context
 * @param stats  Output statistics structure
 */
 void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats);
 // =============================================================================
 // TAV Packet Types (for reference)
 // =============================================================================
 #define TAV_PACKET_IFRAME        0x10  // I-frame (intra-only, single frame)
 #define TAV_PACKET_PFRAME        0x11  // P-frame (delta from previous)
 #define TAV_PACKET_GOP_UNIFIED   0x12  // GOP unified (3D DWT, multiple frames)
 #define TAV_PACKET_AUDIO_TAD     0x24  // TAD audio (DWT-based perceptual codec)
 #define TAV_PACKET_AUDIO_PCM8    0x20  // PCM8 audio (legacy)
 #define TAV_PACKET_LOOP_START    0xF0  // Loop point start (no payload)
 #define TAV_PACKET_GOP_SYNC      0xFC  // GOP sync (frame count marker)
 #define TAV_PACKET_TIMECODE      0xFD  // Timecode metadata
 #define TAV_PACKET_SYNC          0xFF  // Sync packet (no payload)
 // =============================================================================
 // Tile Settings (for multi-tile mode)
 // =============================================================================
 #define TAV_TILE_SIZE_X 640               // Base tile width
 #define TAV_TILE_SIZE_Y 540               // Base tile height
 #define TAV_DWT_FILTER_HALF_SUPPORT 4     // For 9/7 filter (filter lengths 9,7 → L=4)
 #define TAV_TILE_MARGIN_LEVELS 3          // Use margin for 3 levels: 4 * (2^3) = 32px
 #define TAV_TILE_MARGIN (TAV_DWT_FILTER_HALF_SUPPORT * (1 << TAV_TILE_MARGIN_LEVELS))  // 32px
 #define TAV_PADDED_TILE_SIZE_X (TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN)  // 704
 #define TAV_PADDED_TILE_SIZE_Y (TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN)  // 604
 // Monoblock threshold: D1 PAL resolution (720x576)
 // If width > 720 OR height > 576, automatically switch to tiled mode
 #define TAV_MONOBLOCK_MAX_WIDTH  720
 #define TAV_MONOBLOCK_MAX_HEIGHT 576
 #ifdef __cplusplus
 }
 #endif
 #endif // TAV_ENCODER_LIB_H
--- a/video_encoder/include/tav_simd_dispatch.h
+++ b/video_encoder/include/tav_simd_dispatch.h
@@ -1,275 +0,0 @@
 /*
 * TAV SIMD Function Dispatcher
 *
 * This file provides runtime CPU detection and function pointer dispatch
 * for SIMD-optimized versions of performance-critical TAV encoder functions.
 *
 * Usage:
 * 1. Include this header after defining all scalar functions
 * 2. Call tav_simd_init() once at encoder initialization
 * 3. Use function pointers (e.g., dwt_53_forward_1d_ptr) throughout code
 *
 * The dispatcher will automatically select AVX-512, AVX2, or scalar versions
 * based on runtime CPU capabilities.
 */
 #ifndef TAV_SIMD_DISPATCH_H
 #define TAV_SIMD_DISPATCH_H
 #include <stdint.h>
 // =============================================================================
 // Function Pointer Types
 // =============================================================================
 // 1D DWT function pointer types
 typedef void (*dwt_1d_func_t)(float *data, int length);
 // Quantization function pointer types
 typedef void (*quantise_basic_func_t)(
    float *coeffs, int16_t *quantised, int size,
    float effective_q, float dead_zone_threshold,
    int width, int height, int decomp_levels, int is_chroma,
    int (*get_subband_level)(int, int, int, int),
    int (*get_subband_type)(int, int, int, int)
 );
 typedef void (*quantise_perceptual_func_t)(
    float *coeffs, int16_t *quantised, int size,
    float *weights, float base_quantiser
 );
 // Color conversion function pointer type
 typedef void (*rgb_to_ycocg_func_t)(
    const uint8_t *rgb, float *y, float *co, float *cg,
    int width, int height
 );
 // 2D DWT column operations
 typedef void (*dwt_2d_column_extract_func_t)(
    const float *tile_data, float *column,
    int x, int width, int height
 );
 typedef void (*dwt_2d_column_insert_func_t)(
    float *tile_data, const float *column,
    int x, int width, int height
 );
 // =============================================================================
 // Global Function Pointers (initialized by tav_simd_init)
 // =============================================================================
 // DWT 1D transforms
 static dwt_1d_func_t dwt_53_forward_1d_ptr = NULL;
 static dwt_1d_func_t dwt_97_forward_1d_ptr = NULL;
 static dwt_1d_func_t dwt_haar_forward_1d_ptr = NULL;
 static dwt_1d_func_t dwt_53_inverse_1d_ptr = NULL;
 static dwt_1d_func_t dwt_haar_inverse_1d_ptr = NULL;
 // Quantization
 static quantise_basic_func_t quantise_dwt_coefficients_ptr = NULL;
 static quantise_perceptual_func_t quantise_dwt_coefficients_perceptual_ptr = NULL;
 // Color conversion
 static rgb_to_ycocg_func_t rgb_to_ycocg_ptr = NULL;
 // 2D DWT column operations
 static dwt_2d_column_extract_func_t dwt_2d_extract_column_ptr = NULL;
 static dwt_2d_column_insert_func_t dwt_2d_insert_column_ptr = NULL;
 // =============================================================================
 // SIMD Capability Detection
 // =============================================================================
 typedef enum {
    SIMD_NONE = 0,
    SIMD_AVX512F = 1,
    SIMD_AVX2 = 2,
    SIMD_SSE42 = 3
 } simd_level_t;
 static simd_level_t detected_simd_level = SIMD_NONE;
 static inline simd_level_t detect_simd_capabilities(void) {
 #if defined(__GNUC__) || defined(__clang__)
    // Use GCC/Clang built-in CPU detection
    if (!__builtin_cpu_supports("sse4.2")) {
        return SIMD_NONE;
    }
 #ifdef __AVX512F__
    if (__builtin_cpu_supports("avx512f") &&
        __builtin_cpu_supports("avx512dq") &&
        __builtin_cpu_supports("avx512bw") &&
        __builtin_cpu_supports("avx512vl")) {
        return SIMD_AVX512F;
    }
 #endif
 #ifdef __AVX2__
    if (__builtin_cpu_supports("avx2")) {
        return SIMD_AVX2;
    }
 #endif
    if (__builtin_cpu_supports("sse4.2")) {
        return SIMD_SSE42;
    }
 #endif
    return SIMD_NONE;
 }
 // =============================================================================
 // Scalar Fallback Wrappers
 // =============================================================================
 // These wrappers adapt the scalar functions to match function pointer signatures
 static void quantise_dwt_coefficients_scalar_wrapper(
    float *coeffs, int16_t *quantised, int size,
    float effective_q, float dead_zone_threshold,
    int width, int height, int decomp_levels, int is_chroma,
    int (*get_subband_level)(int, int, int, int),
    int (*get_subband_type)(int, int, int, int)
 );
 // Implementation provided by including encoder - just declare prototype
 static void quantise_dwt_coefficients_perceptual_scalar_wrapper(
    float *coeffs, int16_t *quantised, int size,
    float *weights, float base_quantiser
 );
 // Implementation provided by including encoder
 static void dwt_2d_extract_column_scalar(
    const float *tile_data, float *column,
    int x, int width, int height
 ) {
    for (int y = 0; y < height; y++) {
        column[y] = tile_data[y * width + x];
    }
 }
 static void dwt_2d_insert_column_scalar(
    float *tile_data, const float *column,
    int x, int width, int height
 ) {
    for (int y = 0; y < height; y++) {
        tile_data[y * width + x] = column[y];
    }
 }
 // =============================================================================
 // SIMD Initialization
 // =============================================================================
 static void tav_simd_init(void) {
    // Detect CPU capabilities
    detected_simd_level = detect_simd_capabilities();
    const char *simd_names[] = {"None", "AVX-512", "AVX2", "SSE4.2"};
    fprintf(stderr, "[TAV] SIMD level detected: %s\n",
            simd_names[detected_simd_level]);
 #ifdef __AVX512F__
    if (detected_simd_level == SIMD_AVX512F) {
        fprintf(stderr, "[TAV] Using AVX-512 optimizations\n");
        // DWT functions
        extern void dwt_53_forward_1d_avx512(float *data, int length);
        extern void dwt_97_forward_1d_avx512(float *data, int length);
        extern void dwt_haar_forward_1d_avx512(float *data, int length);
        dwt_53_forward_1d_ptr = dwt_53_forward_1d_avx512;
        dwt_97_forward_1d_ptr = dwt_97_forward_1d_avx512;
        dwt_haar_forward_1d_ptr = dwt_haar_forward_1d_avx512;
        // Quantization
        // Note: Need wrapper functions that match the complex signature
        // For now, using scalar versions
        extern void dwt_53_forward_1d(float *data, int length);
        extern void dwt_97_forward_1d(float *data, int length);
        extern void dwt_haar_forward_1d(float *data, int length);
        extern void dwt_53_inverse_1d(float *data, int length);
        extern void dwt_haar_inverse_1d(float *data, int length);
        // Fallback to scalar for inverse (can optimize later)
        dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
        dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
        // Color conversion
        extern void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
        rgb_to_ycocg_ptr = rgb_to_ycocg_avx512;
        // 2D column operations
        extern void dwt_2d_extract_column_avx512(const float *tile_data, float *column, int x, int width, int height);
        extern void dwt_2d_insert_column_avx512(float *tile_data, const float *column, int x, int width, int height);
        dwt_2d_extract_column_ptr = dwt_2d_extract_column_avx512;
        dwt_2d_insert_column_ptr = dwt_2d_insert_column_avx512;
        // Quantization uses scalar for now (needs integration work)
        extern void dwt_53_forward_1d(float *data, int length);
        extern void dwt_97_forward_1d(float *data, int length);
        extern void dwt_haar_forward_1d(float *data, int length);
        extern void dwt_53_inverse_1d(float *data, int length);
        extern void dwt_haar_inverse_1d(float *data, int length);
        extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
        quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
        quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
        return;
    }
 #endif
    // Fallback to scalar implementations
    fprintf(stderr, "[TAV] Using scalar (non-SIMD) implementations\n");
    extern void dwt_53_forward_1d(float *data, int length);
    extern void dwt_97_forward_1d(float *data, int length);
    extern void dwt_haar_forward_1d(float *data, int length);
    extern void dwt_53_inverse_1d(float *data, int length);
    extern void dwt_haar_inverse_1d(float *data, int length);
    extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
    dwt_53_forward_1d_ptr = dwt_53_forward_1d;
    dwt_97_forward_1d_ptr = dwt_97_forward_1d;
    dwt_haar_forward_1d_ptr = dwt_haar_forward_1d;
    dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
    dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
    rgb_to_ycocg_ptr = rgb_to_ycocg;
    dwt_2d_extract_column_ptr = dwt_2d_extract_column_scalar;
    dwt_2d_insert_column_ptr = dwt_2d_insert_column_scalar;
    quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
    quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
 }
 // =============================================================================
 // Convenience Macros for Code Readability
 // =============================================================================
 // Use these macros in encoder code for cleaner dispatch
 #define DWT_53_FORWARD_1D(data, length) \
    dwt_53_forward_1d_ptr((data), (length))
 #define DWT_97_FORWARD_1D(data, length) \
    dwt_97_forward_1d_ptr((data), (length))
 #define DWT_HAAR_FORWARD_1D(data, length) \
    dwt_haar_forward_1d_ptr((data), (length))
 #define RGB_TO_YCOCG(rgb, y, co, cg, width, height) \
    rgb_to_ycocg_ptr((rgb), (y), (co), (cg), (width), (height))
 #define DWT_2D_EXTRACT_COLUMN(tile_data, column, x, width, height) \
    dwt_2d_extract_column_ptr((tile_data), (column), (x), (width), (height))
 #define DWT_2D_INSERT_COLUMN(tile_data, column, x, width, height) \
    dwt_2d_insert_column_ptr((tile_data), (column), (x), (width), (height))
 #endif // TAV_SIMD_DISPATCH_H
--- a/video_encoder/include/tav_video_decoder.h
+++ b/video_encoder/include/tav_video_decoder.h
@@ -1,78 +0,0 @@
 // Created by CuriousTorvald and Claude on 2025-12-02.
 // TAV Video Decoder Library - Shared decoding functions for TAV format
 // Can be used by both regular TAV decoder and TAV-DT decoder
 #ifndef TAV_VIDEO_DECODER_H
 #define TAV_VIDEO_DECODER_H
 #include <stdint.h>
 #include <stddef.h>
 // Video decoder context - opaque to users
 typedef struct tav_video_context tav_video_context_t;
 // Video parameters structure
 typedef struct {
    int width;
    int height;
    int decomp_levels;        // Spatial DWT levels (typically 4)
    int temporal_levels;      // Temporal DWT levels (typically 2)
    int wavelet_filter;       // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
    int temporal_wavelet;     // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
    int entropy_coder;        // 0=Twobitmap, 1=EZBC, 2=RAW
    int channel_layout;       // 0=YCoCg-R, 1=ICtCp
    int perceptual_tuning;    // 1=perceptual quantisation, 0=uniform
    uint8_t quantiser_y;      // Base quantiser index for Y/I
    uint8_t quantiser_co;     // Base quantiser index for Co/Ct
    uint8_t quantiser_cg;     // Base quantiser index for Cg/Cp
    uint8_t encoder_preset;   // Encoder preset flags (sports, anime, etc.)
    int monoblock;            // 1=single tile (monoblock), 0=multi-tile
    int no_zstd;              // 1=packets are uncompressed (Video Flags bit 4), 0=Zstd compressed
 } tav_video_params_t;
 // Create video decoder context
 // Returns NULL on failure
 tav_video_context_t *tav_video_create(const tav_video_params_t *params);
 // Free video decoder context
 void tav_video_free(tav_video_context_t *ctx);
 // Decode GOP_UNIFIED packet (0x12) to RGB24 frames
 // Input: compressed_data - GOP packet data (after packet type byte)
 //        compressed_size - size of compressed data
 //        gop_size - number of frames in GOP (read from packet)
 // Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
 //         Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
 // Returns: 0 on success, -1 on error
 int tav_video_decode_gop(tav_video_context_t *ctx,
                         const uint8_t *compressed_data, uint32_t compressed_size,
                         uint8_t gop_size, uint8_t **rgb_frames);
 // Decode IFRAME packet (0x10) to RGB24 frame
 // Input: compressed_data - I-frame packet data (after packet type byte)
 //        packet_size - size of packet data
 // Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
 //         Must be pre-allocated by caller
 // Returns: 0 on success, -1 on error
 int tav_video_decode_iframe(tav_video_context_t *ctx,
                            const uint8_t *compressed_data, uint32_t packet_size,
                            uint8_t *rgb_frame);
 // Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
 // Input: compressed_data - P-frame packet data (after packet type byte)
 //        packet_size - size of packet data
 // Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
 //         Must be pre-allocated by caller
 // Returns: 0 on success, -1 on error
 // Note: Requires previous frame to be decoded first (stored internally as reference)
 int tav_video_decode_pframe(tav_video_context_t *ctx,
                            const uint8_t *compressed_data, uint32_t packet_size,
                            uint8_t *rgb_frame);
 // Get last error message
 const char *tav_video_get_error(tav_video_context_t *ctx);
 // Enable verbose debug output
 void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
 #endif // TAV_VIDEO_DECODER_H
--- a/video_encoder/lib/libfec/ldpc.c
+++ b/video_encoder/lib/libfec/ldpc.c
@@ -1,397 +0,0 @@
 /**
 * LDPC Rate 1/2 Codec Implementation
 *
 * LDPC for TAV-DT header protection.
 * Uses a systematic rate 1/2 code with sum-product belief propagation decoder.
 *
 * The parity-check matrix is designed for good error correction on small blocks.
 * Each parity bit is computed as XOR of multiple data bits using a pseudo-random
 * but deterministic pattern.
 *
 * Created by CuriousTorvald and Claude on 2025-12-09.
 * Updated 2025-12-17: Replaced bit-flipping with belief propagation decoder.
 */
 #include "ldpc.h"
 #include <string.h>
 #include <stdio.h>
 #include <math.h>
 // Channel LLR magnitude for hard-decision input
 // Higher value = more confidence in received bits
 // For BER ~0.01, optimal is about 4.6; we use slightly lower for robustness
 #define CHANNEL_LLR_MAG 4.0f
 // Clipping value to prevent numerical overflow in tanh operations
 #define LLR_CLIP 20.0f
 // =============================================================================
 // Parity-Check Matrix Generation
 // =============================================================================
 // For rate 1/2 LDPC: n = 2k bits, parity-check matrix H is (n-k) x n = k x 2k
 // We use H = [P | I_k] where P is the parity pattern matrix
 // This gives systematic encoding: c = [data | parity] where parity = P * data
 // Parity pattern: each parity bit j depends on data bits where pattern[j][i] = 1
 // We use a regular pattern with column weight 3 (each data bit affects 3 parity bits)
 // and row weight varies to cover the data bits well
 // Simple hash function for generating parity connections
 static inline uint32_t hash_mix(uint32_t a, uint32_t b) {
    a ^= b;
    a = (a ^ (a >> 16)) * 0x85ebca6b;
    a = (a ^ (a >> 13)) * 0xc2b2ae35;
    return a ^ (a >> 16);
 }
 // Get bit from byte array
 static inline int get_bit(const uint8_t *data, int bit_idx) {
    return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
 }
 // Set bit in byte array
 static inline void set_bit(uint8_t *data, int bit_idx, int value) {
    int byte_idx = bit_idx >> 3;
    int bit_pos = 7 - (bit_idx & 7);
    if (value) {
        data[byte_idx] |= (1 << bit_pos);
    } else {
        data[byte_idx] &= ~(1 << bit_pos);
    }
 }
 // Flip bit in byte array
 static inline void flip_bit(uint8_t *data, int bit_idx) {
    int byte_idx = bit_idx >> 3;
    int bit_pos = 7 - (bit_idx & 7);
    data[byte_idx] ^= (1 << bit_pos);
 }
 // Get list of data bits that affect parity bit j
 // Returns number of connected data bits, stores indices in connections[]
 // For rate 1/2: data bits are 0 to k*8-1, parity bits are k*8 to 2*k*8-1
 static int get_parity_connections(int parity_idx, int k_bits, int *connections) {
    int count = 0;
    // Use a deterministic pseudo-random pattern
    // Each parity bit connects to approximately k_bits/3 data bits
    // Different seeds for different parity positions ensure coverage
    uint32_t seed = hash_mix(0xDEADBEEF, (uint32_t)parity_idx);
    for (int i = 0; i < k_bits; i++) {
        // Each data bit has ~3/k_bits chance of connecting to this parity bit
        // Total connections per parity ~ 3 (column weight)
        uint32_t h = hash_mix(seed, (uint32_t)i);
        if ((h % (k_bits / 3 + 1)) == 0) {
            connections[count++] = i;
        }
    }
    // Ensure at least 2 connections per parity bit
    if (count < 2) {
        connections[count++] = parity_idx % k_bits;
        connections[count++] = (parity_idx + k_bits / 2) % k_bits;
    }
    return count;
 }
 // Get list of parity bits affected by data bit i
 static int get_data_connections(int data_idx, int k_bits, int *connections) {
    int count = 0;
    for (int j = 0; j < k_bits; j++) {
        int parity_conns[LDPC_MAX_DATA_BYTES * 8];
        int n_conns = get_parity_connections(j, k_bits, parity_conns);
        for (int c = 0; c < n_conns; c++) {
            if (parity_conns[c] == data_idx) {
                connections[count++] = j;
                break;
            }
        }
    }
    return count;
 }
 // =============================================================================
 // Initialization
 // =============================================================================
 static int ldpc_initialized = 0;
 void ldpc_init(void) {
    if (ldpc_initialized) return;
    // No pre-computation needed - patterns generated on the fly
    ldpc_initialized = 1;
 }
 // =============================================================================
 // Encoding
 // =============================================================================
 size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
    if (!ldpc_initialized) ldpc_init();
    if (data_len > LDPC_MAX_DATA_BYTES) {
        data_len = LDPC_MAX_DATA_BYTES;
    }
    int k_bits = (int)(data_len * 8);  // Number of data bits
    // Copy data to output (systematic encoding)
    memcpy(output, data, data_len);
    // Initialize parity bytes to zero
    memset(output + data_len, 0, data_len);
    // Compute parity bits
    for (int j = 0; j < k_bits; j++) {
        // Get data bits connected to parity bit j
        int connections[LDPC_MAX_DATA_BYTES * 8];
        int n_conns = get_parity_connections(j, k_bits, connections);
        // Parity bit = XOR of connected data bits
        int parity = 0;
        for (int c = 0; c < n_conns; c++) {
            parity ^= get_bit(data, connections[c]);
        }
        // Set parity bit
        set_bit(output + data_len, j, parity);
    }
    return data_len * 2;
 }
 // =============================================================================
 // Decoding
 // =============================================================================
 int ldpc_check_syndrome(const uint8_t *codeword, size_t len) {
    if (!ldpc_initialized) ldpc_init();
    size_t data_len = len / 2;
    int k_bits = (int)(data_len * 8);
    // Check all parity equations
    for (int j = 0; j < k_bits; j++) {
        int connections[LDPC_MAX_DATA_BYTES * 8];
        int n_conns = get_parity_connections(j, k_bits, connections);
        // Compute syndrome bit: XOR of connected data bits XOR parity bit
        int syndrome = get_bit(codeword + data_len, j);
        for (int c = 0; c < n_conns; c++) {
            syndrome ^= get_bit(codeword, connections[c]);
        }
        if (syndrome != 0) {
            return 0;  // Syndrome non-zero: errors detected
        }
    }
    return 1;  // Zero syndrome: valid codeword
 }
 // Clip LLR to prevent overflow
 static inline float clip_llr(float llr) {
    if (llr > LLR_CLIP) return LLR_CLIP;
    if (llr < -LLR_CLIP) return -LLR_CLIP;
    return llr;
 }
 // Sign of a float (returns +1 or -1)
 static inline float sign_f(float x) {
    return (x >= 0.0f) ? 1.0f : -1.0f;
 }
 int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output) {
    if (!ldpc_initialized) ldpc_init();
    if (encoded_len < 2 || (encoded_len & 1) != 0) {
        return -1;  // Invalid length
    }
    size_t data_len = encoded_len / 2;
    if (data_len > LDPC_MAX_DATA_BYTES) {
        return -1;
    }
    int k_bits = (int)(data_len * 8);
    int n_bits = k_bits * 2;  // Total codeword bits (data + parity)
    // Pre-compute the parity check matrix structure for efficiency
    // For each check node j: which variable nodes it connects to
    int check_to_var[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
    int check_degree[LDPC_MAX_DATA_BYTES * 8];
    for (int j = 0; j < k_bits; j++) {
        int connections[LDPC_MAX_DATA_BYTES * 8];
        int n_conns = get_parity_connections(j, k_bits, connections);
        // Check j connects to: data bits in connections[] + parity bit j
        check_degree[j] = n_conns + 1;
        for (int c = 0; c < n_conns; c++) {
            check_to_var[j][c] = connections[c];  // Data bit index
        }
        check_to_var[j][n_conns] = k_bits + j;  // Parity bit index
    }
    // Initialize channel LLRs from received hard bits
    // LLR > 0 means bit is probably 0, LLR < 0 means bit is probably 1
    float channel_llr[LDPC_MAX_DATA_BYTES * 16];
    for (int i = 0; i < n_bits; i++) {
        int bit = get_bit(encoded, i);
        channel_llr[i] = bit ? -CHANNEL_LLR_MAG : CHANNEL_LLR_MAG;
    }
    // Message arrays for BP
    // check_to_var_msg[j][idx] = message from check j to variable check_to_var[j][idx]
    float check_to_var_msg[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
    // Initialize check-to-variable messages to zero
    memset(check_to_var_msg, 0, sizeof(check_to_var_msg));
    // Belief Propagation iterations
    for (int iter = 0; iter < LDPC_MAX_ITERATIONS; iter++) {
        // Step 1: Variable-to-check messages (implicit, computed on the fly)
        // var_to_check[v→j] = channel_llr[v] + sum of all check_to_var_msg[k][idx_v] for k != j
        // Step 2: Check-to-variable messages using min-sum approximation
        // For each check node j, for each connected variable v:
        // check_to_var_msg[j→v] = sign * min(|incoming messages from other vars|)
        for (int j = 0; j < k_bits; j++) {
            int degree = check_degree[j];
            // First, compute variable-to-check messages for all variables in this check
            float var_to_check[LDPC_MAX_DATA_BYTES * 8 + 1];
            for (int idx = 0; idx < degree; idx++) {
                int v = check_to_var[j][idx];
                // Sum all incoming check messages to variable v, except from check j
                float sum = channel_llr[v];
                for (int jj = 0; jj < k_bits; jj++) {
                    if (jj == j) continue;
                    // Find if check jj connects to variable v
                    for (int idx2 = 0; idx2 < check_degree[jj]; idx2++) {
                        if (check_to_var[jj][idx2] == v) {
                            sum += check_to_var_msg[jj][idx2];
                            break;
                        }
                    }
                }
                var_to_check[idx] = clip_llr(sum);
            }
            // Now compute check-to-variable messages using min-sum
            for (int idx = 0; idx < degree; idx++) {
                float sign_prod = 1.0f;
                float min_abs = 1e30f;
                for (int idx2 = 0; idx2 < degree; idx2++) {
                    if (idx2 == idx) continue;
                    float msg = var_to_check[idx2];
                    sign_prod *= sign_f(msg);
                    float abs_msg = fabsf(msg);
                    if (abs_msg < min_abs) min_abs = abs_msg;
                }
                // Min-sum with scaling factor 0.75 for better performance
                check_to_var_msg[j][idx] = clip_llr(sign_prod * min_abs * 0.75f);
            }
        }
        // Step 3: Compute posterior LLRs and make hard decisions
        float posterior[LDPC_MAX_DATA_BYTES * 16];
        for (int v = 0; v < n_bits; v++) {
            float sum = channel_llr[v];
            // Add all incoming check-to-variable messages
            for (int j = 0; j < k_bits; j++) {
                for (int idx = 0; idx < check_degree[j]; idx++) {
                    if (check_to_var[j][idx] == v) {
                        sum += check_to_var_msg[j][idx];
                        break;
                    }
                }
            }
            posterior[v] = sum;
        }
        // Make hard decisions
        uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
        memset(decoded, 0, encoded_len);
        for (int v = 0; v < n_bits; v++) {
            if (posterior[v] < 0) {
                set_bit(decoded, v, 1);
            }
        }
        // Check syndrome
        int syndrome_count = 0;
        for (int j = 0; j < k_bits; j++) {
            int syn = 0;
            for (int idx = 0; idx < check_degree[j]; idx++) {
                syn ^= get_bit(decoded, check_to_var[j][idx]);
            }
            if (syn) syndrome_count++;
        }
        // If all syndromes are zero, we're done
        if (syndrome_count == 0) {
            memcpy(output, decoded, data_len);
            return 0;
        }
        // Early termination if syndrome count is very small (nearly converged)
        if (iter > 5 && syndrome_count <= 2) {
            // Try one more iteration, if still stuck, accept
        }
    }
    // Decoding did not converge - compute final estimate
    float posterior[LDPC_MAX_DATA_BYTES * 16];
    for (int v = 0; v < n_bits; v++) {
        float sum = channel_llr[v];
        for (int j = 0; j < k_bits; j++) {
            for (int idx = 0; idx < check_degree[j]; idx++) {
                if (check_to_var[j][idx] == v) {
                    sum += check_to_var_msg[j][idx];
                    break;
                }
            }
        }
        posterior[v] = sum;
    }
    uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
    memset(decoded, 0, encoded_len);
    for (int v = 0; v < n_bits; v++) {
        if (posterior[v] < 0) {
            set_bit(decoded, v, 1);
        }
    }
    // Check final syndrome count
    int final_syndromes = 0;
    for (int j = 0; j < k_bits; j++) {
        int syn = 0;
        for (int idx = 0; idx < check_degree[j]; idx++) {
            syn ^= get_bit(decoded, check_to_var[j][idx]);
        }
        if (syn) final_syndromes++;
    }
    // Accept if syndrome count is low enough
    if (final_syndromes <= k_bits / 4) {
        memcpy(output, decoded, data_len);
        return 0;  // Soft success
    }
    // Total failure - return original data as best effort
    memcpy(output, encoded, data_len);
    return -1;
 }
--- a/video_encoder/lib/libfec/ldpc.h
+++ b/video_encoder/lib/libfec/ldpc.h
@@ -1,68 +0,0 @@
 /**
 * LDPC Rate 1/2 Codec for TAV-DT
 *
 * Simple LDPC implementation for header protection in TAV-DT format.
 * Rate 1/2: k data bytes → 2k encoded bytes (doubles the size)
 *
 * Uses systematic encoding where first k bytes are data, last k bytes are parity.
 * Decoding uses iterative bit-flipping algorithm.
 *
 * Designed for small blocks (headers up to 64 bytes).
 *
 * Created by CuriousTorvald and Claude on 2025-12-09.
 */
 #ifndef LDPC_H
 #define LDPC_H
 #include <stdint.h>
 #include <stddef.h>
 // Maximum block size (data bytes before encoding)
 #define LDPC_MAX_DATA_BYTES 64
 // LDPC decoder parameters
 #define LDPC_MAX_ITERATIONS 50
 /**
 * Initialize LDPC codec.
 * Must be called once before using encode/decode functions.
 * Thread-safe: uses static initialization.
 */
 void ldpc_init(void);
 /**
 * Encode data block with LDPC rate 1/2.
 *
 * @param data      Input data bytes
 * @param data_len  Length of input data (1 to LDPC_MAX_DATA_BYTES)
 * @param output    Output buffer (must hold 2 * data_len bytes)
 * @return          Output length (2 * data_len)
 *
 * Output format: [data bytes][parity bytes]
 * The output is systematic: first data_len bytes are the original data.
 */
 size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output);
 /**
 * Decode LDPC rate 1/2 encoded block.
 *
 * @param encoded     Input encoded data (2 * data_len bytes)
 * @param encoded_len Length of encoded data (must be even, max 2*LDPC_MAX_DATA_BYTES)
 * @param output      Output buffer for decoded data (encoded_len / 2 bytes)
 * @return            0 on success, -1 if decoding failed (too many errors)
 *
 * Uses iterative bit-flipping decoder.
 */
 int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output);
 /**
 * Calculate syndrome for validation.
 *
 * @param codeword   Encoded codeword (2 * data_len bytes)
 * @param len        Length of codeword
 * @return           1 if valid (zero syndrome), 0 if errors detected
 */
 int ldpc_check_syndrome(const uint8_t *codeword, size_t len);
 #endif // LDPC_H
--- a/video_encoder/lib/libfec/ldpc_payload.c
+++ b/video_encoder/lib/libfec/ldpc_payload.c
@@ -1,478 +0,0 @@
 /**
 * LDPC(255,223) Codec Implementation - Enhanced Version
 *
 * This implements a high-rate LDPC code designed to compete with RS(255,223).
 *
 * Key improvements in this version:
 * - Sum-Product (Belief Propagation) decoder for optimal performance
 * - Quasi-cyclic H matrix with optimized degree distribution
 * - Layered scheduling for faster convergence
 * - Adaptive LLR initialization
 *
 * Created by CuriousTorvald and Claude on 2025-12-15.
 */
 #include "ldpc_payload.h"
 #include <string.h>
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
 // =============================================================================
 // Constants
 // =============================================================================
 #define N_BITS    (LDPC_P_BLOCK_SIZE * 8)   // 2040 total bits
 #define K_BITS    (LDPC_P_DATA_SIZE * 8)    // 1784 data bits
 #define M_BITS    (LDPC_P_PARITY_SIZE * 8)  // 256 parity bits
 // LLR bounds - tighter bounds help prevent numerical issues
 #define LLR_MAX  20.0f
 #define LLR_MIN -20.0f
 // Decoding parameters
 #define LDPC_MAX_ITER 100
 // =============================================================================
 // Sparse Matrix Storage
 // =============================================================================
 #define MAX_CHECK_DEGREE 50
 #define MAX_VAR_DEGREE   12
 static int ldpc_p_initialized = 0;
 static int check_degree[M_BITS];
 static int check_to_var[M_BITS][MAX_CHECK_DEGREE];
 static int check_to_var_idx[M_BITS][MAX_CHECK_DEGREE];
 static int var_degree[N_BITS];
 static int var_to_check[N_BITS][MAX_VAR_DEGREE];
 static int var_to_check_idx[N_BITS][MAX_VAR_DEGREE];
 // =============================================================================
 // Bit manipulation
 // =============================================================================
 static inline int get_bit(const uint8_t *data, int bit_idx) {
    return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
 }
 static inline void set_bit(uint8_t *data, int bit_idx, int value) {
    int byte_idx = bit_idx >> 3;
    int bit_pos = 7 - (bit_idx & 7);
    if (value) {
        data[byte_idx] |= (1 << bit_pos);
    } else {
        data[byte_idx] &= ~(1 << bit_pos);
    }
 }
 // =============================================================================
 // H Matrix Construction - Quasi-Cyclic with Optimized Distribution
 // =============================================================================
 // Hash function for deterministic pseudo-random connections
 static inline uint32_t hash32(uint32_t a, uint32_t b) {
    uint32_t h = a ^ (b * 0x9E3779B9);
    h ^= h >> 16;
    h *= 0x85EBCA6B;
    h ^= h >> 13;
    h *= 0xC2B2AE35;
    h ^= h >> 16;
    return h;
 }
 static void add_edge(int check, int var) {
    // Check if already connected
    for (int i = 0; i < check_degree[check]; i++) {
        if (check_to_var[check][i] == var) return;
    }
    if (check_degree[check] >= MAX_CHECK_DEGREE || var_degree[var] >= MAX_VAR_DEGREE) {
        return;
    }
    int cidx = check_degree[check];
    int vidx = var_degree[var];
    check_to_var[check][cidx] = var;
    check_to_var_idx[check][cidx] = vidx;
    check_degree[check]++;
    var_to_check[var][vidx] = check;
    var_to_check_idx[var][vidx] = cidx;
    var_degree[var]++;
 }
 // Simplified cycle check - only check direct neighbors (faster)
 static int would_create_short_cycle(int v, int c) {
    // Quick check: if v is already connected to c, skip
    for (int i = 0; i < var_degree[v]; i++) {
        if (var_to_check[v][i] == c) return 1;
    }
    // For speed, only do basic 4-cycle check for low-degree nodes
    if (var_degree[v] > 4 || check_degree[c] > 20) return 0;
    // Check for 4-cycles
    for (int i = 0; i < var_degree[v]; i++) {
        int c_prime = var_to_check[v][i];
        for (int j = 0; j < check_degree[c_prime] && j < 15; j++) {
            int v_prime = check_to_var[c_prime][j];
            if (v_prime == v) continue;
            for (int k = 0; k < var_degree[v_prime] && k < 8; k++) {
                if (var_to_check[v_prime][k] == c) {
                    return 1;
                }
            }
        }
    }
    return 0;
 }
 // Quasi-cyclic expansion: shift value determines cyclic permutation
 static int qc_shift(int base_idx, int shift, int size) {
    return (base_idx + shift) % size;
 }
 static void build_h_matrix(void) {
    memset(check_degree, 0, sizeof(check_degree));
    memset(var_degree, 0, sizeof(var_degree));
    // ==========================================================================
    // H matrix with staircase parity and PEG-based data connections
    // ==========================================================================
    // --- Part 1: Staircase parity structure ---
    for (int c = 0; c < M_BITS; c++) {
        int parity_bit = K_BITS + c;
        add_edge(c, parity_bit);
        if (c > 0) {
            add_edge(c, K_BITS + c - 1);
        }
    }
    // --- Part 2: Connect data bits using PEG approach ---
    for (int v = 0; v < K_BITS; v++) {
        // Target 6 connections per variable
        int target = 6;
        for (int d = 0; d < target; d++) {
            uint32_t h = hash32((uint32_t)v * 2654435769U, (uint32_t)d * 1597334677U);
            // Find best check (lowest degree)
            int best_c = -1;
            int best_deg = MAX_CHECK_DEGREE;
            for (int attempt = 0; attempt < 16; attempt++) {
                int c = (int)((h + attempt * 127) % M_BITS);
                if (check_degree[c] < best_deg && check_degree[c] < MAX_CHECK_DEGREE - 2) {
                    // Check not already connected
                    int connected = 0;
                    for (int i = 0; i < var_degree[v]; i++) {
                        if (var_to_check[v][i] == c) { connected = 1; break; }
                    }
                    if (!connected) {
                        best_deg = check_degree[c];
                        best_c = c;
                        if (best_deg < 30) break;  // Good enough
                    }
                }
            }
            if (best_c >= 0 && var_degree[v] < MAX_VAR_DEGREE - 1) {
                add_edge(best_c, v);
            }
        }
    }
    // --- Part 3: Fill in low-degree variables ---
    for (int v = 0; v < K_BITS; v++) {
        while (var_degree[v] < 5) {
            uint32_t h = hash32((uint32_t)v * 12345, (uint32_t)var_degree[v] * 67890);
            int added = 0;
            for (int attempt = 0; attempt < 64 && !added; attempt++) {
                int c = (int)((h + attempt * 31) % M_BITS);
                if (check_degree[c] < MAX_CHECK_DEGREE - 2) {
                    int prev = var_degree[v];
                    add_edge(c, v);
                    if (var_degree[v] > prev) added = 1;
                }
            }
            if (!added) break;
        }
    }
    // --- Part 4: Balance check degrees ---
    for (int c = 0; c < M_BITS; c++) {
        int target = 35;
        int attempts = 0;
        while (check_degree[c] < target && attempts < 150) {
            uint32_t h = hash32((uint32_t)c * 48271, (uint32_t)attempts * 16807);
            int v = (int)(h % K_BITS);
            if (var_degree[v] < MAX_VAR_DEGREE - 1) {
                add_edge(c, v);
            }
            attempts++;
        }
    }
 }
 void ldpc_p_init(void) {
    if (ldpc_p_initialized) return;
    build_h_matrix();
    ldpc_p_initialized = 1;
 }
 // =============================================================================
 // Syndrome Check
 // =============================================================================
 int ldpc_p_check_syndrome(const uint8_t *codeword) {
    if (!ldpc_p_initialized) ldpc_p_init();
    for (int c = 0; c < M_BITS; c++) {
        int syndrome = 0;
        for (int i = 0; i < check_degree[c]; i++) {
            int v = check_to_var[c][i];
            syndrome ^= get_bit(codeword, v);
        }
        if (syndrome != 0) {
            return 0;
        }
    }
    return 1;
 }
 // =============================================================================
 // Encoding
 // =============================================================================
 size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
    if (!ldpc_p_initialized) ldpc_p_init();
    if (data_len > LDPC_P_DATA_SIZE) {
        data_len = LDPC_P_DATA_SIZE;
    }
    // Copy data to output and pad if necessary
    memcpy(output, data, data_len);
    if (data_len < LDPC_P_DATA_SIZE) {
        memset(output + data_len, 0, LDPC_P_DATA_SIZE - data_len);
    }
    // Initialize parity bytes to zero
    memset(output + LDPC_P_DATA_SIZE, 0, LDPC_P_PARITY_SIZE);
    // Compute syndrome contribution from data bits
    int syndrome[M_BITS];
    for (int c = 0; c < M_BITS; c++) {
        syndrome[c] = 0;
        for (int i = 0; i < check_degree[c]; i++) {
            int v = check_to_var[c][i];
            if (v < K_BITS) {
                syndrome[c] ^= get_bit(output, v);
            }
        }
    }
    // Back-substitution for parity bits (staircase structure)
    int prev_parity = 0;
    for (int c = 0; c < M_BITS; c++) {
        int parity_bit = syndrome[c] ^ prev_parity;
        set_bit(output + LDPC_P_DATA_SIZE, c, parity_bit);
        prev_parity = parity_bit;
    }
    return LDPC_P_BLOCK_SIZE;
 }
 // =============================================================================
 // Min-Sum Decoder with Optimized Parameters
 // =============================================================================
 // Clamp LLR to valid range
 static inline float clamp_llr(float x) {
    if (x > LLR_MAX) return LLR_MAX;
    if (x < LLR_MIN) return LLR_MIN;
    return x;
 }
 int ldpc_p_decode(uint8_t *data, size_t data_len) {
    if (!ldpc_p_initialized) ldpc_p_init();
    size_t total_len = data_len + LDPC_P_PARITY_SIZE;
    if (total_len > LDPC_P_BLOCK_SIZE) {
        return -1;
    }
    // Working codeword buffer
    uint8_t codeword[LDPC_P_BLOCK_SIZE];
    memcpy(codeword, data, total_len);
    if (total_len < LDPC_P_BLOCK_SIZE) {
        memset(codeword + total_len, 0, LDPC_P_BLOCK_SIZE - total_len);
    }
    // Quick check - if already valid, no decoding needed
    if (ldpc_p_check_syndrome(codeword)) {
        return 0;
    }
    // ==========================================================================
    // Initialize channel LLRs
    // ==========================================================================
    float var_llr[N_BITS];
    float llr_magnitude = 6.0f;
    for (int v = 0; v < N_BITS; v++) {
        int bit = get_bit(codeword, v);
        var_llr[v] = bit ? -llr_magnitude : llr_magnitude;
    }
    // Message storage
    static float c2v[M_BITS][MAX_CHECK_DEGREE];
    for (int c = 0; c < M_BITS; c++) {
        for (int i = 0; i < check_degree[c]; i++) {
            c2v[c][i] = 0.0f;
        }
    }
    // ==========================================================================
    // Normalized Min-Sum Decoding with Layered Scheduling
    // ==========================================================================
    float v2c[MAX_CHECK_DEGREE];
    const float alpha = 0.75f;  // Normalization factor
    for (int iter = 0; iter < LDPC_MAX_ITER; iter++) {
        // Process each check node (layer)
        for (int c = 0; c < M_BITS; c++) {
            int deg = check_degree[c];
            // Step 1: Compute variable-to-check messages
            for (int i = 0; i < deg; i++) {
                int v = check_to_var[c][i];
                v2c[i] = var_llr[v] - c2v[c][i];
            }
            // Step 2: Compute check-to-variable messages using min-sum
            for (int i = 0; i < deg; i++) {
                float sign_prod = 1.0f;
                float min1 = LLR_MAX, min2 = LLR_MAX;
                for (int j = 0; j < deg; j++) {
                    if (j == i) continue;
                    float val = v2c[j];
                    if (val < 0) sign_prod = -sign_prod;
                    float absval = fabsf(val);
                    if (absval < min1) {
                        min2 = min1;
                        min1 = absval;
                    } else if (absval < min2) {
                        min2 = absval;
                    }
                }
                // Normalized min-sum message
                float msg_mag = alpha * min1;
                float new_c2v = sign_prod * msg_mag;
                // Update variable LLR immediately (layered approach)
                int v = check_to_var[c][i];
                var_llr[v] = clamp_llr(var_llr[v] - c2v[c][i] + new_c2v);
                c2v[c][i] = new_c2v;
            }
        }
        // Make hard decisions
        for (int v = 0; v < N_BITS; v++) {
            set_bit(codeword, v, var_llr[v] < 0 ? 1 : 0);
        }
        // Check if valid codeword
        if (ldpc_p_check_syndrome(codeword)) {
            memcpy(data, codeword, data_len);
            return iter + 1;
        }
        // Adaptive restart at iteration milestones
        if (iter == 25 || iter == 50 || iter == 75) {
            float new_mag = 4.0f - (iter / 25) * 0.5f;
            for (int v = 0; v < N_BITS; v++) {
                int bit = get_bit(codeword, v);
                var_llr[v] = bit ? -new_mag : new_mag;
            }
            for (int c = 0; c < M_BITS; c++) {
                for (int i = 0; i < check_degree[c]; i++) {
                    c2v[c][i] = 0.0f;
                }
            }
        }
    }
    // Failed to converge
    memcpy(data, codeword, data_len);
    return -1;
 }
 // =============================================================================
 // Block-level operations
 // =============================================================================
 size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
    if (!ldpc_p_initialized) ldpc_p_init();
    size_t output_len = 0;
    size_t remaining = data_len;
    const uint8_t *src = data;
    uint8_t *dst = output;
    while (remaining > 0) {
        size_t block_data = (remaining > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining;
        ldpc_p_encode(src, block_data, dst);
        src += block_data;
        dst += LDPC_P_BLOCK_SIZE;
        output_len += LDPC_P_BLOCK_SIZE;
        remaining -= block_data;
    }
    return output_len;
 }
 int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
    if (!ldpc_p_initialized) ldpc_p_init();
    int total_iterations = 0;
    size_t remaining_output = output_len;
    uint8_t *src = data;
    uint8_t *dst = output;
    while (total_len >= LDPC_P_BLOCK_SIZE && remaining_output > 0) {
        size_t bytes_to_copy = (remaining_output > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining_output;
        int result = ldpc_p_decode(src, LDPC_P_DATA_SIZE);
        if (result < 0) {
            return -1;
        }
        total_iterations += result;
        memcpy(dst, src, bytes_to_copy);
        src += LDPC_P_BLOCK_SIZE;
        dst += bytes_to_copy;
        total_len -= LDPC_P_BLOCK_SIZE;
        remaining_output -= bytes_to_copy;
    }
    return total_iterations;
 }
--- a/video_encoder/lib/libfec/ldpc_payload.h
+++ b/video_encoder/lib/libfec/ldpc_payload.h
@@ -1,97 +0,0 @@
 /**
 * LDPC(255,223) Codec for TAV-DT Payloads
 *
 * Alternative to RS(255,223) with same rate (~0.875):
 * - Block size: 255 bytes (223 data + 32 parity)
 * - Uses quasi-cyclic LDPC structure for efficiency
 * - Soft-decision belief propagation decoder
 *
 * Designed as drop-in replacement for RS(255,223):
 * - Same input/output sizes
 * - Same API style
 * - Different error correction characteristics:
 *   - LDPC: Better at high BER (>1e-3), gradual degradation
 *   - RS: Better at low BER, hard threshold at 16 byte errors
 *
 * Created by CuriousTorvald and Claude on 2025-12-15.
 */
 #ifndef LDPC_PAYLOAD_H
 #define LDPC_PAYLOAD_H
 #include <stdint.h>
 #include <stddef.h>
 // LDPC(255,223) parameters - matches RS(255,223) for drop-in replacement
 #define LDPC_P_BLOCK_SIZE    255   // Total codeword size (bytes)
 #define LDPC_P_DATA_SIZE     223   // Data bytes per block
 #define LDPC_P_PARITY_SIZE   32    // Parity bytes per block
 // Decoder parameters
 #define LDPC_P_MAX_ITERATIONS 30   // Maximum BP iterations
 #define LDPC_P_EARLY_TERM     1    // Enable early termination on valid codeword
 /**
 * Initialize LDPC(255,223) codec.
 * Must be called once before using encode/decode functions.
 * Thread-safe: uses static initialization.
 */
 void ldpc_p_init(void);
 /**
 * Encode data block with LDPC(255,223).
 *
 * @param data      Input data (up to LDPC_P_DATA_SIZE bytes)
 * @param data_len  Length of input data (1 to LDPC_P_DATA_SIZE)
 * @param output    Output buffer (must hold data_len + LDPC_P_PARITY_SIZE bytes)
 *                  Format: [data][parity]
 * @return          Total output length (data_len + LDPC_P_PARITY_SIZE)
 *
 * Note: For data shorter than LDPC_P_DATA_SIZE, the encoder pads with zeros
 * internally but only outputs actual data + parity.
 */
 size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output);
 /**
 * Decode and correct LDPC(255,223) encoded block.
 *
 * @param data      Buffer containing [data][parity] (modified in-place)
 * @param data_len  Length of data portion (1 to LDPC_P_DATA_SIZE)
 * @return          Number of iterations used (1-30), or -1 if uncorrectable
 *
 * On success, data buffer contains corrected data.
 * On failure, data buffer contents are undefined.
 */
 int ldpc_p_decode(uint8_t *data, size_t data_len);
 /**
 * Encode data with automatic block splitting.
 * For data larger than LDPC_P_DATA_SIZE, splits into multiple blocks.
 *
 * @param data        Input data
 * @param data_len    Length of input data
 * @param output      Output buffer (must hold ceil(data_len/223) * 255 bytes)
 * @return            Total output length
 */
 size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
 /**
 * Decode data with automatic block splitting.
 *
 * @param data        Buffer containing LDPC-encoded blocks (modified in-place)
 * @param total_len   Total length of encoded data (multiple of LDPC_P_BLOCK_SIZE)
 * @param output      Output buffer for decoded data
 * @param output_len  Expected length of decoded data
 * @return            Total iterations across all blocks, or -1 if any block failed
 */
 int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
 /**
 * Check if codeword is valid (syndrome check).
 *
 * @param codeword   Full codeword (LDPC_P_BLOCK_SIZE bytes)
 * @return           1 if valid (zero syndrome), 0 if errors detected
 */
 int ldpc_p_check_syndrome(const uint8_t *codeword);
 #endif // LDPC_PAYLOAD_H
--- a/video_encoder/lib/libfec/reed_solomon.c
+++ b/video_encoder/lib/libfec/reed_solomon.c
@@ -1,417 +0,0 @@
 /**
 * Reed-Solomon (255,223) Codec Implementation
 *
 * Standard RS code over GF(2^8) for TAV-DT forward error correction.
 *
 * Created by CuriousTorvald and Claude on 2025-12-09.
 */
 #include "reed_solomon.h"
 #include <string.h>
 #include <stdio.h>
 // =============================================================================
 // Galois Field GF(2^8) Arithmetic
 // =============================================================================
 // Primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 = 0x11D
 #define GF_PRIMITIVE 0x11D
 #define GF_SIZE      256
 #define GF_MAX       255
 // Lookup tables for GF(2^8) arithmetic
 static uint8_t gf_exp[512];  // Anti-log table (doubled for easy modular reduction)
 static uint8_t gf_log[256];  // Log table
 static uint8_t gf_generator[RS_PARITY_SIZE + 1];  // Generator polynomial coefficients
 static int rs_initialized = 0;
 // Initialize GF(2^8) exp/log tables
 static void init_gf_tables(void) {
    uint16_t x = 1;
    for (int i = 0; i < GF_MAX; i++) {
        gf_exp[i] = (uint8_t)x;
        gf_log[x] = (uint8_t)i;
        // Multiply by alpha (primitive element = 2)
        x <<= 1;
        if (x & 0x100) {
            x ^= GF_PRIMITIVE;
        }
    }
    // Double the exp table for easy modular reduction
    for (int i = GF_MAX; i < 512; i++) {
        gf_exp[i] = gf_exp[i - GF_MAX];
    }
    // gf_log[0] is undefined, set to 0 for safety
    gf_log[0] = 0;
 }
 // GF multiplication
 static inline uint8_t gf_mul(uint8_t a, uint8_t b) {
    if (a == 0 || b == 0) return 0;
    return gf_exp[gf_log[a] + gf_log[b]];
 }
 // GF division
 static inline uint8_t gf_div(uint8_t a, uint8_t b) {
    if (a == 0) return 0;
    if (b == 0) return 0;  // Division by zero - shouldn't happen
    return gf_exp[gf_log[a] + GF_MAX - gf_log[b]];
 }
 // GF power
 static inline uint8_t gf_pow(uint8_t a, int n) {
    if (n == 0) return 1;
    if (a == 0) return 0;
    return gf_exp[(gf_log[a] * n) % GF_MAX];
 }
 // GF inverse
 static inline uint8_t gf_inv(uint8_t a) {
    if (a == 0) return 0;
    return gf_exp[GF_MAX - gf_log[a]];
 }
 // =============================================================================
 // Generator Polynomial
 // =============================================================================
 // Build generator polynomial: g(x) = (x - alpha^0)(x - alpha^1)...(x - alpha^31)
 static void init_generator(void) {
    // Start with g(x) = 1
    gf_generator[0] = 1;
    for (int i = 1; i <= RS_PARITY_SIZE; i++) {
        gf_generator[i] = 0;
    }
    // Multiply by (x - alpha^i) for i = 0 to 31
    for (int i = 0; i < RS_PARITY_SIZE; i++) {
        uint8_t alpha_i = gf_exp[i];  // alpha^i
        // Multiply current polynomial by (x - alpha^i)
        for (int j = RS_PARITY_SIZE; j > 0; j--) {
            gf_generator[j] = gf_generator[j - 1] ^ gf_mul(gf_generator[j], alpha_i);
        }
        gf_generator[0] = gf_mul(gf_generator[0], alpha_i);
    }
 }
 // =============================================================================
 // Public API
 // =============================================================================
 void rs_init(void) {
    if (rs_initialized) return;
    init_gf_tables();
    init_generator();
    rs_initialized = 1;
 }
 size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
    if (!rs_initialized) rs_init();
    // Validate input
    if (data_len > RS_DATA_SIZE) {
        data_len = RS_DATA_SIZE;
    }
    // Copy data to output
    memcpy(output, data, data_len);
    // Initialize parity bytes to zero
    memset(output + data_len, 0, RS_PARITY_SIZE);
    // Create padded message polynomial (RS_DATA_SIZE + RS_PARITY_SIZE coefficients)
    // Message is shifted to leave room for parity (systematic encoding)
    uint8_t msg[RS_BLOCK_SIZE];
    memset(msg, 0, sizeof(msg));
    memcpy(msg, data, data_len);
    // Polynomial division: compute remainder of msg(x) * x^32 / g(x)
    uint8_t remainder[RS_PARITY_SIZE];
    memset(remainder, 0, RS_PARITY_SIZE);
    for (size_t i = 0; i < data_len; i++) {
        uint8_t coef = msg[i] ^ remainder[0];
        // Shift remainder
        memmove(remainder, remainder + 1, RS_PARITY_SIZE - 1);
        remainder[RS_PARITY_SIZE - 1] = 0;
        // Subtract coef * g(x) from remainder
        if (coef != 0) {
            for (int j = 0; j < RS_PARITY_SIZE; j++) {
                remainder[j] ^= gf_mul(gf_generator[RS_PARITY_SIZE - 1 - j], coef);
            }
        }
    }
    // Append parity to output
    memcpy(output + data_len, remainder, RS_PARITY_SIZE);
    return data_len + RS_PARITY_SIZE;
 }
 // =============================================================================
 // Berlekamp-Massey Decoder
 // =============================================================================
 // Compute syndromes S_i = r(alpha^i) for i = 0..31
 static void compute_syndromes(const uint8_t *r, size_t len, uint8_t *syndromes) {
    for (int i = 0; i < RS_PARITY_SIZE; i++) {
        syndromes[i] = 0;
        for (size_t j = 0; j < len; j++) {
            syndromes[i] ^= gf_mul(r[j], gf_pow(gf_exp[i], (int)(len - 1 - j)));
        }
    }
 }
 // Berlekamp-Massey algorithm to find error locator polynomial
 static int berlekamp_massey(const uint8_t *syndromes, uint8_t *sigma, int *sigma_deg) {
    uint8_t C[RS_PARITY_SIZE + 1];  // Connection polynomial
    uint8_t B[RS_PARITY_SIZE + 1];  // Previous connection polynomial
    int L = 0;  // Current length of LFSR
    int m = 1;  // Number of steps since last update
    uint8_t b = 1;  // Previous discrepancy
    // Initialize: C(x) = 1, B(x) = 1
    memset(C, 0, sizeof(C));
    memset(B, 0, sizeof(B));
    C[0] = 1;
    B[0] = 1;
    for (int n = 0; n < RS_PARITY_SIZE; n++) {
        // Compute discrepancy
        uint8_t d = syndromes[n];
        for (int i = 1; i <= L; i++) {
            d ^= gf_mul(C[i], syndromes[n - i]);
        }
        if (d == 0) {
            // No update needed
            m++;
        } else if (2 * L <= n) {
            // Update both C and L
            uint8_t T[RS_PARITY_SIZE + 1];
            memcpy(T, C, sizeof(T));
            uint8_t factor = gf_div(d, b);
            for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
                C[i + m] ^= gf_mul(factor, B[i]);
            }
            L = n + 1 - L;
            memcpy(B, T, sizeof(B));
            b = d;
            m = 1;
        } else {
            // Only update C
            uint8_t factor = gf_div(d, b);
            for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
                C[i + m] ^= gf_mul(factor, B[i]);
            }
            m++;
        }
    }
    // Copy result
    memcpy(sigma, C, RS_PARITY_SIZE + 1);
    *sigma_deg = L;
    return L;
 }
 // Chien search: find error positions (roots of sigma)
 static int chien_search(const uint8_t *sigma, int sigma_deg, size_t n, uint8_t *positions, int *num_errors) {
    *num_errors = 0;
    // Evaluate sigma(alpha^(-i)) for i = 0 to n-1
    for (size_t i = 0; i < n; i++) {
        uint8_t eval = 0;
        for (int j = 0; j <= sigma_deg; j++) {
            // sigma(alpha^(-i)) = sum of sigma[j] * alpha^(-i*j)
            int exp = (GF_MAX - (int)((i * j) % GF_MAX)) % GF_MAX;
            eval ^= gf_mul(sigma[j], gf_exp[exp]);
        }
        if (eval == 0) {
            // Found a root - error at position n-1-i
            positions[*num_errors] = (uint8_t)(n - 1 - i);
            (*num_errors)++;
        }
    }
    // Check if we found the expected number of errors
    return (*num_errors == sigma_deg) ? 0 : -1;
 }
 // Compute formal derivative of polynomial
 static void poly_derivative(const uint8_t *poly, int deg, uint8_t *deriv) {
    for (int i = 0; i < deg; i++) {
        // Derivative of x^(i+1) is (i+1) * x^i
        // In GF(2^m), coefficient is 1 if (i+1) is odd, 0 if even
        deriv[i] = ((i + 1) & 1) ? poly[i + 1] : 0;
    }
 }
 // Forney algorithm: compute error values
 static void forney(const uint8_t *syndromes, const uint8_t *sigma, int sigma_deg,
                   const uint8_t *positions, int num_errors, size_t n, uint8_t *errors) {
    // Compute error evaluator polynomial omega(x) = S(x) * sigma(x) mod x^2t
    uint8_t omega[RS_PARITY_SIZE + 1];
    memset(omega, 0, sizeof(omega));
    for (int i = 0; i < RS_PARITY_SIZE; i++) {
        for (int j = 0; j <= sigma_deg && i - j >= 0; j++) {
            omega[i] ^= gf_mul(syndromes[i - j], sigma[j]);
        }
    }
    // Compute formal derivative of sigma
    uint8_t sigma_prime[RS_PARITY_SIZE];
    poly_derivative(sigma, sigma_deg, sigma_prime);
    // Compute error values using Forney formula
    for (int i = 0; i < num_errors; i++) {
        uint8_t pos = positions[i];
        uint8_t Xi = gf_exp[n - 1 - pos];  // alpha^(n-1-pos)
        uint8_t Xi_inv = gf_inv(Xi);
        // Evaluate omega at Xi_inv
        uint8_t omega_val = 0;
        for (int j = 0; j < RS_PARITY_SIZE; j++) {
            omega_val ^= gf_mul(omega[j], gf_pow(Xi_inv, j));
        }
        // Evaluate sigma' at Xi_inv
        uint8_t sigma_prime_val = 0;
        for (int j = 0; j < sigma_deg; j++) {
            sigma_prime_val ^= gf_mul(sigma_prime[j], gf_pow(Xi_inv, j));
        }
        // Error value: e_i = Xi * omega(Xi_inv) / sigma'(Xi_inv)
        errors[i] = gf_mul(Xi, gf_div(omega_val, sigma_prime_val));
    }
 }
 int rs_decode(uint8_t *data, size_t data_len) {
    if (!rs_initialized) rs_init();
    size_t total_len = data_len + RS_PARITY_SIZE;
    if (total_len > RS_BLOCK_SIZE) {
        return -1;
    }
    // Compute syndromes
    uint8_t syndromes[RS_PARITY_SIZE];
    compute_syndromes(data, total_len, syndromes);
    // Check if all syndromes are zero (no errors)
    int has_errors = 0;
    for (int i = 0; i < RS_PARITY_SIZE; i++) {
        if (syndromes[i] != 0) {
            has_errors = 1;
            break;
        }
    }
    if (!has_errors) {
        return 0;  // No errors
    }
    // Find error locator polynomial using Berlekamp-Massey
    uint8_t sigma[RS_PARITY_SIZE + 1];
    int sigma_deg;
    int num_errors_expected = berlekamp_massey(syndromes, sigma, &sigma_deg);
    if (num_errors_expected > RS_MAX_ERRORS) {
        return -1;  // Too many errors
    }
    // Find error positions using Chien search
    uint8_t positions[RS_MAX_ERRORS];
    int num_errors;
    if (chien_search(sigma, sigma_deg, total_len, positions, &num_errors) != 0) {
        return -1;  // Inconsistent error count
    }
    // Compute error values using Forney algorithm
    uint8_t error_values[RS_MAX_ERRORS];
    forney(syndromes, sigma, sigma_deg, positions, num_errors, total_len, error_values);
    // Apply corrections
    for (int i = 0; i < num_errors; i++) {
        if (positions[i] < total_len) {
            data[positions[i]] ^= error_values[i];
        }
    }
    return num_errors;
 }
 // =============================================================================
 // Block-level operations
 // =============================================================================
 size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
    if (!rs_initialized) rs_init();
    size_t output_len = 0;
    size_t remaining = data_len;
    const uint8_t *src = data;
    uint8_t *dst = output;
    while (remaining > 0) {
        size_t block_data = (remaining > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining;
        size_t encoded_len = rs_encode(src, block_data, dst);
        // Pad to full block size for consistent block boundaries
        if (encoded_len < RS_BLOCK_SIZE) {
            memset(dst + encoded_len, 0, RS_BLOCK_SIZE - encoded_len);
        }
        src += block_data;
        dst += RS_BLOCK_SIZE;
        output_len += RS_BLOCK_SIZE;
        remaining -= block_data;
    }
    return output_len;
 }
 int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
    if (!rs_initialized) rs_init();
    int total_errors = 0;
    size_t remaining_output = output_len;
    uint8_t *src = data;
    uint8_t *dst = output;
    while (total_len >= RS_BLOCK_SIZE && remaining_output > 0) {
        // Always decode with full RS_DATA_SIZE since encoder pads to full blocks
        // But only copy the bytes we actually need
        size_t bytes_to_copy = (remaining_output > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining_output;
        // Decode block with full data size (modifies src in place)
        int errors = rs_decode(src, RS_DATA_SIZE);
        if (errors < 0) {
            return -1;  // Uncorrectable block
        }
        total_errors += errors;
        // Copy only the bytes we need to output
        memcpy(dst, src, bytes_to_copy);
        src += RS_BLOCK_SIZE;
        dst += bytes_to_copy;
        total_len -= RS_BLOCK_SIZE;
        remaining_output -= bytes_to_copy;
    }
    return total_errors;
 }
--- a/video_encoder/lib/libfec/reed_solomon.h
+++ b/video_encoder/lib/libfec/reed_solomon.h
@@ -1,82 +0,0 @@
 /**
 * Reed-Solomon (255,223) Codec for TAV-DT
 *
 * Standard RS code over GF(2^8):
 * - Block size: 255 bytes (223 data + 32 parity)
 * - Error correction: up to 16 byte errors
 * - Error detection: up to 32 byte errors
 *
 * Uses primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x11D)
 * Generator polynomial: g(x) = product of (x - alpha^i) for i = 0..31
 *
 * Created by CuriousTorvald and Claude on 2025-12-09.
 */
 #ifndef REED_SOLOMON_H
 #define REED_SOLOMON_H
 #include <stdint.h>
 #include <stddef.h>
 // RS(255,223) parameters
 #define RS_BLOCK_SIZE     255   // Total codeword size
 #define RS_DATA_SIZE      223   // Data bytes per block
 #define RS_PARITY_SIZE    32    // Parity bytes per block (2t = 32, t = 16)
 #define RS_MAX_ERRORS     16    // Maximum correctable errors (t)
 /**
 * Initialize Reed-Solomon codec.
 * Must be called once before using encode/decode functions.
 * Thread-safe: uses static initialization.
 */
 void rs_init(void);
 /**
 * Encode data block with Reed-Solomon parity.
 *
 * @param data      Input data (up to RS_DATA_SIZE bytes)
 * @param data_len  Length of input data (1 to RS_DATA_SIZE)
 * @param output    Output buffer (must hold data_len + RS_PARITY_SIZE bytes)
 *                  Format: [data][parity]
 * @return          Total output length (data_len + RS_PARITY_SIZE)
 *
 * Note: For data shorter than RS_DATA_SIZE, the encoder pads with zeros
 * internally but only outputs actual data + parity.
 */
 size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output);
 /**
 * Decode and correct Reed-Solomon encoded block.
 *
 * @param data      Buffer containing [data][parity] (modified in-place)
 * @param data_len  Length of data portion (1 to RS_DATA_SIZE)
 * @return          Number of errors corrected (0-16), or -1 if uncorrectable
 *
 * On success, data buffer contains corrected data (parity may also be corrected).
 * On failure, data buffer contents are undefined.
 */
 int rs_decode(uint8_t *data, size_t data_len);
 /**
 * Encode data with automatic block splitting.
 * For data larger than RS_DATA_SIZE, splits into multiple RS blocks.
 *
 * @param data        Input data
 * @param data_len    Length of input data
 * @param output      Output buffer (must hold ceil(data_len/223) * 255 bytes)
 * @return            Total output length
 */
 size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
 /**
 * Decode data with automatic block splitting.
 *
 * @param data        Buffer containing RS-encoded blocks (modified in-place)
 * @param total_len   Total length of encoded data (multiple of RS_BLOCK_SIZE)
 * @param output      Output buffer for decoded data
 * @param output_len  Expected length of decoded data
 * @return            Total errors corrected across all blocks, or -1 if any block failed
 */
 int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
 #endif // REED_SOLOMON_H
--- a/video_encoder/lib/libtaddec/decoder_tad.c
+++ b/video_encoder/lib/libtaddec/decoder_tad.c
--- a/video_encoder/lib/libtadenc/encoder_tad.c
+++ b/video_encoder/lib/libtadenc/encoder_tad.c
--- a/video_encoder/lib/libtavdec/tav_video_decoder.c
+++ b/video_encoder/lib/libtavdec/tav_video_decoder.c
--- a/video_encoder/lib/libtavenc/tav_encoder_color.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_color.c
@@ -1,255 +0,0 @@
 /**
 * TAV Encoder - Color Space Conversion Library
 *
 * Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
 * for the TSVM Advanced Video (TAV) encoder.
 *
 * Extracted from encoder_tav.c as part of library refactoring.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <math.h>
 // =============================================================================
 // Utility Functions
 // =============================================================================
 static inline int CLAMP(int x, int min, int max) {
    return x < min ? min : (x > max ? max : x);
 }
 static inline float FCLAMP(float x, float min, float max) {
    return x < min ? min : (x > max ? max : x);
 }
 static inline int iround(double v) {
    return (int)floor(v + 0.5);
 }
 // =============================================================================
 // sRGB Gamma Helpers
 // =============================================================================
 static inline double srgb_linearise(double val) {
    if (val <= 0.04045) return val / 12.92;
    return pow((val + 0.055) / 1.055, 2.4);
 }
 static inline double srgb_unlinearise(double val) {
    if (val <= 0.0031308) return 12.92 * val;
    return 1.055 * pow(val, 1.0/2.4) - 0.055;
 }
 // =============================================================================
 // HLG (Hybrid Log-Gamma) Transfer Functions
 // =============================================================================
 static inline double HLG_OETF(double E) {
    const double a = 0.17883277;
    const double b = 0.28466892;  // 1 - 4*a
    const double c = 0.55991073;  // 0.5 - a*ln(4*a)
    if (E <= 1.0/12.0) return sqrt(3.0 * E);
    return a * log(12.0 * E - b) + c;
 }
 static inline double HLG_EOTF(double Ep) {
    const double a = 0.17883277;
    const double b = 0.28466892;
    const double c = 0.55991073;
    if (Ep <= 0.5) {
        double val = Ep * Ep / 3.0;
        return val;
    }
    double val = (exp((Ep - c) / a) + b) / 12.0;
    return val;
 }
 // =============================================================================
 // Color Space Transformation Matrices
 // =============================================================================
 // BT.2100 RGB -> LMS matrix
 static const double M_RGB_TO_LMS[3][3] = {
    {1688.0/4096, 2146.0/4096,  262.0/4096},
    { 683.0/4096, 2951.0/4096,  462.0/4096},
    {  99.0/4096,  309.0/4096, 3688.0/4096}
 };
 // LMS -> RGB inverse matrix
 static const double M_LMS_TO_RGB[3][3] = {
    { 6.1723815689243215, -5.319534979827695,   0.14699442094633924},
    {-1.3243428148026244,  2.560286104841917,  -0.2359203727576164},
    {-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
 };
 // ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
 static const double M_LMSPRIME_TO_ICTCP[3][3] = {
    { 2048.0/4096.0,   2048.0/4096.0,     0.0          },
    { 3625.0/4096.0,  -7465.0/4096.0,  3840.0/4096.0   },
    { 9500.0/4096.0,  -9212.0/4096.0,  -288.0/4096.0   }
 };
 // ICtCp -> L' M' S' inverse matrix
 static const double M_ICTCP_TO_LMSPRIME[3][3] = {
    { 1.0,   0.015718580108730416,   0.2095810681164055 },
    { 1.0,  -0.015718580108730416,  -0.20958106811640548},
    { 1.0,   1.0212710798422344,    -0.6052744909924316 }
 };
 // =============================================================================
 // YCoCg-R Color Space Conversion
 // =============================================================================
 /**
 * Convert RGB24 to YCoCg-R color space for a full frame.
 *
 * YCoCg-R is a reversible color transform optimized for compression:
 * - Y  = luma (G + (R-B)/2)
 * - Co = orange chrominance (R - B)
 * - Cg = green chrominance (G - (R+B)/2)
 *
 * @param rgb    Input RGB24 data (planar: RRRR...GGGG...BBBB...)
 * @param y      Output luma channel
 * @param co     Output orange chrominance
 * @param cg     Output green chrominance
 * @param width  Frame width
 * @param height Frame height
 */
 void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
                      int width, int height)
 {
    const int total_pixels = width * height;
    // Process 4 pixels at a time for better cache utilization
    int i = 0;
    const int simd_end = (total_pixels / 4) * 4;
    // Vectorized processing for groups of 4 pixels
    for (i = 0; i < simd_end; i += 4) {
        const uint8_t *rgb_ptr = &rgb[i * 3];
        // Process 4 pixels simultaneously with loop unrolling
        for (int j = 0; j < 4; j++) {
            const int idx = i + j;
            const float r = rgb_ptr[j * 3 + 0];
            const float g = rgb_ptr[j * 3 + 1];
            const float b = rgb_ptr[j * 3 + 2];
            // YCoCg-R transform
            co[idx] = r - b;
            const float tmp = b + co[idx] * 0.5f;
            cg[idx] = g - tmp;
            y[idx] = tmp + cg[idx] * 0.5f;
        }
    }
    // Handle remaining pixels (1-3 pixels)
    for (; i < total_pixels; i++) {
        const float r = rgb[i * 3 + 0];
        const float g = rgb[i * 3 + 1];
        const float b = rgb[i * 3 + 2];
        co[i] = r - b;
        const float tmp = b + co[i] * 0.5f;
        cg[i] = g - tmp;
        y[i] = tmp + cg[i] * 0.5f;
    }
 }
 // =============================================================================
 // ICtCp Color Space Conversion (HDR-capable)
 // =============================================================================
 /**
 * Convert sRGB8 to ICtCp color space using HLG transfer function.
 *
 * ICtCp is a perceptually uniform color space designed for HDR content:
 * - I  = intensity (luma)
 * - Ct = tritanope (blue-yellow)
 * - Cp = protanope (red-green)
 *
 * Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
 *
 * @param r8     Input red component (0-255)
 * @param g8     Input green component (0-255)
 * @param b8     Input blue component (0-255)
 * @param out_I  Output intensity (0-255)
 * @param out_Ct Output tritanope (0-255, centered at 127.5)
 * @param out_Cp Output protanope (0-255, centered at 127.5)
 */
 void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
                             double *out_I, double *out_Ct, double *out_Cp)
 {
    // 1) Linearize sRGB to 0..1
    double r = srgb_linearise((double)r8 / 255.0);
    double g = srgb_linearise((double)g8 / 255.0);
    double b = srgb_linearise((double)b8 / 255.0);
    // 2) Linear RGB -> LMS (3x3 multiply)
    double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
    double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
    double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
    // 3) Apply HLG OETF (Hybrid Log-Gamma)
    double Lp = HLG_OETF(L);
    double Mp = HLG_OETF(M);
    double Sp = HLG_OETF(S);
    // 4) L'M'S' -> ICtCp
    double I  = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
    double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
    double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
    // 5) Scale and offset to 0-255 range
    *out_I = FCLAMP(I * 255.0, 0.0, 255.0);
    *out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
    *out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
 }
 /**
 * Convert ICtCp back to sRGB8 using HLG inverse transfer function.
 *
 * @param I8  Input intensity (0-255)
 * @param Ct8 Input tritanope (0-255, centered at 127.5)
 * @param Cp8 Input protanope (0-255, centered at 127.5)
 * @param r8  Output red component (0-255)
 * @param g8  Output green component (0-255)
 * @param b8  Output blue component (0-255)
 */
 void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
                             uint8_t *r8, uint8_t *g8, uint8_t *b8)
 {
    // 1) Denormalize from 0-255 range
    double I = I8 / 255.0;
    double Ct = (Ct8 - 127.5) / 255.0;
    double Cp = (Cp8 - 127.5) / 255.0;
    // 2) ICtCp -> L' M' S' (3x3 inverse multiply)
    double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
    double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
    double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
    // 3) Apply HLG inverse EOTF
    double L = HLG_EOTF(Lp);
    double M = HLG_EOTF(Mp);
    double S = HLG_EOTF(Sp);
    // 4) LMS -> linear sRGB (3x3 inverse multiply)
    double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
    double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
    double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
    // 5) Apply sRGB gamma and convert to 0-255 with rounding
    double r = srgb_unlinearise(r_lin);
    double g = srgb_unlinearise(g_lin);
    double b = srgb_unlinearise(b_lin);
    *r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
    *g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
    *b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
 }
--- a/video_encoder/lib/libtavenc/tav_encoder_color.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_color.h
@@ -1,67 +0,0 @@
 /**
 * TAV Encoder - Color Space Conversion Library
 *
 * Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
 */
 #ifndef TAV_ENCODER_COLOR_H
 #define TAV_ENCODER_COLOR_H
 #include <stdint.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 // =============================================================================
 // YCoCg-R Color Space Conversion
 // =============================================================================
 /**
 * Convert RGB24 to YCoCg-R color space for a full frame.
 *
 * @param rgb    Input RGB24 data (interleaved: RGBRGBRGB...)
 * @param y      Output luma channel
 * @param co     Output orange chrominance
 * @param cg     Output green chrominance
 * @param width  Frame width
 * @param height Frame height
 */
 void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
                      int width, int height);
 // =============================================================================
 // ICtCp Color Space Conversion (HDR-capable)
 // =============================================================================
 /**
 * Convert sRGB8 to ICtCp color space using HLG transfer function.
 *
 * @param r8     Input red component (0-255)
 * @param g8     Input green component (0-255)
 * @param b8     Input blue component (0-255)
 * @param out_I  Output intensity (0-255)
 * @param out_Ct Output tritanope (0-255, centered at 127.5)
 * @param out_Cp Output protanope (0-255, centered at 127.5)
 */
 void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
                             double *out_I, double *out_Ct, double *out_Cp);
 /**
 * Convert ICtCp back to sRGB8 using HLG inverse transfer function.
 *
 * @param I8  Input intensity (0-255)
 * @param Ct8 Input tritanope (0-255, centered at 127.5)
 * @param Cp8 Input protanope (0-255, centered at 127.5)
 * @param r8  Output red component (0-255)
 * @param g8  Output green component (0-255)
 * @param b8  Output blue component (0-255)
 */
 void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
                             uint8_t *r8, uint8_t *g8, uint8_t *b8);
 #ifdef __cplusplus
 }
 #endif
 #endif // TAV_ENCODER_COLOR_H
--- a/video_encoder/lib/libtavenc/tav_encoder_dwt.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_dwt.c
@@ -1,619 +0,0 @@
 /**
 * TAV Encoder - Discrete Wavelet Transform (DWT) Library
 *
 * Provides multi-resolution wavelet decomposition for video compression.
 * Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
 *
 * Extracted from encoder_tav.c as part of library refactoring.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <math.h>
 // =============================================================================
 // Wavelet Type Constants
 // =============================================================================
 #define WAVELET_5_3_REVERSIBLE 0       // CDF 5/3 - Lossless capable
 #define WAVELET_9_7_IRREVERSIBLE 1     // CDF 9/7 - Higher compression (default)
 #define WAVELET_BIORTHOGONAL_13_7 2    // Biorthogonal 13/7
 #define WAVELET_DD4 16                 // Deslauriers-Dubuc 4-point interpolating
 #define WAVELET_HAAR 255               // Haar - Simplest wavelet
 // =============================================================================
 // 1D Forward DWT Transforms
 // =============================================================================
 /**
 * CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
 *
 * Uses lifting scheme with predict and update steps.
 * Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
 *
 * @param data   In/out signal data (modified in-place)
 * @param length Signal length (handles non-power-of-2)
 */
 static void dwt_53_forward_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = calloc(length, sizeof(float));
    int half = (length + 1) / 2;
    // Predict step (high-pass)
    for (int i = 0; i < half; i++) {
        int idx = 2 * i + 1;
        if (idx < length) {
            float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
            temp[half + i] = data[idx] - pred;
        }
    }
    // Update step (low-pass)
    for (int i = 0; i < half; i++) {
        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
                               (i < half - 1 ? temp[half + i] : 0));
        temp[i] = data[2 * i] + update;
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 /**
 * CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
 *
 * Five-step lifting scheme with scaling for optimal compression.
 * Output layout: [LL...LL, HH...HH]
 *
 * @param data   In/out signal data
 * @param length Signal length
 */
 static void dwt_97_forward_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    // Split into even/odd samples
    for (int i = 0; i < half; i++) {
        temp[i] = data[2 * i];           // Even (low)
    }
    for (int i = 0; i < length / 2; i++) {
        temp[half + i] = data[2 * i + 1]; // Odd (high)
    }
    // JPEG2000 9/7 lifting coefficients
    const float alpha = -1.586134342f;
    const float beta = -0.052980118f;
    const float gamma = 0.882911076f;
    const float delta = 0.443506852f;
    const float K = 1.230174105f;
    // Step 1: Predict α
    for (int i = 0; i < length / 2; i++) {
        if (half + i < length) {
            float s_curr = temp[i];
            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
            temp[half + i] += alpha * (s_curr + s_next);
        }
    }
    // Step 2: Update β
    for (int i = 0; i < half; i++) {
        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
        temp[i] += beta * (d_prev + d_curr);
    }
    // Step 3: Predict γ
    for (int i = 0; i < length / 2; i++) {
        if (half + i < length) {
            float s_curr = temp[i];
            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
            temp[half + i] += gamma * (s_curr + s_next);
        }
    }
    // Step 4: Update δ
    for (int i = 0; i < half; i++) {
        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
        temp[i] += delta * (d_prev + d_curr);
    }
    // Step 5: Scaling
    for (int i = 0; i < half; i++) {
        temp[i] *= K;
    }
    for (int i = 0; i < length / 2; i++) {
        if (half + i < length) {
            temp[half + i] /= K;
        }
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 /**
 * CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
 *
 * Same structure as 9/7 irreversible but uses integer arithmetic.
 *
 * @param data   In/out signal data
 * @param length Signal length
 */
 static void dwt_97_iint_forward_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    for (int i = 0; i < half; ++i) temp[i] = data[2*i];
    for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
    const int SHIFT = 16;
    const int64_t ROUND = 1LL << (SHIFT - 1);
    const int64_t A = -103949;  // α
    const int64_t B = -3472;    // β
    const int64_t G = 57862;    // γ
    const int64_t D = 29066;    // δ
    const int64_t K_FP  = 80542;  // ≈ 1.230174105 * 2^16
    const int64_t Ki_FP = 53283;  // ≈ (1/1.230174105) * 2^16
    #define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
    // Predict α
    for (int i = 0; i < length/2; ++i) {
        int s = temp[i];
        int sn = (i+1<half)? temp[i+1] : s;
        temp[half+i] += RN(A * (int64_t)(s + sn));
    }
    // Update β
    for (int i = 0; i < half; ++i) {
        int d = (half+i<length)? temp[half+i]:0;
        int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
        temp[i] += RN(B * (int64_t)(dp + d));
    }
    // Predict γ
    for (int i = 0; i < length/2; ++i) {
        int s = temp[i];
        int sn = (i+1<half)? temp[i+1]:s;
        temp[half+i] += RN(G * (int64_t)(s + sn));
    }
    // Update δ
    for (int i = 0; i < half; ++i) {
        int d = (half+i<length)? temp[half+i]:0;
        int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
        temp[i] += RN(D * (int64_t)(dp + d));
    }
    // Scaling
    for (int i = 0; i < half; ++i) {
        temp[i] = (((int64_t)temp[i] * K_FP  + ROUND) >> SHIFT);
    }
    for (int i = 0; i < length/2; ++i) {
        if (half + i < length) {
            temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
        }
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
    #undef RN
 }
 /**
 * Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
 *
 * Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
 * Good for smooth signals and still images.
 *
 * @param data   In/out signal data
 * @param length Signal length
 */
 static void dwt_dd4_forward_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    // Split into even/odd samples
    for (int i = 0; i < half; i++) {
        temp[i] = data[2 * i];
    }
    for (int i = 0; i < length / 2; i++) {
        temp[half + i] = data[2 * i + 1];
    }
    // DD-4 prediction step with four-point kernel
    for (int i = 0; i < length / 2; i++) {
        // Get four neighbouring even samples with symmetric boundary extension
        float s_m1, s_0, s_1, s_2;
        s_m1 = (i > 0) ? temp[i - 1] : temp[0];
        s_0 = temp[i];
        s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
        s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
        float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
                          (9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
        temp[half + i] -= prediction;
    }
    // DD-4 update step
    for (int i = 0; i < half; i++) {
        float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
        float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
        temp[i] += 0.25f * (d_prev + d_curr);
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 /**
 * Biorthogonal 13/7 wavelet forward 1D.
 *
 * Analysis filters: Low-pass (13 taps), High-pass (7 taps)
 * Simplified implementation using 5/3 structure with scaling.
 *
 * @param data   In/out signal data
 * @param length Signal length
 */
 static void dwt_bior137_forward_1d(float *data, int length) {
    if (length < 2) return;
    const float K = 1.230174105f;
    float *temp = malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    // Predict step (high-pass)
    for (int i = 0; i < half; i++) {
        int idx = 2 * i + 1;
        if (idx < length) {
            float left = data[2 * i];
            float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
            float prediction = 0.5f * (left + right);
            temp[half + i] = data[idx] - prediction;
        }
    }
    // Update step (low-pass)
    for (int i = 0; i < half; i++) {
        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
                               (i < half - 1 ? temp[half + i] : 0));
        temp[i] = data[2 * i] + update;
    }
    // Scaling
    for (int i = 0; i < half; i++) {
        temp[i] *= K;
    }
    for (int i = 0; i < length / 2; i++) {
        if (half + i < length) {
            temp[half + i] /= K;
        }
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 /**
 * Haar wavelet forward 1D transform.
 *
 * The simplest wavelet: averages (low-pass) and differences (high-pass).
 * Useful for temporal DWT in GOPs.
 *
 * @param data   In/out signal data
 * @param length Signal length
 */
 static void dwt_haar_forward_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    for (int i = 0; i < half; i++) {
        if (2 * i + 1 < length) {
            temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
            temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
        } else {
            temp[i] = data[2 * i];
            if (half + i < length) {
                temp[half + i] = 0.0f;
            }
        }
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 // =============================================================================
 // 1D Inverse DWT Transforms
 // =============================================================================
 /**
 * CDF 5/3 reversible wavelet inverse 1D transform.
 *
 * Reverses dwt_53_forward_1d() transform exactly.
 *
 * @param data   In/out coefficient data
 * @param length Signal length
 */
 static void dwt_53_inverse_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    // Copy low-pass and high-pass coefficients
    memcpy(temp, data, length * sizeof(float));
    // Undo update step
    for (int i = 0; i < half; i++) {
        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
                               (i < half - 1 ? temp[half + i] : 0));
        temp[i] -= update;
    }
    // Undo predict step
    for (int i = 0; i < half; i++) {
        int idx = 2 * i + 1;
        if (idx < length) {
            float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
            data[2 * i] = temp[i];
            data[idx] = temp[half + i] + pred;
        } else {
            data[2 * i] = temp[i];
        }
    }
    free(temp);
 }
 /**
 * Haar wavelet inverse 1D transform.
 *
 * Reverses dwt_haar_forward_1d() transform.
 *
 * @param data   In/out coefficient data
 * @param length Signal length
 */
 static void dwt_haar_inverse_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = (length + 1) / 2;
    // Reconstruct from averages and differences
    for (int i = 0; i < half; i++) {
        if (2 * i + 1 < length) {
            temp[2 * i] = data[i] + data[half + i];
            temp[2 * i + 1] = data[i] - data[half + i];
        } else {
            temp[2 * i] = data[i];
        }
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 // =============================================================================
 // 2D DWT Transform
 // =============================================================================
 /**
 * Apply 2D forward DWT to a frame (in-place).
 *
 * Applies separable 1D transforms: horizontal (rows), then vertical (columns).
 * Supports multi-level decomposition.
 *
 * @param data        In/out 2D image data (row-major, width stride)
 * @param width       Image width
 * @param height      Image height
 * @param levels      Number of decomposition levels
 * @param filter_type Wavelet type (WAVELET_* constant)
 */
 void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
    const int max_size = (width > height) ? width : height;
    float *temp_row = malloc(max_size * sizeof(float));
    float *temp_col = malloc(max_size * sizeof(float));
    // Pre-calculate dimensions for each level
    int *widths = malloc((levels + 1) * sizeof(int));
    int *heights = malloc((levels + 1) * sizeof(int));
    widths[0] = width;
    heights[0] = height;
    for (int i = 1; i <= levels; i++) {
        widths[i] = (widths[i - 1] + 1) / 2;
        heights[i] = (heights[i - 1] + 1) / 2;
    }
    // Apply multi-level decomposition
    for (int level = 0; level < levels; level++) {
        int current_width = widths[level];
        int current_height = heights[level];
        if (current_width < 1 || current_height < 1) break;
        // Row transform (horizontal)
        for (int y = 0; y < current_height; y++) {
            // Extract row
            for (int x = 0; x < current_width; x++) {
                temp_row[x] = data[y * width + x];
            }
            // Apply 1D DWT
            switch (filter_type) {
                case WAVELET_5_3_REVERSIBLE:
                    dwt_53_forward_1d(temp_row, current_width);
                    break;
                case WAVELET_9_7_IRREVERSIBLE:
                    dwt_97_forward_1d(temp_row, current_width);
                    break;
                case WAVELET_BIORTHOGONAL_13_7:
                    dwt_bior137_forward_1d(temp_row, current_width);
                    break;
                case WAVELET_DD4:
                    dwt_dd4_forward_1d(temp_row, current_width);
                    break;
                case WAVELET_HAAR:
                    dwt_haar_forward_1d(temp_row, current_width);
                    break;
            }
            // Write back
            for (int x = 0; x < current_width; x++) {
                data[y * width + x] = temp_row[x];
            }
        }
        // Column transform (vertical)
        for (int x = 0; x < current_width; x++) {
            // Extract column
            for (int y = 0; y < current_height; y++) {
                temp_col[y] = data[y * width + x];
            }
            // Apply 1D DWT
            switch (filter_type) {
                case WAVELET_5_3_REVERSIBLE:
                    dwt_53_forward_1d(temp_col, current_height);
                    break;
                case WAVELET_9_7_IRREVERSIBLE:
                    dwt_97_forward_1d(temp_col, current_height);
                    break;
                case WAVELET_BIORTHOGONAL_13_7:
                    dwt_bior137_forward_1d(temp_col, current_height);
                    break;
                case WAVELET_DD4:
                    dwt_dd4_forward_1d(temp_col, current_height);
                    break;
                case WAVELET_HAAR:
                    dwt_haar_forward_1d(temp_col, current_height);
                    break;
            }
            // Write back
            for (int y = 0; y < current_height; y++) {
                data[y * width + x] = temp_col[y];
            }
        }
    }
    free(widths);
    free(heights);
    free(temp_row);
    free(temp_col);
 }
 // =============================================================================
 // 3D DWT Transform (Temporal + Spatial)
 // =============================================================================
 /**
 * Apply 3D forward DWT to a GOP (group of pictures).
 *
 * First applies temporal DWT across frames at each spatial location,
 * then applies 2D spatial DWT to each resulting temporal subband.
 *
 * @param gop_data        Array of frame pointers [num_frames][width*height]
 * @param width           Frame width
 * @param height          Frame height
 * @param num_frames      Number of frames in GOP
 * @param spatial_levels  Number of 2D spatial decomposition levels
 * @param temporal_levels Number of 1D temporal decomposition levels
 * @param spatial_filter  Wavelet type for spatial transform
 * @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
 */
 void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
                        int spatial_levels, int temporal_levels,
                        int spatial_filter, int temporal_filter) {
    if (num_frames < 2 || width < 2 || height < 2) return;
    float *temporal_line = malloc(num_frames * sizeof(float));
    // Pre-calculate temporal lengths for non-power-of-2 GOPs
    int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
    temporal_lengths[0] = num_frames;
    for (int i = 1; i <= temporal_levels; i++) {
        temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
    }
    // Step 1: Apply temporal DWT across frames
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int pixel_idx = y * width + x;
            // Extract temporal signal
            for (int t = 0; t < num_frames; t++) {
                temporal_line[t] = gop_data[t][pixel_idx];
            }
            // Apply temporal DWT with multiple levels
            for (int level = 0; level < temporal_levels; level++) {
                int level_frames = temporal_lengths[level];
                if (level_frames >= 2) {
                    if (temporal_filter == 255) {
                        // Haar temporal (default)
                        dwt_haar_forward_1d(temporal_line, level_frames);
                    } else if (temporal_filter == 0) {
                        // CDF 5/3 temporal
                        dwt_53_forward_1d(temporal_line, level_frames);
                    } else {
                        // Fallback to Haar for unsupported wavelets
                        dwt_haar_forward_1d(temporal_line, level_frames);
                    }
                }
            }
            // Write back temporal coefficients
            for (int t = 0; t < num_frames; t++) {
                gop_data[t][pixel_idx] = temporal_line[t];
            }
        }
    }
    free(temporal_lengths);
    free(temporal_line);
    // Step 2: Apply 2D spatial DWT to each temporal subband
    for (int t = 0; t < num_frames; t++) {
        tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
    }
 }
 // =============================================================================
 // Utility Functions
 // =============================================================================
 /**
 * Calculate recommended number of decomposition levels for given dimensions.
 *
 * @param width  Image width
 * @param height Image height
 * @return       Recommended number of levels (1-6)
 */
 int tav_dwt_calculate_levels(int width, int height) {
    int levels = 0;
    int min_size = (width < height) ? width : height;
    // Keep halving until we reach minimum size
    while (min_size >= 32) {
        min_size /= 2;
        levels++;
    }
    // Cap at reasonable maximum
    return (levels > 6) ? 6 : levels;
 }
--- a/video_encoder/lib/libtavenc/tav_encoder_dwt.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_dwt.h
@@ -1,88 +0,0 @@
 /**
 * TAV Encoder - Discrete Wavelet Transform Library
 *
 * Public API for multi-resolution wavelet decomposition.
 * Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
 */
 #ifndef TAV_ENCODER_DWT_H
 #define TAV_ENCODER_DWT_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 // =============================================================================
 // Wavelet Type Constants
 // =============================================================================
 #define WAVELET_5_3_REVERSIBLE 0      // CDF 5/3 reversible (lossless capable)
 #define WAVELET_9_7_IRREVERSIBLE 1    // CDF 9/7 JPEG2000 (default, best compression)
 #define WAVELET_BIORTHOGONAL_13_7 2   // CDF 13/7 experimental
 #define WAVELET_DD4 16                // Deslauriers-Dubuc 4-point interpolating
 #define WAVELET_HAAR 255              // Haar (demonstration only)
 // =============================================================================
 // 2D Discrete Wavelet Transform
 // =============================================================================
 /**
 * Apply 2D wavelet transform to spatial data.
 *
 * Uses separable 1D transforms: apply horizontal rows, then vertical columns.
 * Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
 *
 * @param data         Input/output data array (modified in-place)
 * @param width        Frame width
 * @param height       Frame height
 * @param levels       Number of decomposition levels (0 = auto-calculate)
 * @param filter_type  Wavelet type (WAVELET_* constants)
 */
 void tav_dwt_2d_forward(float *data, int width, int height,
                        int levels, int filter_type);
 // =============================================================================
 // 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
 // =============================================================================
 /**
 * Apply 3D wavelet transform to group-of-pictures (GOP).
 *
 * Process:
 * 1. Apply temporal 1D DWT across frames at each spatial position
 * 2. Apply spatial 2D DWT to each temporal subband frame
 *
 * @param gop_data         Array of frame pointers [num_frames]
 * @param width            Frame width
 * @param height           Frame height
 * @param num_frames       Number of frames in GOP
 * @param spatial_levels   Spatial decomposition levels (0 = auto)
 * @param temporal_levels  Temporal decomposition levels
 * @param spatial_filter   Wavelet type for spatial transform
 * @param temporal_filter  Wavelet type for temporal transform
 */
 void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
                        int spatial_levels, int temporal_levels,
                        int spatial_filter, int temporal_filter);
 // =============================================================================
 // Utility Functions
 // =============================================================================
 /**
 * Calculate optimal number of decomposition levels for given dimensions.
 *
 * Uses formula: floor(log2(min(width, height))) - 1
 * Ensures at least 2x2 low-pass subband remains after decomposition.
 *
 * @param width   Frame width
 * @param height  Frame height
 * @return        Recommended number of levels
 */
 int tav_dwt_calculate_levels(int width, int height);
 #ifdef __cplusplus
 }
 #endif
 #endif // TAV_ENCODER_DWT_H
--- a/video_encoder/lib/libtavenc/tav_encoder_ezbc.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_ezbc.c
@@ -1,415 +0,0 @@
 /**
 * TAV Encoder - EZBC (Embedded Zero Block Coding) Library
 *
 * Implements binary tree embedded zero block coding for efficient storage
 * of sparse wavelet coefficients. Exploits coefficient sparsity through
 * hierarchical significance testing and progressive bitplane encoding.
 *
 * Extracted from encoder_tav.c as part of library refactoring.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdbool.h>
 #include <math.h>
 // =============================================================================
 // EZBC Structures
 // =============================================================================
 /**
 * Bitstream writer for bit-level encoding.
 */
 typedef struct {
    uint8_t *data;
    size_t capacity;
    size_t byte_pos;
    uint8_t bit_pos;  // 0-7, current bit position in current byte
 } bitstream_t;
 /**
 * Block structure for EZBC quadtree decomposition.
 */
 typedef struct {
    int x, y;           // Top-left position in 2D coefficient array
    int width, height;  // Block dimensions
 } ezbc_block_t;
 /**
 * Queue for EZBC block processing.
 */
 typedef struct {
    ezbc_block_t *blocks;
    size_t count;
    size_t capacity;
 } block_queue_t;
 /**
 * Track coefficient state for refinement.
 */
 typedef struct {
    bool significant;     // Has been marked significant
    int first_bitplane;   // Bitplane where it became significant
 } coeff_state_t;
 /**
 * EZBC encoding context for recursive processing.
 */
 typedef struct {
    bitstream_t *bs;
    int16_t *coeffs;
    coeff_state_t *states;
    int width;
    int height;
    int bitplane;
    int threshold;
    block_queue_t *next_insignificant;
    block_queue_t *next_significant;
    int *sign_count;
 } ezbc_context_t;
 // =============================================================================
 // Bitstream Operations
 // =============================================================================
 /**
 * Initialize bitstream with initial capacity.
 */
 static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
    // Ensure minimum capacity to avoid issues with zero-size allocations
    if (initial_capacity < 64) initial_capacity = 64;
    bs->capacity = initial_capacity;
    bs->data = calloc(1, initial_capacity);
    if (!bs->data) {
        fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
        exit(1);
    }
    bs->byte_pos = 0;
    bs->bit_pos = 0;
 }
 /**
 * Write a single bit to bitstream.
 */
 static void bitstream_write_bit(bitstream_t *bs, int bit) {
    // Grow if needed
    if (bs->byte_pos >= bs->capacity) {
        size_t old_capacity = bs->capacity;
        bs->capacity *= 2;
        bs->data = realloc(bs->data, bs->capacity);
        // Clear only the newly allocated memory region
        memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
    }
    if (bit) {
        bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
    }
    bs->bit_pos++;
    if (bs->bit_pos == 8) {
        bs->bit_pos = 0;
        bs->byte_pos++;
    }
 }
 /**
 * Write multiple bits to bitstream (LSB first).
 */
 static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
    for (int i = 0; i < num_bits; i++) {
        bitstream_write_bit(bs, (value >> i) & 1);
    }
 }
 /**
 * Get current bitstream size in bytes.
 */
 static size_t bitstream_size(bitstream_t *bs) {
    return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
 }
 /**
 * Free bitstream buffer.
 */
 static void bitstream_free(bitstream_t *bs) {
    free(bs->data);
 }
 // =============================================================================
 // Block Queue Operations
 // =============================================================================
 /**
 * Initialize block queue with initial capacity.
 */
 static void queue_init(block_queue_t *q) {
    q->capacity = 1024;
    q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
    q->count = 0;
 }
 /**
 * Push block onto queue, growing if needed.
 */
 static void queue_push(block_queue_t *q, ezbc_block_t block) {
    if (q->count >= q->capacity) {
        q->capacity *= 2;
        q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
    }
    q->blocks[q->count++] = block;
 }
 /**
 * Free block queue.
 */
 static void queue_free(block_queue_t *q) {
    free(q->blocks);
 }
 // =============================================================================
 // EZBC Helper Functions
 // =============================================================================
 /**
 * Check if all coefficients in block have |coeff| < threshold.
 */
 static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
                                const ezbc_block_t *block, int threshold) {
    for (int y = block->y; y < block->y + block->height && y < height; y++) {
        for (int x = block->x; x < block->x + block->width && x < width; x++) {
            int idx = y * width + x;
            if (abs(coeffs[idx]) >= threshold) {
                return false;
            }
        }
    }
    return true;
 }
 /**
 * Find maximum absolute value in coefficient array.
 */
 static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
    int max_abs = 0;
    for (size_t i = 0; i < count; i++) {
        int abs_val = abs(coeffs[i]);
        if (abs_val > max_abs) {
            max_abs = abs_val;
        }
    }
    return max_abs;
 }
 /**
 * Get MSB position (bitplane number).
 * Returns floor(log2(value)), i.e., the position of the highest set bit.
 */
 static int get_msb_bitplane(int value) {
    if (value == 0) return 0;
    int bitplane = 0;
    while (value > 1) {
        value >>= 1;
        bitplane++;
    }
    return bitplane;
 }
 /**
 * Recursively process a significant block - subdivide until 1x1.
 */
 static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
    // If 1x1 block: emit sign bit and add to significant queue
    if (block.width == 1 && block.height == 1) {
        int idx = block.y * ctx->width + block.x;
        bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
        (*ctx->sign_count)++;
        ctx->states[idx].significant = true;
        ctx->states[idx].first_bitplane = ctx->bitplane;
        queue_push(ctx->next_significant, block);
        return;
    }
    // Block is > 1x1: subdivide into children and recursively process each
    int mid_x = block.width / 2;
    int mid_y = block.height / 2;
    if (mid_x == 0) mid_x = 1;
    if (mid_y == 0) mid_y = 1;
    // Process top-left child
    ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
    if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
        bitstream_write_bit(ctx->bs, 1);  // Significant
        process_significant_block_recursive(ctx, tl);
    } else {
        bitstream_write_bit(ctx->bs, 0);  // Insignificant
        queue_push(ctx->next_insignificant, tl);
    }
    // Process top-right child (if exists)
    if (block.width > mid_x) {
        ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
            bitstream_write_bit(ctx->bs, 1);
            process_significant_block_recursive(ctx, tr);
        } else {
            bitstream_write_bit(ctx->bs, 0);
            queue_push(ctx->next_insignificant, tr);
        }
    }
    // Process bottom-left child (if exists)
    if (block.height > mid_y) {
        ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
            bitstream_write_bit(ctx->bs, 1);
            process_significant_block_recursive(ctx, bl);
        } else {
            bitstream_write_bit(ctx->bs, 0);
            queue_push(ctx->next_insignificant, bl);
        }
    }
    // Process bottom-right child (if exists)
    if (block.width > mid_x && block.height > mid_y) {
        ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
            bitstream_write_bit(ctx->bs, 1);
            process_significant_block_recursive(ctx, br);
        } else {
            bitstream_write_bit(ctx->bs, 0);
            queue_push(ctx->next_insignificant, br);
        }
    }
 }
 // =============================================================================
 // Main EZBC Encoding Function
 // =============================================================================
 /**
 * EZBC encoding for a single channel.
 *
 * Uses two separate queues for insignificant blocks and significant 1x1 blocks.
 * Encodes coefficients progressively from MSB to LSB bitplane.
 *
 * Algorithm:
 * 1. Find MSB bitplane from maximum absolute coefficient value
 * 2. Write header: MSB bitplane, width, height
 * 3. For each bitplane from MSB to 0:
 *    a. Process insignificant blocks: check if they become significant
 *    b. For newly significant blocks: recursively subdivide until 1x1
 *    c. Emit sign bits for newly significant 1x1 coefficients
 *    d. Process already-significant coefficients: emit refinement bits
 * 4. Return encoded bitstream
 *
 * @param coeffs  Input quantized coefficients (int16_t array)
 * @param count   Number of coefficients
 * @param width   Frame width
 * @param height  Frame height
 * @param output  Output buffer pointer (allocated by this function)
 * @return        Encoded size in bytes
 */
 size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
                                uint8_t **output) {
    bitstream_t bs;
    bitstream_init(&bs, count / 4);  // Initial guess
    // Track coefficient significance
    coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
    // Find maximum value to determine MSB bitplane
    int max_abs = find_max_abs_ezbc(coeffs, count);
    int msb_bitplane = get_msb_bitplane(max_abs);
    // Write header: MSB bitplane and dimensions
    bitstream_write_bits(&bs, msb_bitplane, 8);
    bitstream_write_bits(&bs, width, 16);
    bitstream_write_bits(&bs, height, 16);
    // Initialise two queues: insignificant blocks and significant 1x1 blocks
    block_queue_t insignificant_queue, next_insignificant;
    block_queue_t significant_queue, next_significant;
    queue_init(&insignificant_queue);
    queue_init(&next_insignificant);
    queue_init(&significant_queue);
    queue_init(&next_significant);
    // Start with root block as insignificant
    ezbc_block_t root = {0, 0, width, height};
    queue_push(&insignificant_queue, root);
    // Process bitplanes from MSB to LSB
    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
        int threshold = 1 << bitplane;
        int sign_bits_this_bitplane = 0;
        // Process insignificant blocks - check if they become significant
        for (size_t i = 0; i < insignificant_queue.count; i++) {
            ezbc_block_t block = insignificant_queue.blocks[i];
            // Check if this block has any coefficient >= threshold
            if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
                // Still insignificant: emit 0
                bitstream_write_bit(&bs, 0);
                // Keep in insignificant queue for next bitplane
                queue_push(&next_insignificant, block);
            } else {
                // Became significant: emit 1
                bitstream_write_bit(&bs, 1);
                // Use recursive subdivision to process this block and all children
                ezbc_context_t ctx = {
                    .bs = &bs,
                    .coeffs = coeffs,
                    .states = states,
                    .width = width,
                    .height = height,
                    .bitplane = bitplane,
                    .threshold = threshold,
                    .next_insignificant = &next_insignificant,
                    .next_significant = &next_significant,
                    .sign_count = &sign_bits_this_bitplane
                };
                process_significant_block_recursive(&ctx, block);
            }
        }
        // Process significant 1x1 blocks - emit refinement bits
        for (size_t i = 0; i < significant_queue.count; i++) {
            ezbc_block_t block = significant_queue.blocks[i];
            int idx = block.y * width + block.x;
            int abs_val = abs(coeffs[idx]);
            // Emit refinement bit at current bitplane
            int bit = (abs_val >> bitplane) & 1;
            bitstream_write_bit(&bs, bit);
            // Keep in significant queue for next bitplane
            queue_push(&next_significant, block);
        }
        // Swap queues for next bitplane
        queue_free(&insignificant_queue);
        queue_free(&significant_queue);
        insignificant_queue = next_insignificant;
        significant_queue = next_significant;
        queue_init(&next_insignificant);
        queue_init(&next_significant);
    }
    // Free all queues
    queue_free(&insignificant_queue);
    queue_free(&significant_queue);
    queue_free(&next_insignificant);
    queue_free(&next_significant);
    free(states);
    size_t final_size = bitstream_size(&bs);
    *output = bs.data;
    return final_size;
 }
--- a/video_encoder/lib/libtavenc/tav_encoder_ezbc.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_ezbc.h
@@ -1,61 +0,0 @@
 /**
 * TAV Encoder - EZBC (Embedded Zero Block Coding) Library
 *
 * Public API for EZBC entropy coding of wavelet coefficients.
 */
 #ifndef TAV_ENCODER_EZBC_H
 #define TAV_ENCODER_EZBC_H
 #include <stdint.h>
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 // =============================================================================
 // EZBC Encoding
 // =============================================================================
 /**
 * EZBC encoding for a single channel.
 *
 * Implements binary tree embedded zero block coding for efficient storage
 * of sparse wavelet coefficients. Exploits coefficient sparsity through
 * hierarchical significance testing and progressive bitplane encoding.
 *
 * Algorithm:
 * 1. Find MSB bitplane from maximum absolute coefficient value
 * 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
 * 3. For each bitplane from MSB to 0:
 *    a. Process insignificant blocks: check if they become significant
 *       - Emit 0 if still insignificant, 1 if became significant
 *    b. For newly significant blocks: recursively subdivide until 1x1
 *       - Emit tree structure: 1=child is significant, 0=child insignificant
 *    c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
 *    d. Process already-significant coefficients: emit refinement bits
 *       - Emit bit at current bitplane for progressive reconstruction
 * 4. Return encoded bitstream
 *
 * Benefits:
 * - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
 * - Progressive refinement from MSB to LSB
 * - Spatial clustering through quadtree decomposition
 * - No additional entropy coding needed (bitstream is already compressed)
 *
 * @param coeffs  Input quantized coefficients (int16_t array)
 * @param count   Number of coefficients (width × height)
 * @param width   Frame width (must match coefficient array layout)
 * @param height  Frame height (must match coefficient array layout)
 * @param output  Output buffer pointer (allocated by this function, caller must free)
 * @return        Encoded size in bytes (including header)
 */
 size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
                                uint8_t **output);
 #ifdef __cplusplus
 }
 #endif
 #endif // TAV_ENCODER_EZBC_H
--- a/video_encoder/lib/libtavenc/tav_encoder_lib.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_lib.c
--- a/video_encoder/lib/libtavenc/tav_encoder_quantize.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_quantize.c
@@ -1,635 +0,0 @@
 /**
 * TAV Encoder - Quantization Library
 *
 * Provides DWT coefficient quantization with perceptual weighting based on
 * the Human Visual System (HVS). Implements separable 3D quantization for
 * temporal GOP encoding.
 *
 * Extracted from encoder_tav.c as part of library refactoring.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <math.h>
 // Forward declaration of encoder context (defined in main encoder)
 typedef struct tav_encoder_s tav_encoder_t;
 // =============================================================================
 // Utility Functions
 // =============================================================================
 static inline int CLAMP(int x, int min, int max) {
    return x < min ? min : (x > max ? max : x);
 }
 static inline float FCLAMP(float x, float min, float max) {
    return x < min ? min : (x > max ? max : x);
 }
 // =============================================================================
 // Constants for Perceptual Model
 // =============================================================================
 // Dead-zone quantization scaling factors (applied selectively to luma only)
 #define DEAD_ZONE_FINEST_SCALE 1.0f      // Full dead-zone for finest level
 #define DEAD_ZONE_FINE_SCALE 0.5f        // Reduced dead-zone for second-finest level
 // Anisotropy parameters for horizontal vs vertical detail quantization
 // Index by quality level (0-5)
 static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
 static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
 // Chroma-specific anisotropy (more aggressive quantization)
 static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
 static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
 // Detail preservation factors for 2-pixel and 4-pixel structures
 #define FOUR_PIXEL_DETAILER 0.88f
 #define TWO_PIXEL_DETAILER  0.92f
 // =============================================================================
 // Subband Analysis Helper Functions
 // =============================================================================
 /**
 * Get decomposition level for coefficient at 2D spatial position.
 * Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
 */
 static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
    // Recursively determine which level this coefficient belongs to
    // by checking which quadrant it's in at each level
    for (int level = 1; level <= decomp_levels; level++) {
        int half_w = width >> 1;
        int half_h = height >> 1;
        // Check if in top-left quadrant (LL - contains finer levels)
        if (x < half_w && y < half_h) {
            // Continue to finer level
            width = half_w;
            height = half_h;
            continue;
        }
        // In one of the detail bands (LH, HL, HH) at this level
        return level;
    }
    // Reached LL subband at coarsest level
    return 0;
 }
 /**
 * Get subband type for coefficient at 2D spatial position.
 * Returns: 0=LL, 1=LH, 2=HL, 3=HH
 */
 static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
    // Recursively determine which subband this coefficient belongs to
    for (int level = 1; level <= decomp_levels; level++) {
        int half_w = width >> 1;
        int half_h = height >> 1;
        // Check if in top-left quadrant (LL - contains finer levels)
        if (x < half_w && y < half_h) {
            // Continue to finer level
            width = half_w;
            height = half_h;
            continue;
        }
        // Determine which detail band at this level
        if (x >= half_w && y < half_h) {
            return 1; // LH (top-right)
        } else if (x < half_w && y >= half_h) {
            return 2; // HL (bottom-left)
        } else {
            return 3; // HH (bottom-right)
        }
    }
    // Reached LL subband at coarsest level
    return 0;
 }
 /**
 * Legacy functions - convert linear index to 2D coords.
 */
 static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
    int x = linear_idx % width;
    int y = linear_idx / width;
    return get_subband_level_2d(x, y, width, height, decomp_levels);
 }
 static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
    int x = linear_idx % width;
    int y = linear_idx / width;
    return get_subband_type_2d(x, y, width, height, decomp_levels);
 }
 /**
 * Get temporal subband level for frame index in GOP.
 * After temporal DWT with N levels, frames are organized as:
 * - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
 * - Remaining frames are temporal high-pass subbands at various levels
 *
 * Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
 */
 static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
    // Check each level boundary from coarsest to finest
    for (int level = 0; level < temporal_levels; level++) {
        int frames_at_this_level = num_frames >> (temporal_levels - level);
        if (frame_idx < frames_at_this_level) {
            return level;
        }
    }
    // Finest level (first decomposition's high-pass)
    return temporal_levels;
 }
 // =============================================================================
 // Perceptual Model Functions (HVS-based weighting)
 // =============================================================================
 // Linear interpolation helper
 static float lerp(float x, float y, float a) {
    return x * (1.f - a) + y * a;
 }
 /**
 * Perceptual model for LH subband (horizontal details).
 * Human eyes are more sensitive to horizontal details than vertical.
 * Curve: https://www.desmos.com/calculator/mjlpwqm8ge
 *
 * @param quality  Quality level (0-5)
 * @param level    Normalized decomposition level (1.0-6.0)
 * @return         Perceptual weight multiplier
 */
 static float perceptual_model3_LH(int quality, float level) {
    float H4 = 1.2f;
    float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
    float K12 = K * 12.f;
    float x = level;
    float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
    float C3 = -1.f / 45.f * (K12 + 92);
    float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
    return (level >= 4) ? Lx : G3x;
 }
 /**
 * Perceptual model for HL subband (vertical details).
 * Derived from LH with anisotropy compensation.
 *
 * @param quality  Quality level (0-5)
 * @param LH       LH subband weight
 * @return         Perceptual weight multiplier
 */
 static float perceptual_model3_HL(int quality, float LH) {
    return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
 }
 /**
 * Perceptual model for HH subband (diagonal details).
 * Interpolates between LH and HL based on level.
 *
 * @param LH     LH subband weight
 * @param HL     HL subband weight
 * @param level  Normalized decomposition level
 * @return       Perceptual weight multiplier
 */
 static float perceptual_model3_HH(float LH, float HL, float level) {
    float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
    return lerp(LH, HL, Kx);
 }
 /**
 * Perceptual model for LL subband (low-frequency baseband).
 * Contains most image energy, preserve carefully.
 *
 * @param quality  Quality level (0-5)
 * @param level    Normalized decomposition level
 * @return         Perceptual weight multiplier
 */
 static float perceptual_model3_LL(int quality, float level) {
    float n = perceptual_model3_LH(quality, level);
    float m = perceptual_model3_LH(quality, level - 1) / n;
    return n / m;
 }
 /**
 * Chroma-specific perceptual model base curve.
 * Less critical for human perception, more aggressive quantization.
 *
 * @param quality  Quality level (0-5)
 * @param level    Normalized decomposition level
 * @return         Perceptual weight multiplier
 */
 static float perceptual_model3_chroma_basecurve(int quality, float level) {
    return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
 }
 /**
 * Get perceptual weight for a specific subband and level.
 * Implements HVS-optimized frequency weighting.
 *
 * NOTE: This function requires enc->quality_level field from encoder context.
 *
 * @param enc           Encoder context (for quality_level)
 * @param level0        Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
 * @param subband_type  Subband type (0=LL, 1=LH, 2=HL, 3=HH)
 * @param is_chroma     1 for chroma channels, 0 for luma
 * @param max_levels    Maximum decomposition levels
 * @return              Perceptual weight multiplier (≥1.0)
 */
 static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
 /**
 * Get perceptual weight for coefficient at linear index position.
 * Maps linear coefficient index to DWT subband layout.
 *
 * NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
 *
 * @param enc             Encoder context (for widths/heights arrays and quality_level)
 * @param linear_idx      Linear coefficient index
 * @param width           Frame width
 * @param height          Frame height
 * @param decomp_levels   Number of decomposition levels
 * @param is_chroma       1 for chroma channels, 0 for luma
 * @return                Perceptual weight multiplier (≥1.0)
 */
 static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
 // =============================================================================
 // Quantization Functions
 // =============================================================================
 /**
 * Quantize DWT coefficients with uniform quantization and optional dead-zone.
 *
 * This is the basic quantization function without perceptual weighting.
 * Dead-zone quantization is applied selectively to luma channel only:
 * - HH1 (finest diagonal): full dead-zone
 * - LH1/HL1/HH2: half dead-zone
 * - Coarser levels: no dead-zone (preserve structure)
 *
 * @param coeffs               Input DWT coefficients (float)
 * @param quantised            Output quantized coefficients (int16_t)
 * @param size                 Number of coefficients
 * @param quantiser            Base quantizer value (1-4096)
 * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
 * @param width                Frame width
 * @param height               Frame height
 * @param decomp_levels        Number of decomposition levels
 * @param is_chroma            1 for chroma channels, 0 for luma
 */
 void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
                          float dead_zone_threshold, int width, int height,
                          int decomp_levels, int is_chroma);
 /**
 * Quantize DWT coefficients with per-coefficient perceptual weighting.
 *
 * Applies HVS-optimized frequency weighting to each coefficient based on its
 * position in the DWT subband tree. Implements the full perceptual model with
 * dead-zone quantization for luma.
 *
 * NOTE: This function requires encoder context fields:
 * - enc->widths[]/enc->heights[] for subband layout
 * - enc->quality_level for perceptual model
 * - enc->dead_zone_threshold for dead-zone quantization
 *
 * @param enc             Encoder context
 * @param coeffs          Input DWT coefficients (float)
 * @param quantised       Output quantized coefficients (int16_t)
 * @param size            Number of coefficients
 * @param base_quantiser  Base quantizer value (before perceptual weighting)
 * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
 * @param width           Frame width
 * @param height          Frame height
 * @param decomp_levels   Number of decomposition levels
 * @param is_chroma       1 for chroma channels, 0 for luma
 * @param frame_count     Current frame number (for any frame-dependent logic)
 */
 void tav_quantise_perceptual(tav_encoder_t *enc,
                              float *coeffs, int16_t *quantised, int size,
                              int base_quantiser, float dead_zone_threshold, int width, int height,
                              int decomp_levels, int is_chroma, int frame_count);
 /**
 * Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
 *
 * After 3D DWT (temporal + spatial), GOP coefficients have this structure:
 * - Temporal DWT applied first → temporal subbands at different levels
 * - Spatial 2D DWT applied to each temporal subband
 *
 * Quantization strategy:
 * 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
 *    - tLL (level 0): coarsest temporal → smallest quantizer
 *    - tHH (highest level): finest temporal → largest quantizer
 * 2. Apply spatial perceptual weighting to tH_base
 * 3. Final quantizer: Q_effective = tH_base × spatial_weight
 *
 * NOTE: This function requires encoder context fields:
 * - enc->encoder_preset for sports mode detection
 * - enc->temporal_decomp_levels for temporal level calculation
 * - enc->verbose for debug output
 * - Plus all fields needed by tav_quantise_perceptual()
 *
 * @param enc             Encoder context
 * @param gop_coeffs      GOP coefficients [frame][pixel] (temporal subbands)
 * @param quantised       Output quantized coefficients [frame][pixel]
 * @param num_frames      Number of temporal subband frames
 * @param spatial_size    Number of spatial coefficients per frame
 * @param base_quantiser  Base quantizer value (before temporal/spatial scaling)
 * @param is_chroma       1 for chroma channels, 0 for luma
 */
 void tav_quantise_3d_dwt(tav_encoder_t *enc,
                         float **gop_coeffs, int16_t **quantised, int num_frames,
                         int spatial_size, int base_quantiser, int is_chroma);
 /**
 * Convert floating-point quantizer to integer with dithering (for bitrate mode).
 *
 * Implements Floyd-Steinberg style error diffusion to avoid quantization
 * artifacts when converting float quantizer values to integers for rate control.
 *
 * NOTE: This function requires encoder context fields:
 * - enc->adjusted_quantiser_y_float (current float quantizer)
 * - enc->dither_accumulator (accumulated error, modified by this function)
 *
 * @param enc  Encoder context
 * @return     Integer quantizer value (0-254)
 */
 int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
 // =============================================================================
 // Perceptual Weight Implementation (requires encoder context)
 // =============================================================================
 // NOTE: This implementation requires encoder context (enc->quality_level)
 // Struct definition will be in encoder header when integrated
 #ifndef TAV_ENCODER_QUANTIZE_INTERNAL
 // Forward declare structure access - will be properly defined when integrated
 struct tav_encoder_s {
    int quality_level;
    int *widths;
    int *heights;
    int decomp_levels;
    float dead_zone_threshold;
    int encoder_preset;
    int temporal_decomp_levels;
    int verbose;
    int frame_count;
    float adjusted_quantiser_y_float;
    float dither_accumulator;
    int width;
    int height;
    int perceptual_tuning;
 };
 #endif
 static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
    // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
    float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
    // strategy: more horizontal detail
    if (!is_chroma) {
        // LL subband - contains most image energy, preserve carefully
        if (subband_type == 0)
            return perceptual_model3_LL(enc->quality_level, level);
        // LH subband - horizontal details (human eyes more sensitive)
        float LH = perceptual_model3_LH(enc->quality_level, level);
        if (subband_type == 1)
            return LH;
        // HL subband - vertical details
        float HL = perceptual_model3_HL(enc->quality_level, LH);
        if (subband_type == 2)
            return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
        // HH subband - diagonal details
        else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
    } else {
        // CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
        float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
        if (subband_type == 0) { // LL chroma - still important but less than luma
            return 1.0f;
        } else if (subband_type == 1) { // LH chroma - horizontal chroma details
            return FCLAMP(base, 1.0f, 100.0f);
        } else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
            return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
        } else { // HH chroma - diagonal chroma details (most aggressive)
            return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
        }
    }
 }
 static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
    // If perceptual tuning is disabled, use uniform quantization (weight = 1.0)
    if (!enc->perceptual_tuning) {
        return 1.0f;
    }
    // Map linear coefficient index to DWT subband using same layout as decoder
    int offset = 0;
    // First: LL subband at maximum decomposition level
    int ll_width = enc->widths[decomp_levels];
    int ll_height = enc->heights[decomp_levels];
    int ll_size = ll_width * ll_height;
    if (linear_idx < offset + ll_size) {
        // LL subband at maximum level - use get_perceptual_weight for consistency
        return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
    }
    offset += ll_size;
    // Then: LH, HL, HH subbands for each level from max down to 1
    for (int level = decomp_levels; level >= 1; level--) {
        int level_width = enc->widths[decomp_levels - level + 1];
        int level_height = enc->heights[decomp_levels - level + 1];
        const int subband_size = level_width * level_height;
        // LH subband (horizontal details)
        if (linear_idx < offset + subband_size) {
            return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
        }
        offset += subband_size;
        // HL subband (vertical details)
        if (linear_idx < offset + subband_size) {
            return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
        }
        offset += subband_size;
        // HH subband (diagonal details)
        if (linear_idx < offset + subband_size) {
            return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
        }
        offset += subband_size;
    }
    // Fallback for out-of-bounds indices
    return 1.0f;
 }
 // =============================================================================
 // Quantization Function Implementations
 // =============================================================================
 void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
                          float dead_zone_threshold, int width, int height,
                          int decomp_levels, int is_chroma) {
    float effective_q = quantiser;
    effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
    // Scalar implementation (AVX-512 version would go in separate optimized module)
    for (int i = 0; i < size; i++) {
        float quantised_val = coeffs[i] / effective_q;
        // Apply dead-zone quantisation ONLY to luma channel and specific subbands
        if (dead_zone_threshold > 0.0f && !is_chroma) {
            int level = get_subband_level(i, width, height, decomp_levels);
            int subband_type = get_subband_type(i, width, height, decomp_levels);
            float level_threshold = 0.0f;
            if (level == 1) {
                // Finest level
                if (subband_type == 3) {
                    // HH1: full dead-zone
                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
                } else if (subband_type == 1 || subband_type == 2) {
                    // LH1, HL1: half dead-zone
                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
                }
            } else if (level == 2) {
                // Second-finest level
                if (subband_type == 3) {
                    // HH2: half dead-zone
                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
                }
            }
            if (fabsf(quantised_val) <= level_threshold) {
                quantised_val = 0.0f;
            }
        }
        quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
    }
 }
 void tav_quantise_perceptual(tav_encoder_t *enc,
                              float *coeffs, int16_t *quantised, int size,
                              int base_quantiser, float dead_zone_threshold, int width, int height,
                              int decomp_levels, int is_chroma, int frame_count) {
    float effective_base_q = base_quantiser;
    effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
    for (int i = 0; i < size; i++) {
        // Apply perceptual weight based on coefficient's position in DWT layout
        float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
        float effective_q = effective_base_q * weight;
        float quantised_val = coeffs[i] / effective_q;
        // Apply dead-zone quantisation ONLY to luma channel
        if (dead_zone_threshold > 0.0f && !is_chroma) {
            int level = get_subband_level(i, width, height, decomp_levels);
            int subband_type = get_subband_type(i, width, height, decomp_levels);
            float level_threshold = 0.0f;
            if (level == 1) {
                if (subband_type == 3) {
                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
                } else if (subband_type == 1 || subband_type == 2) {
                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
                }
            } else if (level == 2) {
                if (subband_type == 3) {
                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
                }
            }
            if (fabsf(quantised_val) <= level_threshold) {
                quantised_val = 0.0f;
            }
        }
        quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
    }
 }
 void tav_quantise_3d_dwt(tav_encoder_t *enc,
                         float **gop_coeffs, int16_t **quantised, int num_frames,
                         int spatial_size, int base_quantiser, int is_chroma) {
    // Sports preset: use finer temporal quantisation (less aggressive)
    const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
    const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
    // Process each temporal subband independently (separable approach)
    for (int t = 0; t < num_frames; t++) {
        // Step 1: Determine temporal subband level
        int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
        // Step 2: Compute temporal base quantiser using exponential scaling
        float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
        float temporal_quantiser = base_quantiser * temporal_scale;
        int temporal_base_quantiser = (int)roundf(temporal_quantiser);
        temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
        // Step 3: Apply spatial quantisation within this temporal subband
        // Check if perceptual tuning is enabled (stored in encoder_preset bit 1)
        // NOTE: perceptual_tuning field is NOT in tav_encoder_s, so we check context flag
        // For now, just use perceptual (this will be controlled by caller disabling)
        tav_quantise_perceptual(
            enc,
            gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
            quantised[t],            // Output: quantised spatial coefficients
            spatial_size,            // Number of spatial coefficients
            temporal_base_quantiser, // Temporally-scaled base quantiser
            enc->dead_zone_threshold, // Dead zone threshold
            enc->width,              // Frame width
            enc->height,             // Frame height
            enc->decomp_levels,      // Spatial decomposition levels
            is_chroma,               // Is chroma channel
            enc->frame_count + t     // Frame number
        );
        /*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
            printf("  Temporal subband %d: level=%d, tH_base=%d\n",
                   t, temporal_level, temporal_base_quantiser);
        }*/
    }
 }
 int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
    float qy_float = enc->adjusted_quantiser_y_float;
    // Add accumulated dithering error
    float qy_with_error = qy_float + enc->dither_accumulator;
    // Round to nearest integer
    int qy_int = (int)(qy_with_error + 0.5f);
    // Calculate quantisation error and accumulate for next frame
    // This is Floyd-Steinberg style error diffusion
    float quantisation_error = qy_with_error - (float)qy_int;
    enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
    // Clamp to valid range
    qy_int = CLAMP(qy_int, 0, 254);
    return qy_int;
 }
--- a/video_encoder/lib/libtavenc/tav_encoder_quantize.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_quantize.h
@@ -1,138 +0,0 @@
 /**
 * TAV Encoder - Quantization Library
 *
 * Public API for DWT coefficient quantization with perceptual weighting.
 */
 #ifndef TAV_ENCODER_QUANTIZE_H
 #define TAV_ENCODER_QUANTIZE_H
 #include <stdint.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 // Forward declaration of encoder context (defined in main encoder)
 typedef struct tav_encoder_s tav_encoder_t;
 // =============================================================================
 // Uniform Quantization
 // =============================================================================
 /**
 * Quantize DWT coefficients with uniform quantization and optional dead-zone.
 *
 * This is the basic quantization function without perceptual weighting.
 * Dead-zone quantization is applied selectively to luma channel only:
 * - HH1 (finest diagonal): full dead-zone
 * - LH1/HL1/HH2: half dead-zone
 * - Coarser levels: no dead-zone (preserve structure)
 *
 * @param coeffs               Input DWT coefficients (float)
 * @param quantised            Output quantized coefficients (int16_t)
 * @param size                 Number of coefficients
 * @param quantiser            Base quantizer value (1-4096)
 * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
 * @param width                Frame width
 * @param height               Frame height
 * @param decomp_levels        Number of decomposition levels
 * @param is_chroma            1 for chroma channels, 0 for luma
 */
 void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
                          float dead_zone_threshold, int width, int height,
                          int decomp_levels, int is_chroma);
 // =============================================================================
 // Perceptual Quantization
 // =============================================================================
 /**
 * Quantize DWT coefficients with per-coefficient perceptual weighting.
 *
 * Applies HVS-optimized frequency weighting to each coefficient based on its
 * position in the DWT subband tree. Implements the full perceptual model with
 * dead-zone quantization for luma.
 *
 * NOTE: This function requires encoder context fields:
 * - enc->widths[]/enc->heights[] for subband layout
 * - enc->quality_level for perceptual model
 * - enc->dead_zone_threshold for dead-zone quantization
 *
 * @param enc             Encoder context
 * @param coeffs          Input DWT coefficients (float)
 * @param quantised       Output quantized coefficients (int16_t)
 * @param size            Number of coefficients
 * @param base_quantiser  Base quantizer value (before perceptual weighting)
 * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
 * @param width           Frame width
 * @param height          Frame height
 * @param decomp_levels   Number of decomposition levels
 * @param is_chroma       1 for chroma channels, 0 for luma
 * @param frame_count     Current frame number (for any frame-dependent logic)
 */
 void tav_quantise_perceptual(tav_encoder_t *enc,
                              float *coeffs, int16_t *quantised, int size,
                              int base_quantiser, float dead_zone_threshold, int width, int height,
                              int decomp_levels, int is_chroma, int frame_count);
 // =============================================================================
 // 3D GOP Quantization
 // =============================================================================
 /**
 * Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
 *
 * After 3D DWT (temporal + spatial), GOP coefficients have this structure:
 * - Temporal DWT applied first → temporal subbands at different levels
 * - Spatial 2D DWT applied to each temporal subband
 *
 * Quantization strategy:
 * 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
 *    - tLL (level 0): coarsest temporal → smallest quantizer
 *    - tHH (highest level): finest temporal → largest quantizer
 * 2. Apply spatial perceptual weighting to tH_base
 * 3. Final quantizer: Q_effective = tH_base × spatial_weight
 *
 * NOTE: This function requires encoder context fields:
 * - enc->encoder_preset for sports mode detection
 * - enc->temporal_decomp_levels for temporal level calculation
 * - enc->verbose for debug output
 * - Plus all fields needed by tav_quantise_perceptual()
 *
 * @param enc             Encoder context
 * @param gop_coeffs      GOP coefficients [frame][pixel] (temporal subbands)
 * @param quantised       Output quantized coefficients [frame][pixel]
 * @param num_frames      Number of temporal subband frames
 * @param spatial_size    Number of spatial coefficients per frame
 * @param base_quantiser  Base quantizer value (before temporal/spatial scaling)
 * @param is_chroma       1 for chroma channels, 0 for luma
 */
 void tav_quantise_3d_dwt(tav_encoder_t *enc,
                         float **gop_coeffs, int16_t **quantised, int num_frames,
                         int spatial_size, int base_quantiser, int is_chroma);
 // =============================================================================
 // Rate Control
 // =============================================================================
 /**
 * Convert floating-point quantizer to integer with dithering (for bitrate mode).
 *
 * Implements Floyd-Steinberg style error diffusion to avoid quantization
 * artifacts when converting float quantizer values to integers for rate control.
 *
 * NOTE: This function requires encoder context fields:
 * - enc->adjusted_quantiser_y_float (current float quantizer)
 * - enc->dither_accumulator (accumulated error, modified by this function)
 *
 * @param enc  Encoder context
 * @return     Integer quantizer value (0-254)
 */
 int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
 #ifdef __cplusplus
 }
 #endif
 #endif // TAV_ENCODER_QUANTIZE_H
--- a/video_encoder/lib/libtavenc/tav_encoder_tile.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_tile.c
@@ -1,159 +0,0 @@
 /**
 * TAV Encoder Library - Tile Processing Implementation
 */
 #include "tav_encoder_tile.h"
 #include "tav_encoder_dwt.h"
 #include <string.h>
 #include <stdlib.h>
 #define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
 void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
                             int frame_width, int frame_height,
                             int tile_x, int tile_y,
                             float *padded_y, float *padded_co, float *padded_cg) {
    const int core_start_x = tile_x * TAV_TILE_SIZE_X;
    const int core_start_y = tile_y * TAV_TILE_SIZE_Y;
    // Process row by row with bulk copying for core region where possible
    for (int py = 0; py < TAV_PADDED_TILE_SIZE_Y; py++) {
        // Map padded row to source image row
        int src_y = core_start_y + py - TAV_TILE_MARGIN;
        // Handle vertical boundary conditions with mirroring
        if (src_y < 0) {
            src_y = -src_y;
        } else if (src_y >= frame_height) {
            src_y = frame_height - 1 - (src_y - frame_height);
        }
        src_y = CLAMP(src_y, 0, frame_height - 1);
        // Calculate source and destination row offsets
        const int padded_row_offset = py * TAV_PADDED_TILE_SIZE_X;
        const int src_row_offset = src_y * frame_width;
        // Margin boundaries in padded tile
        const int core_start_px = TAV_TILE_MARGIN;
        const int core_end_px = TAV_TILE_MARGIN + TAV_TILE_SIZE_X;
        // Check if core region is entirely within frame bounds
        const int core_src_start_x = core_start_x;
        const int core_src_end_x = core_start_x + TAV_TILE_SIZE_X;
        if (core_src_start_x >= 0 && core_src_end_x <= frame_width) {
            // Bulk copy core region in one operation
            const int src_core_offset = src_row_offset + core_src_start_x;
            memcpy(&padded_y[padded_row_offset + core_start_px],
                   &frame_y[src_core_offset],
                   TAV_TILE_SIZE_X * sizeof(float));
            memcpy(&padded_co[padded_row_offset + core_start_px],
                   &frame_co[src_core_offset],
                   TAV_TILE_SIZE_X * sizeof(float));
            memcpy(&padded_cg[padded_row_offset + core_start_px],
                   &frame_cg[src_core_offset],
                   TAV_TILE_SIZE_X * sizeof(float));
            // Handle left margin pixels individually
            for (int px = 0; px < core_start_px; px++) {
                int src_x = core_start_x + px - TAV_TILE_MARGIN;
                if (src_x < 0) src_x = -src_x;
                src_x = CLAMP(src_x, 0, frame_width - 1);
                int src_idx = src_row_offset + src_x;
                int padded_idx = padded_row_offset + px;
                padded_y[padded_idx] = frame_y[src_idx];
                padded_co[padded_idx] = frame_co[src_idx];
                padded_cg[padded_idx] = frame_cg[src_idx];
            }
            // Handle right margin pixels individually
            for (int px = core_end_px; px < TAV_PADDED_TILE_SIZE_X; px++) {
                int src_x = core_start_x + px - TAV_TILE_MARGIN;
                if (src_x >= frame_width) {
                    src_x = frame_width - 1 - (src_x - frame_width);
                }
                src_x = CLAMP(src_x, 0, frame_width - 1);
                int src_idx = src_row_offset + src_x;
                int padded_idx = padded_row_offset + px;
                padded_y[padded_idx] = frame_y[src_idx];
                padded_co[padded_idx] = frame_co[src_idx];
                padded_cg[padded_idx] = frame_cg[src_idx];
            }
        } else {
            // Fallback: process entire row pixel by pixel (for edge tiles)
            for (int px = 0; px < TAV_PADDED_TILE_SIZE_X; px++) {
                int src_x = core_start_x + px - TAV_TILE_MARGIN;
                // Handle horizontal boundary conditions with mirroring
                if (src_x < 0) {
                    src_x = -src_x;
                } else if (src_x >= frame_width) {
                    src_x = frame_width - 1 - (src_x - frame_width);
                }
                src_x = CLAMP(src_x, 0, frame_width - 1);
                int src_idx = src_row_offset + src_x;
                int padded_idx = padded_row_offset + px;
                padded_y[padded_idx] = frame_y[src_idx];
                padded_co[padded_idx] = frame_co[src_idx];
                padded_cg[padded_idx] = frame_cg[src_idx];
            }
        }
    }
 }
 // Use existing 2D DWT from tav_encoder_dwt.c
 // For padded tiles, we simply call the existing function with tile dimensions
 void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type) {
    // Use the existing 2D DWT with padded tile dimensions
    tav_dwt_2d_forward(tile_data, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
                       levels, filter_type);
 }
 void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type) {
    // Note: Inverse transform not yet implemented in library for arbitrary dimensions
    // For now, this is a placeholder - decoder uses different code path
    (void)tile_data;
    (void)levels;
    (void)filter_type;
 }
 void tav_crop_tile_margins(const float *padded_data, float *core_data) {
    for (int y = 0; y < TAV_TILE_SIZE_Y; y++) {
        const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
        const int core_row = y * TAV_TILE_SIZE_X;
        memcpy(&core_data[core_row], &padded_data[padded_row], TAV_TILE_SIZE_X * sizeof(float));
    }
 }
 void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
                                int actual_width, int actual_height) {
    for (int y = 0; y < actual_height; y++) {
        const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
        const int core_row = y * actual_width;
        memcpy(&core_data[core_row], &padded_data[padded_row], actual_width * sizeof(float));
    }
 }
 void tav_get_tile_dimensions(int frame_width, int frame_height,
                             int tile_x, int tile_y,
                             int *tile_width, int *tile_height) {
    // Calculate the starting position of this tile
    int start_x = tile_x * TAV_TILE_SIZE_X;
    int start_y = tile_y * TAV_TILE_SIZE_Y;
    // Calculate how much of the frame is left from this starting position
    int remaining_width = frame_width - start_x;
    int remaining_height = frame_height - start_y;
    // Tile width is the minimum of standard tile size and remaining width
    *tile_width = (remaining_width < TAV_TILE_SIZE_X) ? remaining_width : TAV_TILE_SIZE_X;
    *tile_height = (remaining_height < TAV_TILE_SIZE_Y) ? remaining_height : TAV_TILE_SIZE_Y;
 }
--- a/video_encoder/lib/libtavenc/tav_encoder_tile.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_tile.h
@@ -1,103 +0,0 @@
 /**
 * TAV Encoder Library - Tile Processing
 *
 * Functions for padded tile extraction and DWT processing.
 * Used when video dimensions exceed monoblock threshold (720x576).
 */
 #ifndef TAV_ENCODER_TILE_H
 #define TAV_ENCODER_TILE_H
 #include <stdint.h>
 #include <stddef.h>
 #include "../../include/tav_encoder_lib.h"
 // Tile dimensions (from header)
 // TAV_TILE_SIZE_X = 640, TAV_TILE_SIZE_Y = 540
 // TAV_PADDED_TILE_SIZE_X = 704, TAV_PADDED_TILE_SIZE_Y = 604
 // TAV_TILE_MARGIN = 32
 /**
 * Extract a padded tile from full-frame YCoCg buffers.
 *
 * Extracts a tile at position (tile_x, tile_y) with TAV_TILE_MARGIN pixels
 * of padding on all sides for seamless DWT processing. Uses symmetric
 * extension (mirroring) at frame boundaries.
 *
 * @param frame_y       Full frame Y channel
 * @param frame_co      Full frame Co channel
 * @param frame_cg      Full frame Cg channel
 * @param frame_width   Full frame width
 * @param frame_height  Full frame height
 * @param tile_x        Tile X index (0-based)
 * @param tile_y        Tile Y index (0-based)
 * @param padded_y      Output: Padded tile Y (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y floats)
 * @param padded_co     Output: Padded tile Co
 * @param padded_cg     Output: Padded tile Cg
 */
 void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
                             int frame_width, int frame_height,
                             int tile_x, int tile_y,
                             float *padded_y, float *padded_co, float *padded_cg);
 /**
 * Apply 2D DWT forward transform to a padded tile.
 *
 * Uses fixed PADDED_TILE_SIZE dimensions (704x604) for optimal performance.
 *
 * @param tile_data     Tile data (modified in-place)
 * @param levels        Number of decomposition levels
 * @param filter_type   Wavelet filter type (0=CDF 5/3, 1=CDF 9/7, etc.)
 */
 void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type);
 /**
 * Apply 2D DWT inverse transform to a padded tile.
 *
 * @param tile_data     Tile data (modified in-place)
 * @param levels        Number of decomposition levels
 * @param filter_type   Wavelet filter type
 */
 void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type);
 /**
 * Crop a padded tile to its core region (removing margins).
 *
 * Extracts the central TAV_TILE_SIZE_X × TAV_TILE_SIZE_Y region from a padded tile.
 *
 * @param padded_data   Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
 * @param core_data     Output: Core tile (TILE_SIZE_X * TILE_SIZE_Y)
 */
 void tav_crop_tile_margins(const float *padded_data, float *core_data);
 /**
 * Crop a padded tile to actual dimensions for edge tiles.
 *
 * For tiles at the right/bottom edges of a frame, the actual tile may be
 * smaller than TILE_SIZE_X × TILE_SIZE_Y. This function handles that case.
 *
 * @param padded_data   Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
 * @param core_data     Output: Core tile data
 * @param actual_width  Actual tile width (may be < TILE_SIZE_X for edge tiles)
 * @param actual_height Actual tile height (may be < TILE_SIZE_Y for edge tiles)
 */
 void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
                                int actual_width, int actual_height);
 /**
 * Calculate actual tile dimensions for a given tile position.
 *
 * Edge tiles may be smaller than the standard tile size.
 *
 * @param frame_width   Full frame width
 * @param frame_height  Full frame height
 * @param tile_x        Tile X index
 * @param tile_y        Tile Y index
 * @param tile_width    Output: Actual tile width
 * @param tile_height   Output: Actual tile height
 */
 void tav_get_tile_dimensions(int frame_width, int frame_height,
                             int tile_x, int tile_y,
                             int *tile_width, int *tile_height);
 #endif // TAV_ENCODER_TILE_H
--- a/video_encoder/lib/libtavenc/tav_encoder_utils.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_utils.c
@@ -1,441 +0,0 @@
 /**
 * TAV Encoder - Utilities Library
 *
 * Common utility functions and helpers used across the encoder.
 * Includes math utilities, clamping, filename generation, etc.
 *
 * Extracted from encoder_tav.c as part of library refactoring.
 */
 #define _POSIX_C_SOURCE 200112L
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <time.h>
 #include <math.h>
 // =============================================================================
 // Math Utilities
 // =============================================================================
 /**
 * Clamp integer value to range [min, max].
 */
 int tav_clamp_int(int x, int min, int max) {
    return x < min ? min : (x > max ? max : x);
 }
 /**
 * Clamp float value to range [min, max].
 */
 float tav_clamp_float(float x, float min, float max) {
    return x < min ? min : (x > max ? max : x);
 }
 /**
 * Clamp double value to range [min, max].
 */
 double tav_clamp_double(double x, double min, double max) {
    return x < min ? min : (x > max ? max : x);
 }
 /**
 * Round double to nearest integer.
 */
 int tav_iround(double v) {
    return (int)floor(v + 0.5);
 }
 /**
 * Linear interpolation between two values.
 * @param a  Start value (when t=0)
 * @param b  End value (when t=1)
 * @param t  Interpolation factor (0.0 to 1.0)
 * @return   Interpolated value
 */
 float tav_lerp(float a, float b, float t) {
    return a * (1.0f - t) + b * t;
 }
 /**
 * Double precision linear interpolation.
 */
 double tav_lerp_double(double a, double b, double t) {
    return a * (1.0 - t) + b * t;
 }
 /**
 * Get minimum of two integers.
 */
 int tav_min_int(int a, int b) {
    return a < b ? a : b;
 }
 /**
 * Get maximum of two integers.
 */
 int tav_max_int(int a, int b) {
    return a > b ? a : b;
 }
 /**
 * Get minimum of two floats.
 */
 float tav_min_float(float a, float b) {
    return a < b ? a : b;
 }
 /**
 * Get maximum of two floats.
 */
 float tav_max_float(float a, float b) {
    return a > b ? a : b;
 }
 /**
 * Compute absolute value of integer.
 */
 int tav_abs_int(int x) {
    return x < 0 ? -x : x;
 }
 /**
 * Compute absolute value of float.
 */
 float tav_abs_float(float x) {
    return x < 0.0f ? -x : x;
 }
 /**
 * Sign function: returns -1, 0, or 1.
 */
 int tav_sign(int x) {
    return (x > 0) - (x < 0);
 }
 /**
 * Check if integer is power of 2.
 */
 int tav_is_power_of_2(int x) {
    return x > 0 && (x & (x - 1)) == 0;
 }
 /**
 * Round up to next power of 2.
 */
 int tav_next_power_of_2(int x) {
    if (x <= 0) return 1;
    x--;
    x |= x >> 1;
    x |= x >> 2;
    x |= x >> 4;
    x |= x >> 8;
    x |= x >> 16;
    return x + 1;
 }
 /**
 * Compute floor of log2(x).
 * Returns -1 for x <= 0.
 */
 int tav_floor_log2(int x) {
    if (x <= 0) return -1;
    int log = 0;
    while (x > 1) {
        x >>= 1;
        log++;
    }
    return log;
 }
 /**
 * Compute ceil of log2(x).
 * Returns -1 for x <= 0.
 */
 int tav_ceil_log2(int x) {
    if (x <= 0) return -1;
    if (x == 1) return 0;
    int log = tav_floor_log2(x);
    // Check if x is power of 2
    if ((1 << log) == x) {
        return log;
    }
    return log + 1;
 }
 // =============================================================================
 // Random Filename Generation
 // =============================================================================
 /**
 * Generate a random temporary filename with .mp2 extension.
 * Format: /tmp/[32 random chars].mp2
 *
 * @param filename  Output buffer (must be at least 42 bytes)
 */
 void tav_generate_random_filename(char *filename) {
    static int seeded = 0;
    if (!seeded) {
        srand(time(NULL));
        seeded = 1;
    }
    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    const int charset_size = sizeof(charset) - 1;
    // Start with the prefix
    strcpy(filename, "/tmp/");
    // Generate 32 random characters
    for (int i = 0; i < 32; i++) {
        filename[5 + i] = charset[rand() % charset_size];
    }
    // Add the .mp2 extension
    strcpy(filename + 37, ".mp2");
    filename[41] = '\0';  // Null terminate
 }
 /**
 * Generate a random temporary filename with custom extension.
 * Format: /tmp/[32 random chars].[ext]
 *
 * @param filename  Output buffer (must be large enough for path + extension)
 * @param ext       File extension (without leading dot, e.g., "tmp", "wav")
 */
 void tav_generate_random_filename_ext(char *filename, const char *ext) {
    static int seeded = 0;
    if (!seeded) {
        srand(time(NULL));
        seeded = 1;
    }
    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    const int charset_size = sizeof(charset) - 1;
    // Start with the prefix
    strcpy(filename, "/tmp/");
    // Generate 32 random characters
    for (int i = 0; i < 32; i++) {
        filename[5 + i] = charset[rand() % charset_size];
    }
    // Add the extension
    filename[37] = '.';
    strcpy(filename + 38, ext);
 }
 // =============================================================================
 // Memory Utilities
 // =============================================================================
 /**
 * Safe malloc with error checking.
 * Exits program on allocation failure.
 */
 void *tav_malloc(size_t size) {
    void *ptr = malloc(size);
    if (!ptr && size > 0) {
        fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
        exit(1);
    }
    return ptr;
 }
 /**
 * Safe calloc with error checking.
 * Exits program on allocation failure.
 */
 void *tav_calloc(size_t count, size_t size) {
    void *ptr = calloc(count, size);
    if (!ptr && count > 0 && size > 0) {
        fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
        exit(1);
    }
    return ptr;
 }
 /**
 * Safe realloc with error checking.
 * Exits program on allocation failure.
 */
 void *tav_realloc(void *ptr, size_t size) {
    void *new_ptr = realloc(ptr, size);
    if (!new_ptr && size > 0) {
        fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
        exit(1);
    }
    return new_ptr;
 }
 /**
 * Allocate aligned memory.
 * Returns NULL on failure.
 */
 void *tav_aligned_alloc(size_t alignment, size_t size) {
    // Ensure alignment is power of 2
    if (!tav_is_power_of_2(alignment)) {
        fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
        return NULL;
    }
 #ifdef _WIN32
    return _aligned_malloc(size, alignment);
 #else
    void *ptr = NULL;
    if (posix_memalign(&ptr, alignment, size) != 0) {
        return NULL;
    }
    return ptr;
 #endif
 }
 /**
 * Free aligned memory.
 */
 void tav_aligned_free(void *ptr) {
 #ifdef _WIN32
    _aligned_free(ptr);
 #else
    free(ptr);
 #endif
 }
 // =============================================================================
 // Array Utilities
 // =============================================================================
 /**
 * Fill integer array with constant value.
 */
 void tav_array_fill_int(int *array, size_t count, int value) {
    for (size_t i = 0; i < count; i++) {
        array[i] = value;
    }
 }
 /**
 * Fill float array with constant value.
 */
 void tav_array_fill_float(float *array, size_t count, float value) {
    for (size_t i = 0; i < count; i++) {
        array[i] = value;
    }
 }
 /**
 * Copy integer array.
 */
 void tav_array_copy_int(int *dst, const int *src, size_t count) {
    memcpy(dst, src, count * sizeof(int));
 }
 /**
 * Copy float array.
 */
 void tav_array_copy_float(float *dst, const float *src, size_t count) {
    memcpy(dst, src, count * sizeof(float));
 }
 /**
 * Find maximum value in integer array.
 */
 int tav_array_max_int(const int *array, size_t count) {
    if (count == 0) return 0;
    int max_val = array[0];
    for (size_t i = 1; i < count; i++) {
        if (array[i] > max_val) {
            max_val = array[i];
        }
    }
    return max_val;
 }
 /**
 * Find minimum value in integer array.
 */
 int tav_array_min_int(const int *array, size_t count) {
    if (count == 0) return 0;
    int min_val = array[0];
    for (size_t i = 1; i < count; i++) {
        if (array[i] < min_val) {
            min_val = array[i];
        }
    }
    return min_val;
 }
 /**
 * Find maximum absolute value in float array.
 */
 float tav_array_max_abs_float(const float *array, size_t count) {
    if (count == 0) return 0.0f;
    float max_abs = fabsf(array[0]);
    for (size_t i = 1; i < count; i++) {
        float abs_val = fabsf(array[i]);
        if (abs_val > max_abs) {
            max_abs = abs_val;
        }
    }
    return max_abs;
 }
 /**
 * Compute sum of integer array.
 */
 long long tav_array_sum_int(const int *array, size_t count) {
    long long sum = 0;
    for (size_t i = 0; i < count; i++) {
        sum += array[i];
    }
    return sum;
 }
 /**
 * Compute sum of float array.
 */
 double tav_array_sum_float(const float *array, size_t count) {
    double sum = 0.0;
    for (size_t i = 0; i < count; i++) {
        sum += array[i];
    }
    return sum;
 }
 /**
 * Compute mean of float array.
 */
 float tav_array_mean_float(const float *array, size_t count) {
    if (count == 0) return 0.0f;
    return (float)(tav_array_sum_float(array, count) / count);
 }
 /**
 * Swap two integer values.
 */
 void tav_swap_int(int *a, int *b) {
    int temp = *a;
    *a = *b;
    *b = temp;
 }
 /**
 * Swap two float values.
 */
 void tav_swap_float(float *a, float *b) {
    float temp = *a;
    *a = *b;
    *b = temp;
 }
 /**
 * Swap two pointer values.
 */
 void tav_swap_ptr(void **a, void **b) {
    void *temp = *a;
    *a = *b;
    *b = temp;
 }
--- a/video_encoder/lib/libtavenc/tav_encoder_utils.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_utils.h
@@ -1,165 +0,0 @@
 /**
 * TAV Encoder - Utilities Library
 *
 * Public API for common utility functions and helpers.
 */
 #ifndef TAV_ENCODER_UTILS_H
 #define TAV_ENCODER_UTILS_H
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 // =============================================================================
 // Math Utilities
 // =============================================================================
 /** Clamp integer value to range [min, max] */
 int tav_clamp_int(int x, int min, int max);
 /** Clamp float value to range [min, max] */
 float tav_clamp_float(float x, float min, float max);
 /** Clamp double value to range [min, max] */
 double tav_clamp_double(double x, double min, double max);
 /** Round double to nearest integer */
 int tav_iround(double v);
 /** Linear interpolation between two floats */
 float tav_lerp(float a, float b, float t);
 /** Linear interpolation between two doubles */
 double tav_lerp_double(double a, double b, double t);
 /** Get minimum of two integers */
 int tav_min_int(int a, int b);
 /** Get maximum of two integers */
 int tav_max_int(int a, int b);
 /** Get minimum of two floats */
 float tav_min_float(float a, float b);
 /** Get maximum of two floats */
 float tav_max_float(float a, float b);
 /** Compute absolute value of integer */
 int tav_abs_int(int x);
 /** Compute absolute value of float */
 float tav_abs_float(float x);
 /** Sign function: returns -1, 0, or 1 */
 int tav_sign(int x);
 /** Check if integer is power of 2 */
 int tav_is_power_of_2(int x);
 /** Round up to next power of 2 */
 int tav_next_power_of_2(int x);
 /** Compute floor of log2(x) */
 int tav_floor_log2(int x);
 /** Compute ceil of log2(x) */
 int tav_ceil_log2(int x);
 // =============================================================================
 // Random Filename Generation
 // =============================================================================
 /**
 * Generate a random temporary filename with .mp2 extension.
 * Format: /tmp/[32 random chars].mp2
 *
 * @param filename  Output buffer (must be at least 42 bytes)
 */
 void tav_generate_random_filename(char *filename);
 /**
 * Generate a random temporary filename with custom extension.
 * Format: /tmp/[32 random chars].[ext]
 *
 * @param filename  Output buffer (must be large enough)
 * @param ext       File extension (without leading dot)
 */
 void tav_generate_random_filename_ext(char *filename, const char *ext);
 // =============================================================================
 // Memory Utilities
 // =============================================================================
 /** Safe malloc with error checking (exits on failure) */
 void *tav_malloc(size_t size);
 /** Safe calloc with error checking (exits on failure) */
 void *tav_calloc(size_t count, size_t size);
 /** Safe realloc with error checking (exits on failure) */
 void *tav_realloc(void *ptr, size_t size);
 /** Allocate aligned memory (returns NULL on failure) */
 void *tav_aligned_alloc(size_t alignment, size_t size);
 /** Free aligned memory */
 void tav_aligned_free(void *ptr);
 // =============================================================================
 // Array Utilities
 // =============================================================================
 /** Fill integer array with constant value */
 void tav_array_fill_int(int *array, size_t count, int value);
 /** Fill float array with constant value */
 void tav_array_fill_float(float *array, size_t count, float value);
 /** Copy integer array */
 void tav_array_copy_int(int *dst, const int *src, size_t count);
 /** Copy float array */
 void tav_array_copy_float(float *dst, const float *src, size_t count);
 /** Find maximum value in integer array */
 int tav_array_max_int(const int *array, size_t count);
 /** Find minimum value in integer array */
 int tav_array_min_int(const int *array, size_t count);
 /** Find maximum absolute value in float array */
 float tav_array_max_abs_float(const float *array, size_t count);
 /** Compute sum of integer array */
 long long tav_array_sum_int(const int *array, size_t count);
 /** Compute sum of float array */
 double tav_array_sum_float(const float *array, size_t count);
 /** Compute mean of float array */
 float tav_array_mean_float(const float *array, size_t count);
 /** Swap two integer values */
 void tav_swap_int(int *a, int *b);
 /** Swap two float values */
 void tav_swap_float(float *a, float *b);
 /** Swap two pointer values */
 void tav_swap_ptr(void **a, void **b);
 // =============================================================================
 // Convenience Macros (for backward compatibility)
 // =============================================================================
 #define CLAMP(x, min, max)  tav_clamp_int(x, min, max)
 #define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
 #ifdef __cplusplus
 }
 #endif
 #endif // TAV_ENCODER_UTILS_H
--- a/video_encoder/range_coder.c
+++ b/video_encoder/range_coder.c
@@ -1,152 +0,0 @@
 // Simple range coder for TAD audio codec
 // Based on range coding with Laplacian probability model
 #include "range_coder.h"
 #include <string.h>
 #include <math.h>
 #define TOP_VALUE 0xFFFFFFFFU
 #define BOTTOM_VALUE 0x00FFFFFF
 static inline void range_encoder_put_byte(RangeEncoder *enc, uint8_t byte) {
    if (enc->buffer_pos < enc->buffer_capacity) {
        enc->buffer[enc->buffer_pos++] = byte;
    }
 }
 static inline uint8_t range_decoder_get_byte(RangeDecoder *dec) {
    if (dec->buffer_pos < dec->buffer_size) {
        return dec->buffer[dec->buffer_pos++];
    }
    return 0;
 }
 static void range_encoder_renormalise(RangeEncoder *enc) {
    while (enc->range <= BOTTOM_VALUE) {
        range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
        enc->low <<= 8;
        enc->range <<= 8;
    }
 }
 static void range_decoder_renormalise(RangeDecoder *dec) {
    while (dec->range <= BOTTOM_VALUE) {
        dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
        dec->low <<= 8;
        dec->range <<= 8;
    }
 }
 void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity) {
    enc->low = 0;
    enc->range = TOP_VALUE;
    enc->buffer = buffer;
    enc->buffer_pos = 0;
    enc->buffer_capacity = capacity;
 }
 // Calculate Laplacian CDF for a given value
 // CDF(x) = 0.5 * exp(λx) for x < 0
 // CDF(x) = 1 - 0.5 * exp(-λx) for x ≥ 0
 static inline double laplacian_cdf(int16_t value, float lambda) {
    if (value < 0) {
        return 0.5 * exp(lambda * value);
    } else {
        return 1.0 - 0.5 * exp(-lambda * value);
    }
 }
 void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda) {
    // Clamp to valid range
    if (value < -max_abs_value) value = -max_abs_value;
    if (value > max_abs_value) value = max_abs_value;
    // Calculate cumulative probabilities using Laplacian distribution
    // We need CDF at value and value+1 to get the probability mass for this symbol
    double cdf_low = (value == -max_abs_value) ? 0.0 : laplacian_cdf(value - 1, lambda);
    double cdf_high = laplacian_cdf(value, lambda);
    // Normalise to get cumulative counts in range [0, SCALE]
    const uint32_t SCALE = 0x10000;  // 65536 for precision
    uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
    uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
    // Ensure we have at least 1 unit of probability
    if (cum_high <= cum_low) cum_high = cum_low + 1;
    if (cum_high > SCALE) cum_high = SCALE;
    // Encode using cumulative probabilities
    uint64_t range_64 = (uint64_t)enc->range;
    enc->low += (uint32_t)((range_64 * cum_low) / SCALE);
    enc->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
    range_encoder_renormalise(enc);
 }
 size_t range_encoder_finish(RangeEncoder *enc) {
    // Flush remaining bytes
    for (int i = 0; i < 4; i++) {
        range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
        enc->low <<= 8;
    }
    return enc->buffer_pos;
 }
 void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size) {
    dec->low = 0;
    dec->range = TOP_VALUE;
    dec->code = 0;
    dec->buffer = buffer;
    dec->buffer_pos = 0;
    dec->buffer_size = size;
    // Read initial bytes into code
    for (int i = 0; i < 4; i++) {
        dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
    }
 }
 int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda) {
    const uint32_t SCALE = 0x10000;  // Must match encoder
    // Calculate current position in probability space
    uint64_t range_64 = (uint64_t)dec->range;
    uint32_t cum_freq = (uint32_t)(((uint64_t)(dec->code - dec->low) * SCALE) / range_64);
    // Binary search to find symbol whose CDF range contains cum_freq
    int16_t low = -max_abs_value;
    int16_t high = max_abs_value;
    int16_t value = 0;
    while (low <= high) {
        int16_t mid = (low + high) / 2;
        double cdf_low = (mid == -max_abs_value) ? 0.0 : laplacian_cdf(mid - 1, lambda);
        double cdf_high = laplacian_cdf(mid, lambda);
        uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
        uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
        if (cum_high <= cum_low) cum_high = cum_low + 1;
        if (cum_freq >= cum_low && cum_freq < cum_high) {
            // Found the symbol
            value = mid;
            // Update decoder state
            dec->low += (uint32_t)((range_64 * cum_low) / SCALE);
            dec->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
            range_decoder_renormalise(dec);
            return value;
        } else if (cum_freq < cum_low) {
            high = mid - 1;
        } else {
            low = mid + 1;
        }
    }
    // Fallback: shouldn't happen with correct encoding
    range_decoder_renormalise(dec);
    return value;
 }
--- a/video_encoder/range_coder.h
+++ b/video_encoder/range_coder.h
@@ -1,42 +0,0 @@
 #ifndef RANGE_CODER_H
 #define RANGE_CODER_H
 #include <stdint.h>
 #include <stddef.h>
 // Simple range coder for signed 16-bit integers
 // Uses adaptive frequency model for better compression
 typedef struct {
    uint32_t low;
    uint32_t range;
    uint8_t *buffer;
    size_t buffer_pos;
    size_t buffer_capacity;
 } RangeEncoder;
 typedef struct {
    uint32_t low;
    uint32_t range;
    uint32_t code;
    const uint8_t *buffer;
    size_t buffer_pos;
    size_t buffer_size;
 } RangeDecoder;
 // Initialise encoder
 void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity);
 // Encode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
 void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda);
 // Finalise encoding and return bytes written
 size_t range_encoder_finish(RangeEncoder *enc);
 // Initialise decoder
 void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size);
 // Decode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
 int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda);
 #endif // RANGE_CODER_H
--- a/video_encoder/src/decoder_tav.c
+++ b/video_encoder/src/decoder_tav.c
--- a/video_encoder/src/decoder_tav_dt.c
+++ b/video_encoder/src/decoder_tav_dt.c
--- a/video_encoder/src/encoder_tad_standalone.c
+++ b/video_encoder/src/encoder_tad_standalone.c
@@ -1,344 +0,0 @@
 // Created by CuriousTorvald and Claude on 2025-10-24.
 // TAD32 (Terrarum Advanced Audio - PCM32 version) Encoder - Standalone program
 // Alternative version: PCM32 throughout encoding, PCM8 conversion only at decoder
 // Uses encoder_tad32.c library for encoding functions
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <getopt.h>
 #include <math.h>
 #include <time.h>
 #include "encoder_tad.h"
 #define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251107"
 // TAD32 format constants
 #define TAD32_DEFAULT_CHUNK_SIZE 32768  // Using a prime number to force the worst condition
 // Temporary file for FFmpeg PCM extraction
 char TEMP_PCM_FILE[42];
 static void generate_random_filename(char *filename) {
    srand(time(NULL));
    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    const int charset_size = sizeof(charset) - 1;
    // Start with the prefix
    strcpy(filename, "/tmp/");
    // Generate 32 random characters
    for (int i = 0; i < 32; i++) {
        filename[5 + i] = charset[rand() % charset_size];
    }
    // Add the extension
    strcpy(filename + 37, ".tad");
    filename[41] = '\0';  // Null terminate
 }
 //=============================================================================
 // Main Encoder
 //=============================================================================
 static void print_usage(const char *prog_name) {
    printf("Usage: %s -i <input> [options]\n", prog_name);
    printf("Options:\n");
    printf("  -i <file>       Input audio file (any format supported by FFmpeg)\n");
    printf("  -o <file>       Output TAD32 file (optional, auto-generated as input.qN.tad)\n");
    printf("  -q <level>      Quality level (0-5, default: %d)\n", TAD32_QUALITY_DEFAULT);
    printf("                  0 = lowest quality/smallest (max_index=31)\n");
    printf("                  1 = low quality (max_index=35)\n");
    printf("                  2 = medium quality (max_index=39)\n");
    printf("                  3 = good quality (max_index=47) [DEFAULT]\n");
    printf("                  4 = high quality (max_index=56)\n");
    printf("                  5 = very high quality/largest (max_index=89)\n");
    printf("  -v              Verbose output\n");
    printf("  -h, --help      Show this help\n");
    printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
    printf("Note: This is the PCM32 alternative version for comparison testing.\n");
    printf("      PCM32 is processed throughout encoding; PCM8 conversion happens at decoder.\n");
 }
 int main(int argc, char *argv[]) {
    generate_random_filename(TEMP_PCM_FILE);
    char *input_file = NULL;
    char *output_file = NULL;
    int quality = TAD32_QUALITY_DEFAULT;  // Default quality level (0-5)
    float quantiser_scale = 1.0f;  // Default quantiser scaling
    int verbose = 0;
    // Parse command line arguments
    static struct option long_options[] = {
        {"help", no_argument, 0, 'h'},
        {0, 0, 0, 0}
    };
    int opt;
    int option_index = 0;
    while ((opt = getopt_long(argc, argv, "i:o:q:s:vh", long_options, &option_index)) != -1) {
        switch (opt) {
            case 'i':
                input_file = optarg;
                break;
            case 'o':
                output_file = optarg;
                break;
            case 'q':
                quality = atoi(optarg);
                if (quality < TAD32_QUALITY_MIN || quality > TAD32_QUALITY_MAX) {
                    fprintf(stderr, "Error: Quality must be in range %d-%d\n", TAD32_QUALITY_MIN, TAD32_QUALITY_MAX);
                    return 1;
                }
                break;
            case 's':
                quantiser_scale = atof(optarg);
                if (quantiser_scale < 0.5f || quantiser_scale > 4.0f) {
                    fprintf(stderr, "Error: Quantiser scale must be in range 0.5-4.0\n");
                    return 1;
                }
                break;
            case 'v':
                verbose = 1;
                break;
            case 'h':
                print_usage(argv[0]);
                return 0;
            default:
                print_usage(argv[0]);
                return 1;
        }
    }
    if (!input_file) {
        fprintf(stderr, "Error: Input file is required\n");
        print_usage(argv[0]);
        return 1;
    }
    // Convert quality (0-5) to max_index for quantisation
    int max_index = tad32_quality_to_max_index(quality);
    // Generate output filename if not provided
    if (!output_file) {
        // Allocate space for output filename
        size_t input_len = strlen(input_file);
        output_file = malloc(input_len + 32);  // Extra space for .qNN.tad
        // Find the last directory separator
        const char *basename_start = strrchr(input_file, '/');
        if (!basename_start) basename_start = strrchr(input_file, '\\');
        basename_start = basename_start ? basename_start + 1 : input_file;
        // Copy directory part
        size_t dir_len = basename_start - input_file;
        strncpy(output_file, input_file, dir_len);
        // Find the extension (last dot after basename)
        const char *ext = strrchr(basename_start, '.');
        if (ext && ext > basename_start) {
            // Copy basename without extension
            size_t name_len = ext - basename_start;
            strncpy(output_file + dir_len, basename_start, name_len);
            output_file[dir_len + name_len] = '\0';
        } else {
            // No extension, copy entire basename
            strcpy(output_file + dir_len, basename_start);
        }
        // Append .qNN.tad (use quality level for filename)
        sprintf(output_file + strlen(output_file), ".q%d.tad", quality);
        if (verbose) {
            printf("Auto-generated output path: %s\n", output_file);
        }
    }
    if (verbose) {
        printf("%s\n", ENCODER_VENDOR_STRING);
        printf("Input: %s\n", input_file);
        printf("Output: %s\n", output_file);
        printf("Quality level: %d (max_index=%d)\n", quality, max_index);
        printf("Quantiser scale: %.2f\n", quantiser_scale);
    }
    // Detect original sample rate for high-quality resampling
    char sample_rate_str[32] = "48000";  // Default fallback
    char detect_cmd[2048];
    snprintf(detect_cmd, sizeof(detect_cmd),
        "ffprobe -v error -select_streams a:0 -show_entries stream=sample_rate "
        "-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
        input_file);
    FILE *probe = popen(detect_cmd, "r");
    if (probe) {
        if (fgets(sample_rate_str, sizeof(sample_rate_str), probe)) {
            // Remove newline
            sample_rate_str[strcspn(sample_rate_str, "\n")] = 0;
        }
        pclose(probe);
    }
    int original_rate = atoi(sample_rate_str);
    if (original_rate <= 0 || original_rate > 192000) {
        original_rate = 48000;  // Fallback
    }
    if (verbose) {
        printf("Detected original sample rate: %d Hz\n", original_rate);
        printf("Extracting and resampling audio to %d Hz...\n", TAD32_SAMPLE_RATE);
    }
    // Extract and resample in two passes for better quality
    // Pass 1: Extract at original sample rate
    char temp_original_pcm[256];
    snprintf(temp_original_pcm, sizeof(temp_original_pcm), "%s.orig", TEMP_PCM_FILE);
    char ffmpeg_cmd[2048];
    snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
        "ffmpeg -hide_banner -v error -i \"%s\" -f f32le -acodec pcm_f32le -ac %d -y \"%s\" 2>&1",
        input_file, TAD32_CHANNELS, temp_original_pcm);
    int result = system(ffmpeg_cmd);
    if (result != 0) {
        fprintf(stderr, "Error: FFmpeg extraction failed\n");
        return 1;
    }
    // Pass 2: Resample to 32kHz with high-quality SoXR resampler and highpass filter
    snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
        "ffmpeg -hide_banner -v error -f f32le -ar %d -ac %d -i \"%s\" "
        "-f f32le -acodec pcm_f32le -ar %d -ac %d "
        "-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" "
        "-y \"%s\" 2>&1",
        original_rate, TAD32_CHANNELS, temp_original_pcm, TAD32_SAMPLE_RATE, TAD32_CHANNELS, TEMP_PCM_FILE);
    result = system(ffmpeg_cmd);
    remove(temp_original_pcm);  // Clean up intermediate file
    if (result != 0) {
        fprintf(stderr, "Error: FFmpeg resampling failed\n");
        return 1;
    }
    // Open PCM file
    FILE *pcm_file = fopen(TEMP_PCM_FILE, "rb");
    if (!pcm_file) {
        fprintf(stderr, "Error: Could not open temporary PCM file\n");
        return 1;
    }
    // Get file size
    fseek(pcm_file, 0, SEEK_END);
    size_t pcm_size = ftell(pcm_file);
    fseek(pcm_file, 0, SEEK_SET);
    size_t total_samples = pcm_size / (TAD32_CHANNELS * sizeof(float));
    // Pad to even sample count
    if (total_samples % 2 == 1) {
        total_samples++;
        if (verbose) {
            printf("Odd sample count detected, padding with one zero sample\n");
        }
    }
    size_t num_chunks = (total_samples + TAD32_DEFAULT_CHUNK_SIZE - 1) / TAD32_DEFAULT_CHUNK_SIZE;
    if (verbose) {
        printf("Total samples: %zu (%.2f seconds)\n", total_samples,
               (double)total_samples / TAD32_SAMPLE_RATE);
        printf("Chunks: %zu (chunk size: %d samples)\n", num_chunks, TAD32_DEFAULT_CHUNK_SIZE);
    }
    // Open output file
    FILE *output = fopen(output_file, "wb");
    if (!output) {
        fprintf(stderr, "Error: Could not open output file\n");
        fclose(pcm_file);
        return 1;
    }
    // Process chunks using linked TAD32 encoder library
    size_t total_output_size = 0;
    float *chunk_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * TAD32_CHANNELS * sizeof(float));
    uint8_t *output_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * 4 * sizeof(float));  // Generous buffer
    for (size_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) {
        size_t chunk_samples = TAD32_DEFAULT_CHUNK_SIZE;
        size_t remaining = total_samples - (chunk_idx * TAD32_DEFAULT_CHUNK_SIZE);
        if (remaining < TAD32_DEFAULT_CHUNK_SIZE) {
            chunk_samples = remaining;
        }
        // Read chunk
        size_t samples_read = fread(chunk_buffer, TAD32_CHANNELS * sizeof(float),
                                   chunk_samples, pcm_file);
        (void)samples_read;  // Unused, but kept for compatibility
        // Pad with zeros if necessary
        if (chunk_samples < TAD32_DEFAULT_CHUNK_SIZE) {
            memset(&chunk_buffer[chunk_samples * TAD32_CHANNELS], 0,
                   (TAD32_DEFAULT_CHUNK_SIZE - chunk_samples) * TAD32_CHANNELS * sizeof(float));
        }
        // Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
        size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
                                                 max_index,
                                                 quantiser_scale, TAD32_ZSTD_LEVEL, output_buffer);
        if (encoded_size == 0) {
            fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);
            free(chunk_buffer);
            free(output_buffer);
            fclose(pcm_file);
            fclose(output);
            return 1;
        }
        // Write chunk to output
        fwrite(output_buffer, 1, encoded_size, output);
        total_output_size += encoded_size;
        if (verbose && (chunk_idx % 10 == 0 || chunk_idx == num_chunks - 1)) {
            printf("Processed chunk %zu/%zu (%.1f%%)\r", chunk_idx + 1, num_chunks,
                   (chunk_idx + 1) * 100.0 / num_chunks);
            fflush(stdout);
        }
    }
    if (verbose) {
        printf("\n");
    }
    // Print coefficient statistics if enabled
    tad32_print_statistics();
    tad32_free_statistics();
    // Cleanup
    free(chunk_buffer);
    free(output_buffer);
    fclose(pcm_file);
    fclose(output);
    remove(TEMP_PCM_FILE);
    // Print statistics
    size_t pcmu8_size = total_samples * TAD32_CHANNELS;  // PCMu8 baseline
    float compression_ratio = (float)pcmu8_size / total_output_size;
    printf("Encoding complete!\n");
    printf("PCMu8 size: %zu bytes\n", pcmu8_size);
    printf("TAD32 size: %zu bytes\n", total_output_size);
    printf("Compression ratio: %.2f:1 (%.1f%% of PCMu8)\n",
           compression_ratio, (total_output_size * 100.0) / pcmu8_size);
    if (compression_ratio < 1.8) {
        printf("Warning: Compression ratio below 2:1 target. Try lower quantisation bits or different settings.\n");
    }
    return 0;
 }
--- a/video_encoder/src/encoder_tav.c
+++ b/video_encoder/src/encoder_tav.c
--- a/video_encoder/src/encoder_tav_dt.c
+++ b/video_encoder/src/encoder_tav_dt.c
--- a/video_encoder/tav_inspector.c
+++ b/video_encoder/tav_inspector.c
--- a/video_encoder/tav_visualise_coefficients.c
+++ b/video_encoder/tav_visualise_coefficients.c
@@ -1,294 +0,0 @@
 // Visualise DWT Coefficients as Image
 // Converts .bin coefficient file to PPM image with logarithmic color mapping
 // Usage: ./visualise_coefficients <input.bin> <output.ppm> <width> <height>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <math.h>
 // Logarithmic color mapping for coefficient visualisation
 // Zero: Black (#000000)
 // Positive: Red to Yellow (#FF0000 to #FFFF00) - logarithmic
 // Negative: Blue to Cyan (#0000FF to #00FFFF) - logarithmic
 typedef struct {
    uint8_t r, g, b;
 } rgb_t;
 static rgb_t map_coefficient_to_color(int16_t coeff) {
    rgb_t color = {0, 0, 0};
    if (coeff == 0) {
        // Zero: pure black
        return color;
    }
    if (coeff == 1) {
        // +1: Light green #55FF55
        color.r = 0x55;
        color.g = 0xFF;
        color.b = 0x55;
        return color;
    }
    if (coeff == -1) {
        // -1: Dark green #005500
        color.r = 0x00;
        color.g = 0x55;
        color.b = 0x00;
        return color;
    }
    if (coeff > 0) {
        // Positive: Red (#FF0000) to Yellow (#FFFF00)
        // Logarithmic mapping: log2(1) = 0, log2(32767) ≈ 14.99
        double log_val = log2((double)coeff);
        double log_max = log2(32767.0);
        double normalised = log_val / log_max;  // 0.0 to 1.0
        color.r = 255;
        color.g = (uint8_t)(normalised * 255.0);
        color.b = 0;
    } else {
        // Negative: Blue (#0000FF) to Cyan (#00FFFF)
        // Logarithmic mapping: log2(1) = 0, log2(32768) = 15
        double log_val = log2((double)(-coeff));
        double log_max = log2(32768.0);
        double normalised = log_val / log_max;  // 0.0 to 1.0
        color.r = 0;
        color.g = (uint8_t)(normalised * 255.0);
        color.b = 255;
    }
    return color;
 }
 int main(int argc, char *argv[]) {
    if (argc != 5) {
        printf("Usage: %s <input.bin> <output.ppm> <width> <height>\n", argv[0]);
        printf("Example: %s frame_060.tavframe.y.bin output.ppm 560 448\n", argv[0]);
        return 1;
    }
    const char *input_file = argv[1];
    const char *output_file = argv[2];
    int width = atoi(argv[3]);
    int height = atoi(argv[4]);
    if (width <= 0 || height <= 0) {
        printf("Error: Invalid dimensions %dx%d\n", width, height);
        return 1;
    }
    size_t expected_count = width * height;
    // Load coefficient file
    FILE *fp_in = fopen(input_file, "rb");
    if (!fp_in) {
        printf("Error: Cannot open %s\n", input_file);
        return 1;
    }
    // Get file size
    fseek(fp_in, 0, SEEK_END);
    long file_size = ftell(fp_in);
    fseek(fp_in, 0, SEEK_SET);
    size_t coeff_count = file_size / sizeof(int16_t);
    if (coeff_count != expected_count) {
        printf("Warning: File contains %zu coefficients, expected %zu (%dx%d)\n",
               coeff_count, expected_count, width, height);
    }
    // Allocate coefficient buffer
    int16_t *coeffs = malloc(expected_count * sizeof(int16_t));
    if (!coeffs) {
        printf("Error: Memory allocation failed\n");
        fclose(fp_in);
        return 1;
    }
    // Read coefficients
    size_t read_count = fread(coeffs, sizeof(int16_t), expected_count, fp_in);
    fclose(fp_in);
    if (read_count != expected_count) {
        printf("Error: Read %zu coefficients, expected %zu\n", read_count, expected_count);
        free(coeffs);
        return 1;
    }
    // Analyse coefficient distribution - Overall and per-subband
    size_t zeros = 0, ones = 0, positives = 0, negatives = 0;
    int16_t min_val = INT16_MAX, max_val = INT16_MIN;
    // Calculate overall statistics
    for (size_t i = 0; i < expected_count; i++) {
        if (coeffs[i] == 0) zeros++;
        else if (coeffs[i] == 1 || coeffs[i] == -1) ones++;
        else if (coeffs[i] > 0) positives++;
        else negatives++;
        if (coeffs[i] < min_val) min_val = coeffs[i];
        if (coeffs[i] > max_val) max_val = coeffs[i];
    }
    printf("Overall coefficient statistics:\n");
    printf("  Total: %zu\n", expected_count);
    printf("  Zeros: %zu (%.1f%%)\n", zeros, 100.0 * zeros / expected_count);
    printf("  Ones: %zu (%.1f%%)\n", ones, 100.0 * ones / expected_count);
    printf("  Positives: %zu (%.1f%%)\n", positives, 100.0 * positives / expected_count);
    printf("  Negatives: %zu (%.1f%%)\n", negatives, 100.0 * negatives / expected_count);
    printf("  Range: [%d, %d]\n\n", min_val, max_val);
    // Per-subband statistics using 2D spatial layout
    // The coefficients are stored in 2D spatial arrangement like the PPM image
    int num_levels = 6;
    // Helper macro to get coefficient from 2D position
    #define GET_COEFF(x, y) coeffs[(y) * width + (x)]
    // Calculate subband dimensions for each level
    int level_w[7], level_h[7];  // level_w[1] = width/2, level_w[6] = width/64
    for (int i = 1; i <= num_levels; i++) {
        level_w[i] = width / (1 << i);
        level_h[i] = height / (1 << i);
    }
    // LL6 subband (top-left corner)
    {
        int ll_w = level_w[6], ll_h = level_h[6];
        size_t ll_zeros = 0, ll_ones = 0, ll_pos = 0, ll_neg = 0;
        int16_t ll_min = INT16_MAX, ll_max = INT16_MIN;
        for (int y = 0; y < ll_h; y++) {
            for (int x = 0; x < ll_w; x++) {
                int16_t val = GET_COEFF(x, y);
                if (val == 0) ll_zeros++;
                else if (val == 1 || val == -1) ll_ones++;
                else if (val > 0) ll_pos++;
                else ll_neg++;
                if (val < ll_min) ll_min = val;
                if (val > ll_max) ll_max = val;
            }
        }
        size_t ll_total = ll_w * ll_h;
        printf("LL%d subband (%dx%d):\n", num_levels, ll_w, ll_h);
        printf("  Total: %zu\n", ll_total);
        printf("  Zeros: %zu (%.1f%%)\n", ll_zeros, 100.0 * ll_zeros / ll_total);
        printf("  Ones: %zu (%.1f%%)\n", ll_ones, 100.0 * ll_ones / ll_total);
        printf("  Positives: %zu (%.1f%%)\n", ll_pos, 100.0 * ll_pos / ll_total);
        printf("  Negatives: %zu (%.1f%%)\n", ll_neg, 100.0 * ll_neg / ll_total);
        printf("  Range: [%d, %d]\n\n", ll_min, ll_max);
    }
    // Process each level from deepest (6) to finest (1)
    for (int level = num_levels; level >= 1; level--) {
        int half_w = level_w[level];
        int half_h = level_h[level];
        // LH subband (horizontal high-pass) - right of LL region
        size_t lh_zeros = 0, lh_ones = 0, lh_pos = 0, lh_neg = 0;
        int16_t lh_min = INT16_MAX, lh_max = INT16_MIN;
        int lh_x0 = half_w, lh_y0 = 0;
        int lh_x1 = half_w * 2, lh_y1 = half_h;
        for (int y = lh_y0; y < lh_y1; y++) {
            for (int x = lh_x0; x < lh_x1; x++) {
                int16_t val = GET_COEFF(x, y);
                if (val == 0) lh_zeros++;
                else if (val == 1 || val == -1) lh_ones++;
                else if (val > 0) lh_pos++;
                else lh_neg++;
                if (val < lh_min) lh_min = val;
                if (val > lh_max) lh_max = val;
            }
        }
        // HL subband (vertical high-pass) - below LL region
        size_t hl_zeros = 0, hl_ones = 0, hl_pos = 0, hl_neg = 0;
        int16_t hl_min = INT16_MAX, hl_max = INT16_MIN;
        int hl_x0 = 0, hl_y0 = half_h;
        int hl_x1 = half_w, hl_y1 = half_h * 2;
        for (int y = hl_y0; y < hl_y1; y++) {
            for (int x = hl_x0; x < hl_x1; x++) {
                int16_t val = GET_COEFF(x, y);
                if (val == 0) hl_zeros++;
                else if (val == 1 || val == -1) hl_ones++;
                else if (val > 0) hl_pos++;
                else hl_neg++;
                if (val < hl_min) hl_min = val;
                if (val > hl_max) hl_max = val;
            }
        }
        // HH subband (diagonal high-pass) - bottom-right of LL region
        size_t hh_zeros = 0, hh_ones = 0, hh_pos = 0, hh_neg = 0;
        int16_t hh_min = INT16_MAX, hh_max = INT16_MIN;
        int hh_x0 = half_w, hh_y0 = half_h;
        int hh_x1 = half_w * 2, hh_y1 = half_h * 2;
        for (int y = hh_y0; y < hh_y1; y++) {
            for (int x = hh_x0; x < hh_x1; x++) {
                int16_t val = GET_COEFF(x, y);
                if (val == 0) hh_zeros++;
                else if (val == 1 || val == -1) hh_ones++;
                else if (val > 0) hh_pos++;
                else hh_neg++;
                if (val < hh_min) hh_min = val;
                if (val > hh_max) hh_max = val;
            }
        }
        size_t sub_total = half_w * half_h;
        printf("Level %d subbands (%dx%d each):\n", level, half_w, half_h);
        printf("  LH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
               level, sub_total, lh_zeros, 100.0*lh_zeros/sub_total, lh_ones, 100.0*lh_ones/sub_total,
               lh_pos, 100.0*lh_pos/sub_total, lh_neg, 100.0*lh_neg/sub_total, lh_min, lh_max);
        printf("  HL%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
               level, sub_total, hl_zeros, 100.0*hl_zeros/sub_total, hl_ones, 100.0*hl_ones/sub_total,
               hl_pos, 100.0*hl_pos/sub_total, hl_neg, 100.0*hl_neg/sub_total, hl_min, hl_max);
        printf("  HH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n\n",
               level, sub_total, hh_zeros, 100.0*hh_zeros/sub_total, hh_ones, 100.0*hh_ones/sub_total,
               hh_pos, 100.0*hh_pos/sub_total, hh_neg, 100.0*hh_neg/sub_total, hh_min, hh_max);
    }
    #undef GET_COEFF
    // Write PPM image
    FILE *fp_out = fopen(output_file, "wb");
    if (!fp_out) {
        printf("Error: Cannot create %s\n", output_file);
        free(coeffs);
        return 1;
    }
    // PPM header
    fprintf(fp_out, "P6\n%d %d\n255\n", width, height);
    // Write pixel data
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            size_t idx = y * width + x;
            rgb_t color = map_coefficient_to_color(coeffs[idx]);
            fwrite(&color, 3, 1, fp_out);
        }
    }
    fclose(fp_out);
    free(coeffs);
    printf("\nWrote %dx%d image to %s\n", width, height, output_file);
    printf("Color mapping:\n");
    printf("  Black:  Zero coefficients\n");
    printf("  Light Green (#55FF55): +1 coefficients\n");
    printf("  Dark Green (#00AA00): -1 coefficients\n");
    printf("  Red→Yellow: Positive coefficients > +1 (logarithmic)\n");
    printf("  Blue→Cyan: Negative coefficients < -1 (logarithmic)\n");
    return 0;
 }
--- a/video_encoder/tavdt_noise_injector.c
+++ b/video_encoder/tavdt_noise_injector.c
@@ -1,402 +0,0 @@
 // TAV-DT Noise Injector - Simulates satellite transmission channel noise
 // Models QPSK over Ku-band satellite with AWGN and burst interference
 // to compile: gcc -O2 -o tavdt_noise_injector tavdt_noise_injector.c -lm
 // Created by CuriousTorvald and Claude on 2025-12-14
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <math.h>
 #include <getopt.h>
 #include <time.h>
 // Buffer size for streaming processing
 #define BUFFER_SIZE (1024 * 1024)  // 1 MB chunks
 // Default TAV-DT bitrate for timing calculations (~2 Mbps)
 #define DEFAULT_BITRATE_BPS 2000000.0
 // Global bitrate (can be overridden by --bitrate)
 static double g_bitrate_bps = DEFAULT_BITRATE_BPS;
 // Burst noise parameters
 #define BURST_LENGTH_MEAN   100.0
 #define BURST_LENGTH_STDDEV  30.0
 #define BURST_LENGTH_MIN     10
 //=============================================================================
 // PRNG Functions (xorshift64)
 //=============================================================================
 static uint64_t xorshift64(uint64_t *state) {
    uint64_t x = *state;
    x ^= x << 13;
    x ^= x >> 7;
    x ^= x << 17;
    return *state = x;
 }
 // Returns uniform random in [0, 1)
 static double rand_uniform(uint64_t *state) {
    return (double)xorshift64(state) / (double)UINT64_MAX;
 }
 // Box-Muller transform for Gaussian random numbers
 static double gaussian_rand(uint64_t *state, double mean, double stddev) {
    double u1 = rand_uniform(state);
    double u2 = rand_uniform(state);
    // Avoid log(0)
    if (u1 < 1e-15) u1 = 1e-15;
    double z = sqrt(-2.0 * log(u1)) * cos(2.0 * M_PI * u2);
    return mean + stddev * z;
 }
 //=============================================================================
 // BER Calculation
 //=============================================================================
 // Calculate BER from SNR in dB for QPSK modulation
 // BER = 0.5 * erfc(sqrt(Eb/N0))
 // For QPSK, Eb/N0 = SNR (2 bits per symbol)
 static double snr_to_ber(double snr_db) {
    double snr_linear = pow(10.0, snr_db / 10.0);
    double eb_n0 = snr_linear;
    return 0.5 * erfc(sqrt(eb_n0));
 }
 //=============================================================================
 // Burst State Management
 //=============================================================================
 typedef struct {
    double current_time_sec;       // Elapsed playback time
    double next_burst_time;        // When next burst occurs
    int burst_bytes_remaining;     // Bytes left in current burst (0 = no active burst)
    double burst_interval;         // Mean interval between bursts (60.0 / bursts_per_minute)
    double burst_ber;              // BER during burst
    int burst_count;               // Total bursts applied
    int total_burst_bytes;         // Total bytes affected by bursts
    int verbose;                   // Verbose output flag
 } burst_state_t;
 static void burst_state_init(burst_state_t *state, double bursts_per_minute,
                             double burst_ber, int verbose, uint64_t *seed) {
    state->current_time_sec = 0.0;
    state->burst_bytes_remaining = 0;
    state->burst_ber = burst_ber;
    state->burst_count = 0;
    state->total_burst_bytes = 0;
    state->verbose = verbose;
    if (bursts_per_minute > 0) {
        state->burst_interval = 60.0 / bursts_per_minute;
        // Schedule first burst using exponential distribution
        state->next_burst_time = -state->burst_interval * log(rand_uniform(seed));
    } else {
        state->burst_interval = 0;
        state->next_burst_time = 1e30;  // Never burst
    }
 }
 static void burst_state_advance_time(burst_state_t *state, double delta_sec, uint64_t *seed) {
    double end_time = state->current_time_sec + delta_sec;
    // Check if any bursts should occur during this time span
    while (state->burst_interval > 0 && state->next_burst_time < end_time) {
        // A burst should start during this chunk
        if (state->burst_bytes_remaining == 0) {
            double length = gaussian_rand(seed, BURST_LENGTH_MEAN, BURST_LENGTH_STDDEV);
            state->burst_bytes_remaining = (int)fmax(BURST_LENGTH_MIN, length);
            state->burst_count++;
            if (state->verbose) {
                fprintf(stderr, "  [burst] time %.2fs, %d bytes\n",
                        state->next_burst_time, state->burst_bytes_remaining);
            }
        }
        // Schedule next burst
        double wait = -state->burst_interval * log(rand_uniform(seed));
        if (wait < 0.001) wait = 0.001;  // Minimum 1ms between bursts
        state->next_burst_time += wait;
    }
    state->current_time_sec = end_time;
 }
 //=============================================================================
 // Noise Application Functions
 //=============================================================================
 // Apply AWGN-based bit errors to buffer
 // Returns number of bits flipped
 static int apply_background_noise(uint8_t *data, size_t len, double ber, uint64_t *seed) {
    int bits_flipped = 0;
    // Optimization: if BER is extremely low, use probability-based skipping
    if (ber < 1e-10) {
        return 0;  // Effectively no errors at this BER
    }
    for (size_t i = 0; i < len; i++) {
        for (int bit = 0; bit < 8; bit++) {
            if (rand_uniform(seed) < ber) {
                data[i] ^= (1 << bit);
                bits_flipped++;
            }
        }
    }
    return bits_flipped;
 }
 // Apply burst noise to buffer (checks/updates burst state)
 // Returns number of bits flipped
 static int apply_burst_noise(uint8_t *data, size_t len, burst_state_t *state, uint64_t *seed) {
    int bits_flipped = 0;
    if (state->burst_bytes_remaining <= 0) {
        return 0;
    }
    // Apply burst BER to bytes while burst is active
    size_t burst_bytes = (size_t)state->burst_bytes_remaining;
    if (burst_bytes > len) {
        burst_bytes = len;
    }
    for (size_t i = 0; i < burst_bytes; i++) {
        for (int bit = 0; bit < 8; bit++) {
            if (rand_uniform(seed) < state->burst_ber) {
                data[i] ^= (1 << bit);
                bits_flipped++;
            }
        }
    }
    state->total_burst_bytes += burst_bytes;
    state->burst_bytes_remaining -= burst_bytes;
    return bits_flipped;
 }
 //=============================================================================
 // Byte Position to Time Conversion
 //=============================================================================
 // Convert byte position to approximate playback time based on bitrate
 static double bytes_to_time(size_t byte_pos) {
    return (double)(byte_pos * 8) / g_bitrate_bps;
 }
 //=============================================================================
 // Main Program
 //=============================================================================
 static void print_usage(const char *prog) {
    fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
    fprintf(stderr, "Simulates QPSK satellite transmission channel noise\n\n");
    fprintf(stderr, "Usage: %s -i input.tavdt -o output.tavdt --snr N [options]\n\n", prog);
    fprintf(stderr, "Required:\n");
    fprintf(stderr, "  -i, --input FILE     Input TAV-DT file\n");
    fprintf(stderr, "  -o, --output FILE    Output corrupted file\n");
    fprintf(stderr, "  --snr N              Signal-to-noise ratio in dB (0-30)\n");
    fprintf(stderr, "\nOptional:\n");
    fprintf(stderr, "  --burst N            Burst events per minute (default: 0)\n");
    fprintf(stderr, "  --burst-ber N        BER during burst events (default: 0.5)\n");
    fprintf(stderr, "  --bitrate N          Stream bitrate in Mbps for timing (default: 2.0)\n");
    fprintf(stderr, "  --seed N             RNG seed for reproducibility\n");
    fprintf(stderr, "  -v, --verbose        Show detailed progress\n");
    fprintf(stderr, "  -h, --help           Show this help\n");
    fprintf(stderr, "\nSNR Reference:\n");
    fprintf(stderr, "   0 dB: Worst case (BER ~7.9e-2, 1 in 13 bits)\n");
    fprintf(stderr, "   6 dB: Poor but working (BER ~2.4e-3)\n");
    fprintf(stderr, "   9 dB: Typical working (BER ~1.9e-4)\n");
    fprintf(stderr, "  12 dB: Good condition (BER ~3.8e-6)\n");
    fprintf(stderr, "  30 dB: Near-perfect (BER ~2.9e-16)\n");
 }
 int main(int argc, char *argv[]) {
    const char *input_file = NULL;
    const char *output_file = NULL;
    double snr_db = -1;
    double bursts_per_minute = 0;
    double burst_ber = 0.5;
    uint64_t seed = 0;
    int seed_provided = 0;
    int verbose = 0;
    static struct option long_options[] = {
        {"input",     required_argument, 0, 'i'},
        {"output",    required_argument, 0, 'o'},
        {"snr",       required_argument, 0, 's'},
        {"burst",     required_argument, 0, 'b'},
        {"burst-ber", required_argument, 0, 'B'},
        {"bitrate",   required_argument, 0, 'r'},
        {"seed",      required_argument, 0, 'S'},
        {"verbose",   no_argument,       0, 'v'},
        {"help",      no_argument,       0, 'h'},
        {0, 0, 0, 0}
    };
    int opt;
    while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, NULL)) != -1) {
        switch (opt) {
            case 'i':
                input_file = optarg;
                break;
            case 'o':
                output_file = optarg;
                break;
            case 's':
                snr_db = atof(optarg);
                break;
            case 'b':
                bursts_per_minute = atof(optarg);
                break;
            case 'B':
                burst_ber = atof(optarg);
                break;
            case 'r':
                g_bitrate_bps = atof(optarg) * 1000000.0;  // Convert Mbps to bps
                break;
            case 'S':
                seed = strtoull(optarg, NULL, 10);
                seed_provided = 1;
                break;
            case 'v':
                verbose = 1;
                break;
            case 'h':
            default:
                print_usage(argv[0]);
                return opt == 'h' ? 0 : 1;
        }
    }
    // Validate arguments
    if (!input_file || !output_file || snr_db < 0) {
        fprintf(stderr, "Error: Missing required arguments\n\n");
        print_usage(argv[0]);
        return 1;
    }
    if (burst_ber < 0 || burst_ber > 1) {
        fprintf(stderr, "Error: --burst-ber must be between 0 and 1\n");
        return 1;
    }
    // Initialize RNG
    if (!seed_provided) {
        seed = (uint64_t)time(NULL) ^ ((uint64_t)clock() << 32);
    }
    // Ensure seed is not zero (xorshift64 requirement)
    if (seed == 0) seed = 0x853c49e6748fea9bULL;
    // Warm up the generator (small seeds produce poor initial values)
    for (int i = 0; i < 10; i++) xorshift64(&seed);
    // Calculate BER from SNR
    double ber = snr_to_ber(snr_db);
    // Open files
    FILE *in_fp = fopen(input_file, "rb");
    if (!in_fp) {
        fprintf(stderr, "Error: Cannot open input file: %s\n", input_file);
        return 1;
    }
    FILE *out_fp = fopen(output_file, "wb");
    if (!out_fp) {
        fprintf(stderr, "Error: Cannot open output file: %s\n", output_file);
        fclose(in_fp);
        return 1;
    }
    // Print header info
    fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
    fprintf(stderr, "Input:  %s\n", input_file);
    fprintf(stderr, "Output: %s\n", output_file);
    fprintf(stderr, "SNR:    %.1f dB (BER: %.2e)\n", snr_db, ber);
    if (bursts_per_minute > 0) {
        fprintf(stderr, "Burst:  %.1f events/minute (burst BER: %.2f)\n",
                bursts_per_minute, burst_ber);
    } else {
        fprintf(stderr, "Burst:  disabled\n");
    }
    if (seed_provided) {
        fprintf(stderr, "Seed:   %llu\n", (unsigned long long)seed);
    }
    fprintf(stderr, "\n");
    // Initialize burst state
    burst_state_t burst;
    burst_state_init(&burst, bursts_per_minute, burst_ber, verbose, &seed);
    // Allocate buffer for streaming processing
    uint8_t *buffer = malloc(BUFFER_SIZE);
    if (!buffer) {
        fprintf(stderr, "Error: Cannot allocate buffer\n");
        fclose(in_fp);
        fclose(out_fp);
        return 1;
    }
    // Processing statistics
    long long total_bytes = 0;
    long long bits_flipped_bg = 0;
    long long bits_flipped_burst = 0;
    int chunk_count = 0;
    // Process file in chunks
    size_t bytes_read;
    while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, in_fp)) > 0) {
        // Calculate time delta for this chunk (for burst scheduling)
        double delta_sec = bytes_to_time(bytes_read);
        burst_state_advance_time(&burst, delta_sec, &seed);
        // Apply noise to chunk
        bits_flipped_bg += apply_background_noise(buffer, bytes_read, ber, &seed);
        bits_flipped_burst += apply_burst_noise(buffer, bytes_read, &burst, &seed);
        // Write corrupted chunk
        fwrite(buffer, 1, bytes_read, out_fp);
        total_bytes += bytes_read;
        chunk_count++;
        if (verbose && chunk_count % 10 == 0) {
            double time_pos = bytes_to_time(total_bytes);
            fprintf(stderr, "\rProcessed %.1f MB (%.1f sec)...",
                    total_bytes / (1024.0 * 1024.0), time_pos);
        }
    }
    if (verbose) {
        fprintf(stderr, "\r                                        \r");
    }
    // Clean up
    free(buffer);
    fclose(in_fp);
    fclose(out_fp);
    // Print summary
    double duration_sec = bytes_to_time(total_bytes);
    long long total_bits = total_bytes * 8;
    fprintf(stderr, "Complete.\n");
    fprintf(stderr, "  Total bytes: %lld (%.1f sec @ ~%.1f Mbps)\n",
            total_bytes, duration_sec, g_bitrate_bps / 1000000.0);
    fprintf(stderr, "  Background bits flipped: %lld (%.4f%%)\n",
            bits_flipped_bg, 100.0 * bits_flipped_bg / total_bits);
    if (bursts_per_minute > 0) {
        fprintf(stderr, "  Burst events: %d (%d bytes total)\n",
                burst.burst_count, burst.total_burst_bytes);
        fprintf(stderr, "  Burst bits flipped: %lld\n", bits_flipped_burst);
    }
    return 0;
 }
--- a/video_encoder/test_mesh_roundtrip.cpp
+++ b/video_encoder/test_mesh_roundtrip.cpp
@@ -1,328 +0,0 @@
 // Test mesh warp round-trip consistency
 // Warps a frame forward, then backward, and checks if we get the original back
 // This is critical for MC-lifting invertibility
 #include <opencv2/opencv.hpp>
 #include <cstdlib>
 #include <cstring>
 #include <cmath>
 #include <cstdio>
 #include <ctime>
 // Include the mesh functions from encoder
 extern "C" {
    void estimate_motion_optical_flow(
        const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
        int width, int height,
        float **out_flow_x, float **out_flow_y
    );
    void build_mesh_from_flow(
        const float *flow_x, const float *flow_y,
        int width, int height,
        int mesh_w, int mesh_h,
        int16_t *mesh_dx, int16_t *mesh_dy
    );
    void smooth_mesh_laplacian(
        int16_t *mesh_dx, int16_t *mesh_dy,
        int mesh_width, int mesh_height,
        float smoothness, int iterations
    );
 }
 // Mesh warp with bilinear interpolation (translation only)
 static void apply_mesh_warp_rgb(
    const cv::Mat &src,
    cv::Mat &dst,
    const int16_t *mesh_dx,
    const int16_t *mesh_dy,
    int mesh_w, int mesh_h
 ) {
    int width = src.cols;
    int height = src.rows;
    int cell_w = width / mesh_w;
    int cell_h = height / mesh_h;
    dst = cv::Mat(height, width, CV_8UC3);
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int cell_x = x / cell_w;
            int cell_y = y / cell_h;
            cell_x = std::min(cell_x, mesh_w - 2);
            cell_y = std::min(cell_y, mesh_h - 2);
            int idx_00 = cell_y * mesh_w + cell_x;
            int idx_10 = idx_00 + 1;
            int idx_01 = (cell_y + 1) * mesh_w + cell_x;
            int idx_11 = idx_01 + 1;
            float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
            float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
            float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
            float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
            float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
            float beta = (y - cp_y0) / (cp_y1 - cp_y0);
            alpha = std::max(0.0f, std::min(1.0f, alpha));
            beta = std::max(0.0f, std::min(1.0f, beta));
            float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
                       alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
                       (1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
                       alpha * beta * (mesh_dx[idx_11] / 8.0f);
            float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
                       alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
                       (1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
                       alpha * beta * (mesh_dy[idx_11] / 8.0f);
            float src_x = x + dx;
            float src_y = y + dy;
            int sx0 = (int)floorf(src_x);
            int sy0 = (int)floorf(src_y);
            int sx1 = sx0 + 1;
            int sy1 = sy0 + 1;
            sx0 = std::max(0, std::min(width - 1, sx0));
            sy0 = std::max(0, std::min(height - 1, sy0));
            sx1 = std::max(0, std::min(width - 1, sx1));
            sy1 = std::max(0, std::min(height - 1, sy1));
            float fx = src_x - sx0;
            float fy = src_y - sy0;
            for (int c = 0; c < 3; c++) {
                float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
                float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
                float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
                float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
                float val = (1 - fx) * (1 - fy) * val_00 +
                            fx * (1 - fy) * val_10 +
                            (1 - fx) * fy * val_01 +
                            fx * fy * val_11;
                dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
            }
        }
    }
 }
 int main(int argc, char** argv) {
    const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
    int num_tests = (argc > 2) ? atoi(argv[2]) : 5;
    printf("Opening video: %s\n", video_file);
    cv::VideoCapture cap(video_file);
    if (!cap.isOpened()) {
        fprintf(stderr, "Error: Cannot open video file\n");
        return 1;
    }
    int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
    int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
    int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
    printf("Video: %dx%d, %d frames\n", width, height, total_frames);
    // Mesh dimensions (32×32 cells)
    int mesh_cell_size = 32;
    int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
    int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
    if (mesh_w < 2) mesh_w = 2;
    if (mesh_h < 2) mesh_h = 2;
    printf("Mesh: %dx%d (approx %dx%d px cells)\n\n",
           mesh_w, mesh_h, width / mesh_w, height / mesh_h);
    float smoothness = 0.5f;
    int smooth_iterations = 8;
    srand(time(NULL));
    double total_forward_psnr = 0.0;
    double total_roundtrip_psnr = 0.0;
    double total_half_roundtrip_psnr = 0.0;
    for (int test = 0; test < num_tests; test++) {
        int frame_num = 5 + rand() % (total_frames - 10);
        printf("[Test %d/%d] Frame pair %d → %d\n", test + 1, num_tests, frame_num - 1, frame_num);
        cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
        cv::Mat frame0, frame1;
        cap >> frame0;
        cap >> frame1;
        if (frame0.empty() || frame1.empty()) {
            fprintf(stderr, "Error reading frames\n");
            continue;
        }
        cv::Mat frame0_rgb, frame1_rgb;
        cv::cvtColor(frame0, frame0_rgb, cv::COLOR_BGR2RGB);
        cv::cvtColor(frame1, frame1_rgb, cv::COLOR_BGR2RGB);
        // Compute mesh (F0 → F1)
        float *flow_x = nullptr, *flow_y = nullptr;
        estimate_motion_optical_flow(frame0_rgb.data, frame1_rgb.data,
                                     width, height, &flow_x, &flow_y);
        int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
        smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
        // Create inverted mesh
        int16_t *inv_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *inv_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        for (int i = 0; i < mesh_w * mesh_h; i++) {
            inv_mesh_dx[i] = -mesh_dx[i];
            inv_mesh_dy[i] = -mesh_dy[i];
        }
        // Create half-mesh for symmetric lifting test
        int16_t *half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *neg_half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *neg_half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        for (int i = 0; i < mesh_w * mesh_h; i++) {
            half_mesh_dx[i] = mesh_dx[i] / 2;
            half_mesh_dy[i] = mesh_dy[i] / 2;
            neg_half_mesh_dx[i] = -half_mesh_dx[i];
            neg_half_mesh_dy[i] = -half_mesh_dy[i];
        }
        // TEST 1: Full forward warp quality (F0 → F1)
        cv::Mat warped_forward;
        apply_mesh_warp_rgb(frame0, warped_forward, mesh_dx, mesh_dy, mesh_w, mesh_h);
        double forward_mse = 0.0;
        for (int y = 0; y < height; y++) {
            for (int x = 0; x < width; x++) {
                for (int c = 0; c < 3; c++) {
                    double diff = (double)warped_forward.at<cv::Vec3b>(y, x)[c] -
                                 (double)frame1.at<cv::Vec3b>(y, x)[c];
                    forward_mse += diff * diff;
                }
            }
        }
        forward_mse /= (width * height * 3);
        double forward_psnr = (forward_mse > 0) ? 10.0 * log10(255.0 * 255.0 / forward_mse) : 999.0;
        total_forward_psnr += forward_psnr;
        // TEST 2: Full round-trip (F0 → forward → backward → F0')
        cv::Mat roundtrip;
        apply_mesh_warp_rgb(warped_forward, roundtrip, inv_mesh_dx, inv_mesh_dy, mesh_w, mesh_h);
        double roundtrip_mse = 0.0;
        for (int y = 0; y < height; y++) {
            for (int x = 0; x < width; x++) {
                for (int c = 0; c < 3; c++) {
                    double diff = (double)roundtrip.at<cv::Vec3b>(y, x)[c] -
                                 (double)frame0.at<cv::Vec3b>(y, x)[c];
                    roundtrip_mse += diff * diff;
                }
            }
        }
        roundtrip_mse /= (width * height * 3);
        double roundtrip_psnr = (roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / roundtrip_mse) : 999.0;
        total_roundtrip_psnr += roundtrip_psnr;
        // TEST 3: Half-step symmetric round-trip (MC-lifting style)
        // F0 → +½mesh, then → -½mesh (should return to F0)
        cv::Mat half_forward, half_roundtrip;
        apply_mesh_warp_rgb(frame0, half_forward, half_mesh_dx, half_mesh_dy, mesh_w, mesh_h);
        apply_mesh_warp_rgb(half_forward, half_roundtrip, neg_half_mesh_dx, neg_half_mesh_dy, mesh_w, mesh_h);
        double half_roundtrip_mse = 0.0;
        for (int y = 0; y < height; y++) {
            for (int x = 0; x < width; x++) {
                for (int c = 0; c < 3; c++) {
                    double diff = (double)half_roundtrip.at<cv::Vec3b>(y, x)[c] -
                                 (double)frame0.at<cv::Vec3b>(y, x)[c];
                    half_roundtrip_mse += diff * diff;
                }
            }
        }
        half_roundtrip_mse /= (width * height * 3);
        double half_roundtrip_psnr = (half_roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / half_roundtrip_mse) : 999.0;
        total_half_roundtrip_psnr += half_roundtrip_psnr;
        printf("  Forward warp (F0→F1):       PSNR = %.2f dB\n", forward_psnr);
        printf("  Full round-trip (F0→F0'):   PSNR = %.2f dB\n", roundtrip_psnr);
        printf("  Half round-trip (±½mesh):   PSNR = %.2f dB\n", half_roundtrip_psnr);
        // Compute motion stats
        float avg_motion = 0.0f, max_motion = 0.0f;
        for (int i = 0; i < mesh_w * mesh_h; i++) {
            float dx = mesh_dx[i] / 8.0f;
            float dy = mesh_dy[i] / 8.0f;
            float motion = sqrtf(dx * dx + dy * dy);
            avg_motion += motion;
            if (motion > max_motion) max_motion = motion;
        }
        avg_motion /= (mesh_w * mesh_h);
        printf("  Motion: avg=%.2f px, max=%.2f px\n\n", avg_motion, max_motion);
        // Save visualisation for worst case
        if (test == 0 || roundtrip_psnr < 30.0) {
            char filename[256];
            sprintf(filename, "roundtrip_%04d_original.png", frame_num);
            cv::imwrite(filename, frame0);
            sprintf(filename, "roundtrip_%04d_forward.png", frame_num);
            cv::imwrite(filename, warped_forward);
            sprintf(filename, "roundtrip_%04d_roundtrip.png", frame_num);
            cv::imwrite(filename, roundtrip);
            // Difference images
            cv::Mat diff_roundtrip = cv::Mat::zeros(height, width, CV_8UC3);
            for (int y = 0; y < height; y++) {
                for (int x = 0; x < width; x++) {
                    for (int c = 0; c < 3; c++) {
                        int diff = abs((int)roundtrip.at<cv::Vec3b>(y, x)[c] -
                                      (int)frame0.at<cv::Vec3b>(y, x)[c]);
                        diff_roundtrip.at<cv::Vec3b>(y, x)[c] = std::min(diff * 5, 255);
                    }
                }
            }
            sprintf(filename, "roundtrip_%04d_diff.png", frame_num);
            cv::imwrite(filename, diff_roundtrip);
            printf("  Saved visualisation: roundtrip_%04d_*.png\n\n", frame_num);
        }
        free(flow_x);
        free(flow_y);
        free(mesh_dx);
        free(mesh_dy);
        free(inv_mesh_dx);
        free(inv_mesh_dy);
        free(half_mesh_dx);
        free(half_mesh_dy);
        free(neg_half_mesh_dx);
        free(neg_half_mesh_dy);
    }
    printf("===========================================\n");
    printf("Average Results (%d tests):\n", num_tests);
    printf("  Forward warp quality:       %.2f dB\n", total_forward_psnr / num_tests);
    printf("  Full round-trip error:      %.2f dB\n", total_roundtrip_psnr / num_tests);
    printf("  Half round-trip error:      %.2f dB\n", total_half_roundtrip_psnr / num_tests);
    printf("===========================================\n\n");
    if (total_roundtrip_psnr / num_tests < 35.0) {
        printf("WARNING: Round-trip PSNR < 35 dB indicates poor invertibility!\n");
        printf("This will cause MC-lifting to accumulate errors and hurt compression.\n");
        printf("Bilinear interpolation artifacts are likely the culprit.\n");
    } else {
        printf("Round-trip consistency looks acceptable (>35 dB).\n");
    }
    cap.release();
    return 0;
 }
--- a/video_encoder/test_mesh_warp.cpp
+++ b/video_encoder/test_mesh_warp.cpp
@@ -1,422 +0,0 @@
 // Visual unit test for mesh warping with hierarchical block matching and affine estimation
 // Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh,
 // and saves both warped and target frames for visual comparison
 // Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms
 #include <opencv2/opencv.hpp>
 #include <opencv2/video/tracking.hpp>
 #include <cstdlib>
 #include <cstring>
 #include <cmath>
 #include <cstdio>
 #include <ctime>
 // Include the mesh functions from encoder
 extern "C" {
    void estimate_motion_optical_flow(
        const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
        int width, int height,
        float **out_flow_x, float **out_flow_y
    );
    void build_mesh_from_flow(
        const float *flow_x, const float *flow_y,
        int width, int height,
        int mesh_w, int mesh_h,
        int16_t *mesh_dx, int16_t *mesh_dy
    );
    void smooth_mesh_laplacian(
        int16_t *mesh_dx, int16_t *mesh_dy,
        int mesh_width, int mesh_height,
        float smoothness, int iterations
    );
    int estimate_cell_affine(
        const float *flow_x, const float *flow_y,
        int width, int height,
        int cell_x, int cell_y,
        int cell_w, int cell_h,
        float threshold,
        int16_t *out_tx, int16_t *out_ty,
        int16_t *out_a11, int16_t *out_a12,
        int16_t *out_a21, int16_t *out_a22
    );
 }
 // Mesh warp with bilinear interpolation and optional affine support
 static void apply_mesh_warp_rgb(
    const cv::Mat &src,          // Input BGR image
    cv::Mat &dst,                 // Output warped BGR image
    const int16_t *mesh_dx,       // Mesh motion vectors (1/8 pixel)
    const int16_t *mesh_dy,
    const uint8_t *affine_mask,   // 1=affine, 0=translation
    const int16_t *affine_a11,
    const int16_t *affine_a12,
    const int16_t *affine_a21,
    const int16_t *affine_a22,
    int mesh_w, int mesh_h
 ) {
    int width = src.cols;
    int height = src.rows;
    int cell_w = width / mesh_w;
    int cell_h = height / mesh_h;
    dst = cv::Mat(height, width, CV_8UC3);
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int cell_x = x / cell_w;
            int cell_y = y / cell_h;
            // Clamp to valid mesh range
            cell_x = std::min(cell_x, mesh_w - 2);
            cell_y = std::min(cell_y, mesh_h - 2);
            // Four corner control points
            int idx_00 = cell_y * mesh_w + cell_x;
            int idx_10 = idx_00 + 1;
            int idx_01 = (cell_y + 1) * mesh_w + cell_x;
            int idx_11 = idx_01 + 1;
            // Control point positions
            float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
            float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
            float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
            float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
            // Local coordinates
            float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
            float beta = (y - cp_y0) / (cp_y1 - cp_y0);
            alpha = std::max(0.0f, std::min(1.0f, alpha));
            beta = std::max(0.0f, std::min(1.0f, beta));
            // Bilinear interpolation of motion vectors
            float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
                       alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
                       (1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
                       alpha * beta * (mesh_dx[idx_11] / 8.0f);
            float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
                       alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
                       (1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
                       alpha * beta * (mesh_dy[idx_11] / 8.0f);
            // Check if we're using affine in this cell
            // For simplicity, just use the top-left corner's affine parameters
            int cell_idx = cell_y * mesh_w + cell_x;
            if (affine_mask && affine_mask[cell_idx]) {
                // Apply affine transform
                // Compute position relative to cell center
                float rel_x = x - (cell_x * cell_w + cell_w / 2.0f);
                float rel_y = y - (cell_y * cell_h + cell_h / 2.0f);
                float a11 = affine_a11[cell_idx] / 256.0f;
                float a12 = affine_a12[cell_idx] / 256.0f;
                float a21 = affine_a21[cell_idx] / 256.0f;
                float a22 = affine_a22[cell_idx] / 256.0f;
                // Affine warp: [x'] = [a11 a12][x] + [dx]
                //               [y']   [a21 a22][y]   [dy]
                dx = a11 * rel_x + a12 * rel_y + dx;
                dy = a21 * rel_x + a22 * rel_y + dy;
            }
            // Source coordinates (inverse warp)
            float src_x = x + dx;
            float src_y = y + dy;
            // Bilinear interpolation
            int sx0 = (int)floorf(src_x);
            int sy0 = (int)floorf(src_y);
            int sx1 = sx0 + 1;
            int sy1 = sy0 + 1;
            sx0 = std::max(0, std::min(width - 1, sx0));
            sy0 = std::max(0, std::min(height - 1, sy0));
            sx1 = std::max(0, std::min(width - 1, sx1));
            sy1 = std::max(0, std::min(height - 1, sy1));
            float fx = src_x - sx0;
            float fy = src_y - sy0;
            // Interpolate each channel
            for (int c = 0; c < 3; c++) {
                float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
                float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
                float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
                float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
                float val = (1 - fx) * (1 - fy) * val_00 +
                            fx * (1 - fy) * val_10 +
                            (1 - fx) * fy * val_01 +
                            fx * fy * val_11;
                dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
            }
        }
    }
 }
 // Create visualisation overlay showing affine cells
 static void create_affine_overlay(
    cv::Mat &img,
    const uint8_t *affine_mask,
    int mesh_w, int mesh_h
 ) {
    int width = img.cols;
    int height = img.rows;
    int cell_w = width / mesh_w;
    int cell_h = height / mesh_h;
    for (int my = 0; my < mesh_h; my++) {
        for (int mx = 0; mx < mesh_w; mx++) {
            int idx = my * mesh_w + mx;
            if (affine_mask[idx]) {
                // Draw green rectangle for affine cells
                int x0 = mx * cell_w;
                int y0 = my * cell_h;
                int x1 = (mx + 1) * cell_w;
                int y1 = (my + 1) * cell_h;
                cv::rectangle(img,
                             cv::Point(x0, y0),
                             cv::Point(x1, y1),
                             cv::Scalar(0, 255, 0), 1);
            }
        }
    }
 }
 int main(int argc, char** argv) {
    const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
    int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5;
    printf("Opening video: %s\n", video_file);
    cv::VideoCapture cap(video_file);
    if (!cap.isOpened()) {
        fprintf(stderr, "Error: Cannot open video file %s\n", video_file);
        return 1;
    }
    int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
    int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
    int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
    printf("Video: %dx%d, %d frames\n", width, height, total_frames);
    if (total_frames < 10) {
        fprintf(stderr, "Error: Video too short (need at least 10 frames)\n");
        return 1;
    }
    // Calculate mesh dimensions (32×32 pixel cells, matches current encoder)
    int mesh_cell_size = 32;
    int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
    int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
    if (mesh_w < 2) mesh_w = 2;
    if (mesh_h < 2) mesh_h = 2;
    printf("Mesh: %dx%d (approx %dx%d px cells)\n",
           mesh_w, mesh_h, width / mesh_w, height / mesh_h);
    // Encoder parameters (match current encoder_tav.c settings)
    float smoothness = 0.5f;      // Mesh smoothness weight
    int smooth_iterations = 8;     // Smoothing iterations
    float affine_threshold = 0.40f; // 40% improvement required for affine
    printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n",
           smoothness, smooth_iterations, affine_threshold * 100.0f);
    // Seed random number generator
    srand(time(NULL));
    // Pick random frames (avoid first and last 5 frames)
    printf("\nTesting %d random frame pairs:\n", num_test_frames);
    for (int test = 0; test < num_test_frames; test++) {
        // Pick random frame (ensure we have a previous frame)
        int frame_num = 5 + rand() % (total_frames - 10);
        printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n",
               test + 1, num_test_frames, frame_num - 1, frame_num);
        // Read previous frame (source for warping)
        cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
        cv::Mat prev_frame;
        cap >> prev_frame;
        if (prev_frame.empty()) {
            fprintf(stderr, "Error reading frame %d\n", frame_num - 1);
            continue;
        }
        // Read current frame (target to match)
        cv::Mat curr_frame;
        cap >> curr_frame;
        if (curr_frame.empty()) {
            fprintf(stderr, "Error reading frame %d\n", frame_num);
            continue;
        }
        // Convert to RGB for block matching
        cv::Mat prev_rgb, curr_rgb;
        cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB);
        cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB);
        // Compute hierarchical block matching (replaces optical flow)
        printf("  Computing hierarchical block matching...\n");
        float *flow_x = nullptr, *flow_y = nullptr;
        estimate_motion_optical_flow(
            prev_rgb.data, curr_rgb.data,
            width, height,
            &flow_x, &flow_y
        );
        // Build mesh from flow
        printf("  Building mesh from block matches...\n");
        int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
        // Apply Laplacian smoothing
        printf("  Applying Laplacian smoothing (%d iterations, %.2f weight)...\n",
               smooth_iterations, smoothness);
        smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
        // Estimate selective per-cell affine transforms
        printf("  Estimating selective affine transforms (threshold=%.0f%%)...\n",
               affine_threshold * 100.0f);
        uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t));
        int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
        int cell_w = width / mesh_w;
        int cell_h = height / mesh_h;
        int affine_count = 0;
        for (int cy = 0; cy < mesh_h; cy++) {
            for (int cx = 0; cx < mesh_w; cx++) {
                int cell_idx = cy * mesh_w + cx;
                int16_t tx, ty, a11, a12, a21, a22;
                int use_affine = estimate_cell_affine(
                    flow_x, flow_y,
                    width, height,
                    cx, cy, cell_w, cell_h,
                    affine_threshold,
                    &tx, &ty, &a11, &a12, &a21, &a22
                );
                affine_mask[cell_idx] = use_affine ? 1 : 0;
                mesh_dx[cell_idx] = tx;
                mesh_dy[cell_idx] = ty;
                affine_a11[cell_idx] = a11;
                affine_a12[cell_idx] = a12;
                affine_a21[cell_idx] = a21;
                affine_a22[cell_idx] = a22;
                if (use_affine) affine_count++;
            }
        }
        printf("  Affine usage: %d/%d cells (%.1f%%)\n",
               affine_count, mesh_w * mesh_h,
               100.0f * affine_count / (mesh_w * mesh_h));
        // Warp previous frame to current frame
        printf("  Warping frame with mesh + affine...\n");
        cv::Mat warped;
        apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy,
                           affine_mask, affine_a11, affine_a12, affine_a21, affine_a22,
                           mesh_w, mesh_h);
        // Create visualisation with affine overlay
        cv::Mat warped_viz = warped.clone();
        create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h);
        // Compute MSE between warped and target
        double mse = 0.0;
        for (int y = 0; y < height; y++) {
            for (int x = 0; x < width; x++) {
                for (int c = 0; c < 3; c++) {
                    double diff = (double)warped.at<cv::Vec3b>(y, x)[c] -
                                 (double)curr_frame.at<cv::Vec3b>(y, x)[c];
                    mse += diff * diff;
                }
            }
        }
        mse /= (width * height * 3);
        double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0;
        printf("  Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr);
        // Save images
        char filename[256];
        sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1);
        cv::imwrite(filename, prev_frame);
        printf("  Saved source: %s\n", filename);
        sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num);
        cv::imwrite(filename, warped);
        printf("  Saved warped: %s\n", filename);
        sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num);
        cv::imwrite(filename, warped_viz);
        printf("  Saved warped+viz (green=affine): %s\n", filename);
        sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num);
        cv::imwrite(filename, curr_frame);
        printf("  Saved target: %s\n", filename);
        // Compute difference image
        cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3);
        for (int y = 0; y < height; y++) {
            for (int x = 0; x < width; x++) {
                for (int c = 0; c < 3; c++) {
                    int diff = abs((int)warped.at<cv::Vec3b>(y, x)[c] -
                                  (int)curr_frame.at<cv::Vec3b>(y, x)[c]);
                    diff_img.at<cv::Vec3b>(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility
                }
            }
        }
        sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num);
        cv::imwrite(filename, diff_img);
        printf("  Saved difference (amplified 3x): %s\n", filename);
        // Compute motion statistics
        float max_motion = 0.0f, avg_motion = 0.0f;
        for (int i = 0; i < mesh_w * mesh_h; i++) {
            float dx = mesh_dx[i] / 8.0f;
            float dy = mesh_dy[i] / 8.0f;
            float motion = sqrtf(dx * dx + dy * dy);
            avg_motion += motion;
            if (motion > max_motion) max_motion = motion;
        }
        avg_motion /= (mesh_w * mesh_h);
        printf("  Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion);
        // Cleanup
        free(flow_x);
        free(flow_y);
        free(mesh_dx);
        free(mesh_dy);
        free(affine_mask);
        free(affine_a11);
        free(affine_a12);
        free(affine_a21);
        free(affine_a22);
    }
    printf("\nDone! Check output images:\n");
    printf("  *_source.png: Original frame before warping\n");
    printf("  *_warped.png: Warped frame (should match target)\n");
    printf("  *_warped_viz.png: Warped with green overlay showing affine cells\n");
    printf("  *_target.png: Target frame to match\n");
    printf("  *_diff.png: Difference image (should be mostly black if warp is good)\n");
    cap.release();
    return 0;
 }