video_decoder removed; fix video regression and updated to no-zstd

This commit is contained in:
minjaesong
2026-05-10 05:56:56 +09:00
parent b27ef0dbf9
commit 2cdd731c3b
63 changed files with 127 additions and 31850 deletions

11
.idea/libraries/badlogicgames_gdx.xml generated Normal file
View File

@@ -0,0 +1,11 @@
<component name="libraryTable">
<library name="badlogicgames.gdx" type="repository">
<properties maven-id="com.badlogicgames.gdx:gdx:1.12.1" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@@ -0,0 +1,62 @@
<component name="libraryTable">
<library name="badlogicgames.gdx.backend.lwjgl3" type="repository">
<properties maven-id="com.badlogicgames.gdx:gdx-backend-lwjgl3:1.12.1" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-backend-lwjgl3/1.12.1/gdx-backend-lwjgl3-1.12.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm32.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows-x86.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm32.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows-x86.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm32.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows-x86.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm32.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows-x86.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm32.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows-x86.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm32.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos-arm64.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows-x86.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/jlayer/jlayer/1.0.1-gdx/jlayer-1.0.1-gdx.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jcraft/jorbis/0.0.17/jorbis-0.0.17.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env fish #!/usr/bin/env fish
for f in *.mod; python3 mod2taud.py $f assets/disk0/(basename $f .mod).taud; end for f in *.mod; python3 mod2taud.py $f assets/disk0/home/music/(basename $f .mod).taud; end
for f in *.s3m; python3 s3m2taud.py $f assets/disk0/(basename $f .s3m).taud; end for f in *.s3m; python3 s3m2taud.py $f assets/disk0/home/music/(basename $f .s3m).taud; end
for f in *.it; python3 it2taud.py $f assets/disk0/(basename $f .it).taud; end for f in *.it; python3 it2taud.py $f assets/disk0/home/music/(basename $f .it).taud; end
for f in *.xm; python3 xm2taud.py $f assets/disk0/(basename $f .xm).taud; end for f in *.xm; python3 xm2taud.py $f assets/disk0/home/music/(basename $f .xm).taud; end
for f in *.mon; python3 mon2taud.py $f assets/disk0/(basename $f .mon).taud; end for f in *.mon; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .mon).taud; end
for f in *.MON; python3 mon2taud.py $f assets/disk0/(basename $f .MON).taud; end for f in *.MON; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .MON).taud; end

View File

@@ -10,5 +10,7 @@
<orderEntry type="module" module-name="tsvm_core" /> <orderEntry type="module" module-name="tsvm_core" />
<orderEntry type="library" name="TerranVirtualDisk" level="project" /> <orderEntry type="library" name="TerranVirtualDisk" level="project" />
<orderEntry type="library" name="lib" level="project" /> <orderEntry type="library" name="lib" level="project" />
<orderEntry type="library" name="badlogicgames.gdx" level="project" />
<orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
</component> </component>
</module> </module>

View File

@@ -1,7 +1,9 @@
const SND_BASE_ADDR = audio.getBaseAddr() const SND_BASE_ADDR = audio.getBaseAddr()
const SND_MEM_ADDR = audio.getMemAddr() const SND_MEM_ADDR = audio.getMemAddr()
const TAD_INPUT_ADDR = SND_MEM_ADDR - 262144 // TAD input buffer (matches TAV packet 0x24) // tadInputBin lives at audio-local offset 917504 and tadDecodedBin at 983040
const TAD_DECODED_ADDR = SND_MEM_ADDR - 262144 + 65536 // TAD decoded buffer // (post-bef85f6 memory map; the old 262144 offset now hits the enlarged sampleBin).
const TAD_INPUT_ADDR = SND_MEM_ADDR - 917504 // TAD input buffer (matches TAV packet 0x24)
const TAD_DECODED_ADDR = SND_MEM_ADDR - 983040 // TAD decoded buffer
if (!SND_BASE_ADDR) return 10 if (!SND_BASE_ADDR) return 10

View File

@@ -1746,7 +1746,9 @@ try {
tadInitialised = true tadInitialised = true
} }
seqread.readBytes(payloadLen, SND_MEM_ADDR - 262144) // tadInputBin lives at audio-local offset 917504 (post-bef85f6 memory map);
// the previous 262144 offset now points into the enlarged sampleBin.
seqread.readBytes(payloadLen, SND_MEM_ADDR - 917504)
audio.tadDecode() audio.tadDecode()
audio.tadUploadDecoded(AUDIO_DEVICE, sampleLen) audio.tadUploadDecoded(AUDIO_DEVICE, sampleLen)
} }

View File

@@ -275,6 +275,7 @@ class AudioJSR223Delegate(private val vm: VM) {
// while the following code does work, it was decided that MP3 is "too new" for tsvm and thus removed.
/* /*
js-mp3 js-mp3
https://github.com/soundbus-technologies/js-mp3 https://github.com/soundbus-technologies/js-mp3

View File

@@ -5433,6 +5433,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096) private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
// Zstd magic = 0x28 0xB5 0x2F 0xFD (little-endian frame magic).
// Newer TAV files default to no Zstd (Video Flags bit 4); detecting the magic
// lets the decoder accept both compressed and raw payloads transparently.
private fun tavDecompressIfZstd(data: ByteArray): ByteArray {
if (data.size >= 4 &&
data[0] == 0x28.toByte() && data[1] == 0xB5.toByte() &&
data[2] == 0x2F.toByte() && data[3] == 0xFD.toByte()) {
return ZstdInputStream(ByteArrayInputStream(data)).use { it.readBytes() }
}
return data
}
// New tavDecode function that accepts compressed data and decompresses internally // New tavDecode function that accepts compressed data and decompresses internally
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long, fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int, width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
@@ -5445,12 +5457,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
return try { return try {
// Decompress using Zstd // Decompress with Zstd if the payload starts with the Zstd frame magic;
val bais = ByteArrayInputStream(compressedData) // otherwise pass through (TAV files written without --zstd-level).
val zis = ZstdInputStream(bais) val decompressedData = tavDecompressIfZstd(compressedData)
val decompressedData = zis.readBytes()
zis.close()
bais.close()
// Allocate buffer for decompressed data // Allocate buffer for decompressed data
val decompressedBuffer = vm.malloc(decompressedData.size) val decompressedBuffer = vm.malloc(decompressedData.size)
@@ -6725,9 +6734,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
) )
val decompressedData = try { val decompressedData = try {
ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd -> // Decompress with Zstd if the payload starts with the Zstd frame magic;
zstd.readBytes() // otherwise pass through (TAV files written without --zstd-level).
} tavDecompressIfZstd(compressedData)
} catch (e: Exception) { } catch (e: Exception) {
println("ERROR: Zstd decompression failed: ${e.message}") println("ERROR: Zstd decompression failed: ${e.message}")
return arrayOf(0, dbgOut) return arrayOf(0, dbgOut)

View File

@@ -911,24 +911,32 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
((tadInputBin[offset++].toUint()) shl 8) ((tadInputBin[offset++].toUint()) shl 8)
) )
val maxIndex = tadInputBin[offset++].toUint() val maxIndex = tadInputBin[offset++].toUint()
val payloadSize = ( val payloadSizeField = (
(tadInputBin[offset++].toUint()) or (tadInputBin[offset++].toUint()) or
((tadInputBin[offset++].toUint()) shl 8) or ((tadInputBin[offset++].toUint()) shl 8) or
((tadInputBin[offset++].toUint()) shl 16) or ((tadInputBin[offset++].toUint()) shl 16) or
((tadInputBin[offset++].toUint()) shl 24) ((tadInputBin[offset++].toUint()) shl 24)
) )
// Decompress payload // MSB of payload size = 1 means the payload is stored uncompressed (no Zstd).
val payloadIsRaw = (payloadSizeField and 0x80000000.toInt()) != 0
val payloadSize = payloadSizeField and 0x7FFFFFFF
// Read payload bytes
val compressed = ByteArray(payloadSize) val compressed = ByteArray(payloadSize)
UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong()) UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong())
val payload: ByteArray = try { val payload: ByteArray = if (payloadIsRaw) {
ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd -> compressed
zstd.readBytes() } else {
try {
ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
zstd.readBytes()
}
} catch (e: Exception) {
println("ERROR: Zstd decompression failed: ${e.message}")
return
} }
} catch (e: Exception) {
println("ERROR: Zstd decompression failed: ${e.message}")
return
} }
// Decode using binary tree EZBC - FIXED! // Decode using binary tree EZBC - FIXED!

View File

@@ -12,5 +12,7 @@
<orderEntry type="library" name="jetbrains.kotlin.reflect" level="project" /> <orderEntry type="library" name="jetbrains.kotlin.reflect" level="project" />
<orderEntry type="library" name="jetbrains.kotlin.test" level="project" /> <orderEntry type="library" name="jetbrains.kotlin.test" level="project" />
<orderEntry type="library" name="lib" level="project" /> <orderEntry type="library" name="lib" level="project" />
<orderEntry type="library" name="badlogicgames.gdx" level="project" />
<orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
</component> </component>
</module> </module>

View File

@@ -10,5 +10,7 @@
<orderEntry type="library" name="TerranVirtualDisk" level="project" /> <orderEntry type="library" name="TerranVirtualDisk" level="project" />
<orderEntry type="module" module-name="tsvm_core" /> <orderEntry type="module" module-name="tsvm_core" />
<orderEntry type="library" name="lib" level="project" /> <orderEntry type="library" name="lib" level="project" />
<orderEntry type="library" name="badlogicgames.gdx" level="project" />
<orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
</component> </component>
</module> </module>

View File

@@ -1,221 +0,0 @@
# Created by CuriousTorvald and Claude on 2025-08-17.
# Makefile for TSVM Enhanced Video (TEV) encoder and libraries
CC = gcc
CXX = g++
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
DBGFLAGS =
PREFIX = /usr/local
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
ZSTD_LIBS = $(shell pkg-config --libs libzstd 2>/dev/null || echo "-lzstd")
LIBS = -lm $(ZSTD_LIBS)
# =============================================================================
# Library Object Files
# =============================================================================
# libtavenc - TAV encoder library
LIBTAVENC_OBJ = lib/libtavenc/tav_encoder_lib.o \
lib/libtavenc/tav_encoder_color.o \
lib/libtavenc/tav_encoder_dwt.o \
lib/libtavenc/tav_encoder_quantize.o \
lib/libtavenc/tav_encoder_ezbc.o \
lib/libtavenc/tav_encoder_utils.o \
lib/libtavenc/tav_encoder_tile.o
# libtavdec - TAV decoder library
LIBTAVDEC_OBJ = lib/libtavdec/tav_video_decoder.o
# libtadenc - TAD encoder library
LIBTADENC_OBJ = lib/libtadenc/encoder_tad.o
# libtaddec - TAD decoder library
LIBTADDEC_OBJ = lib/libtaddec/decoder_tad.o
# libfec - Forward Error Correction library (LDPC + Reed-Solomon)
LIBFEC_OBJ = lib/libfec/ldpc.o lib/libfec/reed_solomon.o lib/libfec/ldpc_payload.o
# =============================================================================
# Targets
# =============================================================================
# Source files and targets
TARGETS = libs encoder_tav_ref decoder_tav_ref tav_inspector tad tav_dt
LIBRARIES = lib/libtavenc.a lib/libtavdec.a lib/libtadenc.a lib/libtaddec.a lib/libfec.a
TAV_TARGETS = encoder_tav_ref decoder_tav_ref tav_inspector
TAD_TARGETS = encoder_tad decoder_tad
DT_TARGETS = encoder_tav_dt decoder_tav_dt tavdt_noise_injector
# Build all encoders (default)
all: clean $(TARGETS)
# Build all libraries
libs: $(LIBRARIES)
# Reference encoder using libtavenc (replaces old monolithic encoder)
encoder_tav_ref: src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a
rm -f encoder_tav_ref
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o encoder_tav_ref src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a $(LIBS)
@echo ""
@echo "Reference encoder built: encoder_tav_ref"
@echo "This is the official reference implementation with all features"
# Reference decoder using libtavdec (replaces old monolithic decoder)
decoder_tav_ref: src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a
rm -f decoder_tav_ref
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o decoder_tav_ref src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a $(LIBS)
@echo ""
@echo "Reference decoder built: decoder_tav_ref"
@echo "This is the official reference implementation with all features"
tav_inspector: tav_inspector.c lib/libfec.a
rm -f tav_inspector
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Ilib/libfec -o tav_inspector $< lib/libfec.a $(LIBS)
tav: $(TAV_TARGETS)
# Build TAD (Terrarum Advanced Audio) tools
encoder_tad: src/encoder_tad_standalone.c lib/libtadenc/encoder_tad.c include/encoder_tad.h
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtadenc/encoder_tad.c -o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/encoder_tad_standalone.c -o encoder_tad_standalone.o
$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
decoder_tad: lib/libtaddec/decoder_tad.c
rm -f decoder_tad
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o decoder_tad $< $(LIBS)
# Build all TAD tools
tad: $(TAD_TARGETS)
# =============================================================================
# Library Build Rules
# =============================================================================
# Compile library object files
lib/libtavenc/%.o: lib/libtavenc/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
lib/libtavdec/%.o: lib/libtavdec/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
lib/libtadenc/%.o: lib/libtadenc/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
lib/libtaddec/%.o: lib/libtaddec/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c $< -o $@
lib/libfec/%.o: lib/libfec/%.c
$(CC) $(CFLAGS) -Ilib/libfec -c $< -o $@
# Build static libraries
lib/libtavenc.a: $(LIBTAVENC_OBJ)
ar rcs $@ $^
lib/libtavdec.a: $(LIBTAVDEC_OBJ)
ar rcs $@ $^
lib/libtadenc.a: $(LIBTADENC_OBJ)
ar rcs $@ $^
lib/libtaddec.a: $(LIBTADDEC_OBJ)
ar rcs $@ $^
lib/libfec.a: $(LIBFEC_OBJ)
ar rcs $@ $^
# =============================================================================
# TAV-DT (Digital Tape) Encoder/Decoder
# =============================================================================
# TAV-DT encoder with FEC (multithreaded)
encoder_tav_dt: src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a
rm -f encoder_tav_dt
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o encoder_tav_dt src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a $(LIBS) -lpthread
@echo ""
@echo "TAV-DT encoder built: encoder_tav_dt"
@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
# TAV-DT decoder with FEC (multithreaded)
decoder_tav_dt: src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a
rm -f decoder_tav_dt
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o decoder_tav_dt src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a $(LIBS) -lpthread
@echo ""
@echo "TAV-DT decoder built: decoder_tav_dt"
@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
# TAV-DT noise injector (channel simulator)
tavdt_noise_injector: tavdt_noise_injector.c
rm -f tavdt_noise_injector
$(CC) -std=c99 -Wall -Ofast -D_GNU_SOURCE -o tavdt_noise_injector tavdt_noise_injector.c -lm
@echo ""
@echo "TAV-DT noise injector built: tavdt_noise_injector"
@echo "Simulates QPSK satellite channel noise (AWGN + burst)"
# Build all TAV-DT tools
tav_dt: $(DT_TARGETS)
# Build with debug symbols
debug: CFLAGS += -g -DDEBUG -fsanitize=address -fno-omit-frame-pointer
debug: DBGFLAGS += -fsanitize=address -fno-omit-frame-pointer
debug: clean $(TARGETS)
# Clean build artifacts
clean:
rm -f $(TARGETS) $(TAD_TARGETS) $(DT_TARGETS) $(LIBRARIES) *.o lib/*/*.o
# Install (copy to PATH)
install: $(TARGETS)
cp encoder_tav_ref $(PREFIX)/bin/
cp decoder_tav_ref $(PREFIX)/bin/
cp encoder_tad $(PREFIX)/bin/
cp decoder_tad $(PREFIX)/bin/
cp encoder_tav_dt $(PREFIX)/bin/
cp decoder_tav_dt $(PREFIX)/bin/
cp tav_inspector $(PREFIX)/bin/
# Check for required dependencies
check-deps:
@echo "Checking dependencies..."
@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install libzstd-dev or equivalent" && exit 1)
@echo "All dependencies found."
# Help
help:
@echo "TSVM Advanced Video (TAV) and Audio (TAD) Encoders"
@echo ""
@echo "Targets:"
@echo " all - Build video encoders (default)"
@echo " libs - Build all codec libraries (.a files)"
@echo " tav - Build the TAV advanced video encoder"
@echo " tav_dt - Build all TAV-DT (Digital Tape) tools with FEC"
@echo " tavdt_noise_injector - Build TAV-DT channel noise simulator"
@echo " tad - Build all TAD audio tools (encoder, decoder)"
@echo " encoder_tad - Build TAD audio encoder"
@echo " decoder_tad - Build TAD audio decoder"
@echo " tests - Build test programs"
@echo " debug - Build with debug symbols"
@echo " clean - Remove build artifacts"
@echo " install - Install to /usr/local/bin"
@echo " check-deps - Check for required dependencies"
@echo " help - Show this help"
@echo ""
@echo "Libraries:"
@echo " lib/libtavenc.a - TAV encoder library"
@echo " lib/libtavdec.a - TAV decoder library"
@echo " lib/libtadenc.a - TAD encoder library"
@echo " lib/libtaddec.a - TAD decoder library"
@echo " lib/libfec.a - Forward Error Correction library (LDPC + RS)"
@echo ""
@echo "Usage:"
@echo " make # Build video encoders"
@echo " make libs # Build all libraries"
@echo " make tav # Build TAV encoder"
@echo " make tav_dt # Build TAV-DT encoder/decoder with FEC"
@echo " make tad # Build all TAD audio tools"
@echo " sudo make install # Install all encoders"
.PHONY: all libs clean install check-deps help debug tad tav_dt tests

View File

@@ -1,350 +0,0 @@
# TAD - TSVM Advanced Audio Codec
A perceptually-optimised wavelet-based audio codec designed for resource-constrained systems, featuring CDF 9/7 wavelets, EZBC sparse coding, and sophisticated perceptual quantisation.
## Overview
TAD (TSVM Advanced Audio) is a modern audio codec built on discrete wavelet transform (DWT) using Cohen-Daubechies-Feauveau (CDF) 9/7 biorthogonal wavelets. It combines perceptual quantisation, advanced entropy coding, and careful optimisation for resource-constrained systems.
### Key Advantages
- **Perceptual optimisation**: HVS-aware quantisation preserves audio quality where it matters
- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity (86.9% zeros in typical content)
- **Variable chunk sizes**: Supports any chunk size ≥1024 samples, including non-power-of-2
- **Stereo decorrelation**: Mid/Side encoding exploits stereo correlation for better compression
- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
## Features
### Compression Technology
- **CDF 9/7 Biorthogonal Wavelets**
- 9-level fixed decomposition for all chunk sizes
- Lifting scheme implementation for efficient computation
- Optimal frequency discrimination for audio signals
- **Pre-processing**
- First-order IIR pre-emphasis filter (α=0.5) shifts quantisation noise to lower frequencies, where they are less objectionable to listeners
- Gamma companding (γ=0.5) for dynamic range compression before quantisation
- Mid/Side stereo transformation exploits stereo correlation
- Lambda companding (λ=6.0) with Laplacian CDF mapping for full bit utilisation
- **Perceptual Quantisation**
- Channel-specific (Mid/Side) frequency-dependent weights
- Subband-aware quantisation preserves perceptually important frequencies
- **EZBC Encoding**
- Binary tree embedded zero block coding
- Exploits coefficient sparsity (86.9% Mid, 97.8% Side typical)
- Progressive refinement structure
- Spatial clustering of non-zero coefficients
- **Entropy Coding**
- Zstandard compression (level 7) on concatenated EZBC bitstreams
- Cross-channel compression optimisation
- Optional Zstd bypass for debugging
### Audio Format
- **Sample Rate**: 32 KHz (TSVM audio hardware native format)
- **Channels**: Stereo (L/R input, Mid/Side internal representation)
- **Chunk Sizes**: Variable, any size ≥1024 samples (including non-power-of-2)
- **Bit Depth**: 32-bit float internal, 8-bit unsigned PCM output with noise-shaped dithering
- **Bandwidth**: Full 0-16 KHz frequency range preserved
### Quality Levels
Six quality levels (0-5) provide a wide range of compression/quality trade-offs:
- **Level 0**: Lowest quality, smallest file size
- **Level 3**: Default, balanced quality/compression (2.51:1 vs PCMu8)
- **Level 5**: Highest quality, largest file size
Quality levels are designed to be synchronised with TAV video codec for unified encoding.
## Building
### Prerequisites
- C compiler (GCC/Clang)
- Zstandard library (libzstd)
- Math library (libm)
### Compilation
```bash
# Build TAD encoder/decoder
make tad
# Build all tools
make all
# Clean build artifacts
make clean
```
### Build Targets
- `encoder_tad` - Standalone audio encoder with FFmpeg calls
- `decoder_tad` - Standalone audio decoder
## Usage
### Basic Encoding
Encoding requires FFmpeg executable installed in your system.
```bash
# Default encoding (quality level 3)
./encoder_tad -i input.mp3 -o output.tad
# Specify quality level (0-5)
./encoder_tad -i input.m4a -o output.tad -q 0 # Lowest quality
./encoder_tad -i input.ogg -o output.tad -q 5 # Highest quality
# Disable Zstd compression (for debugging)
./encoder_tad -i input.opus -o output.tad --no-zstd
# Verbose output with statistics
./encoder_tad -i input.flac -o output.tad -v
```
### Decoding
```bash
# Decode to PCMu8
./decoder_tad -i input.tad -o output.pcm --raw-pcm
# Decode to WAV
./decoder_tad -i input.tad -o output.wav
```
### Input Formats
TAD encoder accepts any audio format supported by FFmpeg:
- Audio files: WAV, MP3, FLAC, OGG, AAC, etc.
- Video files with audio streams: MP4, MKV, AVI, etc.
- Raw PCM formats
Audio is automatically resampled to 32 KHz stereo if necessary.
## Technical Architecture
### Encoder Pipeline
1. **Input Processing**
- FFmpeg demuxing and audio stream extraction
- Resampling to 32 KHz stereo
- Conversion to PCM32f
2. **Pre-emphasis Filter**
- First-order IIR filter with α=0.5
- Shifts quantisation noise toward lower frequencies
- Improves perceptual quality
3. **Gamma Companding**
- Dynamic range compression with γ=0.5
- Applied independently to each sample
- Reduces quantisation error for low-amplitude signals
4. **Stereo Decorrelation**
- Left/Right to Mid/Side transformation
- Mid = (L + R) / 2
- Side = (L - R) / 2
- Exploits stereo correlation for better compression
5. **9-Level CDF 9/7 DWT**
- Fixed 9 decomposition levels for all chunk sizes
- Forward lifting scheme implementation
- Correct length tracking for non-power-of-2 sizes
6. **Perceptual Quantisation**
- Channel-specific (Mid/Side) subband weights
- Lambda companding with λ=6.0
- Laplacian CDF mapping: `sign(x) * floor(λ * log(1 + |x|/λ))`
- Quantised to int8 coefficients
7. **EZBC Encoding**
- Binary tree structure per channel
- Progressive refinement by bitplanes
- Zero block coding exploits sparsity
- Independent bitstreams for Mid and Side
8. **Zstd Compression**
- Level 7 compression on concatenated `[Mid_bitstream][Side_bitstream]`
- Cross-channel optimisation opportunities
- Adaptive compression based on content
### Decoder Pipeline
1. **Container Parsing**
- TAD packet identification (type 0x24)
- Chunk size extraction
- Compressed data boundaries
2. **Zstd Decompression**
- Decompress concatenated bitstreams
- Split into Mid and Side EZBC streams
3. **EZBC Decoding**
- Binary tree decoder per channel
- Reconstruct quantised int8 coefficients
- Progressive refinement reconstruction
4. **Lambda Decompanding**
- Inverse Laplacian CDF with channel-specific weights
- Reconstruct float32 DWT coefficients
- Apply subband-specific perceptual weights
5. **9-Level Inverse CDF 9/7 DWT**
- Inverse lifting scheme implementation
- Correct length tracking for non-power-of-2 chunk sizes
- Pre-calculated length sequence from forward transform
6. **Mid/Side to Left/Right**
- L = Mid + Side
- R = Mid - Side
- Reconstruct stereo channels
7. **Gamma Decompanding**
- Inverse gamma with γ⁻¹=2.0
- Restore original dynamic range
8. **De-emphasis Filter**
- Reverse pre-emphasis with α=0.5
- Remove frequency shaping
- Restore flat frequency response
9. **PCM32f to PCM8u Conversion**
- Noise-shaped dithering for 8-bit output
- Clamping to valid range
- Final output format
### Wavelet Implementation
CDF 9/7 wavelet follows a **two-stage lifting scheme**:
```c
// Forward Transform: Predict → Update
// Predict step (generate high-pass)
temp[half + i] = data[odd] - α * (data[even_left] + data[even_right]);
// Update step (generate low-pass)
temp[i] = data[even] + β * (temp[half + i - 1] + temp[half + i]);
// Normalization (K factor)
temp[i] *= K;
temp[half + i] /= K;
// Inverse Transform: Denormalize → Undo Update → Undo Predict (reversed order)
temp[i] /= K;
temp[half + i] *= K;
temp[i] -= β * (temp[half + i - 1] + temp[half + i]);
data[odd] = temp[half + i] + α * (temp[i] + temp[i + 1]);
data[even] = temp[i];
```
**CDF 9/7 Coefficients**:
- α = -1.586134342
- β = -0.052980118
- γ = +0.882911075
- δ = +0.443506852
- K = 1.230174105
### Non-Power-of-2 Chunk Size Handling
Critical implementation detail for variable chunk sizes:
```c
// Pre-calculate exact length sequence from forward transform
int lengths[MAX_LEVELS + 1];
lengths[0] = chunk_size;
for (int i = 1; i <= levels; i++) {
lengths[i] = (lengths[i - 1] + 1) / 2;
}
// Apply inverse DWT using lengths[level] for each level
// NEVER use simple doubling (length *= 2) - incorrect for non-power-of-2!
```
Incorrect length tracking causes mirrored subband artefacts in decoded audio.
### Perceptual Quantisation Weights
Channel-specific weights for Mid (channel 0) and Side (channel 1):
```c
// Base quantiser weights per subband (9 levels + approximation)
float BASE_QUANTISER_WEIGHTS[2][10] = {
// Mid channel (0)
{4.0f, 2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f, 1.3f, 2.0f},
// Side channel (1)
{6.0f, 5.0f, 2.6f, 2.4f, 1.8f, 1.3f, 1.0f, 1.0f, 1.6f, 3.2f}
};
// During dequantisation:
float weight = BASE_QUANTISER_WEIGHTS[channel][subband] * quantiser_scale;
coeffs[i] = normalised_val * TAD32_COEFF_SCALARS[subband] * weight;
```
Different weights for Mid and Side channels reflect perceptual importance of frequency bands in each channel. DC frequency has highest weight (4.0 Mid, 6.0 Side) due to energy concentration.
## Performance Characteristics
### Compression Efficiency
- **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input)
- **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3
- **Audio Quality**: Preserves full 0-16 KHz bandwidth
- **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
- **EZBC Benefits**: Exploits sparsity, progressive refinement, spatial clustering
### Computational Complexity
- **Encoding**: O(n log n) per chunk for DWT, O(n) for EZBC encoding
- **Decoding**: O(n log n) per chunk for inverse DWT, O(n) for EZBC decoding
- **Memory**: O(n) working memory for chunk processing
### Quality Characteristics
- **Frequency Response**: Flat 0-16 KHz within perceptual limits
- **Dynamic Range**: Preserved through gamma companding
- **Stereo Imaging**: Maintained through Mid/Side decorrelation
- **Perceptual Quality**: Optimised for human auditory system characteristics
## Integration with TAV
TAD is designed as an includable API for TAV video encoder integration:
- **Variable Chunk Sizes**: Audio chunks can match video GOP boundaries (e.g., 32016 samples for 1-second TAV GOP)
- **Unified Quality Levels**: TAD quality 0-5 synchronised with TAV quality 0-5
- **Embedded Packets**: TAV embeds TAD-compressed audio using packet type 0x24
- **Shared Container**: Single .tav file contains both video and audio streams
### TAV Integration Example
```c
// TAD handles non-power-of-2 chunk size correctly
tad_encode_chunk(audio_buffer, audio_samples_per_gop, output_buffer, &output_size);
// TAV embeds TAD packet
tav_write_packet(TAV_PACKET_AUDIO, output_buffer, output_size);
```
## Format Specification
For complete packet structure and bitstream format details, refer to `format documentation.txt`.
### Key Packet Types
- `0x24`: TAD audio packet (used in standalone .tad files and embedded in .tav files)
## Related Projects
- **TAV** (TSVM Advanced Video): Wavelet-based video codec with integrated TAD audio
- **TSVM**: Target virtual machine platform for TAD playback
## Licence
MIT.

View File

@@ -1,261 +0,0 @@
# TAV - TSVM Advanced Video Codec
A perceptually-optimised wavelet-based video codec designed for resource-constrained systems, featuring multiple wavelet types, temporal 3D DWT, and sophisticated compression techniques.
## Overview
TAV (TSVM Advanced Video) is a modern video codec built on discrete wavelet transformation (DWT). It combines cutting-edge compression techniques with careful optimisation for resource-constrained systems.
### Key Advantages
- **No blocking artefacts**: Large-tile DWT encoding with padding eliminates DCT block boundaries
- **No colour banding**: Wavelets spreads gradients across scales, preventing banding in the first place
- **Perceptual optimisation**: HVS-aware quantisation preserves visual quality where it matters
- **Temporal coherence**: 3D DWT with GOP encoding exploits inter-frame similarity
- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity for 16-18% additional compression
- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
## Features
### Compression Technology
- **Wavelet Types**
- **5/3 Reversible** (JPEG 2000 standard): Lossless-capable, good for archival
- **9/7 Irreversible** (default): Best overall compression, CDF 9/7 variant
- **Spatial Encoding**
- Large-tile encoding with padding, with optional single-tile mode (no blocking artefacts)
- 6-level DWT decomposition for deep frequency analysis
- Perceptual quantisation with HVS-optimised coefficient scaling
- YCoCg-R colour space with anisotropic chroma quantisation
- **Temporal Encoding** (3D DWT Mode)
- Group-of-pictures (GOP) encoding with adaptive size (typically 20 frames)
- Unified EZBC encoding across temporal dimension
- Adaptive GOP boundaries with scene change detection
- **EZBC Encoding**
- Binary tree embedded zero block coding exploits coefficient sparsity
- Progressive refinement structure with bitplane encoding
- Concatenated channel layout for cross-channel compression optimisation
- Typical sparsity: 86.9% (Y), 97.8% (Co), 99.5% (Cg)
- 16-18% compression improvement over naive coefficient encoding
### Audio Integration
TAV seamlessly integrates with the TAD (TSVM Advanced Audio) codec for synchronised audio/video encoding:
- Variable chunk sizes match video GOP boundaries
- Embedded TAD packets (type 0x24) with Zstd compression
- Unified container format
## Building
### Prerequisites
- C compiler (GCC/Clang)
- Zstandard library
- OpenCV 4 library (only used by experimental motion estimation feature)
### Compilation
```bash
# Build TAV encoder/decoder
make tav
# Build all tools including TAD audio codec
make all
# Clean build artefacts
make clean
```
### Build Targets
- `encoder_tav` - Main video encoder
- `decoder_tav` - Standalone video decoder
- `tav_inspector` - Packet analysis and debugging tool
## Usage
### Basic Encoding
Encoding requires FFmpeg executable installed in your system.
```bash
# Default encoding (CDF 9/7 wavelet, quality level 3)
./encoder_tav -i input.mp4 -o output.tav
# Quality levels (0-5)
./encoder_tav -i input.avi -q 0 -o output.tav # Lowest quality, smallest file
./encoder_tav -i input.mkv -q 5 -o output.tav # Highest quality, largest file
```
### Intra-only Encoding
```bash
# Enable Intra-only encoding
./encoder_tav -i input.mp4 --intra-only -o output.tav
```
### Decoding and Inspection
```bash
# Decode TAV to raw video
./decoder_tav -i input.tav -o output.mkv
# Inspect packet structure (debugging)
./tav_inspector input.tav -v
```
### Frame Limiting
```bash
# Encode only first N frames (useful for testing)
./encoder_tav -i input.mp4 -o output.tav --encode-limit 100
```
## Technical Architecture
### Encoder Pipeline
1. **Input Processing**
- FFmpeg demuxing and frame extraction
- RGB to YCoCg-R colour space conversion
- Resolution validation and padding
2. **DWT Transform**
- Spatial: 6-level decomposition per frame
- Temporal: 1D DWT across GOP frames (3D DWT mode)
- Lifting scheme implementation for all wavelets
3. **Perceptual Quantisation**
- HVS-based subband weights
- Anisotropic chroma quantisation (YCoCg-R specific)
- Quality-dependent quantisation matrices
4. **EZBC Encoding**
- Binary tree embedded zero block coding per channel
- Progressive refinement by bitplanes
- Concatenated bitstream layout: `[Y_bitstream][Co_bitstream][Cg_bitstream]`
- Cross-channel compression optimisation
5. **Entropy Coding**
- Zstandard compression (level 7) on concatenated EZBC bitstreams
- Cross-channel compression opportunities
- Adaptive compression based on GOP structure
### Decoder Pipeline
1. **Container Parsing**
- Packet type identification (0x00-0xFF)
- Timecode synchronisation
- GOP boundary detection
2. **Entropy Decoding**
- Zstd decompression of concatenated bitstreams
- EZBC binary tree decoding per channel
- Progressive coefficient reconstruction
3. **Inverse Quantisation**
- Perceptual weight application
- Subband-specific scaling
- Coefficient reconstruction from sparse representation
4. **Inverse DWT**
- Temporal: 1D inverse DWT across frames (3D DWT mode)
- Spatial: 6-level inverse wavelet reconstruction
5. **Output Conversion**
- YCoCg-R to RGB colour space
- Clamping and dithering
- Frame buffering for display
### Wavelet Implementation
All wavelets follow a **lifting scheme** pattern with symmetric boundary extension:
```c
// Forward Transform: Predict → Update
temp[half + i] = data[odd] - predict(data[even]); // High-pass
temp[i] = data[even] + update(temp[half]); // Low-pass
// Inverse Transform: Undo Update → Undo Predict (reversed order)
data[even] = temp[i] - update(temp[half]); // Undo low-pass
data[odd] = temp[half + i] + predict(data[even]); // Undo high-pass
```
**Critical**: Forward and inverse transforms must use identical coefficient indexing and exactly reverse operations to avoid grid artefacts.
### Coefficient Layout
TAV uses **2D Spatial Layout** in memory for each decomposition level:
```
[LL] [LH] [HL] [HH] [LH] [HL] [HH] ...
└── Level 0 ──┘ └─── Level 1 ───┘
```
- `LL`: Low-pass (approximation) - progressively smaller with each level
- `LH`, `HL`, `HH`: High-pass subbands (horizontal, vertical, diagonal detail)
## Performance Characteristics
### Compression Efficiency
- **Sparsity Exploitation**: Typical quantised coefficient sparsity
- Y channel: 86.9% zeros
- Co channel: 97.8% zeros
- Cg channel: 99.5% zeros
- **EZBC Benefits**: 16-18% compression improvement over naive coefficient encoding through sparsity exploitation
- **Temporal Coherence**: Additional 15-25% improvement with 3D DWT (content-dependent)
### Computational Complexity
- **Encoding**: O(n log n) per frame for spatial DWT
- **Decoding**: O(n log n) per frame, optimised lifting scheme implementation
- **Memory**: Single-tile encoding requires O(w × h) working memory
### Quality Characteristics
- **No blocking artefacts**: Wavelet-based encoding is inherently smooth
- **Perceptual optimisation**: Better subjective quality than bitrate-equivalent DCT codecs
- **Scalability**: 6 quality levels (0-5) provide wide range of bitrate/quality trade-offs
- **Temporal stability**: 3D DWT mode reduces flickering and temporal artefacts
## Format Specification
For complete packet structure and bitstream format details, refer to `format documentation.txt`.
### Key Packet Types
- `0x00`: Metadata and initialisation
- `0x01`: I-frame (intra-coded frame)
- `0x12`: GOP unified packet (3D DWT mode)
- `0x24`: Embedded TAD audio
- `0xFC`: GOP synchronisation
- `0xFD`: Timecode
## Debugging Tools
### TAV Inspector
Analyse TAV packet structure and decode individual frames:
```bash
# Verbose packet analysis
./tav_inspector input.tav -v
# Extract specific frame ranges
./tav_inspector input.tav --frame-range 100-200
```
## Related Projects
- **TAD** (TSVM Advanced Audio): Perceptual audio codec using CDF 9/7 wavelets
- **TSVM**: Target virtual machine platform for TAV playback
## Licence
MIT.

View File

@@ -1,424 +0,0 @@
/**
* TAV+UCF Payload Writer for TAV Files
* Creates a TAV header-only (32 bytes) + UCF cue file (4KB) for concatenated TAV files
* Total output size: 4096 bytes (32 + 4064)
* Usage: ./create_ucf_payload input.tav output.ucf [track_names.txt]
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define TAV_HEADER_SIZE 32
#define UCF_SIZE 4064
#define TAV_OFFSET_BIAS (TAV_HEADER_SIZE + UCF_SIZE)
#define TAV_MAGIC "\x1FTSVMTA" // Matches both TAV and TAP
typedef struct {
uint8_t magic[8];
uint8_t version;
uint16_t width;
uint16_t height;
uint8_t fps;
uint32_t total_frames;
// ... rest of header fields
} __attribute__((packed)) TAVHeader;
// Write TAV header-only payload (File Role = 1)
static void write_tav_header_only(FILE *out) {
uint8_t header[TAV_HEADER_SIZE] = {0};
// Magic: "\x1FTSVMTAV"
header[0] = 0x1F;
header[1] = 'T';
header[2] = 'S';
header[3] = 'V';
header[4] = 'M';
header[5] = 'T';
header[6] = 'A';
header[7] = 'V';
// Version: 5 (YCoCg-R perceptual)
header[8] = 5;
// Width: 560 (little-endian)
header[9] = 0x30;
header[10] = 0x02;
// Height: 448 (little-endian)
header[11] = 0xC0;
header[12] = 0x01;
// FPS: 30
header[13] = 30;
// Total Frames: 0xFFFFFFFF (still image marker / not applicable)
header[14] = 0xFF;
header[15] = 0xFF;
header[16] = 0xFF;
header[17] = 0xFF;
// Wavelet Filter Type: 1 (9/7 irreversible, default)
header[18] = 1;
// Decomposition Levels: 6
header[19] = 6;
// Quantiser Indices (Y, Co, Cg): 255 (not applicable for header-only)
header[20] = 0xFF;
header[21] = 0xFF;
header[22] = 0xFF;
// Extra Feature Flags: 0x80 (bit 7 = has no actual packets)
header[23] = 0x80;
// Video Flags: 0
header[24] = 0;
// Encoder quality level: 0
header[25] = 0;
// Channel layout: 0 (Y-Co-Cg)
header[26] = 0;
// Reserved[4]: zeros (27-30 already initialised to 0)
// File Role: 1 (header-only, UCF payload follows)
header[31] = 1;
fwrite(header, 1, TAV_HEADER_SIZE, out);
}
// Write UCF header
static void write_ucf_header(FILE *out, uint16_t num_cues) {
uint8_t magic[8] = {0x1F, 'T', 'S', 'V', 'M', 'U', 'C', 'F'};
uint8_t version = 1;
uint32_t cue_file_size = TAV_OFFSET_BIAS;
uint8_t reserved = 0;
fwrite(magic, 1, 8, out);
fwrite(&version, 1, 1, out);
fwrite(&num_cues, 2, 1, out);
fwrite(&cue_file_size, 4, 1, out);
fwrite(&reserved, 1, 1, out);
}
// Write UCF cue element (internal addressing, human+machine interactable)
static void write_cue_element(FILE *out, uint64_t offset, const char *name) {
uint8_t addressing_mode = 0x22; // 0x20 (human) | 0x01 (machine) | 0x02 (internal)
uint16_t name_len = strlen(name);
// Offset with 4KB bias
uint64_t biased_offset = offset + TAV_OFFSET_BIAS;
fwrite(&addressing_mode, 1, 1, out);
fwrite(&name_len, 2, 1, out);
fwrite(name, 1, name_len, out);
// Write 48-bit (6-byte) offset
fwrite(&biased_offset, 6, 1, out);
}
// Read track names from file (newline-delimited)
static char **read_track_names(const char *filename, int *count_out) {
FILE *f = fopen(filename, "r");
if (!f) {
return NULL;
}
char **names = NULL;
int count = 0;
int capacity = 16;
char line[256];
names = malloc(capacity * sizeof(char *));
if (!names) {
fclose(f);
return NULL;
}
while (fgets(line, sizeof(line), f)) {
// Remove trailing newline
size_t len = strlen(line);
if (len > 0 && line[len - 1] == '\n') {
line[len - 1] = '\0';
len--;
}
if (len > 0 && line[len - 1] == '\r') {
line[len - 1] = '\0';
len--;
}
// Skip empty lines
if (len == 0) {
continue;
}
// Expand capacity if needed
if (count >= capacity) {
capacity *= 2;
char **new_names = realloc(names, capacity * sizeof(char *));
if (!new_names) {
// Cleanup on failure
for (int i = 0; i < count; i++) {
free(names[i]);
}
free(names);
fclose(f);
return NULL;
}
names = new_names;
}
// Allocate and copy name
names[count] = strdup(line);
if (!names[count]) {
// Cleanup on failure
for (int i = 0; i < count; i++) {
free(names[i]);
}
free(names);
fclose(f);
return NULL;
}
count++;
}
fclose(f);
*count_out = count;
return names;
}
// Find all TAV headers in the file (with smart packet-wise skipping)
static int find_tav_headers(FILE *in, uint64_t **offsets_out) {
uint64_t *offsets = NULL;
int count = 0;
int capacity = 16;
offsets = malloc(capacity * sizeof(uint64_t));
if (!offsets) {
fprintf(stderr, "Error: Memory allocation failed\n");
return -1;
}
// Seek to beginning
fseek(in, 0, SEEK_SET);
uint8_t magic[8];
while (1) {
// Remember current position before reading
uint64_t pos = ftell(in);
// Try to read magic
if (fread(magic, 1, 8, in) != 8) {
// End of file
break;
}
// Check for TAV magic signature
if (memcmp(magic, TAV_MAGIC, 7) == 0 && (magic[7] == 'V' || magic[7] == 'P')) {
// Found TAV header
if (count >= capacity) {
capacity *= 2;
uint64_t *new_offsets = realloc(offsets, capacity * sizeof(uint64_t));
if (!new_offsets) {
fprintf(stderr, "Error: Memory reallocation failed\n");
free(offsets);
return -1;
}
offsets = new_offsets;
}
offsets[count++] = pos;
printf("Found TAV header at offset: 0x%lX (%lu)\n", pos, pos);
// Skip past this header (32 bytes total)
uint64_t packet_pos = pos + 32;
fseek(in, packet_pos, SEEK_SET);
// Smart packet-wise skipping
while (1) {
uint8_t packet_type;
if (fread(&packet_type, 1, 1, in) != 1) {
// End of file
break;
}
// Check if this is the start of next TAV file (0x1F is prohibited as packet type)
if (packet_type == 0x1F) {
// Rewind 1 byte to re-read as magic at the top of outer loop
fseek(in, packet_pos, SEEK_SET);
break;
}
// printf("TAV Packet 0x%02X at 0x%lX\n", packet_type, packet_pos);
// Sync packets (0xFE, 0xFF) have no payload size - they're single-byte packets
if (packet_type == 0xFE || packet_type == 0xFF) {
packet_pos += 1;
fseek(in, packet_pos, SEEK_SET);
continue;
}
// Read payload size (uint32, little-endian)
uint32_t payload_size = 0;
if (fread(&payload_size, 4, 1, in) != 1) {
// End of file
break;
}
// Skip packet: 1 byte (type) + 4 bytes (size) + payload_size
packet_pos += 1 + 4 + payload_size;
fseek(in, packet_pos, SEEK_SET);
}
} else {
// Move forward by 1 byte for next search
fseek(in, pos + 1, SEEK_SET);
}
}
*offsets_out = offsets;
return count;
}
int main(int argc, char *argv[]) {
if (argc < 3 || argc > 4) {
fprintf(stderr, "Usage: %s <input.tav> <output.ucf> [track_names.txt]\n", argv[0]);
fprintf(stderr, "Creates a 4KB UCF payload for concatenated TAV file\n");
fprintf(stderr, " track_names.txt: Optional file with track names (one per line)\n");
return 1;
}
const char *input_path = argv[1];
const char *output_path = argv[2];
const char *names_path = (argc == 4) ? argv[3] : NULL;
// Read track names if provided
char **track_names = NULL;
int num_names = 0;
if (names_path) {
track_names = read_track_names(names_path, &num_names);
if (track_names) {
printf("Loaded %d track name(s) from '%s'\n", num_names, names_path);
} else {
fprintf(stderr, "Warning: Could not read track names from '%s', using defaults\n", names_path);
}
}
// Open input file
FILE *in = fopen(input_path, "rb");
if (!in) {
fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
// Find all TAV headers
uint64_t *offsets = NULL;
int num_tracks = find_tav_headers(in, &offsets);
fclose(in);
if (num_tracks < 0) {
fprintf(stderr, "Error: Failed to scan input file\n");
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
if (num_tracks == 0) {
fprintf(stderr, "Error: No TAV headers found in input file\n");
free(offsets);
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
printf("\nFound %d TAV header(s)\n", num_tracks);
// Create output UCF file
FILE *out = fopen(output_path, "wb");
if (!out) {
fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
free(offsets);
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
// Write TAV header-only payload (File Role = 1)
write_tav_header_only(out);
printf("Written TAV header-only payload (%d bytes)\n", TAV_HEADER_SIZE);
// Write UCF header
write_ucf_header(out, num_tracks);
// Write cue elements
for (int i = 0; i < num_tracks; i++) {
char default_name[32];
const char *name;
// Use custom name if available, otherwise generate default
if (track_names && i < num_names) {
name = track_names[i];
} else {
snprintf(default_name, sizeof(default_name), "Track %d", i + 1);
name = default_name;
}
write_cue_element(out, offsets[i], name);
printf("Written cue element: '%s' at offset 0x%lX (biased: 0x%lX)\n",
name, offsets[i], offsets[i] + TAV_OFFSET_BIAS);
}
// Get current file position
long current_pos = ftell(out);
// Fill remaining space with zeros to reach TAV header + 4KB UCF
size_t target_size = TAV_HEADER_SIZE + UCF_SIZE;
if (current_pos < target_size) {
size_t remaining = target_size - current_pos;
uint8_t *zeros = calloc(remaining, 1);
if (zeros) {
fwrite(zeros, 1, remaining, out);
free(zeros);
}
}
fclose(out);
free(offsets);
// Clean up track names
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
printf("\nTAV+UCF payload created successfully: %s\n", output_path);
printf("File size: %zu bytes (TAV header: %d + UCF: %d)\n",
(size_t)(TAV_HEADER_SIZE + UCF_SIZE), TAV_HEADER_SIZE, UCF_SIZE);
printf("\nTo create seekable TAV file, prepend this payload to your concatenated TAV file:\n");
printf(" cat %s input.tav > output_seekable.tav\n", output_path);
return 0;
}

View File

@@ -1,935 +0,0 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <getopt.h>
#include <sys/time.h>
// TVDOS Movie format constants
#define TVDOS_MAGIC "\x1F\x54\x53\x56\x4D\x4D\x4F\x56" // "\x1FTSVM MOV"
#define IPF_BLOCK_SIZE 12
// iPF1-delta opcodes
#define SKIP_OP 0x00
#define PATCH_OP 0x01
#define REPEAT_OP 0x02
#define END_OP 0xFF
// Video packet types
#define IPF1_PACKET_TYPE 0x04, 0x00 // iPF Type 1 (4 + 0)
#define IPF1_DELTA_PACKET_TYPE 0x04, 0x02 // iPF Type 1 delta
#define SYNC_PACKET_TYPE 0xFF, 0xFF // Sync packet
// Audio constants
#define MP2_SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 0x240
#define MP2_PACKET_TYPE_BASE 0x11
// Default values
#define DEFAULT_WIDTH 560
#define DEFAULT_HEIGHT 448
#define TEMP_AUDIO_FILE "/tmp/tvdos_temp_audio.mp2"
typedef struct {
char *input_file;
char *output_file;
int width;
int height;
int fps;
int total_frames;
double duration;
int has_audio;
int output_to_stdout;
// Internal buffers
uint8_t *previous_ipf_frame;
uint8_t *current_ipf_frame;
uint8_t *delta_buffer;
uint8_t *rgb_buffer;
uint8_t *compressed_buffer;
uint8_t *mp2_buffer;
size_t frame_buffer_size;
// Audio handling
FILE *mp2_file;
int mp2_packet_size;
int mp2_rate_index;
size_t audio_remaining;
int audio_frames_in_buffer;
int target_audio_buffer_size;
// FFmpeg processes
FILE *ffmpeg_video_pipe;
FILE *ffmpeg_audio_pipe;
// Progress tracking
struct timeval start_time;
struct timeval last_progress_time;
size_t total_output_bytes;
// Dithering mode
int dither_mode;
} encoder_config_t;
// CORRECTED YCoCg conversion matching Kotlin implementation
typedef struct {
float y, co, cg;
} ycocg_t;
static ycocg_t rgb_to_ycocg_correct(uint8_t r, uint8_t g, uint8_t b, float ditherThreshold) {
ycocg_t result;
float rf = floor((ditherThreshold / 15.0 + r / 255.0) * 15.0) / 15.0;
float gf = floor((ditherThreshold / 15.0 + g / 255.0) * 15.0) / 15.0;
float bf = floor((ditherThreshold / 15.0 + b / 255.0) * 15.0) / 15.0;
// CORRECTED: Match Kotlin implementation exactly
float co = rf - bf; // co = r - b [-1..1]
float tmp = bf + co / 2.0f; // tmp = b + co/2
float cg = gf - tmp; // cg = g - tmp [-1..1]
float y = tmp + cg / 2.0f; // y = tmp + cg/2 [0..1]
result.y = y;
result.co = co;
result.cg = cg;
return result;
}
static int quantise_4bit_y(float value) {
// Y quantisation: round(y * 15)
return (int)round(fmaxf(0.0f, fminf(15.0f, value * 15.0f)));
}
static int chroma_to_four_bits(float f) {
// CORRECTED: Match Kotlin chromaToFourBits function exactly
// return (round(f * 8) + 7).coerceIn(0..15)
int result = (int)round(f * 8.0f) + 7;
return fmaxf(0, fminf(15, result));
}
// Parse resolution string like "1024x768"
static int parse_resolution(const char *res_str, int *width, int *height) {
if (!res_str) return 0;
return sscanf(res_str, "%dx%d", width, height) == 2;
}
// Execute command and capture output
static char *execute_command(const char *command) {
FILE *pipe = popen(command, "r");
if (!pipe) return NULL;
char *result = malloc(4096);
size_t len = fread(result, 1, 4095, pipe);
result[len] = '\0';
pclose(pipe);
return result;
}
// Get video metadata using ffprobe
static int get_video_metadata(encoder_config_t *config) {
char command[1024];
char *output;
// Get frame count
snprintf(command, sizeof(command),
"ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 \"%s\"",
config->input_file);
output = execute_command(command);
if (!output) {
fprintf(stderr, "Failed to get frame count\n");
return 0;
}
config->total_frames = atoi(output);
free(output);
// Get frame rate
snprintf(command, sizeof(command),
"ffprobe -v quiet -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 \"%s\"",
config->input_file);
output = execute_command(command);
if (!output) {
fprintf(stderr, "Failed to get frame rate\n");
return 0;
}
// Parse framerate (could be "30/1" or "29.97")
int num, den;
if (sscanf(output, "%d/%d", &num, &den) == 2) {
config->fps = (den > 0) ? (num / den) : 30;
} else {
config->fps = (int)round(atof(output));
}
free(output);
// Get duration
snprintf(command, sizeof(command),
"ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"%s\"",
config->input_file);
output = execute_command(command);
if (output) {
config->duration = atof(output);
free(output);
}
// Check if has audio
snprintf(command, sizeof(command),
"ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\"",
config->input_file);
output = execute_command(command);
config->has_audio = (output && strlen(output) > 0 && atoi(output) >= 0);
if (output) free(output);
// Validate frame count using duration if needed
if (config->total_frames <= 0 && config->duration > 0) {
config->total_frames = (int)(config->duration * config->fps);
}
fprintf(stderr, "Video metadata:\n");
fprintf(stderr, " Frames: %d\n", config->total_frames);
fprintf(stderr, " FPS: %d\n", config->fps);
fprintf(stderr, " Duration: %.2fs\n", config->duration);
fprintf(stderr, " Audio: %s\n", config->has_audio ? "Yes" : "No");
fprintf(stderr, " Resolution: %dx%d\n", config->width, config->height);
return (config->total_frames > 0 && config->fps > 0);
}
// Start FFmpeg process for video conversion
static int start_video_conversion(encoder_config_t *config) {
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d -y - 2>/dev/null",
config->input_file, config->width, config->height, config->width, config->height);
config->ffmpeg_video_pipe = popen(command, "r");
return (config->ffmpeg_video_pipe != NULL);
}
// Start FFmpeg process for audio conversion
static int start_audio_conversion(encoder_config_t *config) {
if (!config->has_audio) return 1;
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
config->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command);
if (result == 0) {
config->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
if (config->mp2_file) {
fseek(config->mp2_file, 0, SEEK_END);
config->audio_remaining = ftell(config->mp2_file);
fseek(config->mp2_file, 0, SEEK_SET);
return 1;
}
}
fprintf(stderr, "Warning: Failed to convert audio, proceeding without audio\n");
config->has_audio = 0;
return 1;
}
// Write variable-length integer
static void write_varint(uint8_t **ptr, uint32_t value) {
while (value >= 0x80) {
**ptr = (uint8_t)((value & 0x7F) | 0x80);
(*ptr)++;
value >>= 7;
}
**ptr = (uint8_t)(value & 0x7F);
(*ptr)++;
}
// Get MP2 packet size and rate index
static int get_mp2_packet_size(uint8_t *header) {
int bitrate_index = (header[2] >> 4) & 0xF;
int padding_bit = (header[2] >> 1) & 0x1;
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, -1};
int bitrate = bitrates[bitrate_index];
if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE;
int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit;
return frame_size;
}
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
int rate_index;
switch (packet_size) {
case 144: rate_index = 0; break;
case 216: rate_index = 2; break;
case 252: rate_index = 4; break;
case 288: rate_index = 6; break;
case 360: rate_index = 8; break;
case 432: rate_index = 10; break;
case 504: rate_index = 12; break;
case 576: rate_index = 14; break;
case 720: rate_index = 16; break;
case 864: rate_index = 18; break;
case 1008: rate_index = 20; break;
case 1152: rate_index = 22; break;
case 1440: rate_index = 24; break;
case 1728: rate_index = 26; break;
default: rate_index = 14; break;
}
return rate_index + (is_mono ? 1 : 0);
}
// Gzip compress function (instead of zlib)
static size_t gzip_compress(uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_max) {
z_stream stream = {0};
stream.next_in = src;
stream.avail_in = src_len;
stream.next_out = dst;
stream.avail_out = dst_max;
// Use deflateInit2 with gzip format
if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
return 0;
}
if (deflate(&stream, Z_FINISH) != Z_STREAM_END) {
deflateEnd(&stream);
return 0;
}
size_t compressed_size = stream.total_out;
deflateEnd(&stream);
return compressed_size;
}
// Bayer dithering kernels (4 patterns, each 4x4)
static const float bayerKernels[4][16] = {
{ // Pattern 0
(0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
(12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
(3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f,
(15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f
},
{ // Pattern 1
(8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
(4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
(11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f,
(7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f
},
{ // Pattern 2
(7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f,
(8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
(4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
(11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f
},
{ // Pattern 3
(15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f,
(0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
(12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
(3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f
}
};
// CORRECTED: Encode a 4x4 block to iPF1 format matching Kotlin implementation
static void encode_ipf1_block_correct(uint8_t *rgb_data, int width, int height, int block_x, int block_y,
int channels, int pattern, uint8_t *output) {
ycocg_t pixels[16];
int y_values[16];
float co_values[16]; // Keep full precision for subsampling
float cg_values[16]; // Keep full precision for subsampling
// Convert 4x4 block to YCoCg using corrected transform
for (int py = 0; py < 4; py++) {
for (int px = 0; px < 4; px++) {
int src_x = block_x * 4 + px;
int src_y = block_y * 4 + py;
float t = (pattern < 0) ? 0.0f : bayerKernels[pattern % 4][4 * (py % 4) + (px % 4)];
int idx = py * 4 + px;
if (src_x < width && src_y < height) {
int pixel_offset = (src_y * width + src_x) * channels;
uint8_t r = rgb_data[pixel_offset];
uint8_t g = rgb_data[pixel_offset + 1];
uint8_t b = rgb_data[pixel_offset + 2];
pixels[idx] = rgb_to_ycocg_correct(r, g, b, t);
} else {
pixels[idx] = (ycocg_t){0.0f, 0.0f, 0.0f};
}
y_values[idx] = quantise_4bit_y(pixels[idx].y);
co_values[idx] = pixels[idx].co;
cg_values[idx] = pixels[idx].cg;
}
}
// CORRECTED: Chroma subsampling (4:2:0 for iPF1) with correct averaging
int cos1 = chroma_to_four_bits((co_values[0] + co_values[1] + co_values[4] + co_values[5]) / 4.0f);
int cos2 = chroma_to_four_bits((co_values[2] + co_values[3] + co_values[6] + co_values[7]) / 4.0f);
int cos3 = chroma_to_four_bits((co_values[8] + co_values[9] + co_values[12] + co_values[13]) / 4.0f);
int cos4 = chroma_to_four_bits((co_values[10] + co_values[11] + co_values[14] + co_values[15]) / 4.0f);
int cgs1 = chroma_to_four_bits((cg_values[0] + cg_values[1] + cg_values[4] + cg_values[5]) / 4.0f);
int cgs2 = chroma_to_four_bits((cg_values[2] + cg_values[3] + cg_values[6] + cg_values[7]) / 4.0f);
int cgs3 = chroma_to_four_bits((cg_values[8] + cg_values[9] + cg_values[12] + cg_values[13]) / 4.0f);
int cgs4 = chroma_to_four_bits((cg_values[10] + cg_values[11] + cg_values[14] + cg_values[15]) / 4.0f);
// CORRECTED: Pack into iPF1 format matching Kotlin exactly
// Co values (2 bytes): cos2|cos1, cos4|cos3
output[0] = ((cos2 << 4) | cos1);
output[1] = ((cos4 << 4) | cos3);
// Cg values (2 bytes): cgs2|cgs1, cgs4|cgs3
output[2] = ((cgs2 << 4) | cgs1);
output[3] = ((cgs4 << 4) | cgs3);
// CORRECTED: Y values (8 bytes) with correct ordering from Kotlin
output[4] = ((y_values[1] << 4) | y_values[0]); // Y1|Y0
output[5] = ((y_values[5] << 4) | y_values[4]); // Y5|Y4
output[6] = ((y_values[3] << 4) | y_values[2]); // Y3|Y2
output[7] = ((y_values[7] << 4) | y_values[6]); // Y7|Y6
output[8] = ((y_values[9] << 4) | y_values[8]); // Y9|Y8
output[9] = ((y_values[13] << 4) | y_values[12]); // Y13|Y12
output[10] = ((y_values[11] << 4) | y_values[10]); // Y11|Y10
output[11] = ((y_values[15] << 4) | y_values[14]); // Y15|Y14
}
// Helper function for contrast weighting
static double contrast_weight(int v1, int v2, int delta, int weight) {
double avg = (v1 + v2) / 2.0;
double contrast = (avg < 4 || avg > 11) ? 1.5 : 1.0;
return delta * weight * contrast;
}
// Check if two iPF1 blocks are significantly different
static int is_significantly_different(uint8_t *block_a, uint8_t *block_b) {
double score = 0.0;
// Co values (bytes 0-1)
uint16_t co_a = block_a[0] | (block_a[1] << 8);
uint16_t co_b = block_b[0] | (block_b[1] << 8);
for (int i = 0; i < 4; i++) {
int va = (co_a >> (i * 4)) & 0xF;
int vb = (co_b >> (i * 4)) & 0xF;
int delta = abs(va - vb);
score += contrast_weight(va, vb, delta, 3);
}
// Cg values (bytes 2-3)
uint16_t cg_a = block_a[2] | (block_a[3] << 8);
uint16_t cg_b = block_b[2] | (block_b[3] << 8);
for (int i = 0; i < 4; i++) {
int va = (cg_a >> (i * 4)) & 0xF;
int vb = (cg_b >> (i * 4)) & 0xF;
int delta = abs(va - vb);
score += contrast_weight(va, vb, delta, 3);
}
// Y values (bytes 4-11)
for (int i = 4; i < 12; i++) {
int byte_a = block_a[i] & 0xFF;
int byte_b = block_b[i] & 0xFF;
int y_a_high = (byte_a >> 4) & 0xF;
int y_a_low = byte_a & 0xF;
int y_b_high = (byte_b >> 4) & 0xF;
int y_b_low = byte_b & 0xF;
int delta_high = abs(y_a_high - y_b_high);
int delta_low = abs(y_a_low - y_b_low);
score += contrast_weight(y_a_high, y_b_high, delta_high, 2);
score += contrast_weight(y_a_low, y_b_low, delta_low, 2);
}
return score > 4.0;
}
// Encode iPF1 frame to buffer
static void encode_ipf1_frame(uint8_t *rgb_data, int width, int height, int channels, int pattern,
uint8_t *ipf_buffer) {
int blocks_per_row = (width + 3) / 4;
int blocks_per_col = (height + 3) / 4;
for (int block_y = 0; block_y < blocks_per_col; block_y++) {
for (int block_x = 0; block_x < blocks_per_row; block_x++) {
int block_index = block_y * blocks_per_row + block_x;
uint8_t *output_block = ipf_buffer + block_index * IPF_BLOCK_SIZE;
encode_ipf1_block_correct(rgb_data, width, height, block_x, block_y, channels, pattern, output_block);
}
}
}
// Create iPF1-delta encoded frame
static size_t encode_ipf1_delta(uint8_t *previous_frame, uint8_t *current_frame,
int width, int height, uint8_t *delta_buffer) {
int blocks_per_row = (width + 3) / 4;
int blocks_per_col = (height + 3) / 4;
int total_blocks = blocks_per_row * blocks_per_col;
uint8_t *output_ptr = delta_buffer;
int skip_count = 0;
uint8_t *patch_blocks = malloc(total_blocks * IPF_BLOCK_SIZE);
int patch_count = 0;
for (int block_index = 0; block_index < total_blocks; block_index++) {
uint8_t *prev_block = previous_frame + block_index * IPF_BLOCK_SIZE;
uint8_t *curr_block = current_frame + block_index * IPF_BLOCK_SIZE;
if (is_significantly_different(prev_block, curr_block)) {
if (skip_count > 0) {
*output_ptr++ = SKIP_OP;
write_varint(&output_ptr, skip_count);
skip_count = 0;
}
memcpy(patch_blocks + patch_count * IPF_BLOCK_SIZE, curr_block, IPF_BLOCK_SIZE);
patch_count++;
} else {
if (patch_count > 0) {
*output_ptr++ = PATCH_OP;
write_varint(&output_ptr, patch_count);
memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
output_ptr += patch_count * IPF_BLOCK_SIZE;
patch_count = 0;
}
skip_count++;
}
}
if (patch_count > 0) {
*output_ptr++ = PATCH_OP;
write_varint(&output_ptr, patch_count);
memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
output_ptr += patch_count * IPF_BLOCK_SIZE;
}
*output_ptr++ = END_OP;
free(patch_blocks);
return output_ptr - delta_buffer;
}
// Get current time in seconds
static double get_current_time_sec(struct timeval *tv) {
gettimeofday(tv, NULL);
return tv->tv_sec + tv->tv_usec / 1000000.0;
}
// Display progress information similar to FFmpeg
static void display_progress(encoder_config_t *config, int frame_num) {
struct timeval current_time;
double current_sec = get_current_time_sec(&current_time);
// Only update progress once per second
double last_progress_sec = config->last_progress_time.tv_sec + config->last_progress_time.tv_usec / 1000000.0;
if (current_sec - last_progress_sec < 1.0) {
return;
}
config->last_progress_time = current_time;
// Calculate timing
double start_sec = config->start_time.tv_sec + config->start_time.tv_usec / 1000000.0;
double elapsed_sec = current_sec - start_sec;
double current_video_time = (double)frame_num / config->fps;
double fps = frame_num / elapsed_sec;
double speed = (elapsed_sec > 0) ? current_video_time / elapsed_sec : 0.0;
double bitrate = (elapsed_sec > 0) ? (config->total_output_bytes * 8.0 / 1024.0) / elapsed_sec : 0.0;
// Format output size in human readable format
char size_str[32];
if (config->total_output_bytes >= 1024 * 1024) {
snprintf(size_str, sizeof(size_str), "%.1fMB", config->total_output_bytes / (1024.0 * 1024.0));
} else if (config->total_output_bytes >= 1024) {
snprintf(size_str, sizeof(size_str), "%.1fkB", config->total_output_bytes / 1024.0);
} else {
snprintf(size_str, sizeof(size_str), "%zuB", config->total_output_bytes);
}
// Format current time as HH:MM:SS.xx
int hours = (int)(current_video_time / 3600);
int minutes = (int)((current_video_time - hours * 3600) / 60);
double seconds = current_video_time - hours * 3600 - minutes * 60;
// Print progress line (overwrite previous line)
fprintf(stderr, "\rframe=%d fps=%.1f size=%s time=%02d:%02d:%05.2f bitrate=%.1fkbits/s speed=%4.2fx",
frame_num, fps, size_str, hours, minutes, seconds, bitrate, speed);
fflush(stderr);
}
// Process audio for current frame
static int process_audio(encoder_config_t *config, int frame_num, FILE *output) {
if (!config->has_audio || !config->mp2_file || config->audio_remaining <= 0) {
return 1;
}
// Initialise packet size on first frame
if (config->mp2_packet_size == 0) {
uint8_t header[4];
if (fread(header, 1, 4, config->mp2_file) != 4) return 1;
fseek(config->mp2_file, 0, SEEK_SET);
config->mp2_packet_size = get_mp2_packet_size(header);
int is_mono = (header[3] >> 6) == 3;
config->mp2_rate_index = mp2_packet_size_to_rate_index(config->mp2_packet_size, is_mono);
}
// Calculate how much audio time each frame represents (in seconds)
double frame_audio_time = 1.0 / config->fps;
// Calculate how much audio time each MP2 packet represents
// MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
// Estimate how many packets we consume per video frame
double packets_per_frame = frame_audio_time / packet_audio_time;
// Only insert audio when buffer would go below 2 frames
// Initialise with 2 packets on first frame to prime the buffer
int packets_to_insert = 0;
if (frame_num == 1) {
packets_to_insert = 2;
config->audio_frames_in_buffer = 2;
} else {
// Simulate buffer consumption (packets consumed per frame)
config->audio_frames_in_buffer -= (int)ceil(packets_per_frame);
// Only insert packets when buffer gets low (≤ 2 frames)
if (config->audio_frames_in_buffer <= 2) {
packets_to_insert = config->target_audio_buffer_size - config->audio_frames_in_buffer;
packets_to_insert = (packets_to_insert > 0) ? packets_to_insert : 1;
}
}
// Insert the calculated number of audio packets
for (int q = 0; q < packets_to_insert; q++) {
size_t bytes_to_read = config->mp2_packet_size;
if (bytes_to_read > config->audio_remaining) {
bytes_to_read = config->audio_remaining;
}
size_t bytes_read = fread(config->mp2_buffer, 1, bytes_to_read, config->mp2_file);
if (bytes_read == 0) break;
uint8_t audio_packet_type[2] = {config->mp2_rate_index, MP2_PACKET_TYPE_BASE};
fwrite(audio_packet_type, 1, 2, output);
fwrite(config->mp2_buffer, 1, bytes_read, output);
// Track audio bytes written
config->total_output_bytes += 2 + bytes_read;
config->audio_remaining -= bytes_read;
config->audio_frames_in_buffer++;
}
return 1;
}
// Write TVDOS header
static void write_tvdos_header(encoder_config_t *config, FILE *output) {
fwrite(TVDOS_MAGIC, 1, 8, output);
fwrite(&config->width, 2, 1, output);
fwrite(&config->height, 2, 1, output);
fwrite(&config->fps, 2, 1, output);
fwrite(&config->total_frames, 4, 1, output);
uint16_t unused = 0x00FF;
fwrite(&unused, 2, 1, output);
int audio_sample_size = 2 * (((MP2_SAMPLE_RATE / config->fps) + 1));
int audio_queue_size = config->has_audio ?
(int)ceil(audio_sample_size / 2304.0) + 1 : 0;
uint16_t audio_queue_info = config->has_audio ?
(MP2_DEFAULT_PACKET_SIZE >> 2) | (audio_queue_size << 12) : 0x0000;
fwrite(&audio_queue_info, 2, 1, output);
// Store target buffer size for audio timing
config->target_audio_buffer_size = audio_queue_size;
uint8_t reserved[10] = {0};
fwrite(reserved, 1, 10, output);
}
// Initialise encoder configuration
static encoder_config_t *init_encoder_config() {
encoder_config_t *config = calloc(1, sizeof(encoder_config_t));
if (!config) return NULL;
config->width = DEFAULT_WIDTH;
config->height = DEFAULT_HEIGHT;
return config;
}
// Allocate encoder buffers
static int allocate_buffers(encoder_config_t *config) {
config->frame_buffer_size = ((config->width + 3) / 4) * ((config->height + 3) / 4) * IPF_BLOCK_SIZE;
config->rgb_buffer = malloc(config->width * config->height * 3);
config->previous_ipf_frame = malloc(config->frame_buffer_size);
config->current_ipf_frame = malloc(config->frame_buffer_size);
config->delta_buffer = malloc(config->frame_buffer_size * 2);
config->compressed_buffer = malloc(config->frame_buffer_size * 2);
config->mp2_buffer = malloc(2048);
return (config->rgb_buffer && config->previous_ipf_frame &&
config->current_ipf_frame && config->delta_buffer &&
config->compressed_buffer && config->mp2_buffer);
}
// Process one frame - CORRECTED ORDER: Audio -> Video -> Sync
static int process_frame(encoder_config_t *config, int frame_num, int is_keyframe, FILE *output) {
// Read RGB data from FFmpeg pipe first
size_t rgb_size = config->width * config->height * 3;
if (fread(config->rgb_buffer, 1, rgb_size, config->ffmpeg_video_pipe) != rgb_size) {
if (feof(config->ffmpeg_video_pipe)) return 0;
return -1;
}
// Step 1: Process audio FIRST (matches working file pattern)
if (!process_audio(config, frame_num, output)) {
return -1;
}
// Step 2: Encode and write video
int pattern;
switch (config->dither_mode) {
case 0: pattern = -1; break; // No dithering
case 1: pattern = 0; break; // Static pattern
case 2: pattern = frame_num % 4; break; // Dynamic pattern
default: pattern = 0; break; // Fallback to static
}
encode_ipf1_frame(config->rgb_buffer, config->width, config->height, 3, pattern,
config->current_ipf_frame);
// Determine if we should use delta encoding
int use_delta = 0;
size_t data_size = config->frame_buffer_size;
uint8_t *frame_data = config->current_ipf_frame;
if (frame_num > 1 && !is_keyframe) {
size_t delta_size = encode_ipf1_delta(config->previous_ipf_frame,
config->current_ipf_frame,
config->width, config->height,
config->delta_buffer);
if (delta_size < config->frame_buffer_size * 0.576) {
use_delta = 1;
data_size = delta_size;
frame_data = config->delta_buffer;
}
}
// Compress the frame data using gzip
size_t compressed_size = gzip_compress(frame_data, data_size,
config->compressed_buffer,
config->frame_buffer_size * 2);
if (compressed_size == 0) {
fprintf(stderr, "Gzip compression failed\n");
return -1;
}
// Write video packet
if (use_delta) {
uint8_t packet_type[2] = {IPF1_DELTA_PACKET_TYPE};
fwrite(packet_type, 1, 2, output);
} else {
uint8_t packet_type[2] = {IPF1_PACKET_TYPE};
fwrite(packet_type, 1, 2, output);
}
uint32_t size_le = compressed_size;
fwrite(&size_le, 4, 1, output);
fwrite(config->compressed_buffer, 1, compressed_size, output);
// Step 3: Write sync packet AFTER video (matches working file pattern)
uint8_t sync[2] = {SYNC_PACKET_TYPE};
fwrite(sync, 1, 2, output);
// Track video bytes written (packet type + size + compressed data + sync)
config->total_output_bytes += 2 + 4 + compressed_size + 2;
// Swap frame buffers
uint8_t *temp = config->previous_ipf_frame;
config->previous_ipf_frame = config->current_ipf_frame;
config->current_ipf_frame = temp;
// Display progress
display_progress(config, frame_num);
return 1;
}
// Cleanup function
static void cleanup_config(encoder_config_t *config) {
if (!config) return;
if (config->ffmpeg_video_pipe) pclose(config->ffmpeg_video_pipe);
if (config->mp2_file) fclose(config->mp2_file);
free(config->input_file);
free(config->output_file);
free(config->rgb_buffer);
free(config->previous_ipf_frame);
free(config->current_ipf_frame);
free(config->delta_buffer);
free(config->compressed_buffer);
free(config->mp2_buffer);
// Remove temporary audio file
unlink(TEMP_AUDIO_FILE);
free(config);
}
// Print usage information
static void print_usage(const char *program_name) {
printf("TVDOS Movie Encoder\n\n");
printf("Usage: %s [options] input_video\n\n", program_name);
printf("Options:\n");
printf(" -o, --output FILE Output TVDOS movie file (default: stdout)\n");
printf(" -s, --size WxH Video resolution (default: 560x448)\n");
printf(" -d, --dither MODE Dithering mode (default: 1)\n");
printf(" 0: No dithering\n");
printf(" 1: Static pattern\n");
printf(" 2: Dynamic pattern (better quality, larger files)\n");
printf(" -h, --help Show this help message\n\n");
printf("Examples:\n");
printf(" %s input.mp4 -o output.mov\n", program_name);
printf(" %s input.avi -s 1024x768 -o output.mov\n", program_name);
printf(" yt-dlp -o - \"https://youtube.com/watch?v=VIDEO_ID\" | ffmpeg -i pipe:0 -c copy temp.mp4 && %s temp.mp4 -o youtube_video.mov && rm temp.mp4\n", program_name);
}
int main(int argc, char *argv[]) {
encoder_config_t *config = init_encoder_config();
if (!config) {
fprintf(stderr, "Failed to initialise encoder\n");
return 1;
}
config->output_to_stdout = 1; // Default to stdout
config->dither_mode = 1; // Default to static dithering
// Parse command line arguments
static struct option long_options[] = {
{"output", required_argument, 0, 'o'},
{"size", required_argument, 0, 's'},
{"dither", required_argument, 0, 'd'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int c;
while ((c = getopt_long(argc, argv, "o:s:d:h", long_options, NULL)) != -1) {
switch (c) {
case 'o':
config->output_file = strdup(optarg);
config->output_to_stdout = 0;
break;
case 's':
if (!parse_resolution(optarg, &config->width, &config->height)) {
fprintf(stderr, "Invalid resolution format: %s\n", optarg);
cleanup_config(config);
return 1;
}
break;
case 'd':
config->dither_mode = atoi(optarg);
if (config->dither_mode < 0 || config->dither_mode > 2) {
fprintf(stderr, "Invalid dither mode: %s (must be 0, 1, or 2)\n", optarg);
cleanup_config(config);
return 1;
}
break;
case 'h':
print_usage(argv[0]);
cleanup_config(config);
return 0;
default:
print_usage(argv[0]);
cleanup_config(config);
return 1;
}
}
if (optind >= argc) {
fprintf(stderr, "Error: Input video file required\n\n");
print_usage(argv[0]);
cleanup_config(config);
return 1;
}
config->input_file = strdup(argv[optind]);
// Get video metadata
if (!get_video_metadata(config)) {
fprintf(stderr, "Failed to analyze video metadata\n");
cleanup_config(config);
return 1;
}
// Allocate buffers
if (!allocate_buffers(config)) {
fprintf(stderr, "Failed to allocate memory buffers\n");
cleanup_config(config);
return 1;
}
// Start video conversion
if (!start_video_conversion(config)) {
fprintf(stderr, "Failed to start video conversion\n");
cleanup_config(config);
return 1;
}
// Start audio conversion
if (!start_audio_conversion(config)) {
fprintf(stderr, "Failed to start audio conversion\n");
cleanup_config(config);
return 1;
}
// Open output
FILE *output = config->output_to_stdout ? stdout : fopen(config->output_file, "wb");
if (!output) {
fprintf(stderr, "Failed to open output file\n");
cleanup_config(config);
return 1;
}
// Write TVDOS header
write_tvdos_header(config, output);
// Initialise progress tracking
gettimeofday(&config->start_time, NULL);
config->last_progress_time = config->start_time;
config->total_output_bytes = 8 + 2 + 2 + 2 + 4 + 2 + 2 + 10; // TVDOS header size
// Process frames with correct order: Audio -> Video -> Sync
for (int frame = 1; frame <= config->total_frames; frame++) {
int is_keyframe = (frame == 1) || (frame % 30 == 0);
int result = process_frame(config, frame, is_keyframe, output);
if (result <= 0) {
if (result == 0) {
fprintf(stderr, "End of video at frame %d\n", frame);
}
break;
}
}
// Final progress update and newline
fprintf(stderr, "\n");
if (!config->output_to_stdout) {
fclose(output);
fprintf(stderr, "Encoding complete: %s\n", config->output_file);
}
cleanup_config(config);
return 0;
}

View File

@@ -1,183 +0,0 @@
// Created by CuriousTorvald and Claude on 2025-10-17
// MPEG-style bidirectional block motion compensation for TAV encoder
// Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
extern "C" {
// Dense optical flow estimation using Farneback algorithm
// Computes flow at every pixel, then samples at block centers for motion vectors
// Much more spatially coherent than independent block matching
void estimate_optical_flow_motion(
const float *current_y, // Current frame Y channel (width×height)
const float *reference_y, // Reference frame Y channel
int width, int height,
int block_size, // Block size (e.g., 16)
int16_t *mvs_x, // Output: motion vectors X (in 1/4-pixel units)
int16_t *mvs_y // Output: motion vectors Y (in 1/4-pixel units)
) {
// Convert float Y channels to 8-bit grayscale for OpenCV
cv::Mat cur_gray(height, width, CV_8UC1);
cv::Mat ref_gray(height, width, CV_8UC1);
// Detect if Y is in [0,1] range and scale to [0,255] if needed
float y_min = current_y[0], y_max = current_y[0];
for (int i = 1; i < width * height; i++) {
if (current_y[i] < y_min) y_min = current_y[i];
if (current_y[i] > y_max) y_max = current_y[i];
}
float scale = (y_max <= 1.1f) ? 255.0f : 1.0f;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int idx = y * width + x;
cur_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, current_y[idx] * scale)));
ref_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, reference_y[idx] * scale)));
}
}
// Compute dense optical flow using Farneback algorithm
// IMPORTANT: We need BACKWARD flow (current → reference) for motion compensation
// This tells us where to PULL pixels FROM in the reference frame
cv::Mat flow;
cv::calcOpticalFlowFarneback(
cur_gray, // Current frame (source)
ref_gray, // Reference frame (destination)
flow, // Output flow (2-channel float: dx, dy per pixel)
0.5, // pyr_scale: pyramid scale (0.5 = each layer is half size)
3, // levels: number of pyramid levels
20, // winsize: averaging window size
3, // iterations: number of iterations at each pyramid level
5, // poly_n: size of pixel neighborhood (5 or 7)
1.2, // poly_sigma: standard deviation of Gaussian for polynomial expansion
0 // flags: 0 = normal, OPTFLOW_USE_INITIAL_FLOW = use input flow as initial estimate
);
// Sample flow at block centers to get motion vectors
int num_blocks_x = (width + block_size - 1) / block_size;
int num_blocks_y = (height + block_size - 1) / block_size;
for (int by = 0; by < num_blocks_y; by++) {
for (int bx = 0; bx < num_blocks_x; bx++) {
int block_idx = by * num_blocks_x + bx;
// Block center position
int center_x = bx * block_size + block_size / 2;
int center_y = by * block_size + block_size / 2;
// Clamp to frame boundaries
if (center_x >= width) center_x = width - 1;
if (center_y >= height) center_y = height - 1;
// Get flow at block center
cv::Point2f flow_vec = flow.at<cv::Point2f>(center_y, center_x);
// Convert to 1/4-pixel units and store
// Flow is in pixels, positive = motion to the right/down
mvs_x[block_idx] = (int16_t)std::round(flow_vec.x * 4.0f);
mvs_y[block_idx] = (int16_t)std::round(flow_vec.y * 4.0f);
}
}
}
// Block-based motion compensation with bilinear interpolation (sub-pixel precision)
// MVs are in 1/4-pixel units
// This implements the warp() function from MC-EZBC pseudocode
void warp_block_motion(
const float *src, // Source frame
int width, int height,
const int16_t *mvs_x, // Motion vectors X (1/4-pixel units)
const int16_t *mvs_y, // Motion vectors Y (1/4-pixel units)
int block_size, // Block size (e.g., 16)
float *dst // Output warped frame
) {
int num_blocks_x = (width + block_size - 1) / block_size;
int num_blocks_y = (height + block_size - 1) / block_size;
// Process each block
for (int by = 0; by < num_blocks_y; by++) {
for (int bx = 0; bx < num_blocks_x; bx++) {
int block_idx = by * num_blocks_x + bx;
// Get motion vector for this block (in 1/4-pixel units)
float mv_x = mvs_x[block_idx] / 4.0f; // Convert to pixels
float mv_y = mvs_y[block_idx] / 4.0f;
// Block boundaries in destination frame
int block_x_start = bx * block_size;
int block_y_start = by * block_size;
int block_x_end = std::min(block_x_start + block_size, width);
int block_y_end = std::min(block_y_start + block_size, height);
// Warp each pixel in the block
for (int y = block_y_start; y < block_y_end; y++) {
for (int x = block_x_start; x < block_x_end; x++) {
// Source position (backward warping)
float src_x = x - mv_x;
float src_y = y - mv_y;
// Clamp to valid range
src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
src_y = std::max(0.0f, std::min((float)(height - 1), src_y));
// Bilinear interpolation
int x0 = (int)src_x;
int y0 = (int)src_y;
int x1 = std::min(x0 + 1, width - 1);
int y1 = std::min(y0 + 1, height - 1);
float fx = src_x - x0;
float fy = src_y - y0;
float val00 = src[y0 * width + x0];
float val10 = src[y0 * width + x1];
float val01 = src[y1 * width + x0];
float val11 = src[y1 * width + x1];
float val_top = (1.0f - fx) * val00 + fx * val10;
float val_bot = (1.0f - fx) * val01 + fx * val11;
float val = (1.0f - fy) * val_top + fy * val_bot;
dst[y * width + x] = val;
}
}
}
}
}
// Bidirectional motion compensation for MC-EZBC predict step
// Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
void warp_bidirectional(
const float *f0, const float *f1,
int width, int height,
const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y, // F0 → F1
const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y, // F1 → F0
int block_size,
float *prediction // Output: 0.5 * (warped_f0 + warped_f1)
) {
int num_pixels = width * height;
// Allocate temporary buffers
float *warped_f0 = new float[num_pixels];
float *warped_f1 = new float[num_pixels];
// Warp f0 forward using forward MVs
warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);
// Warp f1 backward using backward MVs
warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);
// Average the two warped frames
for (int i = 0; i < num_pixels; i++) {
prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
}
delete[] warped_f0;
delete[] warped_f1;
}
} // extern "C"

View File

@@ -1,795 +0,0 @@
/*
encoder_tav_text.c
Text-based video encoder for TSVM using custom font ROMs
Outputs Videotex files with custom header and packet type 0x3F (text mode)
File structure:
- Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
- Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
- Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
- Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
- rows: uint8 (32)
- cols: uint8 (80)
- fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
- bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
- char-array: rows*cols bytes (glyph indices 0-255)
Total uncompressed size: 2 + (80*32*3) = 7682 bytes
Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
Video size: 80×32 characters (560×448 pixels with 7×14 font)
Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
Each text frame is treated as an I-frame with sync packet
Usage:
gcc -Ofast -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
./encoder_tav_text -i video.mp4 -f font.chr -o output.mv3
*/
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
#define CHAR_W 7
#define CHAR_H 14
#define GRID_W 80
#define GRID_H 32
#define PIXEL_W (GRID_W * CHAR_W) // 560
#define PIXEL_H (GRID_H * CHAR_H) // 448
#define PATCH_SZ (CHAR_W * CHAR_H)
#define SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 1152
// TAV packet types
#define PACKET_TIMECODE 0xFD
#define PACKET_SYNC 0xFF
#define PACKET_AUDIO_MP2 0x20
#define PACKET_SSF 0x30
#define PACKET_TEXT 0x3F
#define PACKET_EXTENDED_HDR 0xEF
// SSF opcodes for font ROM
#define SSF_OPCODE_LOWROM 0x80
#define SSF_OPCODE_HIGHROM 0x81
// Font ROM size constants
#define FONTROM_PADDED_SIZE 1920
#define GLYPHS_PER_ROM 128
// Color mapping (4-bit RGB to TSVM palette)
#define COLOR_BLACK 0xF0
#define COLOR_WHITE 0xFE
// Generate random filename for temporary audio file
static void generate_random_filename(char *filename) {
srand(time(NULL));
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the .mp2 extension
strcpy(filename + 37, ".mp2");
filename[41] = '\0'; // Null terminate
}
char TEMP_AUDIO_FILE[42];
// Global flag to disable inverted character matching
int g_no_invert_char = 0;
typedef struct {
uint8_t *data; // Binary glyph data (PATCH_SZ bytes per glyph)
int count; // Number of glyphs
} FontROM;
// Get FFmpeg version string
char *get_ffmpeg_version(void) {
FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
if (!pipe) return NULL;
char *version = malloc(256);
if (!version) {
pclose(pipe);
return NULL;
}
if (fgets(version, 256, pipe)) {
// Remove trailing newline
size_t len = strlen(version);
if (len > 0 && version[len - 1] == '\n') {
version[len - 1] = '\0';
}
pclose(pipe);
return version;
}
free(version);
pclose(pipe);
return NULL;
}
// Detect video FPS using ffprobe
float detect_fps(const char *video_path) {
char cmd[1024];
snprintf(cmd, sizeof(cmd),
"ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
video_path);
FILE *pipe = popen(cmd, "r");
if (!pipe) return 30.0f; // fallback
char fps_str[64] = {0};
if (fgets(fps_str, sizeof(fps_str), pipe)) {
// Parse fraction like "30/1" or "24000/1001"
int num = 0, den = 1;
if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
pclose(pipe);
return (float)num / (float)den;
}
}
pclose(pipe);
return 30.0f; // fallback
}
// Load font ROM (14 bytes per glyph, no header)
FontROM *load_font_rom(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) return NULL;
fseek(f, 0, SEEK_END);
long size = ftell(f);
fseek(f, 0, SEEK_SET);
if (size % 14 != 0) {
fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
}
int glyph_count = size / 14;
FontROM *rom = malloc(sizeof(FontROM));
rom->count = glyph_count;
rom->data = malloc(glyph_count * PATCH_SZ);
// Read and unpack glyphs
for (int g = 0; g < glyph_count; g++) {
uint8_t row_bytes[14];
if (fread(row_bytes, 14, 1, f) != 1) {
free(rom->data);
free(rom);
fclose(f);
return NULL;
}
// Unpack bits to binary pixels
for (int row = 0; row < CHAR_H; row++) {
for (int col = 0; col < CHAR_W; col++) {
// Bit 6 = leftmost, bit 0 = rightmost
int bit = (row_bytes[row] >> (6 - col)) & 1;
rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
}
}
}
fclose(f);
fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
return rom;
}
// Find best matching glyph for a grayscale patch
int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
// Try both normal and inverted matching (unless --no-invert-char is set)
int best_glyph = 0;
float best_error = INFINITY;
uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
for (int g = 0; g < rom->count; g++) {
const uint8_t *glyph = &rom->data[g * PATCH_SZ];
// Try normal: glyph 1 = fg, glyph 0 = bg
float err_normal = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 255 : 0;
int diff = patch[i] - expected;
err_normal += diff * diff;
}
if (err_normal < best_error) {
best_error = err_normal;
best_glyph = g;
best_bg = COLOR_BLACK;
best_fg = COLOR_WHITE;
}
// Try inverted: glyph 0 = fg, glyph 1 = bg (skip if --no-invert-char)
if (!g_no_invert_char) {
float err_inverted = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 0 : 255;
int diff = patch[i] - expected;
err_inverted += diff * diff;
}
if (err_inverted < best_error) {
best_error = err_inverted;
best_glyph = g;
best_bg = COLOR_WHITE;
best_fg = COLOR_BLACK;
}
}
}
*out_bg = best_bg;
*out_fg = best_fg;
return best_glyph;
}
// Convert frame to text mode
void frame_to_text(const uint8_t *pixels, const FontROM *rom,
uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
uint8_t patch[PATCH_SZ];
for (int gr = 0; gr < GRID_H; gr++) {
for (int gc = 0; gc < GRID_W; gc++) {
int idx = gr * GRID_W + gc;
// Extract patch
for (int y = 0; y < CHAR_H; y++) {
for (int x = 0; x < CHAR_W; x++) {
int px = gc * CHAR_W + x;
int py = gr * CHAR_H + y;
patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
}
}
// Find best match
chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
}
}
}
// Get current time in nanoseconds since UNIX epoch
uint64_t get_current_time_ns(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
}
// Parse MP2 packet header to get accurate packet size
int get_mp2_packet_size(uint8_t *header) {
int bitrate_index = (header[2] >> 4) & 0x0F;
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
int bitrate = bitrates[bitrate_index];
if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
int sampling_freq_index = (header[2] >> 2) & 0x03;
int sampling_freqs[] = {44100, 48000, 32000, 0};
int sampling_freq = sampling_freqs[sampling_freq_index];
if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
int padding = (header[2] >> 1) & 0x01;
return (144 * bitrate * 1000) / sampling_freq + padding;
}
// Write Videotex header (32 bytes, similar to TAV but simpler)
void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
fwrite("\x1FTSVMTAV", 8, 1, f);
// Version: 1 (uint8)
fputc(1, f);
// Grid dimensions (uint8 each)
uint16_t width = GRID_W;
uint16_t height = GRID_H;
fwrite(&width, sizeof(uint16_t), 1, f); // cols = 80
fwrite(&height, sizeof(uint16_t), 1, f); // rows = 32
// FPS (uint8)
fputc(fps, f);
// Total frames (uint32, little-endian)
fwrite(&total_frames, sizeof(uint32_t), 1, f);
fputc(0, f); // wavelet filter type
fputc(0, f); // decomposition levels
fputc(0, f); // quantiser Y
fputc(0, f); // quantiser Co
fputc(0, f); // quantiser Cg
// Feature Flags
fputc(0x03, f); // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
// Video Flags
fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
fputc(0, f); // encoder quality level
fputc(0x02, f); // channel layout: Y only
fputc(0, f); // entropy coder
fputc(0, f); // reserved
fputc(0, f); // reserved
fputc(0, f); // device orientation: no rotation
fputc(0, f); // file role: generic
}
// Write extended header packet with metadata
// Returns the file offset where ENDT value is written (for later update)
long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
fputc(PACKET_EXTENDED_HDR, f);
// Helper macros for key-value pairs
#define WRITE_KV_UINT64(key_str, value) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x04; /* Uint64 */ \
fwrite(&value_type, 1, 1, f); \
uint64_t val = (value); \
fwrite(&val, sizeof(uint64_t), 1, f); \
} while(0)
#define WRITE_KV_BYTES(key_str, data, len) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x10; /* Bytes */ \
fwrite(&value_type, 1, 1, f); \
uint16_t length = (len); \
fwrite(&length, sizeof(uint16_t), 1, f); \
fwrite((data), 1, (len), f); \
} while(0)
// Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
uint16_t num_pairs = ffmpeg_version ? 5 : 4; // FMPG is optional
fwrite(&num_pairs, sizeof(uint16_t), 1, f);
// BGNT: Video begin time (0 for frame 0)
WRITE_KV_UINT64("BGNT", 0ULL);
// ENDT: Video end time (placeholder, will be updated at end)
long endt_offset = ftell(f);
WRITE_KV_UINT64("ENDT", 0ULL);
// CDAT: Creation time in nanoseconds since UNIX epoch
WRITE_KV_UINT64("CDAT", creation_time_ns);
// VNDR: Encoder name and version
const char *vendor_str = ENCODER_VENDOR_STRING;
WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
// FMPG: FFmpeg version (if available)
if (ffmpeg_version) {
WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
}
#undef WRITE_KV_UINT64
#undef WRITE_KV_BYTES
// Return offset of ENDT value (skip key, type byte)
return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type
}
// Write font ROM packet (SSF packet type 0x30)
void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
// Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
memcpy(padded_data, rom_data, data_size);
// Packet structure:
// [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
// Write packet type and size
fputc(PACKET_SSF, f);
fwrite(&packet_size, sizeof(uint32_t), 1, f);
// Write SSF payload
// Index (3 bytes, always 0 for font ROM)
fputc(0, f);
fputc(0, f);
fputc(0, f);
// Opcode (0x80=lowrom, 0x81=highrom)
fputc(opcode, f);
// Payload length (uint16, little-endian)
uint16_t payload_len = FONTROM_PADDED_SIZE;
fwrite(&payload_len, sizeof(uint16_t), 1, f);
// Font data (padded to 1920 bytes)
fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
// Terminator
fputc(0x00, f);
free(padded_data);
fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
data_size, FONTROM_PADDED_SIZE, opcode);
}
// Write timecode packet (nanoseconds)
void write_timecode(FILE *f, uint64_t timecode_ns) {
fputc(PACKET_TIMECODE, f);
fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
}
// Write sync packet
void write_sync(FILE *f) {
fputc(PACKET_SYNC, f);
}
// Write MP2 audio packet
void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
fputc(PACKET_AUDIO_MP2, f);
fwrite(&size, sizeof(uint32_t), 1, f);
fwrite(data, 1, size, f);
}
// Write text packet with separated arrays (better compression)
void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
const uint8_t *chars, int rows, int cols) {
int grid_size = rows * cols;
// Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
// Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
size_t uncompressed_size = 2 + grid_size * 3;
uint8_t *uncompressed = malloc(uncompressed_size);
uncompressed[0] = rows;
uncompressed[1] = cols;
// Copy arrays in order: foreground, background, characters
memcpy(&uncompressed[2], fg_col, grid_size); // Foreground first
memcpy(&uncompressed[2 + grid_size], bg_col, grid_size); // Background second
memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size); // Characters third
// Compress with Zstd
size_t max_compressed = ZSTD_compressBound(uncompressed_size);
uint8_t *compressed = malloc(max_compressed);
size_t compressed_size = ZSTD_compress(compressed, max_compressed,
uncompressed, uncompressed_size, 3);
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression error\n");
exit(1);
}
// Write packet: [type][size][data]
fputc(PACKET_TEXT, f);
uint32_t size32 = compressed_size;
fwrite(&size32, 4, 1, f);
fwrite(compressed, compressed_size, 1, f);
free(compressed);
free(uncompressed);
}
int main(int argc, char **argv) {
if (argc < 7) {
fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav> [--no-invert-char]\n", argv[0]);
return 1;
}
const char *input_video = NULL;
const char *font_path = NULL;
const char *output_path = NULL;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
else if (strcmp(argv[i], "--no-invert-char") == 0) g_no_invert_char = 1;
}
if (!input_video || !font_path || !output_path) {
fprintf(stderr, "Missing required arguments\n");
return 1;
}
if (g_no_invert_char) {
fprintf(stderr, "Inverted character matching disabled\n");
}
// Generate random temp filename for audio
generate_random_filename(TEMP_AUDIO_FILE);
// Capture creation time and FFmpeg version for extended header
uint64_t creation_time_ns = get_current_time_ns();
char *ffmpeg_version = get_ffmpeg_version();
// Detect video FPS
float fps_float = detect_fps(input_video);
uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
// Load font ROM
FontROM *rom = load_font_rom(font_path);
if (!rom) {
fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
return 1;
}
// Open FFmpeg pipe for grayscale frames at 560×448
char ffmpeg_cmd[1024];
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
"-f rawvideo -pix_fmt gray - 2>/dev/null",
input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
fprintf(stderr, "Opening video stream...\n");
FILE *video_pipe = popen(ffmpeg_cmd, "r");
if (!video_pipe) {
fprintf(stderr, "Failed to open FFmpeg pipe\n");
return 1;
}
// Extract MP2 audio to temporary file using libtwolame
fprintf(stderr, "Extracting MP2 audio...\n");
char audio_cmd[1024];
snprintf(audio_cmd, sizeof(audio_cmd),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 224k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
input_video, SAMPLE_RATE, TEMP_AUDIO_FILE);
int audio_result = system(audio_cmd);
if (audio_result != 0) {
fprintf(stderr, "Warning: Audio extraction failed, continuing without audio\n");
}
// Open MP2 file for reading
FILE *mp2_file = NULL;
long audio_remaining = 0;
if (audio_result == 0) {
mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
if (mp2_file) {
fseek(mp2_file, 0, SEEK_END);
audio_remaining = ftell(mp2_file);
fseek(mp2_file, 0, SEEK_SET);
fprintf(stderr, "Audio ready: %ld bytes\n", audio_remaining);
}
}
// Open output file
FILE *out = fopen(output_path, "wb");
if (!out) {
fprintf(stderr, "Failed to open output file\n");
pclose(video_pipe);
if (mp2_file) fclose(mp2_file);
return 1;
}
// Write Videotex header with placeholder total_frames (will update at end)
long header_offset = ftell(out);
write_videotex_header(out, fps, 0);
// Write extended header packet (before first timecode)
long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
// Upload font ROM to TSVM (split into lowrom and highrom)
fprintf(stderr, "Uploading font ROM to TSVM...\n");
FILE *rom_file = fopen(font_path, "rb");
if (rom_file) {
fseek(rom_file, 0, SEEK_END);
long rom_size = ftell(rom_file);
fseek(rom_file, 0, SEEK_SET);
uint8_t *raw_rom = malloc(rom_size);
if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
// Split into lowrom and highrom
size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
// Write lowrom (first 128 glyphs)
if (rom_size >= bytes_per_half) {
write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
}
// Write highrom (second 128 glyphs)
if (rom_size >= bytes_per_half * 2) {
write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
} else if (rom_size > bytes_per_half) {
// Partial highrom
write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
}
free(raw_rom);
}
fclose(rom_file);
}
// Allocate buffers
size_t frame_size = PIXEL_W * PIXEL_H;
uint8_t *gray_pixels = malloc(frame_size);
uint8_t *bg_col = malloc(GRID_W * GRID_H);
uint8_t *fg_col = malloc(GRID_W * GRID_H);
uint8_t *chars = malloc(GRID_W * GRID_H);
// Audio buffer for MP2 packets
#define MP2_BUFFER_SIZE 2048
uint8_t *audio_buffer = malloc(MP2_BUFFER_SIZE);
uint32_t frame_num = 0;
uint64_t total_audio_bytes = 0;
// Audio timing calculation
double frame_audio_time = 1.0 / fps_float; // Time per video frame
double packet_audio_time = (double)MP2_DEFAULT_PACKET_SIZE / SAMPLE_RATE; // Time per audio packet
double packets_per_frame = frame_audio_time / packet_audio_time;
double audio_frames_in_buffer = 0.0; // Simulated audio buffer level
fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
GRID_W, GRID_H, PIXEL_W, PIXEL_H);
// Track encoding start time
struct timeval start_time, now;
gettimeofday(&start_time, NULL);
// Read and process frames
while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
// Calculate timecode in nanoseconds
uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
// Write audio packets for this frame (based on timing)
if (mp2_file && audio_remaining > 0) {
// Simulate buffer consumption
audio_frames_in_buffer -= packets_per_frame;
// Calculate how many packets we need to maintain buffer
double target_level = fmax(packets_per_frame, 2.0);
int packets_to_insert = 0;
if (audio_frames_in_buffer < target_level) {
double deficit = target_level - audio_frames_in_buffer;
packets_to_insert = (int)ceil(deficit);
}
// Insert the calculated number of audio packets
for (int q = 0; q < packets_to_insert; q++) {
// Peek at header to get actual packet size
long pos = ftell(mp2_file);
uint8_t header[4];
if (fread(header, 1, 4, mp2_file) != 4) break;
fseek(mp2_file, pos, SEEK_SET); // Rewind to re-read with full packet
int actual_packet_size = get_mp2_packet_size(header);
size_t bytes_to_read = actual_packet_size;
// Clamp to remaining audio
if (bytes_to_read > audio_remaining) {
bytes_to_read = audio_remaining;
}
// Sanity check
if (bytes_to_read > MP2_BUFFER_SIZE) {
fprintf(stderr, "ERROR: MP2 packet size %zu exceeds buffer\n", bytes_to_read);
break;
}
// Read full packet
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
if (bytes_read == 0) break;
// Write MP2 audio packet
write_audio_mp2(out, audio_buffer, bytes_read);
// Track audio
audio_remaining -= bytes_read;
audio_frames_in_buffer++;
total_audio_bytes += bytes_read;
}
}
// Write timecode
write_timecode(out, timecode_ns);
// Convert to text mode
frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
// Write text packet (treated as I-frame)
write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
// Write sync packet after each frame
write_sync(out);
frame_num++;
if (frame_num % 30 == 0) {
// Calculate encoding speed
gettimeofday(&now, NULL);
double elapsed = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double encoding_fps = frame_num / elapsed;
fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
fflush(stderr);
}
}
// Write any remaining audio
if (mp2_file && audio_remaining > 0) {
while (audio_remaining > 0) {
// Peek at header to get actual packet size
long pos = ftell(mp2_file);
uint8_t header[4];
if (fread(header, 1, 4, mp2_file) != 4) break;
fseek(mp2_file, pos, SEEK_SET);
int actual_packet_size = get_mp2_packet_size(header);
size_t bytes_to_read = (actual_packet_size < audio_remaining) ? actual_packet_size : audio_remaining;
if (bytes_to_read > MP2_BUFFER_SIZE) break;
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
if (bytes_read == 0) break;
write_audio_mp2(out, audio_buffer, bytes_read);
audio_remaining -= bytes_read;
total_audio_bytes += bytes_read;
}
}
// Final timing
gettimeofday(&now, NULL);
double total_time = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double final_fps = frame_num / total_time;
fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
frame_num, total_time, final_fps);
fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
(unsigned long long)total_audio_bytes,
total_audio_bytes / 1024.0 / 1024.0);
// Update total_frames in header
if (frame_num > 0) {
fseek(out, header_offset + 14, SEEK_SET); // Offset to total_frames field
fwrite(&frame_num, sizeof(uint32_t), 1, out);
fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
}
// Update ENDT in extended header (calculate end time for last frame)
if (frame_num > 0) {
// Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
uint64_t endt_ns = duration_ns;
fseek(out, endt_offset, SEEK_SET);
fwrite(&endt_ns, sizeof(uint64_t), 1, out);
fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
(unsigned long long)endt_ns, endt_ns / 1000000000.0);
}
// Cleanup
pclose(video_pipe);
if (mp2_file) {
fclose(mp2_file);
unlink(TEMP_AUDIO_FILE); // Remove temporary audio file
}
fclose(out);
free(gray_pixels);
free(bg_col);
free(fg_col);
free(chars);
free(audio_buffer);
free(rom->data);
free(rom);
if (ffmpeg_version) free(ffmpeg_version);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,169 +0,0 @@
// Affine estimation for TAV mesh warping
// This file contains logic to estimate per-cell affine transforms from block motion
#include <cmath>
#include <cstdlib>
#include <cstring>
extern "C" {
// Estimate affine transform for a mesh cell from surrounding block motion vectors
// Uses least-squares fitting of motion vectors to affine model: [x'] = [a11 a12][x] + [tx]
// [y'] [a21 a22][y] [ty]
//
// Returns 1 if affine improves residual by >threshold, 0 if translation-only is better
int estimate_cell_affine(
const float *flow_x, const float *flow_y,
int width, int height,
int cell_x, int cell_y, // Cell position in mesh coordinates
int cell_w, int cell_h, // Cell size in pixels
float threshold, // Residual improvement threshold (e.g. 0.10 = 10%)
short *out_tx, short *out_ty, // Translation (1/8 pixel)
short *out_a11, short *out_a12, // Affine matrix (1/256 fixed-point)
short *out_a21, short *out_a22
) {
// Compute cell bounding box
int x_start = cell_x * cell_w;
int y_start = cell_y * cell_h;
int x_end = (cell_x + 1) * cell_w;
int y_end = (cell_y + 1) * cell_h;
if (x_end > width) x_end = width;
if (y_end > height) y_end = height;
// Sample motion vectors from a 4×4 grid within the cell
const int samples_x = 4;
const int samples_y = 4;
float sample_motion_x[16];
float sample_motion_y[16];
int sample_px[16];
int sample_py[16];
int n_samples = 0;
for (int sy = 0; sy < samples_y; sy++) {
for (int sx = 0; sx < samples_x; sx++) {
int px = x_start + (x_end - x_start) * sx / (samples_x - 1);
int py = y_start + (y_end - y_start) * sy / (samples_y - 1);
if (px >= width) px = width - 1;
if (py >= height) py = height - 1;
int idx = py * width + px;
sample_motion_x[n_samples] = flow_x[idx];
sample_motion_y[n_samples] = flow_y[idx];
sample_px[n_samples] = px - (x_start + x_end) / 2; // Relative to cell center
sample_py[n_samples] = py - (y_start + y_end) / 2;
n_samples++;
}
}
// 1. Compute translation-only model (average motion)
float avg_dx = 0, avg_dy = 0;
for (int i = 0; i < n_samples; i++) {
avg_dx += sample_motion_x[i];
avg_dy += sample_motion_y[i];
}
avg_dx /= n_samples;
avg_dy /= n_samples;
// Translation residual
float trans_residual = 0;
for (int i = 0; i < n_samples; i++) {
float dx_err = sample_motion_x[i] - avg_dx;
float dy_err = sample_motion_y[i] - avg_dy;
trans_residual += dx_err * dx_err + dy_err * dy_err;
}
// 2. Estimate affine model using least-squares
// Solve: [vx] = [a11 a12][px] + [tx]
// [vy] [a21 a22][py] [ty]
// Using normal equations for 2×2 affine
double sum_x = 0, sum_y = 0, sum_xx = 0, sum_yy = 0, sum_xy = 0;
double sum_vx = 0, sum_vy = 0, sum_vx_x = 0, sum_vx_y = 0;
double sum_vy_x = 0, sum_vy_y = 0;
for (int i = 0; i < n_samples; i++) {
double px = sample_px[i];
double py = sample_py[i];
double vx = sample_motion_x[i];
double vy = sample_motion_y[i];
sum_x += px;
sum_y += py;
sum_xx += px * px;
sum_yy += py * py;
sum_xy += px * py;
sum_vx += vx;
sum_vy += vy;
sum_vx_x += vx * px;
sum_vx_y += vx * py;
sum_vy_x += vy * px;
sum_vy_y += vy * py;
}
// Solve 2×2 system for [a11, a12, tx] and [a21, a22, ty]
double n = n_samples;
double det = n * sum_xx * sum_yy + 2 * sum_x * sum_y * sum_xy -
sum_xx * sum_y * sum_y - sum_yy * sum_x * sum_x - n * sum_xy * sum_xy;
if (fabs(det) < 1e-6) {
// Singular matrix, fall back to translation
*out_tx = (short)(avg_dx * 8.0f);
*out_ty = (short)(avg_dy * 8.0f);
*out_a11 = 256; // Identity
*out_a12 = 0;
*out_a21 = 0;
*out_a22 = 256;
return 0; // Translation only
}
// Solve for affine parameters (simplified for readability)
double a11 = (sum_vx_x * sum_yy * n - sum_vx_y * sum_xy * n - sum_vx * sum_y * sum_y +
sum_vx * sum_xy * sum_y + sum_vx_y * sum_x * sum_y - sum_vx_x * sum_y * sum_y) / det;
double a12 = (sum_vx_y * sum_xx * n - sum_vx_x * sum_xy * n - sum_vx * sum_x * sum_xy +
sum_vx * sum_xx * sum_y + sum_vx_x * sum_x * sum_y - sum_vx_y * sum_x * sum_x) / det;
double tx = (sum_vx - a11 * sum_x - a12 * sum_y) / n;
double a21 = (sum_vy_x * sum_yy * n - sum_vy_y * sum_xy * n - sum_vy * sum_y * sum_y +
sum_vy * sum_xy * sum_y + sum_vy_y * sum_x * sum_y - sum_vy_x * sum_y * sum_y) / det;
double a22 = (sum_vy_y * sum_xx * n - sum_vy_x * sum_xy * n - sum_vy * sum_x * sum_xy +
sum_vy * sum_xx * sum_y + sum_vy_x * sum_x * sum_y - sum_vy_y * sum_x * sum_x) / det;
double ty = (sum_vy - a21 * sum_x - a22 * sum_y) / n;
// Affine residual
float affine_residual = 0;
for (int i = 0; i < n_samples; i++) {
double px = sample_px[i];
double py = sample_py[i];
double pred_vx = a11 * px + a12 * py + tx;
double pred_vy = a21 * px + a22 * py + ty;
double dx_err = sample_motion_x[i] - pred_vx;
double dy_err = sample_motion_y[i] - pred_vy;
affine_residual += dx_err * dx_err + dy_err * dy_err;
}
// Decision: Use affine if residual improves by > threshold
float improvement = (trans_residual - affine_residual) / (trans_residual + 1e-6f);
if (improvement > threshold) {
// Use affine
*out_tx = (short)(tx * 8.0f);
*out_ty = (short)(ty * 8.0f);
*out_a11 = (short)(a11 * 256.0);
*out_a12 = (short)(a12 * 256.0);
*out_a21 = (short)(a21 * 256.0);
*out_a22 = (short)(a22 * 256.0);
return 1; // Affine
} else {
// Use translation
*out_tx = (short)(avg_dx * 8.0f);
*out_ty = (short)(avg_dy * 8.0f);
*out_a11 = 256; // Identity
*out_a12 = 0;
*out_a21 = 0;
*out_a22 = 256;
return 0; // Translation only
}
}
} // extern "C"

View File

@@ -1,65 +0,0 @@
// Simple coefficient preprocessing for better compression
// Insert right before Zstd compression
#ifndef COEFFICIENT_COMPRESS_H
#define COEFFICIENT_COMPRESS_H
#include <stdint.h>
#include <string.h>
// Preprocess coefficients using significance map
// Returns new buffer size, modifies buffer in-place if possible
static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) {
// Count non-zero coefficients
int nonzero_count = 0;
for (int i = 0; i < coeff_count; i++) {
if (coeffs[i] != 0) nonzero_count++;
}
// Create significance map (1 bit per coefficient, packed into bytes)
int map_bytes = (coeff_count + 7) / 8; // Round up to nearest byte
uint8_t *sig_map = output_buffer;
int16_t *values = (int16_t *)(output_buffer + map_bytes);
// Clear significance map
memset(sig_map, 0, map_bytes);
// Fill significance map and extract non-zero values
int value_idx = 0;
for (int i = 0; i < coeff_count; i++) {
if (coeffs[i] != 0) {
// Set bit in significance map
int byte_idx = i / 8;
int bit_idx = i % 8;
sig_map[byte_idx] |= (1 << bit_idx);
// Store the value
values[value_idx++] = coeffs[i];
}
}
return map_bytes + (nonzero_count * sizeof(int16_t));
}
// Decoder: reconstruct coefficients from significance map
static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
int map_bytes = (coeff_count + 7) / 8;
uint8_t *sig_map = compressed_data;
int16_t *values = (int16_t *)(compressed_data + map_bytes);
// Clear output
memset(output_coeffs, 0, coeff_count * sizeof(int16_t));
// Reconstruct coefficients
int value_idx = 0;
for (int i = 0; i < coeff_count; i++) {
int byte_idx = i / 8;
int bit_idx = i % 8;
if (sig_map[byte_idx] & (1 << bit_idx)) {
output_coeffs[i] = values[value_idx++];
}
}
}
#endif // COEFFICIENT_COMPRESS_H

View File

@@ -1,39 +0,0 @@
#ifndef TAD32_DECODER_H
#define TAD32_DECODER_H
#include <stdint.h>
#include <stddef.h>
// TAD32 (Terrarum Advanced Audio - PCM32f version) Decoder
// DWT-based perceptual audio codec for TSVM
// Shared decoder library used by both decoder_tad (standalone) and decoder_tav (video decoder)
// Constants (must match encoder)
#define TAD32_SAMPLE_RATE 32000
#define TAD32_CHANNELS 2 // Stereo
#define TAD_DEFAULT_CHUNK_SIZE 32768 // Default chunk size for standalone TAD files
/**
* Decode audio chunk with TAD32 codec
*
* @param input Input TAD32 chunk data
* @param input_size Size of input buffer
* @param pcmu8_stereo Output PCMu8 stereo samples (interleaved L,R)
* @param bytes_consumed [out] Number of bytes consumed from input
* @param samples_decoded [out] Number of samples decoded per channel
* @return 0 on success, -1 on error
*
* Input format:
* uint16 sample_count (samples per channel)
* uint8 max_index (maximum quantisation index)
* uint32 payload_size (bytes in payload)
* * payload (encoded M/S data, Zstd-compressed with EZBC)
*
* Output format:
* PCMu8 stereo interleaved (8-bit unsigned PCM, L,R pairs)
* Range: [0, 255] where 128 = silence
*/
int tad32_decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
size_t *bytes_consumed, size_t *samples_decoded);
#endif // TAD32_DECODER_H

View File

@@ -1,63 +0,0 @@
#ifndef TAD32_ENCODER_H
#define TAD32_ENCODER_H
#include <stdint.h>
#include <stddef.h>
// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder
// DWT-based perceptual audio codec for TSVM
// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
// Constants
#define TAD32_COEFF_SCALARS {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f} // value only valid for CDF 9/7 with decomposition level 9. Index 0 = LL band
#define TAD32_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples
#define TAD32_SAMPLE_RATE 32000
#define TAD32_CHANNELS 2 // Stereo
#define TAD32_QUALITY_MIN 0
#define TAD32_QUALITY_MAX 6
#define TAD32_QUALITY_DEFAULT 3
#define TAD32_ZSTD_LEVEL 15
static inline int tad32_quality_to_max_index(int quality) {
static const int quality_map[6] = {21, 31, 44, 63, 89, 127};
if (quality < 0) quality = 0;
if (quality > 5) quality = 5;
return quality_map[quality];
}
/**
* Encode audio chunk with TAD32 codec (PCM32f version)
*
* @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R)
* @param num_samples Number of samples per channel (min 1024)
* @param max_index Maximum quantisation index (7=3bit, 15=4bit, 31=5bit, 63=6bit, 127=7bit)
* @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantisation)
* Higher values = more aggressive quantisation = smaller files
* @param zstd_level Zstd compression level (1-22). Use negative value to disable compression.
* When disabled, MSB of payload_size is set to indicate uncompressed data.
* @param output Output buffer (must be large enough)
* @return Number of bytes written to output, or 0 on error
*
* Output format:
* uint16 sample_count (samples per channel)
* uint8 max_index (maximum quantisation index)
* uint32 payload_size (bytes in payload; MSB=1 indicates uncompressed)
* * payload (encoded M/S data, optionally Zstd-compressed)
*/
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int max_index,
float quantiser_scale, int zstd_level, uint8_t *output);
/**
* Print accumulated coefficient statistics
* Only effective if TAD_COEFF_STATS environment variable is set
*/
void tad32_print_statistics(void);
/**
* Free accumulated statistics memory
* Should be called after tad32_print_statistics()
*/
void tad32_free_statistics(void);
#endif // TAD32_ENCODER_H

View File

@@ -1,74 +0,0 @@
// TEV Entropy Coder - Specialised for DCT coefficients
// Replaces gzip with video-optimized compression
#ifndef ENTROPY_CODER_H
#define ENTROPY_CODER_H
#include <stdint.h>
#include <stdio.h>
// Bit writer for variable-length codes
typedef struct {
uint8_t *buffer;
size_t buffer_size;
size_t byte_pos;
int bit_pos; // 0-7, next bit to write
} bit_writer_t;
// Bit reader for decoding
typedef struct {
const uint8_t *buffer;
size_t buffer_size;
size_t byte_pos;
int bit_pos; // 0-7, next bit to read
} bit_reader_t;
// Huffman table entry
typedef struct {
uint16_t code; // Huffman code
uint8_t bits; // Code length in bits
} huffman_entry_t;
// Video entropy coder optimized for TEV coefficients
typedef struct {
// Huffman tables for different coefficient types
huffman_entry_t y_dc_table[512]; // Y DC coefficients (-255 to +255)
huffman_entry_t y_ac_table[512]; // Y AC coefficients
huffman_entry_t c_dc_table[512]; // Chroma DC coefficients
huffman_entry_t c_ac_table[512]; // Chroma AC coefficients
huffman_entry_t run_table[256]; // Zero run lengths (0-255)
// Motion vector Huffman tables
huffman_entry_t mv_table[65]; // Motion vectors (-32 to +32)
// Bit writer/reader
bit_writer_t writer;
bit_reader_t reader;
} entropy_coder_t;
static const huffman_entry_t BLOCK_MODE_HUFFMAN[16];
void write_bits(bit_writer_t *writer, uint32_t value, int bits);
uint32_t read_bits(bit_reader_t *reader, int bits);
// Initialise entropy coder
entropy_coder_t* entropy_coder_create(uint8_t *buffer, size_t buffer_size);
void entropy_coder_destroy(entropy_coder_t *coder);
// Encoding functions
int encode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
int encode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
int encode_motion_vector(entropy_coder_t *coder, int16_t mv_x, int16_t mv_y);
int encode_block_mode(entropy_coder_t *coder, uint8_t mode);
// Decoding functions
void entropy_coder_init_reader(entropy_coder_t *coder, const uint8_t *buffer, size_t buffer_size);
int decode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
int decode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
int decode_motion_vector(entropy_coder_t *coder, int16_t *mv_x, int16_t *mv_y);
int decode_block_mode(entropy_coder_t *coder, uint8_t *mode);
// Get compressed size
size_t entropy_coder_get_size(entropy_coder_t *coder);
void entropy_coder_reset(entropy_coder_t *coder);
#endif // ENTROPY_CODER_H

View File

@@ -1,837 +0,0 @@
/*
* TAV AVX-512 Optimisations
*
* This file contains AVX-512 optimised versions of performance-critical functions
* in the TAV encoder. Runtime CPU detection ensures fallback to scalar versions
* on non-AVX-512 systems.
*
* Optimised functions:
* - 1D DWT transforms (5/3, 9/7, Haar, Bior13/7, DD4)
* - Quantisation functions
* - RGB to YCoCg colour conversion
* - 2D DWT gather/scatter operations
*
* Compile with: -mavx512f -mavx512dq -mavx512bw -mavx512vl
*/
#ifndef TAV_AVX512_H
#define TAV_AVX512_H
#include <immintrin.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
// =============================================================================
// SIMD Capability Detection
// =============================================================================
typedef enum {
SIMD_NONE = 0,
SIMD_AVX512F = 1
} simd_level_t;
// Global SIMD level (set by tav_simd_init)
static simd_level_t g_simd_level = SIMD_NONE;
// CPU feature detection
static inline int cpu_has_avx512f(void) {
#ifdef __AVX512F__
return __builtin_cpu_supports("avx512f") &&
__builtin_cpu_supports("avx512dq");
#else
return 0;
#endif
}
// Initialize SIMD detection (call once at startup)
static inline void tav_simd_init(void) {
#ifdef __AVX512F__
if (cpu_has_avx512f()) {
g_simd_level = SIMD_AVX512F;
fprintf(stderr, "[TAV] AVX-512 optimisations enabled\n");
} else {
g_simd_level = SIMD_NONE;
fprintf(stderr, "[TAV] AVX-512 not available, using scalar fallback\n");
}
#else
g_simd_level = SIMD_NONE;
fprintf(stderr, "[TAV] Compiled without AVX-512 support\n");
#endif
}
#ifdef __AVX512F__
// =============================================================================
// Helper Functions
// =============================================================================
// Horizontal sum of 16 floats
static inline float _mm512_reduce_add_ps_compat(__m512 v) {
__m256 low = _mm512_castps512_ps256(v);
__m256 high = _mm512_extractf32x8_ps(v, 1);
__m256 sum256 = _mm256_add_ps(low, high);
__m128 sum128 = _mm_add_ps(_mm256_castps256_ps128(sum256), _mm256_extractf128_ps(sum256, 1));
sum128 = _mm_hadd_ps(sum128, sum128);
sum128 = _mm_hadd_ps(sum128, sum128);
return _mm_cvtss_f32(sum128);
}
// Clamp helper for vectorised operations
static inline __m512 _mm512_clamp_ps(__m512 v, __m512 min_val, __m512 max_val) {
return _mm512_min_ps(_mm512_max_ps(v, min_val), max_val);
}
// =============================================================================
// AVX-512 Optimised 1D DWT Forward Transforms
// =============================================================================
// 5/3 Reversible Forward DWT with AVX-512
static inline void dwt_53_forward_1d_avx512(float *data, int length) {
if (length < 2) return;
float *temp = (float*)calloc(length, sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass) - vectorised
// temp[half + i] = data[2*i+1] - 0.5 * (data[2*i] + data[2*i+2])
int i;
for (i = 0; i + 16 <= half; i += 16) {
__mmask16 valid_mask = 0xFFFF;
// Check boundary for last iteration
for (int j = 0; j < 16; j++) {
int idx = 2 * (i + j) + 1;
if (idx >= length) {
valid_mask &= ~(1 << j);
}
}
if (valid_mask == 0) break;
// Load data[2*i] - stride 2 load
float even_curr_vals[16], even_next_vals[16], odd_vals[16];
for (int j = 0; j < 16; j++) {
if (valid_mask & (1 << j)) {
even_curr_vals[j] = data[2 * (i + j)];
even_next_vals[j] = (2 * (i + j) + 2 < length) ? data[2 * (i + j) + 2] : data[2 * (i + j)];
odd_vals[j] = data[2 * (i + j) + 1];
} else {
even_curr_vals[j] = 0.0f;
even_next_vals[j] = 0.0f;
odd_vals[j] = 0.0f;
}
}
__m512 even_curr = _mm512_loadu_ps(even_curr_vals);
__m512 even_next = _mm512_loadu_ps(even_next_vals);
__m512 odd = _mm512_loadu_ps(odd_vals);
__m512 pred = _mm512_mul_ps(_mm512_add_ps(even_curr, even_next), _mm512_set1_ps(0.5f));
__m512 high = _mm512_sub_ps(odd, pred);
_mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
}
// Handle remaining elements
for (; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
temp[half + i] = data[idx] - pred;
}
}
// Update step (low-pass) - vectorised
// temp[i] = data[2*i] + 0.25 * (temp[half+i-1] + temp[half+i])
for (i = 0; i + 16 <= half; i += 16) {
__m512 even = _mm512_loadu_ps(&data[2 * i]); // Load with stride 2 (simplified)
// Manual gather for strided load
float even_vals[16];
for (int j = 0; j < 16 && (i + j) < half; j++) {
even_vals[j] = data[2 * (i + j)];
}
even = _mm512_loadu_ps(even_vals);
// Load high-pass neighbours
float high_prev[16], high_curr[16];
for (int j = 0; j < 16 && (i + j) < half; j++) {
high_prev[j] = ((i + j) > 0) ? temp[half + (i + j) - 1] : 0.0f;
high_curr[j] = ((i + j) < half - 1) ? temp[half + (i + j)] : 0.0f;
}
__m512 hp = _mm512_loadu_ps(high_prev);
__m512 hc = _mm512_loadu_ps(high_curr);
__m512 update = _mm512_mul_ps(_mm512_add_ps(hp, hc), _mm512_set1_ps(0.25f));
__m512 low = _mm512_add_ps(even, update);
__mmask16 store_mask = (i + 16 <= half) ? 0xFFFF : (1 << (half - i)) - 1;
_mm512_mask_storeu_ps(&temp[i], store_mask, low);
}
// Handle remaining elements
for (; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// 9/7 Irreversible Forward DWT with AVX-512
static inline void dwt_97_forward_1d_avx512(float *data, int length) {
if (length < 2) return;
int half = (length + 1) / 2;
// Allocate aligned temp buffer once (64-byte align for cache lines)
float *temp = NULL;
#if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE)
if (posix_memalign((void**)&temp, 64, (size_t)length * sizeof(float)) != 0) {
temp = (float*)malloc((size_t)length * sizeof(float));
}
#else
temp = (float*)aligned_alloc(64, ((size_t)length * sizeof(float) + 63) & ~63);
if (!temp) temp = (float*)malloc((size_t)length * sizeof(float));
#endif
if (!temp) return; // allocation failure: bail out (preserve original behavior could be different)
// FAST SPLIT: interleave into temp: first half = evens, second half = odds
// This is simple, streaming-friendly, and much faster than per-iteration small-array gathers.
{
float *even = temp;
float *odd = temp + half;
int i = 0;
// process pairs to minimize branches and memory ops
for (; i + 1 < length; i += 2) {
even[0] = data[i];
odd[0] = data[i + 1];
++even; ++odd;
}
if (i < length) { // odd leftover
even[0] = data[i];
}
}
// Lifting coefficients as vectors
const __m512 alpha_vec = _mm512_set1_ps(-1.586134342f);
const __m512 beta_vec = _mm512_set1_ps(-0.052980118f);
const __m512 gamma_vec = _mm512_set1_ps(0.882911076f);
const __m512 delta_vec = _mm512_set1_ps(0.443506852f);
const __m512 K_vec = _mm512_set1_ps(1.230174105f);
const __m512 invK_vec = _mm512_set1_ps(1.0f / 1.230174105f);
// Helper variables
int i;
// -----------------------
// Step 1: Predict α
// d[i] += alpha * (s[i] + s[i+1])
// -----------------------
if (half > 0) {
// handle small or trivial cases
if (half == 1) {
if (half < length) {
temp[half + 0] += -1.586134342f * (temp[0] + temp[0]);
}
} else {
// main vectorised body: ensure s_next loads (i+1) valid -> i <= half-2
int limit = (half - 1);
int n_full = (limit / 16) * 16; // process up to n_full (multiple of 16)
i = 0;
for (; i + 32 <= n_full; i += 32) {
// unroll 2x (i and i+16)
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
__m512 sum0 = _mm512_add_ps(s0, s0n);
d0 = _mm512_fmadd_ps(alpha_vec, sum0, d0);
_mm512_storeu_ps(&temp[half + i], d0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 sum1 = _mm512_add_ps(s1, s1n);
d1 = _mm512_fmadd_ps(alpha_vec, sum1, d1);
_mm512_storeu_ps(&temp[half + i + 16], d1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 sn = _mm512_loadu_ps(&temp[i + 1]);
__m512 d = _mm512_loadu_ps(&temp[half + i]);
__m512 sum = _mm512_add_ps(s, sn);
d = _mm512_fmadd_ps(alpha_vec, sum, d);
_mm512_storeu_ps(&temp[half + i], d);
}
// scalar remainder up to limit (half-2 -> last vector handled below)
for (; i < limit; ++i) {
temp[half + i] += -1.586134342f * (temp[i] + temp[i + 1]);
}
// handle last index i = half-1 (mirror)
int last = half - 1;
if (half + last < length) {
float s_curr = temp[last];
float s_next = s_curr;
temp[half + last] += -1.586134342f * (s_curr + s_next);
}
}
}
// -----------------------
// Step 2: Update β
// s[i] += beta * (d[i-1] + d[i])
// -----------------------
if (half > 0) {
// handle i == 0 separately (d_prev = d_curr for boundary semantics)
if (half >= 1) {
// i == 0
if (half + 0 < length) {
float d_curr0 = temp[half + 0];
temp[0] += -0.052980118f * (d_curr0 + d_curr0);
}
}
if (half > 1) {
// main vector loop starting from i = 1 to half-1 (we will write s[i] for i>=1)
int start = 1;
int limit = half; // exclusive
int n_elems = limit - start;
int n_full = (n_elems / 16) * 16;
i = start;
for (; i + 32 <= start + n_full; i += 32) {
// unroll 2x
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
s0 = _mm512_fmadd_ps(beta_vec, sum0, s0);
_mm512_storeu_ps(&temp[i], s0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
__m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
s1 = _mm512_fmadd_ps(beta_vec, sum1, s1);
_mm512_storeu_ps(&temp[i + 16], s1);
}
for (; i + 16 <= start + n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum = _mm512_add_ps(dprev, dcurr);
s = _mm512_fmadd_ps(beta_vec, sum, s);
_mm512_storeu_ps(&temp[i], s);
}
// scalar remainder
for (; i < limit; ++i) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
temp[i] += -0.052980118f * (d_prev + d_curr);
}
}
}
// -----------------------
// Step 3: Predict γ
// d[i] += gamma * (s[i] + s[i+1])
// -----------------------
if (half > 0) {
if (half == 1) {
if (half < length) {
temp[half + 0] += 0.882911076f * (temp[0] + temp[0]);
}
} else {
int limit = (half - 1);
int n_full = (limit / 16) * 16;
i = 0;
for (; i + 32 <= n_full; i += 32) {
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
__m512 sum0 = _mm512_add_ps(s0, s0n);
d0 = _mm512_fmadd_ps(gamma_vec, sum0, d0);
_mm512_storeu_ps(&temp[half + i], d0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 sum1 = _mm512_add_ps(s1, s1n);
d1 = _mm512_fmadd_ps(gamma_vec, sum1, d1);
_mm512_storeu_ps(&temp[half + i + 16], d1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 sn = _mm512_loadu_ps(&temp[i + 1]);
__m512 d = _mm512_loadu_ps(&temp[half + i]);
__m512 sum = _mm512_add_ps(s, sn);
d = _mm512_fmadd_ps(gamma_vec, sum, d);
_mm512_storeu_ps(&temp[half + i], d);
}
for (; i < limit; ++i) {
temp[half + i] += 0.882911076f * (temp[i] + temp[i + 1]);
}
// last index mirror
int last = half - 1;
if (half + last < length) {
float s_curr = temp[last];
float s_next = s_curr;
temp[half + last] += 0.882911076f * (s_curr + s_next);
}
}
}
// -----------------------
// Step 4: Update δ
// s[i] += delta * (d[i-1] + d[i])
// -----------------------
if (half > 0) {
// i == 0
if (half >= 1) {
if (half + 0 < length) {
float d_curr0 = temp[half + 0];
temp[0] += 0.443506852f * (d_curr0 + d_curr0);
}
}
if (half > 1) {
int start = 1;
int limit = half; // exclusive
int n_elems = limit - start;
int n_full = (n_elems / 16) * 16;
i = start;
for (; i + 32 <= start + n_full; i += 32) {
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
s0 = _mm512_fmadd_ps(delta_vec, sum0, s0);
_mm512_storeu_ps(&temp[i], s0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
__m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
s1 = _mm512_fmadd_ps(delta_vec, sum1, s1);
_mm512_storeu_ps(&temp[i + 16], s1);
}
for (; i + 16 <= start + n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum = _mm512_add_ps(dprev, dcurr);
s = _mm512_fmadd_ps(delta_vec, sum, s);
_mm512_storeu_ps(&temp[i], s);
}
for (; i < limit; ++i) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
temp[i] += 0.443506852f * (d_prev + d_curr);
}
}
}
// -----------------------
// Step 5: Scaling
// s *= K, d *= invK
// -----------------------
// s (first half)
{
int n_full = (half / 16) * 16;
i = 0;
for (; i + 32 <= n_full; i += 32) {
__m512 s0 = _mm512_loadu_ps(&temp[i]);
s0 = _mm512_mul_ps(s0, K_vec);
_mm512_storeu_ps(&temp[i], s0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
s1 = _mm512_mul_ps(s1, K_vec);
_mm512_storeu_ps(&temp[i + 16], s1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
s = _mm512_mul_ps(s, K_vec);
_mm512_storeu_ps(&temp[i], s);
}
for (; i < half; ++i) temp[i] *= 1.230174105f;
}
// d (second half)
{
int dlen = length - half;
int n_full = (dlen / 16) * 16;
i = 0;
for (; i + 32 <= n_full; i += 32) {
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
d0 = _mm512_mul_ps(d0, invK_vec);
_mm512_storeu_ps(&temp[half + i], d0);
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
d1 = _mm512_mul_ps(d1, invK_vec);
_mm512_storeu_ps(&temp[half + i + 16], d1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 d = _mm512_loadu_ps(&temp[half + i]);
d = _mm512_mul_ps(d, invK_vec);
_mm512_storeu_ps(&temp[half + i], d);
}
for (; i < dlen; ++i) {
if (half + i < length) temp[half + i] /= 1.230174105f;
}
}
// Copy back and free
memcpy(data, temp, (size_t)length * sizeof(float));
free(temp);
}
// Haar Forward DWT with AVX-512
static inline void dwt_haar_forward_1d_avx512(float *data, int length) {
if (length < 2) return;
float *temp = (float*)malloc(length * sizeof(float));
int half = (length + 1) / 2;
const __m512 half_vec = _mm512_set1_ps(0.5f);
// Process 16 pairs at a time
int i;
for (i = 0; i + 16 <= half; i += 16) {
__mmask16 valid_mask = 0xFFFF;
float even_vals[16], odd_vals[16];
for (int j = 0; j < 16; j++) {
even_vals[j] = data[2 * (i + j)];
if (2 * (i + j) + 1 < length) {
odd_vals[j] = data[2 * (i + j) + 1];
} else {
odd_vals[j] = even_vals[j];
valid_mask &= ~(1 << j);
}
}
__m512 even = _mm512_loadu_ps(even_vals);
__m512 odd = _mm512_loadu_ps(odd_vals);
// Low-pass: (even + odd) / 2
__m512 low = _mm512_mul_ps(_mm512_add_ps(even, odd), half_vec);
// High-pass: (even - odd) / 2
__m512 high = _mm512_mul_ps(_mm512_sub_ps(even, odd), half_vec);
_mm512_storeu_ps(&temp[i], low);
_mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
}
// Remaining scalar
for (; i < half; i++) {
if (2 * i + 1 < length) {
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
} else {
temp[i] = data[2 * i];
if (half + i < length) {
temp[half + i] = 0.0f;
}
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// AVX-512 Optimised Quantisation Functions
// =============================================================================
static inline void quantise_dwt_coefficients_avx512(
float *coeffs, int16_t *quantised, int size,
float effective_q, float dead_zone_threshold,
int width, int height, int decomp_levels, int is_chroma,
int (*get_subband_level)(int, int, int, int),
int (*get_subband_type)(int, int, int, int)
) {
const __m512 q_vec = _mm512_set1_ps(effective_q);
const __m512 inv_q_vec = _mm512_set1_ps(1.0f / effective_q);
const __m512 half_vec = _mm512_set1_ps(0.5f);
const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
const __m512 zero_vec = _mm512_setzero_ps();
const __m512i min_i32 = _mm512_set1_epi32(-32768);
const __m512i max_i32 = _mm512_set1_epi32(32767);
int i;
for (i = 0; i + 16 <= size; i += 16) {
__m512 coeff = _mm512_loadu_ps(&coeffs[i]);
__m512 quant = _mm512_mul_ps(coeff, inv_q_vec);
// Dead-zone handling (simplified - full version needs per-coeff logic)
if (dead_zone_threshold > 0.0f && !is_chroma) {
__m512 threshold_vec = _mm512_set1_ps(dead_zone_threshold);
__m512 abs_quant = _mm512_abs_ps(quant);
__mmask16 dead_mask = _mm512_cmp_ps_mask(abs_quant, threshold_vec, _CMP_LE_OQ);
quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
}
// Manual rounding to match scalar behaviour (round away from zero)
// First add 0.5 or -0.5 based on sign
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
quant = _mm512_add_ps(quant, round_val);
// Now truncate to int32 (this matches scalar (int32_t) cast after adding 0.5)
__m512i quant_i32 = _mm512_cvttps_epi32(quant); // cvtt = truncate (round toward zero)
quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
// Pack to int16 (AVX-512 has cvtsepi32_epi16)
__m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
_mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
}
// Remaining scalar
for (; i < size; i++) {
float quantised_val = coeffs[i] / effective_q;
// Dead-zone (simplified)
if (dead_zone_threshold > 0.0f && !is_chroma) {
if (fabsf(quantised_val) <= dead_zone_threshold) {
quantised_val = 0.0f;
}
}
int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
}
}
// Perceptual quantisation with per-coefficient weighting
static inline void quantise_dwt_coefficients_perceptual_avx512(
float *coeffs, int16_t *quantised, int size,
float *weights, // Pre-computed per-coefficient weights
float base_quantiser
) {
const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
const __m512 half_vec = _mm512_set1_ps(0.5f);
const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
const __m512 zero_vec = _mm512_setzero_ps();
const __m512i min_i32 = _mm512_set1_epi32(-32768);
const __m512i max_i32 = _mm512_set1_epi32(32767);
int i;
for (i = 0; i + 16 <= size; i += 16) {
__m512 coeff = _mm512_loadu_ps(&coeffs[i]);
__m512 weight = _mm512_loadu_ps(&weights[i]);
// effective_q = base_q * weight
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
__m512 quant = _mm512_div_ps(coeff, effective_q);
// Manual rounding to match scalar behaviour
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
quant = _mm512_add_ps(quant, round_val);
// Truncate to int32 (matches scalar cast after rounding)
__m512i quant_i32 = _mm512_cvttps_epi32(quant);
quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
__m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
_mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
}
// Remaining scalar
for (; i < size; i++) {
float effective_q = base_quantiser * weights[i];
float quantised_val = coeffs[i] / effective_q;
int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
}
}
// =============================================================================
// AVX-512 Optimised Dequantisation Functions
// =============================================================================
// Basic dequantisation: quantised[i] * effective_q
static inline void dequantise_dwt_coefficients_avx512(
const int16_t *quantised, float *coeffs, int size,
float effective_q
) {
const __m512 q_vec = _mm512_set1_ps(effective_q);
int i;
for (i = 0; i + 16 <= size; i += 16) {
// Load 16 int16 values
__m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
// Convert int16 to int32
__m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
// Convert int32 to float
__m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
// Multiply by quantiser
__m512 dequant = _mm512_mul_ps(quant_f32, q_vec);
_mm512_storeu_ps(&coeffs[i], dequant);
}
// Remaining scalar
for (; i < size; i++) {
coeffs[i] = (float)quantised[i] * effective_q;
}
}
// Perceptual dequantisation with per-coefficient weights
static inline void dequantise_dwt_coefficients_perceptual_avx512(
const int16_t *quantised, float *coeffs, int size,
const float *weights, float base_quantiser
) {
const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
int i;
for (i = 0; i + 16 <= size; i += 16) {
// Load 16 int16 values
__m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
// Convert int16 → int32 → float
__m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
__m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
// Load weights
__m512 weight = _mm512_loadu_ps(&weights[i]);
// effective_q = base_q * weight
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
// dequant = quantised * effective_q
__m512 dequant = _mm512_mul_ps(quant_f32, effective_q);
_mm512_storeu_ps(&coeffs[i], dequant);
}
// Remaining scalar
for (; i < size; i++) {
float effective_q = base_quantiser * weights[i];
coeffs[i] = (float)quantised[i] * effective_q;
}
}
// =============================================================================
// AVX-512 Optimised RGB to YCoCg Conversion
// =============================================================================
static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) {
const int total_pixels = width * height;
const __m512 half_vec = _mm512_set1_ps(0.5f);
int i;
// Process 16 pixels at a time (48 bytes of RGB data)
for (i = 0; i + 16 <= total_pixels; i += 16) {
// Load 16 RGB triplets (48 bytes)
// We need to deinterleave R, G, B channels
// Manual load and deinterleave (AVX-512 doesn't have direct RGB deinterleave)
float r_vals[16], g_vals[16], b_vals[16];
for (int j = 0; j < 16; j++) {
r_vals[j] = (float)rgb[(i + j) * 3 + 0];
g_vals[j] = (float)rgb[(i + j) * 3 + 1];
b_vals[j] = (float)rgb[(i + j) * 3 + 2];
}
__m512 r = _mm512_loadu_ps(r_vals);
__m512 g = _mm512_loadu_ps(g_vals);
__m512 b = _mm512_loadu_ps(b_vals);
// YCoCg-R transform:
// co = r - b
// tmp = b + co * 0.5
// cg = g - tmp
// y = tmp + cg * 0.5
__m512 co_vec = _mm512_sub_ps(r, b);
__m512 tmp = _mm512_fmadd_ps(co_vec, half_vec, b); // tmp = b + co * 0.5
__m512 cg_vec = _mm512_sub_ps(g, tmp);
__m512 y_vec = _mm512_fmadd_ps(cg_vec, half_vec, tmp); // y = tmp + cg * 0.5
_mm512_storeu_ps(&y[i], y_vec);
_mm512_storeu_ps(&co[i], co_vec);
_mm512_storeu_ps(&cg[i], cg_vec);
}
// Remaining pixels (scalar)
for (; i < total_pixels; i++) {
const float r = rgb[i * 3 + 0];
const float g = rgb[i * 3 + 1];
const float b = rgb[i * 3 + 2];
co[i] = r - b;
const float tmp = b + co[i] * 0.5f;
cg[i] = g - tmp;
y[i] = tmp + cg[i] * 0.5f;
}
}
// =============================================================================
// AVX-512 Optimised 2D DWT with Gather/Scatter
// =============================================================================
// Optimised column extraction using gather
static inline void dwt_2d_extract_column_avx512(
const float *tile_data, float *column,
int x, int width, int height
) {
// Create gather indices for column extraction
// indices[i] = (i * width + x)
int y;
for (y = 0; y + 16 <= height; y += 16) {
// Build gather indices
int indices[16];
for (int j = 0; j < 16; j++) {
indices[j] = (y + j) * width + x;
}
__m512i vindex = _mm512_loadu_si512((__m512i*)indices);
__m512 col_data = _mm512_i32gather_ps(vindex, tile_data, 4);
_mm512_storeu_ps(&column[y], col_data);
}
// Remaining scalar
for (; y < height; y++) {
column[y] = tile_data[y * width + x];
}
}
// Optimised column insertion using scatter
static inline void dwt_2d_insert_column_avx512(
float *tile_data, const float *column,
int x, int width, int height
) {
int y;
for (y = 0; y + 16 <= height; y += 16) {
// Build scatter indices
int indices[16];
for (int j = 0; j < 16; j++) {
indices[j] = (y + j) * width + x;
}
__m512i vindex = _mm512_loadu_si512((__m512i*)indices);
__m512 col_data = _mm512_loadu_ps(&column[y]);
_mm512_i32scatter_ps(tile_data, vindex, col_data, 4);
}
// Remaining scalar
for (; y < height; y++) {
tile_data[y * width + x] = column[y];
}
}
#endif // __AVX512F__
#endif // TAV_AVX512_H

View File

@@ -1,295 +0,0 @@
/**
* TAV Encoder Library - Public API
*
* High-level interface for encoding video using the TSVM Advanced Video (TAV) codec.
* Supports GOP-based encoding with internal multi-threading for optimal performance.
*
* Created by CuriousTorvald and Claude on 2025-12-03.
*/
#ifndef TAV_ENCODER_LIB_H
#define TAV_ENCODER_LIB_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Opaque Encoder Context
// =============================================================================
/**
* TAV encoder context - opaque to users.
* Created with tav_encoder_create(), freed with tav_encoder_free().
*/
typedef struct tav_encoder_context tav_encoder_context_t;
// =============================================================================
// Configuration Structures
// =============================================================================
/**
* Video encoding parameters.
*/
typedef struct {
// === Video Dimensions ===
int width; // Frame width (must be even)
int height; // Frame height (must be even)
int fps_num; // Framerate numerator (e.g., 60 for 60fps)
int fps_den; // Framerate denominator (e.g., 1 for 60/1)
// === Wavelet Configuration ===
int wavelet_type; // Spatial wavelet: 0=CDF 5/3, 1=CDF 9/7 (default), 2=CDF 13/7, 16=DD-4, 255=Haar
int temporal_wavelet; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (default for smooth motion)
int decomp_levels; // Spatial DWT levels (0=auto, typically 6)
int temporal_levels; // Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
// === Color Space ===
int channel_layout; // 0=YCoCg-R (default), 1=ICtCp (for HDR/BT.2100 sources)
int perceptual_tuning; // 1=enable HVS perceptual quantization (default), 0=uniform
// === GOP Configuration ===
int enable_temporal_dwt; // 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
int gop_size; // Frames per GOP (8, 16, or 24; 0=auto based on framerate)
int enable_two_pass; // 1=enable two-pass with scene change detection (default), 0=single-pass
// === Quality Control ===
int quality_level;
int quantiser_y; // Luma quantiser (0-255, indexed against QLUT)
int quantiser_co; // Orange chrominance quantiser (0-255, indexed against QLUT)
int quantiser_cg; // Green chrominance quantiser (0-255, indexed against QLUT)
float dead_zone_threshold; // Dead-zone quantization threshold (0.0=disabled, 0.6-1.5 typical)
// === Entropy Coding ===
int entropy_coder; // 0=Twobitmap (default), 1=EZBC (better for high-quality)
int zstd_level; // Zstd compression level (3-22, default: 7)
// === Multi-threading ===
int num_threads; // Worker threads (0=single-threaded, -1=auto, 1-16=explicit)
// === Encoder Presets ===
int encoder_preset; // Preset flags: 0x01=sports (finer temporal quant), 0x02=anime (disable grain)
// === Advanced Options ===
int verbose; // 1=enable debug output, 0=quiet (default)
int monoblock; // -1=auto (based on dimensions), 0=force tiled, 1=force monoblock
} tav_encoder_params_t;
/**
* Initialize encoder parameters with default values.
*
* @param params Parameter structure to initialize
* @param width Frame width
* @param height Frame height
*/
void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height);
/**
* Encoder output packet.
* Contains encoded video or audio data.
*/
typedef struct {
uint8_t *data; // Packet data (owned by encoder, valid until next encode/flush)
size_t size; // Packet size in bytes
uint8_t packet_type; // TAV packet type (0x10=I-frame, 0x12=GOP, 0x24=audio, etc.)
int frame_number; // Frame number (for video packets)
int is_video; // 1=video packet, 0=audio packet
} tav_encoder_packet_t;
// =============================================================================
// Encoder Lifecycle
// =============================================================================
/**
* Create TAV encoder context.
*
* Allocates internal buffers, initializes thread pool (if multi-threading enabled),
* and prepares encoder for frame submission.
*
* @param params Encoder parameters (copied internally)
* @return Encoder context, or NULL on failure
*/
tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params);
/**
* Free TAV encoder context.
*
* Shuts down thread pool, frees all buffers and resources.
* Any unflushed frames in the GOP buffer will be lost.
*
* @param ctx Encoder context
*/
void tav_encoder_free(tav_encoder_context_t *ctx);
/**
* Get last error message.
*
* @param ctx Encoder context
* @return Error message string (valid until next encode operation)
*/
const char *tav_encoder_get_error(tav_encoder_context_t *ctx);
/**
* Get encoder parameters (with calculated values).
* After context creation, params will contain actual values used
* (e.g., auto-calculated decomp_levels, gop_size).
*
* @param ctx Encoder context
* @param params Output parameters structure
*/
void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params);
/**
* DEBUG: Validate encoder context integrity
* Returns 1 if context appears valid, 0 otherwise
*/
int tav_encoder_validate_context(tav_encoder_context_t *ctx);
// =============================================================================
// Video Encoding
// =============================================================================
/*
* DEPRECATED: tav_encoder_encode_frame() and tav_encoder_flush() have been
* removed. Use tav_encoder_encode_gop() instead, which works for both
* single-threaded and multi-threaded modes. The CLI should buffer frames
* and call encode_gop() when a full GOP is ready.
*/
/**
* Encode a complete GOP (Group of Pictures) directly.
*
* This function is STATELESS and THREAD-SAFE with separate contexts.
* Perfect for multithreaded encoding from CLI:
* - Each thread creates its own encoder context
* - Each thread calls encode_gop() with a batch of frames
* - No shared state, no locking needed
*
* Example multithreaded usage:
* ```c
* // Worker thread function
* void* worker(void* arg) {
* work_item_t* item = (work_item_t*)arg;
*
* // Create thread-local encoder context
* tav_encoder_context_t* ctx = tav_encoder_create(&shared_params);
*
* // Encode this GOP
* tav_encoder_packet_t* packet;
* tav_encoder_encode_gop(ctx, item->frames, item->num_frames,
* item->frame_numbers, &packet);
*
* // Store packet in output queue
* queue_push(output_queue, packet);
*
* tav_encoder_free(ctx);
* return NULL;
* }
* ```
*
* @param ctx Encoder context (one per thread)
* @param rgb_frames Array of RGB24 frames [frame][width*height*3]
* @param num_frames Number of frames in GOP (1-24)
* @param frame_numbers Frame indices for timecodes (can be NULL)
* @param packet Output packet pointer
* @return 1 if packet ready, -1 on error
*/
int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
const uint8_t **rgb_frames,
int num_frames,
const int *frame_numbers,
tav_encoder_packet_t **packet);
/**
* Free a packet returned by encode_frame(), flush(), or encode_gop().
*
* @param packet Packet to free (can be NULL)
*/
void tav_encoder_free_packet(tav_encoder_packet_t *packet);
// =============================================================================
// Audio Encoding (Optional)
// =============================================================================
/**
* Encode audio samples (TAD codec).
*
* Audio is encoded synchronously and returned immediately.
* For TAV muxing: interleave audio packets with video packets by frame PTS.
*
* @param ctx Encoder context
* @param pcm_samples PCM32f stereo samples (interleaved: L,R,L,R,...), num_samples×2 floats
* @param num_samples Number of samples per channel
* @param packet Output packet pointer
* @return 1 if packet ready, -1 on error
*/
int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
const float *pcm_samples,
size_t num_samples,
tav_encoder_packet_t **packet);
// =============================================================================
// Statistics and Info
// =============================================================================
/**
* Get encoding statistics.
*/
typedef struct {
int64_t frames_encoded; // Total frames encoded
int64_t gops_encoded; // Total GOPs encoded
size_t total_bytes; // Total bytes output (video + audio)
size_t video_bytes; // Video bytes
size_t audio_bytes; // Audio bytes
double avg_bitrate_kbps; // Average bitrate (kbps)
double encoding_fps; // Encoding speed (frames/sec)
} tav_encoder_stats_t;
/**
* Get encoding statistics.
*
* @param ctx Encoder context
* @param stats Output statistics structure
*/
void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats);
// =============================================================================
// TAV Packet Types (for reference)
// =============================================================================
#define TAV_PACKET_IFRAME 0x10 // I-frame (intra-only, single frame)
#define TAV_PACKET_PFRAME 0x11 // P-frame (delta from previous)
#define TAV_PACKET_GOP_UNIFIED 0x12 // GOP unified (3D DWT, multiple frames)
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec)
#define TAV_PACKET_AUDIO_PCM8 0x20 // PCM8 audio (legacy)
#define TAV_PACKET_LOOP_START 0xF0 // Loop point start (no payload)
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (frame count marker)
#define TAV_PACKET_TIMECODE 0xFD // Timecode metadata
#define TAV_PACKET_SYNC 0xFF // Sync packet (no payload)
// =============================================================================
// Tile Settings (for multi-tile mode)
// =============================================================================
#define TAV_TILE_SIZE_X 640 // Base tile width
#define TAV_TILE_SIZE_Y 540 // Base tile height
#define TAV_DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4)
#define TAV_TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 32px
#define TAV_TILE_MARGIN (TAV_DWT_FILTER_HALF_SUPPORT * (1 << TAV_TILE_MARGIN_LEVELS)) // 32px
#define TAV_PADDED_TILE_SIZE_X (TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN) // 704
#define TAV_PADDED_TILE_SIZE_Y (TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN) // 604
// Monoblock threshold: D1 PAL resolution (720x576)
// If width > 720 OR height > 576, automatically switch to tiled mode
#define TAV_MONOBLOCK_MAX_WIDTH 720
#define TAV_MONOBLOCK_MAX_HEIGHT 576
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_LIB_H

View File

@@ -1,275 +0,0 @@
/*
* TAV SIMD Function Dispatcher
*
* This file provides runtime CPU detection and function pointer dispatch
* for SIMD-optimized versions of performance-critical TAV encoder functions.
*
* Usage:
* 1. Include this header after defining all scalar functions
* 2. Call tav_simd_init() once at encoder initialization
* 3. Use function pointers (e.g., dwt_53_forward_1d_ptr) throughout code
*
* The dispatcher will automatically select AVX-512, AVX2, or scalar versions
* based on runtime CPU capabilities.
*/
#ifndef TAV_SIMD_DISPATCH_H
#define TAV_SIMD_DISPATCH_H
#include <stdint.h>
// =============================================================================
// Function Pointer Types
// =============================================================================
// 1D DWT function pointer types
typedef void (*dwt_1d_func_t)(float *data, int length);
// Quantization function pointer types
typedef void (*quantise_basic_func_t)(
float *coeffs, int16_t *quantised, int size,
float effective_q, float dead_zone_threshold,
int width, int height, int decomp_levels, int is_chroma,
int (*get_subband_level)(int, int, int, int),
int (*get_subband_type)(int, int, int, int)
);
typedef void (*quantise_perceptual_func_t)(
float *coeffs, int16_t *quantised, int size,
float *weights, float base_quantiser
);
// Color conversion function pointer type
typedef void (*rgb_to_ycocg_func_t)(
const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height
);
// 2D DWT column operations
typedef void (*dwt_2d_column_extract_func_t)(
const float *tile_data, float *column,
int x, int width, int height
);
typedef void (*dwt_2d_column_insert_func_t)(
float *tile_data, const float *column,
int x, int width, int height
);
// =============================================================================
// Global Function Pointers (initialized by tav_simd_init)
// =============================================================================
// DWT 1D transforms
static dwt_1d_func_t dwt_53_forward_1d_ptr = NULL;
static dwt_1d_func_t dwt_97_forward_1d_ptr = NULL;
static dwt_1d_func_t dwt_haar_forward_1d_ptr = NULL;
static dwt_1d_func_t dwt_53_inverse_1d_ptr = NULL;
static dwt_1d_func_t dwt_haar_inverse_1d_ptr = NULL;
// Quantization
static quantise_basic_func_t quantise_dwt_coefficients_ptr = NULL;
static quantise_perceptual_func_t quantise_dwt_coefficients_perceptual_ptr = NULL;
// Color conversion
static rgb_to_ycocg_func_t rgb_to_ycocg_ptr = NULL;
// 2D DWT column operations
static dwt_2d_column_extract_func_t dwt_2d_extract_column_ptr = NULL;
static dwt_2d_column_insert_func_t dwt_2d_insert_column_ptr = NULL;
// =============================================================================
// SIMD Capability Detection
// =============================================================================
typedef enum {
SIMD_NONE = 0,
SIMD_AVX512F = 1,
SIMD_AVX2 = 2,
SIMD_SSE42 = 3
} simd_level_t;
static simd_level_t detected_simd_level = SIMD_NONE;
static inline simd_level_t detect_simd_capabilities(void) {
#if defined(__GNUC__) || defined(__clang__)
// Use GCC/Clang built-in CPU detection
if (!__builtin_cpu_supports("sse4.2")) {
return SIMD_NONE;
}
#ifdef __AVX512F__
if (__builtin_cpu_supports("avx512f") &&
__builtin_cpu_supports("avx512dq") &&
__builtin_cpu_supports("avx512bw") &&
__builtin_cpu_supports("avx512vl")) {
return SIMD_AVX512F;
}
#endif
#ifdef __AVX2__
if (__builtin_cpu_supports("avx2")) {
return SIMD_AVX2;
}
#endif
if (__builtin_cpu_supports("sse4.2")) {
return SIMD_SSE42;
}
#endif
return SIMD_NONE;
}
// =============================================================================
// Scalar Fallback Wrappers
// =============================================================================
// These wrappers adapt the scalar functions to match function pointer signatures
static void quantise_dwt_coefficients_scalar_wrapper(
float *coeffs, int16_t *quantised, int size,
float effective_q, float dead_zone_threshold,
int width, int height, int decomp_levels, int is_chroma,
int (*get_subband_level)(int, int, int, int),
int (*get_subband_type)(int, int, int, int)
);
// Implementation provided by including encoder - just declare prototype
static void quantise_dwt_coefficients_perceptual_scalar_wrapper(
float *coeffs, int16_t *quantised, int size,
float *weights, float base_quantiser
);
// Implementation provided by including encoder
static void dwt_2d_extract_column_scalar(
const float *tile_data, float *column,
int x, int width, int height
) {
for (int y = 0; y < height; y++) {
column[y] = tile_data[y * width + x];
}
}
static void dwt_2d_insert_column_scalar(
float *tile_data, const float *column,
int x, int width, int height
) {
for (int y = 0; y < height; y++) {
tile_data[y * width + x] = column[y];
}
}
// =============================================================================
// SIMD Initialization
// =============================================================================
static void tav_simd_init(void) {
// Detect CPU capabilities
detected_simd_level = detect_simd_capabilities();
const char *simd_names[] = {"None", "AVX-512", "AVX2", "SSE4.2"};
fprintf(stderr, "[TAV] SIMD level detected: %s\n",
simd_names[detected_simd_level]);
#ifdef __AVX512F__
if (detected_simd_level == SIMD_AVX512F) {
fprintf(stderr, "[TAV] Using AVX-512 optimizations\n");
// DWT functions
extern void dwt_53_forward_1d_avx512(float *data, int length);
extern void dwt_97_forward_1d_avx512(float *data, int length);
extern void dwt_haar_forward_1d_avx512(float *data, int length);
dwt_53_forward_1d_ptr = dwt_53_forward_1d_avx512;
dwt_97_forward_1d_ptr = dwt_97_forward_1d_avx512;
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d_avx512;
// Quantization
// Note: Need wrapper functions that match the complex signature
// For now, using scalar versions
extern void dwt_53_forward_1d(float *data, int length);
extern void dwt_97_forward_1d(float *data, int length);
extern void dwt_haar_forward_1d(float *data, int length);
extern void dwt_53_inverse_1d(float *data, int length);
extern void dwt_haar_inverse_1d(float *data, int length);
// Fallback to scalar for inverse (can optimize later)
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
// Color conversion
extern void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
rgb_to_ycocg_ptr = rgb_to_ycocg_avx512;
// 2D column operations
extern void dwt_2d_extract_column_avx512(const float *tile_data, float *column, int x, int width, int height);
extern void dwt_2d_insert_column_avx512(float *tile_data, const float *column, int x, int width, int height);
dwt_2d_extract_column_ptr = dwt_2d_extract_column_avx512;
dwt_2d_insert_column_ptr = dwt_2d_insert_column_avx512;
// Quantization uses scalar for now (needs integration work)
extern void dwt_53_forward_1d(float *data, int length);
extern void dwt_97_forward_1d(float *data, int length);
extern void dwt_haar_forward_1d(float *data, int length);
extern void dwt_53_inverse_1d(float *data, int length);
extern void dwt_haar_inverse_1d(float *data, int length);
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
return;
}
#endif
// Fallback to scalar implementations
fprintf(stderr, "[TAV] Using scalar (non-SIMD) implementations\n");
extern void dwt_53_forward_1d(float *data, int length);
extern void dwt_97_forward_1d(float *data, int length);
extern void dwt_haar_forward_1d(float *data, int length);
extern void dwt_53_inverse_1d(float *data, int length);
extern void dwt_haar_inverse_1d(float *data, int length);
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
dwt_53_forward_1d_ptr = dwt_53_forward_1d;
dwt_97_forward_1d_ptr = dwt_97_forward_1d;
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d;
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
rgb_to_ycocg_ptr = rgb_to_ycocg;
dwt_2d_extract_column_ptr = dwt_2d_extract_column_scalar;
dwt_2d_insert_column_ptr = dwt_2d_insert_column_scalar;
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
}
// =============================================================================
// Convenience Macros for Code Readability
// =============================================================================
// Use these macros in encoder code for cleaner dispatch
#define DWT_53_FORWARD_1D(data, length) \
dwt_53_forward_1d_ptr((data), (length))
#define DWT_97_FORWARD_1D(data, length) \
dwt_97_forward_1d_ptr((data), (length))
#define DWT_HAAR_FORWARD_1D(data, length) \
dwt_haar_forward_1d_ptr((data), (length))
#define RGB_TO_YCOCG(rgb, y, co, cg, width, height) \
rgb_to_ycocg_ptr((rgb), (y), (co), (cg), (width), (height))
#define DWT_2D_EXTRACT_COLUMN(tile_data, column, x, width, height) \
dwt_2d_extract_column_ptr((tile_data), (column), (x), (width), (height))
#define DWT_2D_INSERT_COLUMN(tile_data, column, x, width, height) \
dwt_2d_insert_column_ptr((tile_data), (column), (x), (width), (height))
#endif // TAV_SIMD_DISPATCH_H

View File

@@ -1,78 +0,0 @@
// Created by CuriousTorvald and Claude on 2025-12-02.
// TAV Video Decoder Library - Shared decoding functions for TAV format
// Can be used by both regular TAV decoder and TAV-DT decoder
#ifndef TAV_VIDEO_DECODER_H
#define TAV_VIDEO_DECODER_H
#include <stdint.h>
#include <stddef.h>
// Video decoder context - opaque to users
typedef struct tav_video_context tav_video_context_t;
// Video parameters structure
typedef struct {
int width;
int height;
int decomp_levels; // Spatial DWT levels (typically 4)
int temporal_levels; // Temporal DWT levels (typically 2)
int wavelet_filter; // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
int temporal_wavelet; // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
int entropy_coder; // 0=Twobitmap, 1=EZBC, 2=RAW
int channel_layout; // 0=YCoCg-R, 1=ICtCp
int perceptual_tuning; // 1=perceptual quantisation, 0=uniform
uint8_t quantiser_y; // Base quantiser index for Y/I
uint8_t quantiser_co; // Base quantiser index for Co/Ct
uint8_t quantiser_cg; // Base quantiser index for Cg/Cp
uint8_t encoder_preset; // Encoder preset flags (sports, anime, etc.)
int monoblock; // 1=single tile (monoblock), 0=multi-tile
int no_zstd; // 1=packets are uncompressed (Video Flags bit 4), 0=Zstd compressed
} tav_video_params_t;
// Create video decoder context
// Returns NULL on failure
tav_video_context_t *tav_video_create(const tav_video_params_t *params);
// Free video decoder context
void tav_video_free(tav_video_context_t *ctx);
// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
// Input: compressed_data - GOP packet data (after packet type byte)
// compressed_size - size of compressed data
// gop_size - number of frames in GOP (read from packet)
// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
// Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
// Returns: 0 on success, -1 on error
int tav_video_decode_gop(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t compressed_size,
uint8_t gop_size, uint8_t **rgb_frames);
// Decode IFRAME packet (0x10) to RGB24 frame
// Input: compressed_data - I-frame packet data (after packet type byte)
// packet_size - size of packet data
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
// Must be pre-allocated by caller
// Returns: 0 on success, -1 on error
int tav_video_decode_iframe(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t packet_size,
uint8_t *rgb_frame);
// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
// Input: compressed_data - P-frame packet data (after packet type byte)
// packet_size - size of packet data
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
// Must be pre-allocated by caller
// Returns: 0 on success, -1 on error
// Note: Requires previous frame to be decoded first (stored internally as reference)
int tav_video_decode_pframe(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t packet_size,
uint8_t *rgb_frame);
// Get last error message
const char *tav_video_get_error(tav_video_context_t *ctx);
// Enable verbose debug output
void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
#endif // TAV_VIDEO_DECODER_H

View File

@@ -1,397 +0,0 @@
/**
* LDPC Rate 1/2 Codec Implementation
*
* LDPC for TAV-DT header protection.
* Uses a systematic rate 1/2 code with sum-product belief propagation decoder.
*
* The parity-check matrix is designed for good error correction on small blocks.
* Each parity bit is computed as XOR of multiple data bits using a pseudo-random
* but deterministic pattern.
*
* Created by CuriousTorvald and Claude on 2025-12-09.
* Updated 2025-12-17: Replaced bit-flipping with belief propagation decoder.
*/
#include "ldpc.h"
#include <string.h>
#include <stdio.h>
#include <math.h>
// Channel LLR magnitude for hard-decision input
// Higher value = more confidence in received bits
// For BER ~0.01, optimal is about 4.6; we use slightly lower for robustness
#define CHANNEL_LLR_MAG 4.0f
// Clipping value to prevent numerical overflow in tanh operations
#define LLR_CLIP 20.0f
// =============================================================================
// Parity-Check Matrix Generation
// =============================================================================
// For rate 1/2 LDPC: n = 2k bits, parity-check matrix H is (n-k) x n = k x 2k
// We use H = [P | I_k] where P is the parity pattern matrix
// This gives systematic encoding: c = [data | parity] where parity = P * data
// Parity pattern: each parity bit j depends on data bits where pattern[j][i] = 1
// We use a regular pattern with column weight 3 (each data bit affects 3 parity bits)
// and row weight varies to cover the data bits well
// Simple hash function for generating parity connections
static inline uint32_t hash_mix(uint32_t a, uint32_t b) {
a ^= b;
a = (a ^ (a >> 16)) * 0x85ebca6b;
a = (a ^ (a >> 13)) * 0xc2b2ae35;
return a ^ (a >> 16);
}
// Get bit from byte array
static inline int get_bit(const uint8_t *data, int bit_idx) {
return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
}
// Set bit in byte array
static inline void set_bit(uint8_t *data, int bit_idx, int value) {
int byte_idx = bit_idx >> 3;
int bit_pos = 7 - (bit_idx & 7);
if (value) {
data[byte_idx] |= (1 << bit_pos);
} else {
data[byte_idx] &= ~(1 << bit_pos);
}
}
// Flip bit in byte array
static inline void flip_bit(uint8_t *data, int bit_idx) {
int byte_idx = bit_idx >> 3;
int bit_pos = 7 - (bit_idx & 7);
data[byte_idx] ^= (1 << bit_pos);
}
// Get list of data bits that affect parity bit j
// Returns number of connected data bits, stores indices in connections[]
// For rate 1/2: data bits are 0 to k*8-1, parity bits are k*8 to 2*k*8-1
static int get_parity_connections(int parity_idx, int k_bits, int *connections) {
int count = 0;
// Use a deterministic pseudo-random pattern
// Each parity bit connects to approximately k_bits/3 data bits
// Different seeds for different parity positions ensure coverage
uint32_t seed = hash_mix(0xDEADBEEF, (uint32_t)parity_idx);
for (int i = 0; i < k_bits; i++) {
// Each data bit has ~3/k_bits chance of connecting to this parity bit
// Total connections per parity ~ 3 (column weight)
uint32_t h = hash_mix(seed, (uint32_t)i);
if ((h % (k_bits / 3 + 1)) == 0) {
connections[count++] = i;
}
}
// Ensure at least 2 connections per parity bit
if (count < 2) {
connections[count++] = parity_idx % k_bits;
connections[count++] = (parity_idx + k_bits / 2) % k_bits;
}
return count;
}
// Get list of parity bits affected by data bit i
static int get_data_connections(int data_idx, int k_bits, int *connections) {
int count = 0;
for (int j = 0; j < k_bits; j++) {
int parity_conns[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, parity_conns);
for (int c = 0; c < n_conns; c++) {
if (parity_conns[c] == data_idx) {
connections[count++] = j;
break;
}
}
}
return count;
}
// =============================================================================
// Initialization
// =============================================================================
static int ldpc_initialized = 0;
void ldpc_init(void) {
if (ldpc_initialized) return;
// No pre-computation needed - patterns generated on the fly
ldpc_initialized = 1;
}
// =============================================================================
// Encoding
// =============================================================================
size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!ldpc_initialized) ldpc_init();
if (data_len > LDPC_MAX_DATA_BYTES) {
data_len = LDPC_MAX_DATA_BYTES;
}
int k_bits = (int)(data_len * 8); // Number of data bits
// Copy data to output (systematic encoding)
memcpy(output, data, data_len);
// Initialize parity bytes to zero
memset(output + data_len, 0, data_len);
// Compute parity bits
for (int j = 0; j < k_bits; j++) {
// Get data bits connected to parity bit j
int connections[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, connections);
// Parity bit = XOR of connected data bits
int parity = 0;
for (int c = 0; c < n_conns; c++) {
parity ^= get_bit(data, connections[c]);
}
// Set parity bit
set_bit(output + data_len, j, parity);
}
return data_len * 2;
}
// =============================================================================
// Decoding
// =============================================================================
int ldpc_check_syndrome(const uint8_t *codeword, size_t len) {
if (!ldpc_initialized) ldpc_init();
size_t data_len = len / 2;
int k_bits = (int)(data_len * 8);
// Check all parity equations
for (int j = 0; j < k_bits; j++) {
int connections[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, connections);
// Compute syndrome bit: XOR of connected data bits XOR parity bit
int syndrome = get_bit(codeword + data_len, j);
for (int c = 0; c < n_conns; c++) {
syndrome ^= get_bit(codeword, connections[c]);
}
if (syndrome != 0) {
return 0; // Syndrome non-zero: errors detected
}
}
return 1; // Zero syndrome: valid codeword
}
// Clip LLR to prevent overflow
static inline float clip_llr(float llr) {
if (llr > LLR_CLIP) return LLR_CLIP;
if (llr < -LLR_CLIP) return -LLR_CLIP;
return llr;
}
// Sign of a float (returns +1 or -1)
static inline float sign_f(float x) {
return (x >= 0.0f) ? 1.0f : -1.0f;
}
int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output) {
if (!ldpc_initialized) ldpc_init();
if (encoded_len < 2 || (encoded_len & 1) != 0) {
return -1; // Invalid length
}
size_t data_len = encoded_len / 2;
if (data_len > LDPC_MAX_DATA_BYTES) {
return -1;
}
int k_bits = (int)(data_len * 8);
int n_bits = k_bits * 2; // Total codeword bits (data + parity)
// Pre-compute the parity check matrix structure for efficiency
// For each check node j: which variable nodes it connects to
int check_to_var[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
int check_degree[LDPC_MAX_DATA_BYTES * 8];
for (int j = 0; j < k_bits; j++) {
int connections[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, connections);
// Check j connects to: data bits in connections[] + parity bit j
check_degree[j] = n_conns + 1;
for (int c = 0; c < n_conns; c++) {
check_to_var[j][c] = connections[c]; // Data bit index
}
check_to_var[j][n_conns] = k_bits + j; // Parity bit index
}
// Initialize channel LLRs from received hard bits
// LLR > 0 means bit is probably 0, LLR < 0 means bit is probably 1
float channel_llr[LDPC_MAX_DATA_BYTES * 16];
for (int i = 0; i < n_bits; i++) {
int bit = get_bit(encoded, i);
channel_llr[i] = bit ? -CHANNEL_LLR_MAG : CHANNEL_LLR_MAG;
}
// Message arrays for BP
// check_to_var_msg[j][idx] = message from check j to variable check_to_var[j][idx]
float check_to_var_msg[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
// Initialize check-to-variable messages to zero
memset(check_to_var_msg, 0, sizeof(check_to_var_msg));
// Belief Propagation iterations
for (int iter = 0; iter < LDPC_MAX_ITERATIONS; iter++) {
// Step 1: Variable-to-check messages (implicit, computed on the fly)
// var_to_check[v→j] = channel_llr[v] + sum of all check_to_var_msg[k][idx_v] for k != j
// Step 2: Check-to-variable messages using min-sum approximation
// For each check node j, for each connected variable v:
// check_to_var_msg[j→v] = sign * min(|incoming messages from other vars|)
for (int j = 0; j < k_bits; j++) {
int degree = check_degree[j];
// First, compute variable-to-check messages for all variables in this check
float var_to_check[LDPC_MAX_DATA_BYTES * 8 + 1];
for (int idx = 0; idx < degree; idx++) {
int v = check_to_var[j][idx];
// Sum all incoming check messages to variable v, except from check j
float sum = channel_llr[v];
for (int jj = 0; jj < k_bits; jj++) {
if (jj == j) continue;
// Find if check jj connects to variable v
for (int idx2 = 0; idx2 < check_degree[jj]; idx2++) {
if (check_to_var[jj][idx2] == v) {
sum += check_to_var_msg[jj][idx2];
break;
}
}
}
var_to_check[idx] = clip_llr(sum);
}
// Now compute check-to-variable messages using min-sum
for (int idx = 0; idx < degree; idx++) {
float sign_prod = 1.0f;
float min_abs = 1e30f;
for (int idx2 = 0; idx2 < degree; idx2++) {
if (idx2 == idx) continue;
float msg = var_to_check[idx2];
sign_prod *= sign_f(msg);
float abs_msg = fabsf(msg);
if (abs_msg < min_abs) min_abs = abs_msg;
}
// Min-sum with scaling factor 0.75 for better performance
check_to_var_msg[j][idx] = clip_llr(sign_prod * min_abs * 0.75f);
}
}
// Step 3: Compute posterior LLRs and make hard decisions
float posterior[LDPC_MAX_DATA_BYTES * 16];
for (int v = 0; v < n_bits; v++) {
float sum = channel_llr[v];
// Add all incoming check-to-variable messages
for (int j = 0; j < k_bits; j++) {
for (int idx = 0; idx < check_degree[j]; idx++) {
if (check_to_var[j][idx] == v) {
sum += check_to_var_msg[j][idx];
break;
}
}
}
posterior[v] = sum;
}
// Make hard decisions
uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
memset(decoded, 0, encoded_len);
for (int v = 0; v < n_bits; v++) {
if (posterior[v] < 0) {
set_bit(decoded, v, 1);
}
}
// Check syndrome
int syndrome_count = 0;
for (int j = 0; j < k_bits; j++) {
int syn = 0;
for (int idx = 0; idx < check_degree[j]; idx++) {
syn ^= get_bit(decoded, check_to_var[j][idx]);
}
if (syn) syndrome_count++;
}
// If all syndromes are zero, we're done
if (syndrome_count == 0) {
memcpy(output, decoded, data_len);
return 0;
}
// Early termination if syndrome count is very small (nearly converged)
if (iter > 5 && syndrome_count <= 2) {
// Try one more iteration, if still stuck, accept
}
}
// Decoding did not converge - compute final estimate
float posterior[LDPC_MAX_DATA_BYTES * 16];
for (int v = 0; v < n_bits; v++) {
float sum = channel_llr[v];
for (int j = 0; j < k_bits; j++) {
for (int idx = 0; idx < check_degree[j]; idx++) {
if (check_to_var[j][idx] == v) {
sum += check_to_var_msg[j][idx];
break;
}
}
}
posterior[v] = sum;
}
uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
memset(decoded, 0, encoded_len);
for (int v = 0; v < n_bits; v++) {
if (posterior[v] < 0) {
set_bit(decoded, v, 1);
}
}
// Check final syndrome count
int final_syndromes = 0;
for (int j = 0; j < k_bits; j++) {
int syn = 0;
for (int idx = 0; idx < check_degree[j]; idx++) {
syn ^= get_bit(decoded, check_to_var[j][idx]);
}
if (syn) final_syndromes++;
}
// Accept if syndrome count is low enough
if (final_syndromes <= k_bits / 4) {
memcpy(output, decoded, data_len);
return 0; // Soft success
}
// Total failure - return original data as best effort
memcpy(output, encoded, data_len);
return -1;
}

View File

@@ -1,68 +0,0 @@
/**
* LDPC Rate 1/2 Codec for TAV-DT
*
* Simple LDPC implementation for header protection in TAV-DT format.
* Rate 1/2: k data bytes → 2k encoded bytes (doubles the size)
*
* Uses systematic encoding where first k bytes are data, last k bytes are parity.
* Decoding uses iterative bit-flipping algorithm.
*
* Designed for small blocks (headers up to 64 bytes).
*
* Created by CuriousTorvald and Claude on 2025-12-09.
*/
#ifndef LDPC_H
#define LDPC_H
#include <stdint.h>
#include <stddef.h>
// Maximum block size (data bytes before encoding)
#define LDPC_MAX_DATA_BYTES 64
// LDPC decoder parameters
#define LDPC_MAX_ITERATIONS 50
/**
* Initialize LDPC codec.
* Must be called once before using encode/decode functions.
* Thread-safe: uses static initialization.
*/
void ldpc_init(void);
/**
* Encode data block with LDPC rate 1/2.
*
* @param data Input data bytes
* @param data_len Length of input data (1 to LDPC_MAX_DATA_BYTES)
* @param output Output buffer (must hold 2 * data_len bytes)
* @return Output length (2 * data_len)
*
* Output format: [data bytes][parity bytes]
* The output is systematic: first data_len bytes are the original data.
*/
size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode LDPC rate 1/2 encoded block.
*
* @param encoded Input encoded data (2 * data_len bytes)
* @param encoded_len Length of encoded data (must be even, max 2*LDPC_MAX_DATA_BYTES)
* @param output Output buffer for decoded data (encoded_len / 2 bytes)
* @return 0 on success, -1 if decoding failed (too many errors)
*
* Uses iterative bit-flipping decoder.
*/
int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output);
/**
* Calculate syndrome for validation.
*
* @param codeword Encoded codeword (2 * data_len bytes)
* @param len Length of codeword
* @return 1 if valid (zero syndrome), 0 if errors detected
*/
int ldpc_check_syndrome(const uint8_t *codeword, size_t len);
#endif // LDPC_H

View File

@@ -1,478 +0,0 @@
/**
* LDPC(255,223) Codec Implementation - Enhanced Version
*
* This implements a high-rate LDPC code designed to compete with RS(255,223).
*
* Key improvements in this version:
* - Sum-Product (Belief Propagation) decoder for optimal performance
* - Quasi-cyclic H matrix with optimized degree distribution
* - Layered scheduling for faster convergence
* - Adaptive LLR initialization
*
* Created by CuriousTorvald and Claude on 2025-12-15.
*/
#include "ldpc_payload.h"
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
// =============================================================================
// Constants
// =============================================================================
#define N_BITS (LDPC_P_BLOCK_SIZE * 8) // 2040 total bits
#define K_BITS (LDPC_P_DATA_SIZE * 8) // 1784 data bits
#define M_BITS (LDPC_P_PARITY_SIZE * 8) // 256 parity bits
// LLR bounds - tighter bounds help prevent numerical issues
#define LLR_MAX 20.0f
#define LLR_MIN -20.0f
// Decoding parameters
#define LDPC_MAX_ITER 100
// =============================================================================
// Sparse Matrix Storage
// =============================================================================
#define MAX_CHECK_DEGREE 50
#define MAX_VAR_DEGREE 12
static int ldpc_p_initialized = 0;
static int check_degree[M_BITS];
static int check_to_var[M_BITS][MAX_CHECK_DEGREE];
static int check_to_var_idx[M_BITS][MAX_CHECK_DEGREE];
static int var_degree[N_BITS];
static int var_to_check[N_BITS][MAX_VAR_DEGREE];
static int var_to_check_idx[N_BITS][MAX_VAR_DEGREE];
// =============================================================================
// Bit manipulation
// =============================================================================
static inline int get_bit(const uint8_t *data, int bit_idx) {
return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
}
static inline void set_bit(uint8_t *data, int bit_idx, int value) {
int byte_idx = bit_idx >> 3;
int bit_pos = 7 - (bit_idx & 7);
if (value) {
data[byte_idx] |= (1 << bit_pos);
} else {
data[byte_idx] &= ~(1 << bit_pos);
}
}
// =============================================================================
// H Matrix Construction - Quasi-Cyclic with Optimized Distribution
// =============================================================================
// Hash function for deterministic pseudo-random connections
static inline uint32_t hash32(uint32_t a, uint32_t b) {
uint32_t h = a ^ (b * 0x9E3779B9);
h ^= h >> 16;
h *= 0x85EBCA6B;
h ^= h >> 13;
h *= 0xC2B2AE35;
h ^= h >> 16;
return h;
}
static void add_edge(int check, int var) {
// Check if already connected
for (int i = 0; i < check_degree[check]; i++) {
if (check_to_var[check][i] == var) return;
}
if (check_degree[check] >= MAX_CHECK_DEGREE || var_degree[var] >= MAX_VAR_DEGREE) {
return;
}
int cidx = check_degree[check];
int vidx = var_degree[var];
check_to_var[check][cidx] = var;
check_to_var_idx[check][cidx] = vidx;
check_degree[check]++;
var_to_check[var][vidx] = check;
var_to_check_idx[var][vidx] = cidx;
var_degree[var]++;
}
// Simplified cycle check - only check direct neighbors (faster)
static int would_create_short_cycle(int v, int c) {
// Quick check: if v is already connected to c, skip
for (int i = 0; i < var_degree[v]; i++) {
if (var_to_check[v][i] == c) return 1;
}
// For speed, only do basic 4-cycle check for low-degree nodes
if (var_degree[v] > 4 || check_degree[c] > 20) return 0;
// Check for 4-cycles
for (int i = 0; i < var_degree[v]; i++) {
int c_prime = var_to_check[v][i];
for (int j = 0; j < check_degree[c_prime] && j < 15; j++) {
int v_prime = check_to_var[c_prime][j];
if (v_prime == v) continue;
for (int k = 0; k < var_degree[v_prime] && k < 8; k++) {
if (var_to_check[v_prime][k] == c) {
return 1;
}
}
}
}
return 0;
}
// Quasi-cyclic expansion: shift value determines cyclic permutation
static int qc_shift(int base_idx, int shift, int size) {
return (base_idx + shift) % size;
}
static void build_h_matrix(void) {
memset(check_degree, 0, sizeof(check_degree));
memset(var_degree, 0, sizeof(var_degree));
// ==========================================================================
// H matrix with staircase parity and PEG-based data connections
// ==========================================================================
// --- Part 1: Staircase parity structure ---
for (int c = 0; c < M_BITS; c++) {
int parity_bit = K_BITS + c;
add_edge(c, parity_bit);
if (c > 0) {
add_edge(c, K_BITS + c - 1);
}
}
// --- Part 2: Connect data bits using PEG approach ---
for (int v = 0; v < K_BITS; v++) {
// Target 6 connections per variable
int target = 6;
for (int d = 0; d < target; d++) {
uint32_t h = hash32((uint32_t)v * 2654435769U, (uint32_t)d * 1597334677U);
// Find best check (lowest degree)
int best_c = -1;
int best_deg = MAX_CHECK_DEGREE;
for (int attempt = 0; attempt < 16; attempt++) {
int c = (int)((h + attempt * 127) % M_BITS);
if (check_degree[c] < best_deg && check_degree[c] < MAX_CHECK_DEGREE - 2) {
// Check not already connected
int connected = 0;
for (int i = 0; i < var_degree[v]; i++) {
if (var_to_check[v][i] == c) { connected = 1; break; }
}
if (!connected) {
best_deg = check_degree[c];
best_c = c;
if (best_deg < 30) break; // Good enough
}
}
}
if (best_c >= 0 && var_degree[v] < MAX_VAR_DEGREE - 1) {
add_edge(best_c, v);
}
}
}
// --- Part 3: Fill in low-degree variables ---
for (int v = 0; v < K_BITS; v++) {
while (var_degree[v] < 5) {
uint32_t h = hash32((uint32_t)v * 12345, (uint32_t)var_degree[v] * 67890);
int added = 0;
for (int attempt = 0; attempt < 64 && !added; attempt++) {
int c = (int)((h + attempt * 31) % M_BITS);
if (check_degree[c] < MAX_CHECK_DEGREE - 2) {
int prev = var_degree[v];
add_edge(c, v);
if (var_degree[v] > prev) added = 1;
}
}
if (!added) break;
}
}
// --- Part 4: Balance check degrees ---
for (int c = 0; c < M_BITS; c++) {
int target = 35;
int attempts = 0;
while (check_degree[c] < target && attempts < 150) {
uint32_t h = hash32((uint32_t)c * 48271, (uint32_t)attempts * 16807);
int v = (int)(h % K_BITS);
if (var_degree[v] < MAX_VAR_DEGREE - 1) {
add_edge(c, v);
}
attempts++;
}
}
}
void ldpc_p_init(void) {
if (ldpc_p_initialized) return;
build_h_matrix();
ldpc_p_initialized = 1;
}
// =============================================================================
// Syndrome Check
// =============================================================================
int ldpc_p_check_syndrome(const uint8_t *codeword) {
if (!ldpc_p_initialized) ldpc_p_init();
for (int c = 0; c < M_BITS; c++) {
int syndrome = 0;
for (int i = 0; i < check_degree[c]; i++) {
int v = check_to_var[c][i];
syndrome ^= get_bit(codeword, v);
}
if (syndrome != 0) {
return 0;
}
}
return 1;
}
// =============================================================================
// Encoding
// =============================================================================
size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!ldpc_p_initialized) ldpc_p_init();
if (data_len > LDPC_P_DATA_SIZE) {
data_len = LDPC_P_DATA_SIZE;
}
// Copy data to output and pad if necessary
memcpy(output, data, data_len);
if (data_len < LDPC_P_DATA_SIZE) {
memset(output + data_len, 0, LDPC_P_DATA_SIZE - data_len);
}
// Initialize parity bytes to zero
memset(output + LDPC_P_DATA_SIZE, 0, LDPC_P_PARITY_SIZE);
// Compute syndrome contribution from data bits
int syndrome[M_BITS];
for (int c = 0; c < M_BITS; c++) {
syndrome[c] = 0;
for (int i = 0; i < check_degree[c]; i++) {
int v = check_to_var[c][i];
if (v < K_BITS) {
syndrome[c] ^= get_bit(output, v);
}
}
}
// Back-substitution for parity bits (staircase structure)
int prev_parity = 0;
for (int c = 0; c < M_BITS; c++) {
int parity_bit = syndrome[c] ^ prev_parity;
set_bit(output + LDPC_P_DATA_SIZE, c, parity_bit);
prev_parity = parity_bit;
}
return LDPC_P_BLOCK_SIZE;
}
// =============================================================================
// Min-Sum Decoder with Optimized Parameters
// =============================================================================
// Clamp LLR to valid range
static inline float clamp_llr(float x) {
if (x > LLR_MAX) return LLR_MAX;
if (x < LLR_MIN) return LLR_MIN;
return x;
}
int ldpc_p_decode(uint8_t *data, size_t data_len) {
if (!ldpc_p_initialized) ldpc_p_init();
size_t total_len = data_len + LDPC_P_PARITY_SIZE;
if (total_len > LDPC_P_BLOCK_SIZE) {
return -1;
}
// Working codeword buffer
uint8_t codeword[LDPC_P_BLOCK_SIZE];
memcpy(codeword, data, total_len);
if (total_len < LDPC_P_BLOCK_SIZE) {
memset(codeword + total_len, 0, LDPC_P_BLOCK_SIZE - total_len);
}
// Quick check - if already valid, no decoding needed
if (ldpc_p_check_syndrome(codeword)) {
return 0;
}
// ==========================================================================
// Initialize channel LLRs
// ==========================================================================
float var_llr[N_BITS];
float llr_magnitude = 6.0f;
for (int v = 0; v < N_BITS; v++) {
int bit = get_bit(codeword, v);
var_llr[v] = bit ? -llr_magnitude : llr_magnitude;
}
// Message storage
static float c2v[M_BITS][MAX_CHECK_DEGREE];
for (int c = 0; c < M_BITS; c++) {
for (int i = 0; i < check_degree[c]; i++) {
c2v[c][i] = 0.0f;
}
}
// ==========================================================================
// Normalized Min-Sum Decoding with Layered Scheduling
// ==========================================================================
float v2c[MAX_CHECK_DEGREE];
const float alpha = 0.75f; // Normalization factor
for (int iter = 0; iter < LDPC_MAX_ITER; iter++) {
// Process each check node (layer)
for (int c = 0; c < M_BITS; c++) {
int deg = check_degree[c];
// Step 1: Compute variable-to-check messages
for (int i = 0; i < deg; i++) {
int v = check_to_var[c][i];
v2c[i] = var_llr[v] - c2v[c][i];
}
// Step 2: Compute check-to-variable messages using min-sum
for (int i = 0; i < deg; i++) {
float sign_prod = 1.0f;
float min1 = LLR_MAX, min2 = LLR_MAX;
for (int j = 0; j < deg; j++) {
if (j == i) continue;
float val = v2c[j];
if (val < 0) sign_prod = -sign_prod;
float absval = fabsf(val);
if (absval < min1) {
min2 = min1;
min1 = absval;
} else if (absval < min2) {
min2 = absval;
}
}
// Normalized min-sum message
float msg_mag = alpha * min1;
float new_c2v = sign_prod * msg_mag;
// Update variable LLR immediately (layered approach)
int v = check_to_var[c][i];
var_llr[v] = clamp_llr(var_llr[v] - c2v[c][i] + new_c2v);
c2v[c][i] = new_c2v;
}
}
// Make hard decisions
for (int v = 0; v < N_BITS; v++) {
set_bit(codeword, v, var_llr[v] < 0 ? 1 : 0);
}
// Check if valid codeword
if (ldpc_p_check_syndrome(codeword)) {
memcpy(data, codeword, data_len);
return iter + 1;
}
// Adaptive restart at iteration milestones
if (iter == 25 || iter == 50 || iter == 75) {
float new_mag = 4.0f - (iter / 25) * 0.5f;
for (int v = 0; v < N_BITS; v++) {
int bit = get_bit(codeword, v);
var_llr[v] = bit ? -new_mag : new_mag;
}
for (int c = 0; c < M_BITS; c++) {
for (int i = 0; i < check_degree[c]; i++) {
c2v[c][i] = 0.0f;
}
}
}
}
// Failed to converge
memcpy(data, codeword, data_len);
return -1;
}
// =============================================================================
// Block-level operations
// =============================================================================
size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!ldpc_p_initialized) ldpc_p_init();
size_t output_len = 0;
size_t remaining = data_len;
const uint8_t *src = data;
uint8_t *dst = output;
while (remaining > 0) {
size_t block_data = (remaining > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining;
ldpc_p_encode(src, block_data, dst);
src += block_data;
dst += LDPC_P_BLOCK_SIZE;
output_len += LDPC_P_BLOCK_SIZE;
remaining -= block_data;
}
return output_len;
}
int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
if (!ldpc_p_initialized) ldpc_p_init();
int total_iterations = 0;
size_t remaining_output = output_len;
uint8_t *src = data;
uint8_t *dst = output;
while (total_len >= LDPC_P_BLOCK_SIZE && remaining_output > 0) {
size_t bytes_to_copy = (remaining_output > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining_output;
int result = ldpc_p_decode(src, LDPC_P_DATA_SIZE);
if (result < 0) {
return -1;
}
total_iterations += result;
memcpy(dst, src, bytes_to_copy);
src += LDPC_P_BLOCK_SIZE;
dst += bytes_to_copy;
total_len -= LDPC_P_BLOCK_SIZE;
remaining_output -= bytes_to_copy;
}
return total_iterations;
}

View File

@@ -1,97 +0,0 @@
/**
* LDPC(255,223) Codec for TAV-DT Payloads
*
* Alternative to RS(255,223) with same rate (~0.875):
* - Block size: 255 bytes (223 data + 32 parity)
* - Uses quasi-cyclic LDPC structure for efficiency
* - Soft-decision belief propagation decoder
*
* Designed as drop-in replacement for RS(255,223):
* - Same input/output sizes
* - Same API style
* - Different error correction characteristics:
* - LDPC: Better at high BER (>1e-3), gradual degradation
* - RS: Better at low BER, hard threshold at 16 byte errors
*
* Created by CuriousTorvald and Claude on 2025-12-15.
*/
#ifndef LDPC_PAYLOAD_H
#define LDPC_PAYLOAD_H
#include <stdint.h>
#include <stddef.h>
// LDPC(255,223) parameters - matches RS(255,223) for drop-in replacement
#define LDPC_P_BLOCK_SIZE 255 // Total codeword size (bytes)
#define LDPC_P_DATA_SIZE 223 // Data bytes per block
#define LDPC_P_PARITY_SIZE 32 // Parity bytes per block
// Decoder parameters
#define LDPC_P_MAX_ITERATIONS 30 // Maximum BP iterations
#define LDPC_P_EARLY_TERM 1 // Enable early termination on valid codeword
/**
* Initialize LDPC(255,223) codec.
* Must be called once before using encode/decode functions.
* Thread-safe: uses static initialization.
*/
void ldpc_p_init(void);
/**
* Encode data block with LDPC(255,223).
*
* @param data Input data (up to LDPC_P_DATA_SIZE bytes)
* @param data_len Length of input data (1 to LDPC_P_DATA_SIZE)
* @param output Output buffer (must hold data_len + LDPC_P_PARITY_SIZE bytes)
* Format: [data][parity]
* @return Total output length (data_len + LDPC_P_PARITY_SIZE)
*
* Note: For data shorter than LDPC_P_DATA_SIZE, the encoder pads with zeros
* internally but only outputs actual data + parity.
*/
size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode and correct LDPC(255,223) encoded block.
*
* @param data Buffer containing [data][parity] (modified in-place)
* @param data_len Length of data portion (1 to LDPC_P_DATA_SIZE)
* @return Number of iterations used (1-30), or -1 if uncorrectable
*
* On success, data buffer contains corrected data.
* On failure, data buffer contents are undefined.
*/
int ldpc_p_decode(uint8_t *data, size_t data_len);
/**
* Encode data with automatic block splitting.
* For data larger than LDPC_P_DATA_SIZE, splits into multiple blocks.
*
* @param data Input data
* @param data_len Length of input data
* @param output Output buffer (must hold ceil(data_len/223) * 255 bytes)
* @return Total output length
*/
size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode data with automatic block splitting.
*
* @param data Buffer containing LDPC-encoded blocks (modified in-place)
* @param total_len Total length of encoded data (multiple of LDPC_P_BLOCK_SIZE)
* @param output Output buffer for decoded data
* @param output_len Expected length of decoded data
* @return Total iterations across all blocks, or -1 if any block failed
*/
int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
/**
* Check if codeword is valid (syndrome check).
*
* @param codeword Full codeword (LDPC_P_BLOCK_SIZE bytes)
* @return 1 if valid (zero syndrome), 0 if errors detected
*/
int ldpc_p_check_syndrome(const uint8_t *codeword);
#endif // LDPC_PAYLOAD_H

View File

@@ -1,417 +0,0 @@
/**
* Reed-Solomon (255,223) Codec Implementation
*
* Standard RS code over GF(2^8) for TAV-DT forward error correction.
*
* Created by CuriousTorvald and Claude on 2025-12-09.
*/
#include "reed_solomon.h"
#include <string.h>
#include <stdio.h>
// =============================================================================
// Galois Field GF(2^8) Arithmetic
// =============================================================================
// Primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 = 0x11D
#define GF_PRIMITIVE 0x11D
#define GF_SIZE 256
#define GF_MAX 255
// Lookup tables for GF(2^8) arithmetic
static uint8_t gf_exp[512]; // Anti-log table (doubled for easy modular reduction)
static uint8_t gf_log[256]; // Log table
static uint8_t gf_generator[RS_PARITY_SIZE + 1]; // Generator polynomial coefficients
static int rs_initialized = 0;
// Initialize GF(2^8) exp/log tables
static void init_gf_tables(void) {
uint16_t x = 1;
for (int i = 0; i < GF_MAX; i++) {
gf_exp[i] = (uint8_t)x;
gf_log[x] = (uint8_t)i;
// Multiply by alpha (primitive element = 2)
x <<= 1;
if (x & 0x100) {
x ^= GF_PRIMITIVE;
}
}
// Double the exp table for easy modular reduction
for (int i = GF_MAX; i < 512; i++) {
gf_exp[i] = gf_exp[i - GF_MAX];
}
// gf_log[0] is undefined, set to 0 for safety
gf_log[0] = 0;
}
// GF multiplication
static inline uint8_t gf_mul(uint8_t a, uint8_t b) {
if (a == 0 || b == 0) return 0;
return gf_exp[gf_log[a] + gf_log[b]];
}
// GF division
static inline uint8_t gf_div(uint8_t a, uint8_t b) {
if (a == 0) return 0;
if (b == 0) return 0; // Division by zero - shouldn't happen
return gf_exp[gf_log[a] + GF_MAX - gf_log[b]];
}
// GF power
static inline uint8_t gf_pow(uint8_t a, int n) {
if (n == 0) return 1;
if (a == 0) return 0;
return gf_exp[(gf_log[a] * n) % GF_MAX];
}
// GF inverse
static inline uint8_t gf_inv(uint8_t a) {
if (a == 0) return 0;
return gf_exp[GF_MAX - gf_log[a]];
}
// =============================================================================
// Generator Polynomial
// =============================================================================
// Build generator polynomial: g(x) = (x - alpha^0)(x - alpha^1)...(x - alpha^31)
static void init_generator(void) {
// Start with g(x) = 1
gf_generator[0] = 1;
for (int i = 1; i <= RS_PARITY_SIZE; i++) {
gf_generator[i] = 0;
}
// Multiply by (x - alpha^i) for i = 0 to 31
for (int i = 0; i < RS_PARITY_SIZE; i++) {
uint8_t alpha_i = gf_exp[i]; // alpha^i
// Multiply current polynomial by (x - alpha^i)
for (int j = RS_PARITY_SIZE; j > 0; j--) {
gf_generator[j] = gf_generator[j - 1] ^ gf_mul(gf_generator[j], alpha_i);
}
gf_generator[0] = gf_mul(gf_generator[0], alpha_i);
}
}
// =============================================================================
// Public API
// =============================================================================
void rs_init(void) {
if (rs_initialized) return;
init_gf_tables();
init_generator();
rs_initialized = 1;
}
size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!rs_initialized) rs_init();
// Validate input
if (data_len > RS_DATA_SIZE) {
data_len = RS_DATA_SIZE;
}
// Copy data to output
memcpy(output, data, data_len);
// Initialize parity bytes to zero
memset(output + data_len, 0, RS_PARITY_SIZE);
// Create padded message polynomial (RS_DATA_SIZE + RS_PARITY_SIZE coefficients)
// Message is shifted to leave room for parity (systematic encoding)
uint8_t msg[RS_BLOCK_SIZE];
memset(msg, 0, sizeof(msg));
memcpy(msg, data, data_len);
// Polynomial division: compute remainder of msg(x) * x^32 / g(x)
uint8_t remainder[RS_PARITY_SIZE];
memset(remainder, 0, RS_PARITY_SIZE);
for (size_t i = 0; i < data_len; i++) {
uint8_t coef = msg[i] ^ remainder[0];
// Shift remainder
memmove(remainder, remainder + 1, RS_PARITY_SIZE - 1);
remainder[RS_PARITY_SIZE - 1] = 0;
// Subtract coef * g(x) from remainder
if (coef != 0) {
for (int j = 0; j < RS_PARITY_SIZE; j++) {
remainder[j] ^= gf_mul(gf_generator[RS_PARITY_SIZE - 1 - j], coef);
}
}
}
// Append parity to output
memcpy(output + data_len, remainder, RS_PARITY_SIZE);
return data_len + RS_PARITY_SIZE;
}
// =============================================================================
// Berlekamp-Massey Decoder
// =============================================================================
// Compute syndromes S_i = r(alpha^i) for i = 0..31
static void compute_syndromes(const uint8_t *r, size_t len, uint8_t *syndromes) {
for (int i = 0; i < RS_PARITY_SIZE; i++) {
syndromes[i] = 0;
for (size_t j = 0; j < len; j++) {
syndromes[i] ^= gf_mul(r[j], gf_pow(gf_exp[i], (int)(len - 1 - j)));
}
}
}
// Berlekamp-Massey algorithm to find error locator polynomial
static int berlekamp_massey(const uint8_t *syndromes, uint8_t *sigma, int *sigma_deg) {
uint8_t C[RS_PARITY_SIZE + 1]; // Connection polynomial
uint8_t B[RS_PARITY_SIZE + 1]; // Previous connection polynomial
int L = 0; // Current length of LFSR
int m = 1; // Number of steps since last update
uint8_t b = 1; // Previous discrepancy
// Initialize: C(x) = 1, B(x) = 1
memset(C, 0, sizeof(C));
memset(B, 0, sizeof(B));
C[0] = 1;
B[0] = 1;
for (int n = 0; n < RS_PARITY_SIZE; n++) {
// Compute discrepancy
uint8_t d = syndromes[n];
for (int i = 1; i <= L; i++) {
d ^= gf_mul(C[i], syndromes[n - i]);
}
if (d == 0) {
// No update needed
m++;
} else if (2 * L <= n) {
// Update both C and L
uint8_t T[RS_PARITY_SIZE + 1];
memcpy(T, C, sizeof(T));
uint8_t factor = gf_div(d, b);
for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
C[i + m] ^= gf_mul(factor, B[i]);
}
L = n + 1 - L;
memcpy(B, T, sizeof(B));
b = d;
m = 1;
} else {
// Only update C
uint8_t factor = gf_div(d, b);
for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
C[i + m] ^= gf_mul(factor, B[i]);
}
m++;
}
}
// Copy result
memcpy(sigma, C, RS_PARITY_SIZE + 1);
*sigma_deg = L;
return L;
}
// Chien search: find error positions (roots of sigma)
static int chien_search(const uint8_t *sigma, int sigma_deg, size_t n, uint8_t *positions, int *num_errors) {
*num_errors = 0;
// Evaluate sigma(alpha^(-i)) for i = 0 to n-1
for (size_t i = 0; i < n; i++) {
uint8_t eval = 0;
for (int j = 0; j <= sigma_deg; j++) {
// sigma(alpha^(-i)) = sum of sigma[j] * alpha^(-i*j)
int exp = (GF_MAX - (int)((i * j) % GF_MAX)) % GF_MAX;
eval ^= gf_mul(sigma[j], gf_exp[exp]);
}
if (eval == 0) {
// Found a root - error at position n-1-i
positions[*num_errors] = (uint8_t)(n - 1 - i);
(*num_errors)++;
}
}
// Check if we found the expected number of errors
return (*num_errors == sigma_deg) ? 0 : -1;
}
// Compute formal derivative of polynomial
static void poly_derivative(const uint8_t *poly, int deg, uint8_t *deriv) {
for (int i = 0; i < deg; i++) {
// Derivative of x^(i+1) is (i+1) * x^i
// In GF(2^m), coefficient is 1 if (i+1) is odd, 0 if even
deriv[i] = ((i + 1) & 1) ? poly[i + 1] : 0;
}
}
// Forney algorithm: compute error values
static void forney(const uint8_t *syndromes, const uint8_t *sigma, int sigma_deg,
const uint8_t *positions, int num_errors, size_t n, uint8_t *errors) {
// Compute error evaluator polynomial omega(x) = S(x) * sigma(x) mod x^2t
uint8_t omega[RS_PARITY_SIZE + 1];
memset(omega, 0, sizeof(omega));
for (int i = 0; i < RS_PARITY_SIZE; i++) {
for (int j = 0; j <= sigma_deg && i - j >= 0; j++) {
omega[i] ^= gf_mul(syndromes[i - j], sigma[j]);
}
}
// Compute formal derivative of sigma
uint8_t sigma_prime[RS_PARITY_SIZE];
poly_derivative(sigma, sigma_deg, sigma_prime);
// Compute error values using Forney formula
for (int i = 0; i < num_errors; i++) {
uint8_t pos = positions[i];
uint8_t Xi = gf_exp[n - 1 - pos]; // alpha^(n-1-pos)
uint8_t Xi_inv = gf_inv(Xi);
// Evaluate omega at Xi_inv
uint8_t omega_val = 0;
for (int j = 0; j < RS_PARITY_SIZE; j++) {
omega_val ^= gf_mul(omega[j], gf_pow(Xi_inv, j));
}
// Evaluate sigma' at Xi_inv
uint8_t sigma_prime_val = 0;
for (int j = 0; j < sigma_deg; j++) {
sigma_prime_val ^= gf_mul(sigma_prime[j], gf_pow(Xi_inv, j));
}
// Error value: e_i = Xi * omega(Xi_inv) / sigma'(Xi_inv)
errors[i] = gf_mul(Xi, gf_div(omega_val, sigma_prime_val));
}
}
int rs_decode(uint8_t *data, size_t data_len) {
if (!rs_initialized) rs_init();
size_t total_len = data_len + RS_PARITY_SIZE;
if (total_len > RS_BLOCK_SIZE) {
return -1;
}
// Compute syndromes
uint8_t syndromes[RS_PARITY_SIZE];
compute_syndromes(data, total_len, syndromes);
// Check if all syndromes are zero (no errors)
int has_errors = 0;
for (int i = 0; i < RS_PARITY_SIZE; i++) {
if (syndromes[i] != 0) {
has_errors = 1;
break;
}
}
if (!has_errors) {
return 0; // No errors
}
// Find error locator polynomial using Berlekamp-Massey
uint8_t sigma[RS_PARITY_SIZE + 1];
int sigma_deg;
int num_errors_expected = berlekamp_massey(syndromes, sigma, &sigma_deg);
if (num_errors_expected > RS_MAX_ERRORS) {
return -1; // Too many errors
}
// Find error positions using Chien search
uint8_t positions[RS_MAX_ERRORS];
int num_errors;
if (chien_search(sigma, sigma_deg, total_len, positions, &num_errors) != 0) {
return -1; // Inconsistent error count
}
// Compute error values using Forney algorithm
uint8_t error_values[RS_MAX_ERRORS];
forney(syndromes, sigma, sigma_deg, positions, num_errors, total_len, error_values);
// Apply corrections
for (int i = 0; i < num_errors; i++) {
if (positions[i] < total_len) {
data[positions[i]] ^= error_values[i];
}
}
return num_errors;
}
// =============================================================================
// Block-level operations
// =============================================================================
size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!rs_initialized) rs_init();
size_t output_len = 0;
size_t remaining = data_len;
const uint8_t *src = data;
uint8_t *dst = output;
while (remaining > 0) {
size_t block_data = (remaining > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining;
size_t encoded_len = rs_encode(src, block_data, dst);
// Pad to full block size for consistent block boundaries
if (encoded_len < RS_BLOCK_SIZE) {
memset(dst + encoded_len, 0, RS_BLOCK_SIZE - encoded_len);
}
src += block_data;
dst += RS_BLOCK_SIZE;
output_len += RS_BLOCK_SIZE;
remaining -= block_data;
}
return output_len;
}
int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
if (!rs_initialized) rs_init();
int total_errors = 0;
size_t remaining_output = output_len;
uint8_t *src = data;
uint8_t *dst = output;
while (total_len >= RS_BLOCK_SIZE && remaining_output > 0) {
// Always decode with full RS_DATA_SIZE since encoder pads to full blocks
// But only copy the bytes we actually need
size_t bytes_to_copy = (remaining_output > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining_output;
// Decode block with full data size (modifies src in place)
int errors = rs_decode(src, RS_DATA_SIZE);
if (errors < 0) {
return -1; // Uncorrectable block
}
total_errors += errors;
// Copy only the bytes we need to output
memcpy(dst, src, bytes_to_copy);
src += RS_BLOCK_SIZE;
dst += bytes_to_copy;
total_len -= RS_BLOCK_SIZE;
remaining_output -= bytes_to_copy;
}
return total_errors;
}

View File

@@ -1,82 +0,0 @@
/**
* Reed-Solomon (255,223) Codec for TAV-DT
*
* Standard RS code over GF(2^8):
* - Block size: 255 bytes (223 data + 32 parity)
* - Error correction: up to 16 byte errors
* - Error detection: up to 32 byte errors
*
* Uses primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x11D)
* Generator polynomial: g(x) = product of (x - alpha^i) for i = 0..31
*
* Created by CuriousTorvald and Claude on 2025-12-09.
*/
#ifndef REED_SOLOMON_H
#define REED_SOLOMON_H
#include <stdint.h>
#include <stddef.h>
// RS(255,223) parameters
#define RS_BLOCK_SIZE 255 // Total codeword size
#define RS_DATA_SIZE 223 // Data bytes per block
#define RS_PARITY_SIZE 32 // Parity bytes per block (2t = 32, t = 16)
#define RS_MAX_ERRORS 16 // Maximum correctable errors (t)
/**
* Initialize Reed-Solomon codec.
* Must be called once before using encode/decode functions.
* Thread-safe: uses static initialization.
*/
void rs_init(void);
/**
* Encode data block with Reed-Solomon parity.
*
* @param data Input data (up to RS_DATA_SIZE bytes)
* @param data_len Length of input data (1 to RS_DATA_SIZE)
* @param output Output buffer (must hold data_len + RS_PARITY_SIZE bytes)
* Format: [data][parity]
* @return Total output length (data_len + RS_PARITY_SIZE)
*
* Note: For data shorter than RS_DATA_SIZE, the encoder pads with zeros
* internally but only outputs actual data + parity.
*/
size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode and correct Reed-Solomon encoded block.
*
* @param data Buffer containing [data][parity] (modified in-place)
* @param data_len Length of data portion (1 to RS_DATA_SIZE)
* @return Number of errors corrected (0-16), or -1 if uncorrectable
*
* On success, data buffer contains corrected data (parity may also be corrected).
* On failure, data buffer contents are undefined.
*/
int rs_decode(uint8_t *data, size_t data_len);
/**
* Encode data with automatic block splitting.
* For data larger than RS_DATA_SIZE, splits into multiple RS blocks.
*
* @param data Input data
* @param data_len Length of input data
* @param output Output buffer (must hold ceil(data_len/223) * 255 bytes)
* @return Total output length
*/
size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode data with automatic block splitting.
*
* @param data Buffer containing RS-encoded blocks (modified in-place)
* @param total_len Total length of encoded data (multiple of RS_BLOCK_SIZE)
* @param output Output buffer for decoded data
* @param output_len Expected length of decoded data
* @return Total errors corrected across all blocks, or -1 if any block failed
*/
int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
#endif // REED_SOLOMON_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,255 +0,0 @@
/**
* TAV Encoder - Color Space Conversion Library
*
* Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
* for the TSVM Advanced Video (TAV) encoder.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// =============================================================================
// Utility Functions
// =============================================================================
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
static inline int iround(double v) {
return (int)floor(v + 0.5);
}
// =============================================================================
// sRGB Gamma Helpers
// =============================================================================
static inline double srgb_linearise(double val) {
if (val <= 0.04045) return val / 12.92;
return pow((val + 0.055) / 1.055, 2.4);
}
static inline double srgb_unlinearise(double val) {
if (val <= 0.0031308) return 12.92 * val;
return 1.055 * pow(val, 1.0/2.4) - 0.055;
}
// =============================================================================
// HLG (Hybrid Log-Gamma) Transfer Functions
// =============================================================================
static inline double HLG_OETF(double E) {
const double a = 0.17883277;
const double b = 0.28466892; // 1 - 4*a
const double c = 0.55991073; // 0.5 - a*ln(4*a)
if (E <= 1.0/12.0) return sqrt(3.0 * E);
return a * log(12.0 * E - b) + c;
}
static inline double HLG_EOTF(double Ep) {
const double a = 0.17883277;
const double b = 0.28466892;
const double c = 0.55991073;
if (Ep <= 0.5) {
double val = Ep * Ep / 3.0;
return val;
}
double val = (exp((Ep - c) / a) + b) / 12.0;
return val;
}
// =============================================================================
// Color Space Transformation Matrices
// =============================================================================
// BT.2100 RGB -> LMS matrix
static const double M_RGB_TO_LMS[3][3] = {
{1688.0/4096, 2146.0/4096, 262.0/4096},
{ 683.0/4096, 2951.0/4096, 462.0/4096},
{ 99.0/4096, 309.0/4096, 3688.0/4096}
};
// LMS -> RGB inverse matrix
static const double M_LMS_TO_RGB[3][3] = {
{ 6.1723815689243215, -5.319534979827695, 0.14699442094633924},
{-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
{-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
};
// ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
};
// ICtCp -> L' M' S' inverse matrix
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
{ 1.0, -0.015718580108730416, -0.20958106811640548},
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
};
// =============================================================================
// YCoCg-R Color Space Conversion
// =============================================================================
/**
* Convert RGB24 to YCoCg-R color space for a full frame.
*
* YCoCg-R is a reversible color transform optimized for compression:
* - Y = luma (G + (R-B)/2)
* - Co = orange chrominance (R - B)
* - Cg = green chrominance (G - (R+B)/2)
*
* @param rgb Input RGB24 data (planar: RRRR...GGGG...BBBB...)
* @param y Output luma channel
* @param co Output orange chrominance
* @param cg Output green chrominance
* @param width Frame width
* @param height Frame height
*/
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height)
{
const int total_pixels = width * height;
// Process 4 pixels at a time for better cache utilization
int i = 0;
const int simd_end = (total_pixels / 4) * 4;
// Vectorized processing for groups of 4 pixels
for (i = 0; i < simd_end; i += 4) {
const uint8_t *rgb_ptr = &rgb[i * 3];
// Process 4 pixels simultaneously with loop unrolling
for (int j = 0; j < 4; j++) {
const int idx = i + j;
const float r = rgb_ptr[j * 3 + 0];
const float g = rgb_ptr[j * 3 + 1];
const float b = rgb_ptr[j * 3 + 2];
// YCoCg-R transform
co[idx] = r - b;
const float tmp = b + co[idx] * 0.5f;
cg[idx] = g - tmp;
y[idx] = tmp + cg[idx] * 0.5f;
}
}
// Handle remaining pixels (1-3 pixels)
for (; i < total_pixels; i++) {
const float r = rgb[i * 3 + 0];
const float g = rgb[i * 3 + 1];
const float b = rgb[i * 3 + 2];
co[i] = r - b;
const float tmp = b + co[i] * 0.5f;
cg[i] = g - tmp;
y[i] = tmp + cg[i] * 0.5f;
}
}
// =============================================================================
// ICtCp Color Space Conversion (HDR-capable)
// =============================================================================
/**
* Convert sRGB8 to ICtCp color space using HLG transfer function.
*
* ICtCp is a perceptually uniform color space designed for HDR content:
* - I = intensity (luma)
* - Ct = tritanope (blue-yellow)
* - Cp = protanope (red-green)
*
* Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
*
* @param r8 Input red component (0-255)
* @param g8 Input green component (0-255)
* @param b8 Input blue component (0-255)
* @param out_I Output intensity (0-255)
* @param out_Ct Output tritanope (0-255, centered at 127.5)
* @param out_Cp Output protanope (0-255, centered at 127.5)
*/
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp)
{
// 1) Linearize sRGB to 0..1
double r = srgb_linearise((double)r8 / 255.0);
double g = srgb_linearise((double)g8 / 255.0);
double b = srgb_linearise((double)b8 / 255.0);
// 2) Linear RGB -> LMS (3x3 multiply)
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
// 3) Apply HLG OETF (Hybrid Log-Gamma)
double Lp = HLG_OETF(L);
double Mp = HLG_OETF(M);
double Sp = HLG_OETF(S);
// 4) L'M'S' -> ICtCp
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
// 5) Scale and offset to 0-255 range
*out_I = FCLAMP(I * 255.0, 0.0, 255.0);
*out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
*out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
}
/**
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
*
* @param I8 Input intensity (0-255)
* @param Ct8 Input tritanope (0-255, centered at 127.5)
* @param Cp8 Input protanope (0-255, centered at 127.5)
* @param r8 Output red component (0-255)
* @param g8 Output green component (0-255)
* @param b8 Output blue component (0-255)
*/
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8)
{
// 1) Denormalize from 0-255 range
double I = I8 / 255.0;
double Ct = (Ct8 - 127.5) / 255.0;
double Cp = (Cp8 - 127.5) / 255.0;
// 2) ICtCp -> L' M' S' (3x3 inverse multiply)
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
// 3) Apply HLG inverse EOTF
double L = HLG_EOTF(Lp);
double M = HLG_EOTF(Mp);
double S = HLG_EOTF(Sp);
// 4) LMS -> linear sRGB (3x3 inverse multiply)
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
// 5) Apply sRGB gamma and convert to 0-255 with rounding
double r = srgb_unlinearise(r_lin);
double g = srgb_unlinearise(g_lin);
double b = srgb_unlinearise(b_lin);
*r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
*g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
*b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
}

View File

@@ -1,67 +0,0 @@
/**
* TAV Encoder - Color Space Conversion Library
*
* Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
*/
#ifndef TAV_ENCODER_COLOR_H
#define TAV_ENCODER_COLOR_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// YCoCg-R Color Space Conversion
// =============================================================================
/**
* Convert RGB24 to YCoCg-R color space for a full frame.
*
* @param rgb Input RGB24 data (interleaved: RGBRGBRGB...)
* @param y Output luma channel
* @param co Output orange chrominance
* @param cg Output green chrominance
* @param width Frame width
* @param height Frame height
*/
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height);
// =============================================================================
// ICtCp Color Space Conversion (HDR-capable)
// =============================================================================
/**
* Convert sRGB8 to ICtCp color space using HLG transfer function.
*
* @param r8 Input red component (0-255)
* @param g8 Input green component (0-255)
* @param b8 Input blue component (0-255)
* @param out_I Output intensity (0-255)
* @param out_Ct Output tritanope (0-255, centered at 127.5)
* @param out_Cp Output protanope (0-255, centered at 127.5)
*/
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp);
/**
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
*
* @param I8 Input intensity (0-255)
* @param Ct8 Input tritanope (0-255, centered at 127.5)
* @param Cp8 Input protanope (0-255, centered at 127.5)
* @param r8 Output red component (0-255)
* @param g8 Output green component (0-255)
* @param b8 Output blue component (0-255)
*/
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_COLOR_H

View File

@@ -1,619 +0,0 @@
/**
* TAV Encoder - Discrete Wavelet Transform (DWT) Library
*
* Provides multi-resolution wavelet decomposition for video compression.
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// =============================================================================
// Wavelet Type Constants
// =============================================================================
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 - Lossless capable
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 - Higher compression (default)
#define WAVELET_BIORTHOGONAL_13_7 2 // Biorthogonal 13/7
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
#define WAVELET_HAAR 255 // Haar - Simplest wavelet
// =============================================================================
// 1D Forward DWT Transforms
// =============================================================================
/**
* CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
*
* Uses lifting scheme with predict and update steps.
* Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
*
* @param data In/out signal data (modified in-place)
* @param length Signal length (handles non-power-of-2)
*/
static void dwt_53_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = calloc(length, sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass)
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
temp[half + i] = data[idx] - pred;
}
}
// Update step (low-pass)
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
*
* Five-step lifting scheme with scaling for optimal compression.
* Output layout: [LL...LL, HH...HH]
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_97_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i]; // Even (low)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1]; // Odd (high)
}
// JPEG2000 9/7 lifting coefficients
const float alpha = -1.586134342f;
const float beta = -0.052980118f;
const float gamma = 0.882911076f;
const float delta = 0.443506852f;
const float K = 1.230174105f;
// Step 1: Predict α
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += alpha * (s_curr + s_next);
}
}
// Step 2: Update β
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += beta * (d_prev + d_curr);
}
// Step 3: Predict γ
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += gamma * (s_curr + s_next);
}
}
// Step 4: Update δ
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += delta * (d_prev + d_curr);
}
// Step 5: Scaling
for (int i = 0; i < half; i++) {
temp[i] *= K;
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K;
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
*
* Same structure as 9/7 irreversible but uses integer arithmetic.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_97_iint_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
for (int i = 0; i < half; ++i) temp[i] = data[2*i];
for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
const int SHIFT = 16;
const int64_t ROUND = 1LL << (SHIFT - 1);
const int64_t A = -103949; // α
const int64_t B = -3472; // β
const int64_t G = 57862; // γ
const int64_t D = 29066; // δ
const int64_t K_FP = 80542; // ≈ 1.230174105 * 2^16
const int64_t Ki_FP = 53283; // ≈ (1/1.230174105) * 2^16
#define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
// Predict α
for (int i = 0; i < length/2; ++i) {
int s = temp[i];
int sn = (i+1<half)? temp[i+1] : s;
temp[half+i] += RN(A * (int64_t)(s + sn));
}
// Update β
for (int i = 0; i < half; ++i) {
int d = (half+i<length)? temp[half+i]:0;
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
temp[i] += RN(B * (int64_t)(dp + d));
}
// Predict γ
for (int i = 0; i < length/2; ++i) {
int s = temp[i];
int sn = (i+1<half)? temp[i+1]:s;
temp[half+i] += RN(G * (int64_t)(s + sn));
}
// Update δ
for (int i = 0; i < half; ++i) {
int d = (half+i<length)? temp[half+i]:0;
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
temp[i] += RN(D * (int64_t)(dp + d));
}
// Scaling
for (int i = 0; i < half; ++i) {
temp[i] = (((int64_t)temp[i] * K_FP + ROUND) >> SHIFT);
}
for (int i = 0; i < length/2; ++i) {
if (half + i < length) {
temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
#undef RN
}
/**
* Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
*
* Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
* Good for smooth signals and still images.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_dd4_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i];
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1];
}
// DD-4 prediction step with four-point kernel
for (int i = 0; i < length / 2; i++) {
// Get four neighbouring even samples with symmetric boundary extension
float s_m1, s_0, s_1, s_2;
s_m1 = (i > 0) ? temp[i - 1] : temp[0];
s_0 = temp[i];
s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
temp[half + i] -= prediction;
}
// DD-4 update step
for (int i = 0; i < half; i++) {
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
temp[i] += 0.25f * (d_prev + d_curr);
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* Biorthogonal 13/7 wavelet forward 1D.
*
* Analysis filters: Low-pass (13 taps), High-pass (7 taps)
* Simplified implementation using 5/3 structure with scaling.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_bior137_forward_1d(float *data, int length) {
if (length < 2) return;
const float K = 1.230174105f;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass)
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float left = data[2 * i];
float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
float prediction = 0.5f * (left + right);
temp[half + i] = data[idx] - prediction;
}
}
// Update step (low-pass)
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
// Scaling
for (int i = 0; i < half; i++) {
temp[i] *= K;
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K;
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* Haar wavelet forward 1D transform.
*
* The simplest wavelet: averages (low-pass) and differences (high-pass).
* Useful for temporal DWT in GOPs.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_haar_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
} else {
temp[i] = data[2 * i];
if (half + i < length) {
temp[half + i] = 0.0f;
}
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// 1D Inverse DWT Transforms
// =============================================================================
/**
* CDF 5/3 reversible wavelet inverse 1D transform.
*
* Reverses dwt_53_forward_1d() transform exactly.
*
* @param data In/out coefficient data
* @param length Signal length
*/
static void dwt_53_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Copy low-pass and high-pass coefficients
memcpy(temp, data, length * sizeof(float));
// Undo update step
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] -= update;
}
// Undo predict step
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
data[2 * i] = temp[i];
data[idx] = temp[half + i] + pred;
} else {
data[2 * i] = temp[i];
}
}
free(temp);
}
/**
* Haar wavelet inverse 1D transform.
*
* Reverses dwt_haar_forward_1d() transform.
*
* @param data In/out coefficient data
* @param length Signal length
*/
static void dwt_haar_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Reconstruct from averages and differences
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
temp[2 * i] = data[i] + data[half + i];
temp[2 * i + 1] = data[i] - data[half + i];
} else {
temp[2 * i] = data[i];
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// 2D DWT Transform
// =============================================================================
/**
* Apply 2D forward DWT to a frame (in-place).
*
* Applies separable 1D transforms: horizontal (rows), then vertical (columns).
* Supports multi-level decomposition.
*
* @param data In/out 2D image data (row-major, width stride)
* @param width Image width
* @param height Image height
* @param levels Number of decomposition levels
* @param filter_type Wavelet type (WAVELET_* constant)
*/
void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
const int max_size = (width > height) ? width : height;
float *temp_row = malloc(max_size * sizeof(float));
float *temp_col = malloc(max_size * sizeof(float));
// Pre-calculate dimensions for each level
int *widths = malloc((levels + 1) * sizeof(int));
int *heights = malloc((levels + 1) * sizeof(int));
widths[0] = width;
heights[0] = height;
for (int i = 1; i <= levels; i++) {
widths[i] = (widths[i - 1] + 1) / 2;
heights[i] = (heights[i - 1] + 1) / 2;
}
// Apply multi-level decomposition
for (int level = 0; level < levels; level++) {
int current_width = widths[level];
int current_height = heights[level];
if (current_width < 1 || current_height < 1) break;
// Row transform (horizontal)
for (int y = 0; y < current_height; y++) {
// Extract row
for (int x = 0; x < current_width; x++) {
temp_row[x] = data[y * width + x];
}
// Apply 1D DWT
switch (filter_type) {
case WAVELET_5_3_REVERSIBLE:
dwt_53_forward_1d(temp_row, current_width);
break;
case WAVELET_9_7_IRREVERSIBLE:
dwt_97_forward_1d(temp_row, current_width);
break;
case WAVELET_BIORTHOGONAL_13_7:
dwt_bior137_forward_1d(temp_row, current_width);
break;
case WAVELET_DD4:
dwt_dd4_forward_1d(temp_row, current_width);
break;
case WAVELET_HAAR:
dwt_haar_forward_1d(temp_row, current_width);
break;
}
// Write back
for (int x = 0; x < current_width; x++) {
data[y * width + x] = temp_row[x];
}
}
// Column transform (vertical)
for (int x = 0; x < current_width; x++) {
// Extract column
for (int y = 0; y < current_height; y++) {
temp_col[y] = data[y * width + x];
}
// Apply 1D DWT
switch (filter_type) {
case WAVELET_5_3_REVERSIBLE:
dwt_53_forward_1d(temp_col, current_height);
break;
case WAVELET_9_7_IRREVERSIBLE:
dwt_97_forward_1d(temp_col, current_height);
break;
case WAVELET_BIORTHOGONAL_13_7:
dwt_bior137_forward_1d(temp_col, current_height);
break;
case WAVELET_DD4:
dwt_dd4_forward_1d(temp_col, current_height);
break;
case WAVELET_HAAR:
dwt_haar_forward_1d(temp_col, current_height);
break;
}
// Write back
for (int y = 0; y < current_height; y++) {
data[y * width + x] = temp_col[y];
}
}
}
free(widths);
free(heights);
free(temp_row);
free(temp_col);
}
// =============================================================================
// 3D DWT Transform (Temporal + Spatial)
// =============================================================================
/**
* Apply 3D forward DWT to a GOP (group of pictures).
*
* First applies temporal DWT across frames at each spatial location,
* then applies 2D spatial DWT to each resulting temporal subband.
*
* @param gop_data Array of frame pointers [num_frames][width*height]
* @param width Frame width
* @param height Frame height
* @param num_frames Number of frames in GOP
* @param spatial_levels Number of 2D spatial decomposition levels
* @param temporal_levels Number of 1D temporal decomposition levels
* @param spatial_filter Wavelet type for spatial transform
* @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
*/
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
int spatial_levels, int temporal_levels,
int spatial_filter, int temporal_filter) {
if (num_frames < 2 || width < 2 || height < 2) return;
float *temporal_line = malloc(num_frames * sizeof(float));
// Pre-calculate temporal lengths for non-power-of-2 GOPs
int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
temporal_lengths[0] = num_frames;
for (int i = 1; i <= temporal_levels; i++) {
temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
}
// Step 1: Apply temporal DWT across frames
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int pixel_idx = y * width + x;
// Extract temporal signal
for (int t = 0; t < num_frames; t++) {
temporal_line[t] = gop_data[t][pixel_idx];
}
// Apply temporal DWT with multiple levels
for (int level = 0; level < temporal_levels; level++) {
int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
if (temporal_filter == 255) {
// Haar temporal (default)
dwt_haar_forward_1d(temporal_line, level_frames);
} else if (temporal_filter == 0) {
// CDF 5/3 temporal
dwt_53_forward_1d(temporal_line, level_frames);
} else {
// Fallback to Haar for unsupported wavelets
dwt_haar_forward_1d(temporal_line, level_frames);
}
}
}
// Write back temporal coefficients
for (int t = 0; t < num_frames; t++) {
gop_data[t][pixel_idx] = temporal_line[t];
}
}
}
free(temporal_lengths);
free(temporal_line);
// Step 2: Apply 2D spatial DWT to each temporal subband
for (int t = 0; t < num_frames; t++) {
tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
}
}
// =============================================================================
// Utility Functions
// =============================================================================
/**
* Calculate recommended number of decomposition levels for given dimensions.
*
* @param width Image width
* @param height Image height
* @return Recommended number of levels (1-6)
*/
int tav_dwt_calculate_levels(int width, int height) {
int levels = 0;
int min_size = (width < height) ? width : height;
// Keep halving until we reach minimum size
while (min_size >= 32) {
min_size /= 2;
levels++;
}
// Cap at reasonable maximum
return (levels > 6) ? 6 : levels;
}

View File

@@ -1,88 +0,0 @@
/**
* TAV Encoder - Discrete Wavelet Transform Library
*
* Public API for multi-resolution wavelet decomposition.
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
*/
#ifndef TAV_ENCODER_DWT_H
#define TAV_ENCODER_DWT_H
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Wavelet Type Constants
// =============================================================================
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 reversible (lossless capable)
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 JPEG2000 (default, best compression)
#define WAVELET_BIORTHOGONAL_13_7 2 // CDF 13/7 experimental
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
#define WAVELET_HAAR 255 // Haar (demonstration only)
// =============================================================================
// 2D Discrete Wavelet Transform
// =============================================================================
/**
* Apply 2D wavelet transform to spatial data.
*
* Uses separable 1D transforms: apply horizontal rows, then vertical columns.
* Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
*
* @param data Input/output data array (modified in-place)
* @param width Frame width
* @param height Frame height
* @param levels Number of decomposition levels (0 = auto-calculate)
* @param filter_type Wavelet type (WAVELET_* constants)
*/
void tav_dwt_2d_forward(float *data, int width, int height,
int levels, int filter_type);
// =============================================================================
// 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
// =============================================================================
/**
* Apply 3D wavelet transform to group-of-pictures (GOP).
*
* Process:
* 1. Apply temporal 1D DWT across frames at each spatial position
* 2. Apply spatial 2D DWT to each temporal subband frame
*
* @param gop_data Array of frame pointers [num_frames]
* @param width Frame width
* @param height Frame height
* @param num_frames Number of frames in GOP
* @param spatial_levels Spatial decomposition levels (0 = auto)
* @param temporal_levels Temporal decomposition levels
* @param spatial_filter Wavelet type for spatial transform
* @param temporal_filter Wavelet type for temporal transform
*/
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
int spatial_levels, int temporal_levels,
int spatial_filter, int temporal_filter);
// =============================================================================
// Utility Functions
// =============================================================================
/**
* Calculate optimal number of decomposition levels for given dimensions.
*
* Uses formula: floor(log2(min(width, height))) - 1
* Ensures at least 2x2 low-pass subband remains after decomposition.
*
* @param width Frame width
* @param height Frame height
* @return Recommended number of levels
*/
int tav_dwt_calculate_levels(int width, int height);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_DWT_H

View File

@@ -1,415 +0,0 @@
/**
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
*
* Implements binary tree embedded zero block coding for efficient storage
* of sparse wavelet coefficients. Exploits coefficient sparsity through
* hierarchical significance testing and progressive bitplane encoding.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
// =============================================================================
// EZBC Structures
// =============================================================================
/**
* Bitstream writer for bit-level encoding.
*/
typedef struct {
uint8_t *data;
size_t capacity;
size_t byte_pos;
uint8_t bit_pos; // 0-7, current bit position in current byte
} bitstream_t;
/**
* Block structure for EZBC quadtree decomposition.
*/
typedef struct {
int x, y; // Top-left position in 2D coefficient array
int width, height; // Block dimensions
} ezbc_block_t;
/**
* Queue for EZBC block processing.
*/
typedef struct {
ezbc_block_t *blocks;
size_t count;
size_t capacity;
} block_queue_t;
/**
* Track coefficient state for refinement.
*/
typedef struct {
bool significant; // Has been marked significant
int first_bitplane; // Bitplane where it became significant
} coeff_state_t;
/**
* EZBC encoding context for recursive processing.
*/
typedef struct {
bitstream_t *bs;
int16_t *coeffs;
coeff_state_t *states;
int width;
int height;
int bitplane;
int threshold;
block_queue_t *next_insignificant;
block_queue_t *next_significant;
int *sign_count;
} ezbc_context_t;
// =============================================================================
// Bitstream Operations
// =============================================================================
/**
* Initialize bitstream with initial capacity.
*/
static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
// Ensure minimum capacity to avoid issues with zero-size allocations
if (initial_capacity < 64) initial_capacity = 64;
bs->capacity = initial_capacity;
bs->data = calloc(1, initial_capacity);
if (!bs->data) {
fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
exit(1);
}
bs->byte_pos = 0;
bs->bit_pos = 0;
}
/**
* Write a single bit to bitstream.
*/
static void bitstream_write_bit(bitstream_t *bs, int bit) {
// Grow if needed
if (bs->byte_pos >= bs->capacity) {
size_t old_capacity = bs->capacity;
bs->capacity *= 2;
bs->data = realloc(bs->data, bs->capacity);
// Clear only the newly allocated memory region
memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
}
if (bit) {
bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
}
bs->bit_pos++;
if (bs->bit_pos == 8) {
bs->bit_pos = 0;
bs->byte_pos++;
}
}
/**
* Write multiple bits to bitstream (LSB first).
*/
static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
for (int i = 0; i < num_bits; i++) {
bitstream_write_bit(bs, (value >> i) & 1);
}
}
/**
* Get current bitstream size in bytes.
*/
static size_t bitstream_size(bitstream_t *bs) {
return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
}
/**
* Free bitstream buffer.
*/
static void bitstream_free(bitstream_t *bs) {
free(bs->data);
}
// =============================================================================
// Block Queue Operations
// =============================================================================
/**
* Initialize block queue with initial capacity.
*/
static void queue_init(block_queue_t *q) {
q->capacity = 1024;
q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
q->count = 0;
}
/**
* Push block onto queue, growing if needed.
*/
static void queue_push(block_queue_t *q, ezbc_block_t block) {
if (q->count >= q->capacity) {
q->capacity *= 2;
q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
}
q->blocks[q->count++] = block;
}
/**
* Free block queue.
*/
static void queue_free(block_queue_t *q) {
free(q->blocks);
}
// =============================================================================
// EZBC Helper Functions
// =============================================================================
/**
* Check if all coefficients in block have |coeff| < threshold.
*/
static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
const ezbc_block_t *block, int threshold) {
for (int y = block->y; y < block->y + block->height && y < height; y++) {
for (int x = block->x; x < block->x + block->width && x < width; x++) {
int idx = y * width + x;
if (abs(coeffs[idx]) >= threshold) {
return false;
}
}
}
return true;
}
/**
* Find maximum absolute value in coefficient array.
*/
static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
int max_abs = 0;
for (size_t i = 0; i < count; i++) {
int abs_val = abs(coeffs[i]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
return max_abs;
}
/**
* Get MSB position (bitplane number).
* Returns floor(log2(value)), i.e., the position of the highest set bit.
*/
static int get_msb_bitplane(int value) {
if (value == 0) return 0;
int bitplane = 0;
while (value > 1) {
value >>= 1;
bitplane++;
}
return bitplane;
}
/**
* Recursively process a significant block - subdivide until 1x1.
*/
static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
// If 1x1 block: emit sign bit and add to significant queue
if (block.width == 1 && block.height == 1) {
int idx = block.y * ctx->width + block.x;
bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
(*ctx->sign_count)++;
ctx->states[idx].significant = true;
ctx->states[idx].first_bitplane = ctx->bitplane;
queue_push(ctx->next_significant, block);
return;
}
// Block is > 1x1: subdivide into children and recursively process each
int mid_x = block.width / 2;
int mid_y = block.height / 2;
if (mid_x == 0) mid_x = 1;
if (mid_y == 0) mid_y = 1;
// Process top-left child
ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1); // Significant
process_significant_block_recursive(ctx, tl);
} else {
bitstream_write_bit(ctx->bs, 0); // Insignificant
queue_push(ctx->next_insignificant, tl);
}
// Process top-right child (if exists)
if (block.width > mid_x) {
ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, tr);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, tr);
}
}
// Process bottom-left child (if exists)
if (block.height > mid_y) {
ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, bl);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, bl);
}
}
// Process bottom-right child (if exists)
if (block.width > mid_x && block.height > mid_y) {
ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, br);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, br);
}
}
}
// =============================================================================
// Main EZBC Encoding Function
// =============================================================================
/**
* EZBC encoding for a single channel.
*
* Uses two separate queues for insignificant blocks and significant 1x1 blocks.
* Encodes coefficients progressively from MSB to LSB bitplane.
*
* Algorithm:
* 1. Find MSB bitplane from maximum absolute coefficient value
* 2. Write header: MSB bitplane, width, height
* 3. For each bitplane from MSB to 0:
* a. Process insignificant blocks: check if they become significant
* b. For newly significant blocks: recursively subdivide until 1x1
* c. Emit sign bits for newly significant 1x1 coefficients
* d. Process already-significant coefficients: emit refinement bits
* 4. Return encoded bitstream
*
* @param coeffs Input quantized coefficients (int16_t array)
* @param count Number of coefficients
* @param width Frame width
* @param height Frame height
* @param output Output buffer pointer (allocated by this function)
* @return Encoded size in bytes
*/
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
uint8_t **output) {
bitstream_t bs;
bitstream_init(&bs, count / 4); // Initial guess
// Track coefficient significance
coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
// Find maximum value to determine MSB bitplane
int max_abs = find_max_abs_ezbc(coeffs, count);
int msb_bitplane = get_msb_bitplane(max_abs);
// Write header: MSB bitplane and dimensions
bitstream_write_bits(&bs, msb_bitplane, 8);
bitstream_write_bits(&bs, width, 16);
bitstream_write_bits(&bs, height, 16);
// Initialise two queues: insignificant blocks and significant 1x1 blocks
block_queue_t insignificant_queue, next_insignificant;
block_queue_t significant_queue, next_significant;
queue_init(&insignificant_queue);
queue_init(&next_insignificant);
queue_init(&significant_queue);
queue_init(&next_significant);
// Start with root block as insignificant
ezbc_block_t root = {0, 0, width, height};
queue_push(&insignificant_queue, root);
// Process bitplanes from MSB to LSB
for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
int threshold = 1 << bitplane;
int sign_bits_this_bitplane = 0;
// Process insignificant blocks - check if they become significant
for (size_t i = 0; i < insignificant_queue.count; i++) {
ezbc_block_t block = insignificant_queue.blocks[i];
// Check if this block has any coefficient >= threshold
if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
// Still insignificant: emit 0
bitstream_write_bit(&bs, 0);
// Keep in insignificant queue for next bitplane
queue_push(&next_insignificant, block);
} else {
// Became significant: emit 1
bitstream_write_bit(&bs, 1);
// Use recursive subdivision to process this block and all children
ezbc_context_t ctx = {
.bs = &bs,
.coeffs = coeffs,
.states = states,
.width = width,
.height = height,
.bitplane = bitplane,
.threshold = threshold,
.next_insignificant = &next_insignificant,
.next_significant = &next_significant,
.sign_count = &sign_bits_this_bitplane
};
process_significant_block_recursive(&ctx, block);
}
}
// Process significant 1x1 blocks - emit refinement bits
for (size_t i = 0; i < significant_queue.count; i++) {
ezbc_block_t block = significant_queue.blocks[i];
int idx = block.y * width + block.x;
int abs_val = abs(coeffs[idx]);
// Emit refinement bit at current bitplane
int bit = (abs_val >> bitplane) & 1;
bitstream_write_bit(&bs, bit);
// Keep in significant queue for next bitplane
queue_push(&next_significant, block);
}
// Swap queues for next bitplane
queue_free(&insignificant_queue);
queue_free(&significant_queue);
insignificant_queue = next_insignificant;
significant_queue = next_significant;
queue_init(&next_insignificant);
queue_init(&next_significant);
}
// Free all queues
queue_free(&insignificant_queue);
queue_free(&significant_queue);
queue_free(&next_insignificant);
queue_free(&next_significant);
free(states);
size_t final_size = bitstream_size(&bs);
*output = bs.data;
return final_size;
}

View File

@@ -1,61 +0,0 @@
/**
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
*
* Public API for EZBC entropy coding of wavelet coefficients.
*/
#ifndef TAV_ENCODER_EZBC_H
#define TAV_ENCODER_EZBC_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// EZBC Encoding
// =============================================================================
/**
* EZBC encoding for a single channel.
*
* Implements binary tree embedded zero block coding for efficient storage
* of sparse wavelet coefficients. Exploits coefficient sparsity through
* hierarchical significance testing and progressive bitplane encoding.
*
* Algorithm:
* 1. Find MSB bitplane from maximum absolute coefficient value
* 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
* 3. For each bitplane from MSB to 0:
* a. Process insignificant blocks: check if they become significant
* - Emit 0 if still insignificant, 1 if became significant
* b. For newly significant blocks: recursively subdivide until 1x1
* - Emit tree structure: 1=child is significant, 0=child insignificant
* c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
* d. Process already-significant coefficients: emit refinement bits
* - Emit bit at current bitplane for progressive reconstruction
* 4. Return encoded bitstream
*
* Benefits:
* - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
* - Progressive refinement from MSB to LSB
* - Spatial clustering through quadtree decomposition
* - No additional entropy coding needed (bitstream is already compressed)
*
* @param coeffs Input quantized coefficients (int16_t array)
* @param count Number of coefficients (width × height)
* @param width Frame width (must match coefficient array layout)
* @param height Frame height (must match coefficient array layout)
* @param output Output buffer pointer (allocated by this function, caller must free)
* @return Encoded size in bytes (including header)
*/
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
uint8_t **output);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_EZBC_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,635 +0,0 @@
/**
* TAV Encoder - Quantization Library
*
* Provides DWT coefficient quantization with perceptual weighting based on
* the Human Visual System (HVS). Implements separable 3D quantization for
* temporal GOP encoding.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// Forward declaration of encoder context (defined in main encoder)
typedef struct tav_encoder_s tav_encoder_t;
// =============================================================================
// Utility Functions
// =============================================================================
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
// =============================================================================
// Constants for Perceptual Model
// =============================================================================
// Dead-zone quantization scaling factors (applied selectively to luma only)
#define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level
#define DEAD_ZONE_FINE_SCALE 0.5f // Reduced dead-zone for second-finest level
// Anisotropy parameters for horizontal vs vertical detail quantization
// Index by quality level (0-5)
static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
// Chroma-specific anisotropy (more aggressive quantization)
static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
// Detail preservation factors for 2-pixel and 4-pixel structures
#define FOUR_PIXEL_DETAILER 0.88f
#define TWO_PIXEL_DETAILER 0.92f
// =============================================================================
// Subband Analysis Helper Functions
// =============================================================================
/**
* Get decomposition level for coefficient at 2D spatial position.
* Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
*/
static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
// Recursively determine which level this coefficient belongs to
// by checking which quadrant it's in at each level
for (int level = 1; level <= decomp_levels; level++) {
int half_w = width >> 1;
int half_h = height >> 1;
// Check if in top-left quadrant (LL - contains finer levels)
if (x < half_w && y < half_h) {
// Continue to finer level
width = half_w;
height = half_h;
continue;
}
// In one of the detail bands (LH, HL, HH) at this level
return level;
}
// Reached LL subband at coarsest level
return 0;
}
/**
* Get subband type for coefficient at 2D spatial position.
* Returns: 0=LL, 1=LH, 2=HL, 3=HH
*/
static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
// Recursively determine which subband this coefficient belongs to
for (int level = 1; level <= decomp_levels; level++) {
int half_w = width >> 1;
int half_h = height >> 1;
// Check if in top-left quadrant (LL - contains finer levels)
if (x < half_w && y < half_h) {
// Continue to finer level
width = half_w;
height = half_h;
continue;
}
// Determine which detail band at this level
if (x >= half_w && y < half_h) {
return 1; // LH (top-right)
} else if (x < half_w && y >= half_h) {
return 2; // HL (bottom-left)
} else {
return 3; // HH (bottom-right)
}
}
// Reached LL subband at coarsest level
return 0;
}
/**
* Legacy functions - convert linear index to 2D coords.
*/
static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
int x = linear_idx % width;
int y = linear_idx / width;
return get_subband_level_2d(x, y, width, height, decomp_levels);
}
static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
int x = linear_idx % width;
int y = linear_idx / width;
return get_subband_type_2d(x, y, width, height, decomp_levels);
}
/**
* Get temporal subband level for frame index in GOP.
* After temporal DWT with N levels, frames are organized as:
* - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
* - Remaining frames are temporal high-pass subbands at various levels
*
* Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
*/
static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
// Check each level boundary from coarsest to finest
for (int level = 0; level < temporal_levels; level++) {
int frames_at_this_level = num_frames >> (temporal_levels - level);
if (frame_idx < frames_at_this_level) {
return level;
}
}
// Finest level (first decomposition's high-pass)
return temporal_levels;
}
// =============================================================================
// Perceptual Model Functions (HVS-based weighting)
// =============================================================================
// Linear interpolation helper
static float lerp(float x, float y, float a) {
return x * (1.f - a) + y * a;
}
/**
* Perceptual model for LH subband (horizontal details).
* Human eyes are more sensitive to horizontal details than vertical.
* Curve: https://www.desmos.com/calculator/mjlpwqm8ge
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level (1.0-6.0)
* @return Perceptual weight multiplier
*/
static float perceptual_model3_LH(int quality, float level) {
float H4 = 1.2f;
float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
float K12 = K * 12.f;
float x = level;
float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
float C3 = -1.f / 45.f * (K12 + 92);
float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
return (level >= 4) ? Lx : G3x;
}
/**
* Perceptual model for HL subband (vertical details).
* Derived from LH with anisotropy compensation.
*
* @param quality Quality level (0-5)
* @param LH LH subband weight
* @return Perceptual weight multiplier
*/
static float perceptual_model3_HL(int quality, float LH) {
return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
}
/**
* Perceptual model for HH subband (diagonal details).
* Interpolates between LH and HL based on level.
*
* @param LH LH subband weight
* @param HL HL subband weight
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_HH(float LH, float HL, float level) {
float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
return lerp(LH, HL, Kx);
}
/**
* Perceptual model for LL subband (low-frequency baseband).
* Contains most image energy, preserve carefully.
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_LL(int quality, float level) {
float n = perceptual_model3_LH(quality, level);
float m = perceptual_model3_LH(quality, level - 1) / n;
return n / m;
}
/**
* Chroma-specific perceptual model base curve.
* Less critical for human perception, more aggressive quantization.
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_chroma_basecurve(int quality, float level) {
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
}
/**
* Get perceptual weight for a specific subband and level.
* Implements HVS-optimized frequency weighting.
*
* NOTE: This function requires enc->quality_level field from encoder context.
*
* @param enc Encoder context (for quality_level)
* @param level0 Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
* @param subband_type Subband type (0=LL, 1=LH, 2=HL, 3=HH)
* @param is_chroma 1 for chroma channels, 0 for luma
* @param max_levels Maximum decomposition levels
* @return Perceptual weight multiplier (≥1.0)
*/
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
/**
* Get perceptual weight for coefficient at linear index position.
* Maps linear coefficient index to DWT subband layout.
*
* NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
*
* @param enc Encoder context (for widths/heights arrays and quality_level)
* @param linear_idx Linear coefficient index
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @return Perceptual weight multiplier (≥1.0)
*/
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
// =============================================================================
// Quantization Functions
// =============================================================================
/**
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
*
* This is the basic quantization function without perceptual weighting.
* Dead-zone quantization is applied selectively to luma channel only:
* - HH1 (finest diagonal): full dead-zone
* - LH1/HL1/HH2: half dead-zone
* - Coarser levels: no dead-zone (preserve structure)
*
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param quantiser Base quantizer value (1-4096)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma);
/**
* Quantize DWT coefficients with per-coefficient perceptual weighting.
*
* Applies HVS-optimized frequency weighting to each coefficient based on its
* position in the DWT subband tree. Implements the full perceptual model with
* dead-zone quantization for luma.
*
* NOTE: This function requires encoder context fields:
* - enc->widths[]/enc->heights[] for subband layout
* - enc->quality_level for perceptual model
* - enc->dead_zone_threshold for dead-zone quantization
*
* @param enc Encoder context
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param base_quantiser Base quantizer value (before perceptual weighting)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @param frame_count Current frame number (for any frame-dependent logic)
*/
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma, int frame_count);
/**
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
*
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
* - Temporal DWT applied first → temporal subbands at different levels
* - Spatial 2D DWT applied to each temporal subband
*
* Quantization strategy:
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
* - tLL (level 0): coarsest temporal → smallest quantizer
* - tHH (highest level): finest temporal → largest quantizer
* 2. Apply spatial perceptual weighting to tH_base
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
*
* NOTE: This function requires encoder context fields:
* - enc->encoder_preset for sports mode detection
* - enc->temporal_decomp_levels for temporal level calculation
* - enc->verbose for debug output
* - Plus all fields needed by tav_quantise_perceptual()
*
* @param enc Encoder context
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
* @param quantised Output quantized coefficients [frame][pixel]
* @param num_frames Number of temporal subband frames
* @param spatial_size Number of spatial coefficients per frame
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma);
/**
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
*
* Implements Floyd-Steinberg style error diffusion to avoid quantization
* artifacts when converting float quantizer values to integers for rate control.
*
* NOTE: This function requires encoder context fields:
* - enc->adjusted_quantiser_y_float (current float quantizer)
* - enc->dither_accumulator (accumulated error, modified by this function)
*
* @param enc Encoder context
* @return Integer quantizer value (0-254)
*/
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
// =============================================================================
// Perceptual Weight Implementation (requires encoder context)
// =============================================================================
// NOTE: This implementation requires encoder context (enc->quality_level)
// Struct definition will be in encoder header when integrated
#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
// Forward declare structure access - will be properly defined when integrated
struct tav_encoder_s {
int quality_level;
int *widths;
int *heights;
int decomp_levels;
float dead_zone_threshold;
int encoder_preset;
int temporal_decomp_levels;
int verbose;
int frame_count;
float adjusted_quantiser_y_float;
float dither_accumulator;
int width;
int height;
int perceptual_tuning;
};
#endif
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
// strategy: more horizontal detail
if (!is_chroma) {
// LL subband - contains most image energy, preserve carefully
if (subband_type == 0)
return perceptual_model3_LL(enc->quality_level, level);
// LH subband - horizontal details (human eyes more sensitive)
float LH = perceptual_model3_LH(enc->quality_level, level);
if (subband_type == 1)
return LH;
// HL subband - vertical details
float HL = perceptual_model3_HL(enc->quality_level, LH);
if (subband_type == 2)
return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
// HH subband - diagonal details
else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
} else {
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
if (subband_type == 0) { // LL chroma - still important but less than luma
return 1.0f;
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
return FCLAMP(base, 1.0f, 100.0f);
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
} else { // HH chroma - diagonal chroma details (most aggressive)
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
}
}
}
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
// If perceptual tuning is disabled, use uniform quantization (weight = 1.0)
if (!enc->perceptual_tuning) {
return 1.0f;
}
// Map linear coefficient index to DWT subband using same layout as decoder
int offset = 0;
// First: LL subband at maximum decomposition level
int ll_width = enc->widths[decomp_levels];
int ll_height = enc->heights[decomp_levels];
int ll_size = ll_width * ll_height;
if (linear_idx < offset + ll_size) {
// LL subband at maximum level - use get_perceptual_weight for consistency
return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
}
offset += ll_size;
// Then: LH, HL, HH subbands for each level from max down to 1
for (int level = decomp_levels; level >= 1; level--) {
int level_width = enc->widths[decomp_levels - level + 1];
int level_height = enc->heights[decomp_levels - level + 1];
const int subband_size = level_width * level_height;
// LH subband (horizontal details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
}
offset += subband_size;
// HL subband (vertical details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
}
offset += subband_size;
// HH subband (diagonal details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
}
offset += subband_size;
}
// Fallback for out-of-bounds indices
return 1.0f;
}
// =============================================================================
// Quantization Function Implementations
// =============================================================================
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma) {
float effective_q = quantiser;
effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
// Scalar implementation (AVX-512 version would go in separate optimized module)
for (int i = 0; i < size; i++) {
float quantised_val = coeffs[i] / effective_q;
// Apply dead-zone quantisation ONLY to luma channel and specific subbands
if (dead_zone_threshold > 0.0f && !is_chroma) {
int level = get_subband_level(i, width, height, decomp_levels);
int subband_type = get_subband_type(i, width, height, decomp_levels);
float level_threshold = 0.0f;
if (level == 1) {
// Finest level
if (subband_type == 3) {
// HH1: full dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
} else if (subband_type == 1 || subband_type == 2) {
// LH1, HL1: half dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
} else if (level == 2) {
// Second-finest level
if (subband_type == 3) {
// HH2: half dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
}
if (fabsf(quantised_val) <= level_threshold) {
quantised_val = 0.0f;
}
}
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
}
}
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma, int frame_count) {
float effective_base_q = base_quantiser;
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
for (int i = 0; i < size; i++) {
// Apply perceptual weight based on coefficient's position in DWT layout
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
float effective_q = effective_base_q * weight;
float quantised_val = coeffs[i] / effective_q;
// Apply dead-zone quantisation ONLY to luma channel
if (dead_zone_threshold > 0.0f && !is_chroma) {
int level = get_subband_level(i, width, height, decomp_levels);
int subband_type = get_subband_type(i, width, height, decomp_levels);
float level_threshold = 0.0f;
if (level == 1) {
if (subband_type == 3) {
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
} else if (subband_type == 1 || subband_type == 2) {
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
} else if (level == 2) {
if (subband_type == 3) {
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
}
if (fabsf(quantised_val) <= level_threshold) {
quantised_val = 0.0f;
}
}
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
}
}
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma) {
// Sports preset: use finer temporal quantisation (less aggressive)
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
// Process each temporal subband independently (separable approach)
for (int t = 0; t < num_frames; t++) {
// Step 1: Determine temporal subband level
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
// Step 2: Compute temporal base quantiser using exponential scaling
float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
float temporal_quantiser = base_quantiser * temporal_scale;
int temporal_base_quantiser = (int)roundf(temporal_quantiser);
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
// Step 3: Apply spatial quantisation within this temporal subband
// Check if perceptual tuning is enabled (stored in encoder_preset bit 1)
// NOTE: perceptual_tuning field is NOT in tav_encoder_s, so we check context flag
// For now, just use perceptual (this will be controlled by caller disabling)
tav_quantise_perceptual(
enc,
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
quantised[t], // Output: quantised spatial coefficients
spatial_size, // Number of spatial coefficients
temporal_base_quantiser, // Temporally-scaled base quantiser
enc->dead_zone_threshold, // Dead zone threshold
enc->width, // Frame width
enc->height, // Frame height
enc->decomp_levels, // Spatial decomposition levels
is_chroma, // Is chroma channel
enc->frame_count + t // Frame number
);
/*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
printf(" Temporal subband %d: level=%d, tH_base=%d\n",
t, temporal_level, temporal_base_quantiser);
}*/
}
}
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
float qy_float = enc->adjusted_quantiser_y_float;
// Add accumulated dithering error
float qy_with_error = qy_float + enc->dither_accumulator;
// Round to nearest integer
int qy_int = (int)(qy_with_error + 0.5f);
// Calculate quantisation error and accumulate for next frame
// This is Floyd-Steinberg style error diffusion
float quantisation_error = qy_with_error - (float)qy_int;
enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
// Clamp to valid range
qy_int = CLAMP(qy_int, 0, 254);
return qy_int;
}

View File

@@ -1,138 +0,0 @@
/**
* TAV Encoder - Quantization Library
*
* Public API for DWT coefficient quantization with perceptual weighting.
*/
#ifndef TAV_ENCODER_QUANTIZE_H
#define TAV_ENCODER_QUANTIZE_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Forward declaration of encoder context (defined in main encoder)
typedef struct tav_encoder_s tav_encoder_t;
// =============================================================================
// Uniform Quantization
// =============================================================================
/**
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
*
* This is the basic quantization function without perceptual weighting.
* Dead-zone quantization is applied selectively to luma channel only:
* - HH1 (finest diagonal): full dead-zone
* - LH1/HL1/HH2: half dead-zone
* - Coarser levels: no dead-zone (preserve structure)
*
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param quantiser Base quantizer value (1-4096)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma);
// =============================================================================
// Perceptual Quantization
// =============================================================================
/**
* Quantize DWT coefficients with per-coefficient perceptual weighting.
*
* Applies HVS-optimized frequency weighting to each coefficient based on its
* position in the DWT subband tree. Implements the full perceptual model with
* dead-zone quantization for luma.
*
* NOTE: This function requires encoder context fields:
* - enc->widths[]/enc->heights[] for subband layout
* - enc->quality_level for perceptual model
* - enc->dead_zone_threshold for dead-zone quantization
*
* @param enc Encoder context
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param base_quantiser Base quantizer value (before perceptual weighting)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @param frame_count Current frame number (for any frame-dependent logic)
*/
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma, int frame_count);
// =============================================================================
// 3D GOP Quantization
// =============================================================================
/**
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
*
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
* - Temporal DWT applied first → temporal subbands at different levels
* - Spatial 2D DWT applied to each temporal subband
*
* Quantization strategy:
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
* - tLL (level 0): coarsest temporal → smallest quantizer
* - tHH (highest level): finest temporal → largest quantizer
* 2. Apply spatial perceptual weighting to tH_base
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
*
* NOTE: This function requires encoder context fields:
* - enc->encoder_preset for sports mode detection
* - enc->temporal_decomp_levels for temporal level calculation
* - enc->verbose for debug output
* - Plus all fields needed by tav_quantise_perceptual()
*
* @param enc Encoder context
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
* @param quantised Output quantized coefficients [frame][pixel]
* @param num_frames Number of temporal subband frames
* @param spatial_size Number of spatial coefficients per frame
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma);
// =============================================================================
// Rate Control
// =============================================================================
/**
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
*
* Implements Floyd-Steinberg style error diffusion to avoid quantization
* artifacts when converting float quantizer values to integers for rate control.
*
* NOTE: This function requires encoder context fields:
* - enc->adjusted_quantiser_y_float (current float quantizer)
* - enc->dither_accumulator (accumulated error, modified by this function)
*
* @param enc Encoder context
* @return Integer quantizer value (0-254)
*/
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_QUANTIZE_H

View File

@@ -1,159 +0,0 @@
/**
* TAV Encoder Library - Tile Processing Implementation
*/
#include "tav_encoder_tile.h"
#include "tav_encoder_dwt.h"
#include <string.h>
#include <stdlib.h>
#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
int frame_width, int frame_height,
int tile_x, int tile_y,
float *padded_y, float *padded_co, float *padded_cg) {
const int core_start_x = tile_x * TAV_TILE_SIZE_X;
const int core_start_y = tile_y * TAV_TILE_SIZE_Y;
// Process row by row with bulk copying for core region where possible
for (int py = 0; py < TAV_PADDED_TILE_SIZE_Y; py++) {
// Map padded row to source image row
int src_y = core_start_y + py - TAV_TILE_MARGIN;
// Handle vertical boundary conditions with mirroring
if (src_y < 0) {
src_y = -src_y;
} else if (src_y >= frame_height) {
src_y = frame_height - 1 - (src_y - frame_height);
}
src_y = CLAMP(src_y, 0, frame_height - 1);
// Calculate source and destination row offsets
const int padded_row_offset = py * TAV_PADDED_TILE_SIZE_X;
const int src_row_offset = src_y * frame_width;
// Margin boundaries in padded tile
const int core_start_px = TAV_TILE_MARGIN;
const int core_end_px = TAV_TILE_MARGIN + TAV_TILE_SIZE_X;
// Check if core region is entirely within frame bounds
const int core_src_start_x = core_start_x;
const int core_src_end_x = core_start_x + TAV_TILE_SIZE_X;
if (core_src_start_x >= 0 && core_src_end_x <= frame_width) {
// Bulk copy core region in one operation
const int src_core_offset = src_row_offset + core_src_start_x;
memcpy(&padded_y[padded_row_offset + core_start_px],
&frame_y[src_core_offset],
TAV_TILE_SIZE_X * sizeof(float));
memcpy(&padded_co[padded_row_offset + core_start_px],
&frame_co[src_core_offset],
TAV_TILE_SIZE_X * sizeof(float));
memcpy(&padded_cg[padded_row_offset + core_start_px],
&frame_cg[src_core_offset],
TAV_TILE_SIZE_X * sizeof(float));
// Handle left margin pixels individually
for (int px = 0; px < core_start_px; px++) {
int src_x = core_start_x + px - TAV_TILE_MARGIN;
if (src_x < 0) src_x = -src_x;
src_x = CLAMP(src_x, 0, frame_width - 1);
int src_idx = src_row_offset + src_x;
int padded_idx = padded_row_offset + px;
padded_y[padded_idx] = frame_y[src_idx];
padded_co[padded_idx] = frame_co[src_idx];
padded_cg[padded_idx] = frame_cg[src_idx];
}
// Handle right margin pixels individually
for (int px = core_end_px; px < TAV_PADDED_TILE_SIZE_X; px++) {
int src_x = core_start_x + px - TAV_TILE_MARGIN;
if (src_x >= frame_width) {
src_x = frame_width - 1 - (src_x - frame_width);
}
src_x = CLAMP(src_x, 0, frame_width - 1);
int src_idx = src_row_offset + src_x;
int padded_idx = padded_row_offset + px;
padded_y[padded_idx] = frame_y[src_idx];
padded_co[padded_idx] = frame_co[src_idx];
padded_cg[padded_idx] = frame_cg[src_idx];
}
} else {
// Fallback: process entire row pixel by pixel (for edge tiles)
for (int px = 0; px < TAV_PADDED_TILE_SIZE_X; px++) {
int src_x = core_start_x + px - TAV_TILE_MARGIN;
// Handle horizontal boundary conditions with mirroring
if (src_x < 0) {
src_x = -src_x;
} else if (src_x >= frame_width) {
src_x = frame_width - 1 - (src_x - frame_width);
}
src_x = CLAMP(src_x, 0, frame_width - 1);
int src_idx = src_row_offset + src_x;
int padded_idx = padded_row_offset + px;
padded_y[padded_idx] = frame_y[src_idx];
padded_co[padded_idx] = frame_co[src_idx];
padded_cg[padded_idx] = frame_cg[src_idx];
}
}
}
}
// Use existing 2D DWT from tav_encoder_dwt.c
// For padded tiles, we simply call the existing function with tile dimensions
void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type) {
// Use the existing 2D DWT with padded tile dimensions
tav_dwt_2d_forward(tile_data, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
levels, filter_type);
}
void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type) {
// Note: Inverse transform not yet implemented in library for arbitrary dimensions
// For now, this is a placeholder - decoder uses different code path
(void)tile_data;
(void)levels;
(void)filter_type;
}
void tav_crop_tile_margins(const float *padded_data, float *core_data) {
for (int y = 0; y < TAV_TILE_SIZE_Y; y++) {
const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
const int core_row = y * TAV_TILE_SIZE_X;
memcpy(&core_data[core_row], &padded_data[padded_row], TAV_TILE_SIZE_X * sizeof(float));
}
}
void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
int actual_width, int actual_height) {
for (int y = 0; y < actual_height; y++) {
const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
const int core_row = y * actual_width;
memcpy(&core_data[core_row], &padded_data[padded_row], actual_width * sizeof(float));
}
}
void tav_get_tile_dimensions(int frame_width, int frame_height,
int tile_x, int tile_y,
int *tile_width, int *tile_height) {
// Calculate the starting position of this tile
int start_x = tile_x * TAV_TILE_SIZE_X;
int start_y = tile_y * TAV_TILE_SIZE_Y;
// Calculate how much of the frame is left from this starting position
int remaining_width = frame_width - start_x;
int remaining_height = frame_height - start_y;
// Tile width is the minimum of standard tile size and remaining width
*tile_width = (remaining_width < TAV_TILE_SIZE_X) ? remaining_width : TAV_TILE_SIZE_X;
*tile_height = (remaining_height < TAV_TILE_SIZE_Y) ? remaining_height : TAV_TILE_SIZE_Y;
}

View File

@@ -1,103 +0,0 @@
/**
* TAV Encoder Library - Tile Processing
*
* Functions for padded tile extraction and DWT processing.
* Used when video dimensions exceed monoblock threshold (720x576).
*/
#ifndef TAV_ENCODER_TILE_H
#define TAV_ENCODER_TILE_H
#include <stdint.h>
#include <stddef.h>
#include "../../include/tav_encoder_lib.h"
// Tile dimensions (from header)
// TAV_TILE_SIZE_X = 640, TAV_TILE_SIZE_Y = 540
// TAV_PADDED_TILE_SIZE_X = 704, TAV_PADDED_TILE_SIZE_Y = 604
// TAV_TILE_MARGIN = 32
/**
* Extract a padded tile from full-frame YCoCg buffers.
*
* Extracts a tile at position (tile_x, tile_y) with TAV_TILE_MARGIN pixels
* of padding on all sides for seamless DWT processing. Uses symmetric
* extension (mirroring) at frame boundaries.
*
* @param frame_y Full frame Y channel
* @param frame_co Full frame Co channel
* @param frame_cg Full frame Cg channel
* @param frame_width Full frame width
* @param frame_height Full frame height
* @param tile_x Tile X index (0-based)
* @param tile_y Tile Y index (0-based)
* @param padded_y Output: Padded tile Y (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y floats)
* @param padded_co Output: Padded tile Co
* @param padded_cg Output: Padded tile Cg
*/
void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
int frame_width, int frame_height,
int tile_x, int tile_y,
float *padded_y, float *padded_co, float *padded_cg);
/**
* Apply 2D DWT forward transform to a padded tile.
*
* Uses fixed PADDED_TILE_SIZE dimensions (704x604) for optimal performance.
*
* @param tile_data Tile data (modified in-place)
* @param levels Number of decomposition levels
* @param filter_type Wavelet filter type (0=CDF 5/3, 1=CDF 9/7, etc.)
*/
void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type);
/**
* Apply 2D DWT inverse transform to a padded tile.
*
* @param tile_data Tile data (modified in-place)
* @param levels Number of decomposition levels
* @param filter_type Wavelet filter type
*/
void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type);
/**
* Crop a padded tile to its core region (removing margins).
*
* Extracts the central TAV_TILE_SIZE_X × TAV_TILE_SIZE_Y region from a padded tile.
*
* @param padded_data Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
* @param core_data Output: Core tile (TILE_SIZE_X * TILE_SIZE_Y)
*/
void tav_crop_tile_margins(const float *padded_data, float *core_data);
/**
* Crop a padded tile to actual dimensions for edge tiles.
*
* For tiles at the right/bottom edges of a frame, the actual tile may be
* smaller than TILE_SIZE_X × TILE_SIZE_Y. This function handles that case.
*
* @param padded_data Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
* @param core_data Output: Core tile data
* @param actual_width Actual tile width (may be < TILE_SIZE_X for edge tiles)
* @param actual_height Actual tile height (may be < TILE_SIZE_Y for edge tiles)
*/
void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
int actual_width, int actual_height);
/**
* Calculate actual tile dimensions for a given tile position.
*
* Edge tiles may be smaller than the standard tile size.
*
* @param frame_width Full frame width
* @param frame_height Full frame height
* @param tile_x Tile X index
* @param tile_y Tile Y index
* @param tile_width Output: Actual tile width
* @param tile_height Output: Actual tile height
*/
void tav_get_tile_dimensions(int frame_width, int frame_height,
int tile_x, int tile_y,
int *tile_width, int *tile_height);
#endif // TAV_ENCODER_TILE_H

View File

@@ -1,441 +0,0 @@
/**
* TAV Encoder - Utilities Library
*
* Common utility functions and helpers used across the encoder.
* Includes math utilities, clamping, filename generation, etc.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#define _POSIX_C_SOURCE 200112L
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <math.h>
// =============================================================================
// Math Utilities
// =============================================================================
/**
* Clamp integer value to range [min, max].
*/
int tav_clamp_int(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Clamp float value to range [min, max].
*/
float tav_clamp_float(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Clamp double value to range [min, max].
*/
double tav_clamp_double(double x, double min, double max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Round double to nearest integer.
*/
int tav_iround(double v) {
return (int)floor(v + 0.5);
}
/**
* Linear interpolation between two values.
* @param a Start value (when t=0)
* @param b End value (when t=1)
* @param t Interpolation factor (0.0 to 1.0)
* @return Interpolated value
*/
float tav_lerp(float a, float b, float t) {
return a * (1.0f - t) + b * t;
}
/**
* Double precision linear interpolation.
*/
double tav_lerp_double(double a, double b, double t) {
return a * (1.0 - t) + b * t;
}
/**
* Get minimum of two integers.
*/
int tav_min_int(int a, int b) {
return a < b ? a : b;
}
/**
* Get maximum of two integers.
*/
int tav_max_int(int a, int b) {
return a > b ? a : b;
}
/**
* Get minimum of two floats.
*/
float tav_min_float(float a, float b) {
return a < b ? a : b;
}
/**
* Get maximum of two floats.
*/
float tav_max_float(float a, float b) {
return a > b ? a : b;
}
/**
* Compute absolute value of integer.
*/
int tav_abs_int(int x) {
return x < 0 ? -x : x;
}
/**
* Compute absolute value of float.
*/
float tav_abs_float(float x) {
return x < 0.0f ? -x : x;
}
/**
* Sign function: returns -1, 0, or 1.
*/
int tav_sign(int x) {
return (x > 0) - (x < 0);
}
/**
* Check if integer is power of 2.
*/
int tav_is_power_of_2(int x) {
return x > 0 && (x & (x - 1)) == 0;
}
/**
* Round up to next power of 2.
*/
int tav_next_power_of_2(int x) {
if (x <= 0) return 1;
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x + 1;
}
/**
* Compute floor of log2(x).
* Returns -1 for x <= 0.
*/
int tav_floor_log2(int x) {
if (x <= 0) return -1;
int log = 0;
while (x > 1) {
x >>= 1;
log++;
}
return log;
}
/**
* Compute ceil of log2(x).
* Returns -1 for x <= 0.
*/
int tav_ceil_log2(int x) {
if (x <= 0) return -1;
if (x == 1) return 0;
int log = tav_floor_log2(x);
// Check if x is power of 2
if ((1 << log) == x) {
return log;
}
return log + 1;
}
// =============================================================================
// Random Filename Generation
// =============================================================================
/**
* Generate a random temporary filename with .mp2 extension.
* Format: /tmp/[32 random chars].mp2
*
* @param filename Output buffer (must be at least 42 bytes)
*/
void tav_generate_random_filename(char *filename) {
static int seeded = 0;
if (!seeded) {
srand(time(NULL));
seeded = 1;
}
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the .mp2 extension
strcpy(filename + 37, ".mp2");
filename[41] = '\0'; // Null terminate
}
/**
* Generate a random temporary filename with custom extension.
* Format: /tmp/[32 random chars].[ext]
*
* @param filename Output buffer (must be large enough for path + extension)
* @param ext File extension (without leading dot, e.g., "tmp", "wav")
*/
void tav_generate_random_filename_ext(char *filename, const char *ext) {
static int seeded = 0;
if (!seeded) {
srand(time(NULL));
seeded = 1;
}
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the extension
filename[37] = '.';
strcpy(filename + 38, ext);
}
// =============================================================================
// Memory Utilities
// =============================================================================
/**
* Safe malloc with error checking.
* Exits program on allocation failure.
*/
void *tav_malloc(size_t size) {
void *ptr = malloc(size);
if (!ptr && size > 0) {
fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
exit(1);
}
return ptr;
}
/**
* Safe calloc with error checking.
* Exits program on allocation failure.
*/
void *tav_calloc(size_t count, size_t size) {
void *ptr = calloc(count, size);
if (!ptr && count > 0 && size > 0) {
fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
exit(1);
}
return ptr;
}
/**
* Safe realloc with error checking.
* Exits program on allocation failure.
*/
void *tav_realloc(void *ptr, size_t size) {
void *new_ptr = realloc(ptr, size);
if (!new_ptr && size > 0) {
fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
exit(1);
}
return new_ptr;
}
/**
* Allocate aligned memory.
* Returns NULL on failure.
*/
void *tav_aligned_alloc(size_t alignment, size_t size) {
// Ensure alignment is power of 2
if (!tav_is_power_of_2(alignment)) {
fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
return NULL;
}
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
void *ptr = NULL;
if (posix_memalign(&ptr, alignment, size) != 0) {
return NULL;
}
return ptr;
#endif
}
/**
* Free aligned memory.
*/
void tav_aligned_free(void *ptr) {
#ifdef _WIN32
_aligned_free(ptr);
#else
free(ptr);
#endif
}
// =============================================================================
// Array Utilities
// =============================================================================
/**
* Fill integer array with constant value.
*/
void tav_array_fill_int(int *array, size_t count, int value) {
for (size_t i = 0; i < count; i++) {
array[i] = value;
}
}
/**
* Fill float array with constant value.
*/
void tav_array_fill_float(float *array, size_t count, float value) {
for (size_t i = 0; i < count; i++) {
array[i] = value;
}
}
/**
* Copy integer array.
*/
void tav_array_copy_int(int *dst, const int *src, size_t count) {
memcpy(dst, src, count * sizeof(int));
}
/**
* Copy float array.
*/
void tav_array_copy_float(float *dst, const float *src, size_t count) {
memcpy(dst, src, count * sizeof(float));
}
/**
* Find maximum value in integer array.
*/
int tav_array_max_int(const int *array, size_t count) {
if (count == 0) return 0;
int max_val = array[0];
for (size_t i = 1; i < count; i++) {
if (array[i] > max_val) {
max_val = array[i];
}
}
return max_val;
}
/**
* Find minimum value in integer array.
*/
int tav_array_min_int(const int *array, size_t count) {
if (count == 0) return 0;
int min_val = array[0];
for (size_t i = 1; i < count; i++) {
if (array[i] < min_val) {
min_val = array[i];
}
}
return min_val;
}
/**
* Find maximum absolute value in float array.
*/
float tav_array_max_abs_float(const float *array, size_t count) {
if (count == 0) return 0.0f;
float max_abs = fabsf(array[0]);
for (size_t i = 1; i < count; i++) {
float abs_val = fabsf(array[i]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
return max_abs;
}
/**
* Compute sum of integer array.
*/
long long tav_array_sum_int(const int *array, size_t count) {
long long sum = 0;
for (size_t i = 0; i < count; i++) {
sum += array[i];
}
return sum;
}
/**
* Compute sum of float array.
*/
double tav_array_sum_float(const float *array, size_t count) {
double sum = 0.0;
for (size_t i = 0; i < count; i++) {
sum += array[i];
}
return sum;
}
/**
* Compute mean of float array.
*/
float tav_array_mean_float(const float *array, size_t count) {
if (count == 0) return 0.0f;
return (float)(tav_array_sum_float(array, count) / count);
}
/**
* Swap two integer values.
*/
void tav_swap_int(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
/**
* Swap two float values.
*/
void tav_swap_float(float *a, float *b) {
float temp = *a;
*a = *b;
*b = temp;
}
/**
* Swap two pointer values.
*/
void tav_swap_ptr(void **a, void **b) {
void *temp = *a;
*a = *b;
*b = temp;
}

View File

@@ -1,165 +0,0 @@
/**
* TAV Encoder - Utilities Library
*
* Public API for common utility functions and helpers.
*/
#ifndef TAV_ENCODER_UTILS_H
#define TAV_ENCODER_UTILS_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Math Utilities
// =============================================================================
/** Clamp integer value to range [min, max] */
int tav_clamp_int(int x, int min, int max);
/** Clamp float value to range [min, max] */
float tav_clamp_float(float x, float min, float max);
/** Clamp double value to range [min, max] */
double tav_clamp_double(double x, double min, double max);
/** Round double to nearest integer */
int tav_iround(double v);
/** Linear interpolation between two floats */
float tav_lerp(float a, float b, float t);
/** Linear interpolation between two doubles */
double tav_lerp_double(double a, double b, double t);
/** Get minimum of two integers */
int tav_min_int(int a, int b);
/** Get maximum of two integers */
int tav_max_int(int a, int b);
/** Get minimum of two floats */
float tav_min_float(float a, float b);
/** Get maximum of two floats */
float tav_max_float(float a, float b);
/** Compute absolute value of integer */
int tav_abs_int(int x);
/** Compute absolute value of float */
float tav_abs_float(float x);
/** Sign function: returns -1, 0, or 1 */
int tav_sign(int x);
/** Check if integer is power of 2 */
int tav_is_power_of_2(int x);
/** Round up to next power of 2 */
int tav_next_power_of_2(int x);
/** Compute floor of log2(x) */
int tav_floor_log2(int x);
/** Compute ceil of log2(x) */
int tav_ceil_log2(int x);
// =============================================================================
// Random Filename Generation
// =============================================================================
/**
* Generate a random temporary filename with .mp2 extension.
* Format: /tmp/[32 random chars].mp2
*
* @param filename Output buffer (must be at least 42 bytes)
*/
void tav_generate_random_filename(char *filename);
/**
* Generate a random temporary filename with custom extension.
* Format: /tmp/[32 random chars].[ext]
*
* @param filename Output buffer (must be large enough)
* @param ext File extension (without leading dot)
*/
void tav_generate_random_filename_ext(char *filename, const char *ext);
// =============================================================================
// Memory Utilities
// =============================================================================
/** Safe malloc with error checking (exits on failure) */
void *tav_malloc(size_t size);
/** Safe calloc with error checking (exits on failure) */
void *tav_calloc(size_t count, size_t size);
/** Safe realloc with error checking (exits on failure) */
void *tav_realloc(void *ptr, size_t size);
/** Allocate aligned memory (returns NULL on failure) */
void *tav_aligned_alloc(size_t alignment, size_t size);
/** Free aligned memory */
void tav_aligned_free(void *ptr);
// =============================================================================
// Array Utilities
// =============================================================================
/** Fill integer array with constant value */
void tav_array_fill_int(int *array, size_t count, int value);
/** Fill float array with constant value */
void tav_array_fill_float(float *array, size_t count, float value);
/** Copy integer array */
void tav_array_copy_int(int *dst, const int *src, size_t count);
/** Copy float array */
void tav_array_copy_float(float *dst, const float *src, size_t count);
/** Find maximum value in integer array */
int tav_array_max_int(const int *array, size_t count);
/** Find minimum value in integer array */
int tav_array_min_int(const int *array, size_t count);
/** Find maximum absolute value in float array */
float tav_array_max_abs_float(const float *array, size_t count);
/** Compute sum of integer array */
long long tav_array_sum_int(const int *array, size_t count);
/** Compute sum of float array */
double tav_array_sum_float(const float *array, size_t count);
/** Compute mean of float array */
float tav_array_mean_float(const float *array, size_t count);
/** Swap two integer values */
void tav_swap_int(int *a, int *b);
/** Swap two float values */
void tav_swap_float(float *a, float *b);
/** Swap two pointer values */
void tav_swap_ptr(void **a, void **b);
// =============================================================================
// Convenience Macros (for backward compatibility)
// =============================================================================
#define CLAMP(x, min, max) tav_clamp_int(x, min, max)
#define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_UTILS_H

View File

@@ -1,152 +0,0 @@
// Simple range coder for TAD audio codec
// Based on range coding with Laplacian probability model
#include "range_coder.h"
#include <string.h>
#include <math.h>
#define TOP_VALUE 0xFFFFFFFFU
#define BOTTOM_VALUE 0x00FFFFFF
static inline void range_encoder_put_byte(RangeEncoder *enc, uint8_t byte) {
if (enc->buffer_pos < enc->buffer_capacity) {
enc->buffer[enc->buffer_pos++] = byte;
}
}
static inline uint8_t range_decoder_get_byte(RangeDecoder *dec) {
if (dec->buffer_pos < dec->buffer_size) {
return dec->buffer[dec->buffer_pos++];
}
return 0;
}
static void range_encoder_renormalise(RangeEncoder *enc) {
while (enc->range <= BOTTOM_VALUE) {
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
enc->low <<= 8;
enc->range <<= 8;
}
}
static void range_decoder_renormalise(RangeDecoder *dec) {
while (dec->range <= BOTTOM_VALUE) {
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
dec->low <<= 8;
dec->range <<= 8;
}
}
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity) {
enc->low = 0;
enc->range = TOP_VALUE;
enc->buffer = buffer;
enc->buffer_pos = 0;
enc->buffer_capacity = capacity;
}
// Calculate Laplacian CDF for a given value
// CDF(x) = 0.5 * exp(λx) for x < 0
// CDF(x) = 1 - 0.5 * exp(-λx) for x ≥ 0
static inline double laplacian_cdf(int16_t value, float lambda) {
if (value < 0) {
return 0.5 * exp(lambda * value);
} else {
return 1.0 - 0.5 * exp(-lambda * value);
}
}
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda) {
// Clamp to valid range
if (value < -max_abs_value) value = -max_abs_value;
if (value > max_abs_value) value = max_abs_value;
// Calculate cumulative probabilities using Laplacian distribution
// We need CDF at value and value+1 to get the probability mass for this symbol
double cdf_low = (value == -max_abs_value) ? 0.0 : laplacian_cdf(value - 1, lambda);
double cdf_high = laplacian_cdf(value, lambda);
// Normalise to get cumulative counts in range [0, SCALE]
const uint32_t SCALE = 0x10000; // 65536 for precision
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
// Ensure we have at least 1 unit of probability
if (cum_high <= cum_low) cum_high = cum_low + 1;
if (cum_high > SCALE) cum_high = SCALE;
// Encode using cumulative probabilities
uint64_t range_64 = (uint64_t)enc->range;
enc->low += (uint32_t)((range_64 * cum_low) / SCALE);
enc->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
range_encoder_renormalise(enc);
}
size_t range_encoder_finish(RangeEncoder *enc) {
// Flush remaining bytes
for (int i = 0; i < 4; i++) {
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
enc->low <<= 8;
}
return enc->buffer_pos;
}
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size) {
dec->low = 0;
dec->range = TOP_VALUE;
dec->code = 0;
dec->buffer = buffer;
dec->buffer_pos = 0;
dec->buffer_size = size;
// Read initial bytes into code
for (int i = 0; i < 4; i++) {
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
}
}
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda) {
const uint32_t SCALE = 0x10000; // Must match encoder
// Calculate current position in probability space
uint64_t range_64 = (uint64_t)dec->range;
uint32_t cum_freq = (uint32_t)(((uint64_t)(dec->code - dec->low) * SCALE) / range_64);
// Binary search to find symbol whose CDF range contains cum_freq
int16_t low = -max_abs_value;
int16_t high = max_abs_value;
int16_t value = 0;
while (low <= high) {
int16_t mid = (low + high) / 2;
double cdf_low = (mid == -max_abs_value) ? 0.0 : laplacian_cdf(mid - 1, lambda);
double cdf_high = laplacian_cdf(mid, lambda);
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
if (cum_high <= cum_low) cum_high = cum_low + 1;
if (cum_freq >= cum_low && cum_freq < cum_high) {
// Found the symbol
value = mid;
// Update decoder state
dec->low += (uint32_t)((range_64 * cum_low) / SCALE);
dec->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
range_decoder_renormalise(dec);
return value;
} else if (cum_freq < cum_low) {
high = mid - 1;
} else {
low = mid + 1;
}
}
// Fallback: shouldn't happen with correct encoding
range_decoder_renormalise(dec);
return value;
}

View File

@@ -1,42 +0,0 @@
#ifndef RANGE_CODER_H
#define RANGE_CODER_H
#include <stdint.h>
#include <stddef.h>
// Simple range coder for signed 16-bit integers
// Uses adaptive frequency model for better compression
typedef struct {
uint32_t low;
uint32_t range;
uint8_t *buffer;
size_t buffer_pos;
size_t buffer_capacity;
} RangeEncoder;
typedef struct {
uint32_t low;
uint32_t range;
uint32_t code;
const uint8_t *buffer;
size_t buffer_pos;
size_t buffer_size;
} RangeDecoder;
// Initialise encoder
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity);
// Encode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda);
// Finalise encoding and return bytes written
size_t range_encoder_finish(RangeEncoder *enc);
// Initialise decoder
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size);
// Decode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda);
#endif // RANGE_CODER_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,344 +0,0 @@
// Created by CuriousTorvald and Claude on 2025-10-24.
// TAD32 (Terrarum Advanced Audio - PCM32 version) Encoder - Standalone program
// Alternative version: PCM32 throughout encoding, PCM8 conversion only at decoder
// Uses encoder_tad32.c library for encoding functions
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <getopt.h>
#include <math.h>
#include <time.h>
#include "encoder_tad.h"
#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251107"
// TAD32 format constants
#define TAD32_DEFAULT_CHUNK_SIZE 32768 // Using a prime number to force the worst condition
// Temporary file for FFmpeg PCM extraction
char TEMP_PCM_FILE[42];
static void generate_random_filename(char *filename) {
srand(time(NULL));
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the extension
strcpy(filename + 37, ".tad");
filename[41] = '\0'; // Null terminate
}
//=============================================================================
// Main Encoder
//=============================================================================
static void print_usage(const char *prog_name) {
printf("Usage: %s -i <input> [options]\n", prog_name);
printf("Options:\n");
printf(" -i <file> Input audio file (any format supported by FFmpeg)\n");
printf(" -o <file> Output TAD32 file (optional, auto-generated as input.qN.tad)\n");
printf(" -q <level> Quality level (0-5, default: %d)\n", TAD32_QUALITY_DEFAULT);
printf(" 0 = lowest quality/smallest (max_index=31)\n");
printf(" 1 = low quality (max_index=35)\n");
printf(" 2 = medium quality (max_index=39)\n");
printf(" 3 = good quality (max_index=47) [DEFAULT]\n");
printf(" 4 = high quality (max_index=56)\n");
printf(" 5 = very high quality/largest (max_index=89)\n");
printf(" -v Verbose output\n");
printf(" -h, --help Show this help\n");
printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
printf("Note: This is the PCM32 alternative version for comparison testing.\n");
printf(" PCM32 is processed throughout encoding; PCM8 conversion happens at decoder.\n");
}
int main(int argc, char *argv[]) {
generate_random_filename(TEMP_PCM_FILE);
char *input_file = NULL;
char *output_file = NULL;
int quality = TAD32_QUALITY_DEFAULT; // Default quality level (0-5)
float quantiser_scale = 1.0f; // Default quantiser scaling
int verbose = 0;
// Parse command line arguments
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int opt;
int option_index = 0;
while ((opt = getopt_long(argc, argv, "i:o:q:s:vh", long_options, &option_index)) != -1) {
switch (opt) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 'q':
quality = atoi(optarg);
if (quality < TAD32_QUALITY_MIN || quality > TAD32_QUALITY_MAX) {
fprintf(stderr, "Error: Quality must be in range %d-%d\n", TAD32_QUALITY_MIN, TAD32_QUALITY_MAX);
return 1;
}
break;
case 's':
quantiser_scale = atof(optarg);
if (quantiser_scale < 0.5f || quantiser_scale > 4.0f) {
fprintf(stderr, "Error: Quantiser scale must be in range 0.5-4.0\n");
return 1;
}
break;
case 'v':
verbose = 1;
break;
case 'h':
print_usage(argv[0]);
return 0;
default:
print_usage(argv[0]);
return 1;
}
}
if (!input_file) {
fprintf(stderr, "Error: Input file is required\n");
print_usage(argv[0]);
return 1;
}
// Convert quality (0-5) to max_index for quantisation
int max_index = tad32_quality_to_max_index(quality);
// Generate output filename if not provided
if (!output_file) {
// Allocate space for output filename
size_t input_len = strlen(input_file);
output_file = malloc(input_len + 32); // Extra space for .qNN.tad
// Find the last directory separator
const char *basename_start = strrchr(input_file, '/');
if (!basename_start) basename_start = strrchr(input_file, '\\');
basename_start = basename_start ? basename_start + 1 : input_file;
// Copy directory part
size_t dir_len = basename_start - input_file;
strncpy(output_file, input_file, dir_len);
// Find the extension (last dot after basename)
const char *ext = strrchr(basename_start, '.');
if (ext && ext > basename_start) {
// Copy basename without extension
size_t name_len = ext - basename_start;
strncpy(output_file + dir_len, basename_start, name_len);
output_file[dir_len + name_len] = '\0';
} else {
// No extension, copy entire basename
strcpy(output_file + dir_len, basename_start);
}
// Append .qNN.tad (use quality level for filename)
sprintf(output_file + strlen(output_file), ".q%d.tad", quality);
if (verbose) {
printf("Auto-generated output path: %s\n", output_file);
}
}
if (verbose) {
printf("%s\n", ENCODER_VENDOR_STRING);
printf("Input: %s\n", input_file);
printf("Output: %s\n", output_file);
printf("Quality level: %d (max_index=%d)\n", quality, max_index);
printf("Quantiser scale: %.2f\n", quantiser_scale);
}
// Detect original sample rate for high-quality resampling
char sample_rate_str[32] = "48000"; // Default fallback
char detect_cmd[2048];
snprintf(detect_cmd, sizeof(detect_cmd),
"ffprobe -v error -select_streams a:0 -show_entries stream=sample_rate "
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
input_file);
FILE *probe = popen(detect_cmd, "r");
if (probe) {
if (fgets(sample_rate_str, sizeof(sample_rate_str), probe)) {
// Remove newline
sample_rate_str[strcspn(sample_rate_str, "\n")] = 0;
}
pclose(probe);
}
int original_rate = atoi(sample_rate_str);
if (original_rate <= 0 || original_rate > 192000) {
original_rate = 48000; // Fallback
}
if (verbose) {
printf("Detected original sample rate: %d Hz\n", original_rate);
printf("Extracting and resampling audio to %d Hz...\n", TAD32_SAMPLE_RATE);
}
// Extract and resample in two passes for better quality
// Pass 1: Extract at original sample rate
char temp_original_pcm[256];
snprintf(temp_original_pcm, sizeof(temp_original_pcm), "%s.orig", TEMP_PCM_FILE);
char ffmpeg_cmd[2048];
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -hide_banner -v error -i \"%s\" -f f32le -acodec pcm_f32le -ac %d -y \"%s\" 2>&1",
input_file, TAD32_CHANNELS, temp_original_pcm);
int result = system(ffmpeg_cmd);
if (result != 0) {
fprintf(stderr, "Error: FFmpeg extraction failed\n");
return 1;
}
// Pass 2: Resample to 32kHz with high-quality SoXR resampler and highpass filter
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -hide_banner -v error -f f32le -ar %d -ac %d -i \"%s\" "
"-f f32le -acodec pcm_f32le -ar %d -ac %d "
"-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" "
"-y \"%s\" 2>&1",
original_rate, TAD32_CHANNELS, temp_original_pcm, TAD32_SAMPLE_RATE, TAD32_CHANNELS, TEMP_PCM_FILE);
result = system(ffmpeg_cmd);
remove(temp_original_pcm); // Clean up intermediate file
if (result != 0) {
fprintf(stderr, "Error: FFmpeg resampling failed\n");
return 1;
}
// Open PCM file
FILE *pcm_file = fopen(TEMP_PCM_FILE, "rb");
if (!pcm_file) {
fprintf(stderr, "Error: Could not open temporary PCM file\n");
return 1;
}
// Get file size
fseek(pcm_file, 0, SEEK_END);
size_t pcm_size = ftell(pcm_file);
fseek(pcm_file, 0, SEEK_SET);
size_t total_samples = pcm_size / (TAD32_CHANNELS * sizeof(float));
// Pad to even sample count
if (total_samples % 2 == 1) {
total_samples++;
if (verbose) {
printf("Odd sample count detected, padding with one zero sample\n");
}
}
size_t num_chunks = (total_samples + TAD32_DEFAULT_CHUNK_SIZE - 1) / TAD32_DEFAULT_CHUNK_SIZE;
if (verbose) {
printf("Total samples: %zu (%.2f seconds)\n", total_samples,
(double)total_samples / TAD32_SAMPLE_RATE);
printf("Chunks: %zu (chunk size: %d samples)\n", num_chunks, TAD32_DEFAULT_CHUNK_SIZE);
}
// Open output file
FILE *output = fopen(output_file, "wb");
if (!output) {
fprintf(stderr, "Error: Could not open output file\n");
fclose(pcm_file);
return 1;
}
// Process chunks using linked TAD32 encoder library
size_t total_output_size = 0;
float *chunk_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * TAD32_CHANNELS * sizeof(float));
uint8_t *output_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * 4 * sizeof(float)); // Generous buffer
for (size_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) {
size_t chunk_samples = TAD32_DEFAULT_CHUNK_SIZE;
size_t remaining = total_samples - (chunk_idx * TAD32_DEFAULT_CHUNK_SIZE);
if (remaining < TAD32_DEFAULT_CHUNK_SIZE) {
chunk_samples = remaining;
}
// Read chunk
size_t samples_read = fread(chunk_buffer, TAD32_CHANNELS * sizeof(float),
chunk_samples, pcm_file);
(void)samples_read; // Unused, but kept for compatibility
// Pad with zeros if necessary
if (chunk_samples < TAD32_DEFAULT_CHUNK_SIZE) {
memset(&chunk_buffer[chunk_samples * TAD32_CHANNELS], 0,
(TAD32_DEFAULT_CHUNK_SIZE - chunk_samples) * TAD32_CHANNELS * sizeof(float));
}
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
max_index,
quantiser_scale, TAD32_ZSTD_LEVEL, output_buffer);
if (encoded_size == 0) {
fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);
free(chunk_buffer);
free(output_buffer);
fclose(pcm_file);
fclose(output);
return 1;
}
// Write chunk to output
fwrite(output_buffer, 1, encoded_size, output);
total_output_size += encoded_size;
if (verbose && (chunk_idx % 10 == 0 || chunk_idx == num_chunks - 1)) {
printf("Processed chunk %zu/%zu (%.1f%%)\r", chunk_idx + 1, num_chunks,
(chunk_idx + 1) * 100.0 / num_chunks);
fflush(stdout);
}
}
if (verbose) {
printf("\n");
}
// Print coefficient statistics if enabled
tad32_print_statistics();
tad32_free_statistics();
// Cleanup
free(chunk_buffer);
free(output_buffer);
fclose(pcm_file);
fclose(output);
remove(TEMP_PCM_FILE);
// Print statistics
size_t pcmu8_size = total_samples * TAD32_CHANNELS; // PCMu8 baseline
float compression_ratio = (float)pcmu8_size / total_output_size;
printf("Encoding complete!\n");
printf("PCMu8 size: %zu bytes\n", pcmu8_size);
printf("TAD32 size: %zu bytes\n", total_output_size);
printf("Compression ratio: %.2f:1 (%.1f%% of PCMu8)\n",
compression_ratio, (total_output_size * 100.0) / pcmu8_size);
if (compression_ratio < 1.8) {
printf("Warning: Compression ratio below 2:1 target. Try lower quantisation bits or different settings.\n");
}
return 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,294 +0,0 @@
// Visualise DWT Coefficients as Image
// Converts .bin coefficient file to PPM image with logarithmic color mapping
// Usage: ./visualise_coefficients <input.bin> <output.ppm> <width> <height>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
// Logarithmic color mapping for coefficient visualisation
// Zero: Black (#000000)
// Positive: Red to Yellow (#FF0000 to #FFFF00) - logarithmic
// Negative: Blue to Cyan (#0000FF to #00FFFF) - logarithmic
typedef struct {
uint8_t r, g, b;
} rgb_t;
static rgb_t map_coefficient_to_color(int16_t coeff) {
rgb_t color = {0, 0, 0};
if (coeff == 0) {
// Zero: pure black
return color;
}
if (coeff == 1) {
// +1: Light green #55FF55
color.r = 0x55;
color.g = 0xFF;
color.b = 0x55;
return color;
}
if (coeff == -1) {
// -1: Dark green #005500
color.r = 0x00;
color.g = 0x55;
color.b = 0x00;
return color;
}
if (coeff > 0) {
// Positive: Red (#FF0000) to Yellow (#FFFF00)
// Logarithmic mapping: log2(1) = 0, log2(32767) ≈ 14.99
double log_val = log2((double)coeff);
double log_max = log2(32767.0);
double normalised = log_val / log_max; // 0.0 to 1.0
color.r = 255;
color.g = (uint8_t)(normalised * 255.0);
color.b = 0;
} else {
// Negative: Blue (#0000FF) to Cyan (#00FFFF)
// Logarithmic mapping: log2(1) = 0, log2(32768) = 15
double log_val = log2((double)(-coeff));
double log_max = log2(32768.0);
double normalised = log_val / log_max; // 0.0 to 1.0
color.r = 0;
color.g = (uint8_t)(normalised * 255.0);
color.b = 255;
}
return color;
}
int main(int argc, char *argv[]) {
if (argc != 5) {
printf("Usage: %s <input.bin> <output.ppm> <width> <height>\n", argv[0]);
printf("Example: %s frame_060.tavframe.y.bin output.ppm 560 448\n", argv[0]);
return 1;
}
const char *input_file = argv[1];
const char *output_file = argv[2];
int width = atoi(argv[3]);
int height = atoi(argv[4]);
if (width <= 0 || height <= 0) {
printf("Error: Invalid dimensions %dx%d\n", width, height);
return 1;
}
size_t expected_count = width * height;
// Load coefficient file
FILE *fp_in = fopen(input_file, "rb");
if (!fp_in) {
printf("Error: Cannot open %s\n", input_file);
return 1;
}
// Get file size
fseek(fp_in, 0, SEEK_END);
long file_size = ftell(fp_in);
fseek(fp_in, 0, SEEK_SET);
size_t coeff_count = file_size / sizeof(int16_t);
if (coeff_count != expected_count) {
printf("Warning: File contains %zu coefficients, expected %zu (%dx%d)\n",
coeff_count, expected_count, width, height);
}
// Allocate coefficient buffer
int16_t *coeffs = malloc(expected_count * sizeof(int16_t));
if (!coeffs) {
printf("Error: Memory allocation failed\n");
fclose(fp_in);
return 1;
}
// Read coefficients
size_t read_count = fread(coeffs, sizeof(int16_t), expected_count, fp_in);
fclose(fp_in);
if (read_count != expected_count) {
printf("Error: Read %zu coefficients, expected %zu\n", read_count, expected_count);
free(coeffs);
return 1;
}
// Analyse coefficient distribution - Overall and per-subband
size_t zeros = 0, ones = 0, positives = 0, negatives = 0;
int16_t min_val = INT16_MAX, max_val = INT16_MIN;
// Calculate overall statistics
for (size_t i = 0; i < expected_count; i++) {
if (coeffs[i] == 0) zeros++;
else if (coeffs[i] == 1 || coeffs[i] == -1) ones++;
else if (coeffs[i] > 0) positives++;
else negatives++;
if (coeffs[i] < min_val) min_val = coeffs[i];
if (coeffs[i] > max_val) max_val = coeffs[i];
}
printf("Overall coefficient statistics:\n");
printf(" Total: %zu\n", expected_count);
printf(" Zeros: %zu (%.1f%%)\n", zeros, 100.0 * zeros / expected_count);
printf(" Ones: %zu (%.1f%%)\n", ones, 100.0 * ones / expected_count);
printf(" Positives: %zu (%.1f%%)\n", positives, 100.0 * positives / expected_count);
printf(" Negatives: %zu (%.1f%%)\n", negatives, 100.0 * negatives / expected_count);
printf(" Range: [%d, %d]\n\n", min_val, max_val);
// Per-subband statistics using 2D spatial layout
// The coefficients are stored in 2D spatial arrangement like the PPM image
int num_levels = 6;
// Helper macro to get coefficient from 2D position
#define GET_COEFF(x, y) coeffs[(y) * width + (x)]
// Calculate subband dimensions for each level
int level_w[7], level_h[7]; // level_w[1] = width/2, level_w[6] = width/64
for (int i = 1; i <= num_levels; i++) {
level_w[i] = width / (1 << i);
level_h[i] = height / (1 << i);
}
// LL6 subband (top-left corner)
{
int ll_w = level_w[6], ll_h = level_h[6];
size_t ll_zeros = 0, ll_ones = 0, ll_pos = 0, ll_neg = 0;
int16_t ll_min = INT16_MAX, ll_max = INT16_MIN;
for (int y = 0; y < ll_h; y++) {
for (int x = 0; x < ll_w; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) ll_zeros++;
else if (val == 1 || val == -1) ll_ones++;
else if (val > 0) ll_pos++;
else ll_neg++;
if (val < ll_min) ll_min = val;
if (val > ll_max) ll_max = val;
}
}
size_t ll_total = ll_w * ll_h;
printf("LL%d subband (%dx%d):\n", num_levels, ll_w, ll_h);
printf(" Total: %zu\n", ll_total);
printf(" Zeros: %zu (%.1f%%)\n", ll_zeros, 100.0 * ll_zeros / ll_total);
printf(" Ones: %zu (%.1f%%)\n", ll_ones, 100.0 * ll_ones / ll_total);
printf(" Positives: %zu (%.1f%%)\n", ll_pos, 100.0 * ll_pos / ll_total);
printf(" Negatives: %zu (%.1f%%)\n", ll_neg, 100.0 * ll_neg / ll_total);
printf(" Range: [%d, %d]\n\n", ll_min, ll_max);
}
// Process each level from deepest (6) to finest (1)
for (int level = num_levels; level >= 1; level--) {
int half_w = level_w[level];
int half_h = level_h[level];
// LH subband (horizontal high-pass) - right of LL region
size_t lh_zeros = 0, lh_ones = 0, lh_pos = 0, lh_neg = 0;
int16_t lh_min = INT16_MAX, lh_max = INT16_MIN;
int lh_x0 = half_w, lh_y0 = 0;
int lh_x1 = half_w * 2, lh_y1 = half_h;
for (int y = lh_y0; y < lh_y1; y++) {
for (int x = lh_x0; x < lh_x1; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) lh_zeros++;
else if (val == 1 || val == -1) lh_ones++;
else if (val > 0) lh_pos++;
else lh_neg++;
if (val < lh_min) lh_min = val;
if (val > lh_max) lh_max = val;
}
}
// HL subband (vertical high-pass) - below LL region
size_t hl_zeros = 0, hl_ones = 0, hl_pos = 0, hl_neg = 0;
int16_t hl_min = INT16_MAX, hl_max = INT16_MIN;
int hl_x0 = 0, hl_y0 = half_h;
int hl_x1 = half_w, hl_y1 = half_h * 2;
for (int y = hl_y0; y < hl_y1; y++) {
for (int x = hl_x0; x < hl_x1; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) hl_zeros++;
else if (val == 1 || val == -1) hl_ones++;
else if (val > 0) hl_pos++;
else hl_neg++;
if (val < hl_min) hl_min = val;
if (val > hl_max) hl_max = val;
}
}
// HH subband (diagonal high-pass) - bottom-right of LL region
size_t hh_zeros = 0, hh_ones = 0, hh_pos = 0, hh_neg = 0;
int16_t hh_min = INT16_MAX, hh_max = INT16_MIN;
int hh_x0 = half_w, hh_y0 = half_h;
int hh_x1 = half_w * 2, hh_y1 = half_h * 2;
for (int y = hh_y0; y < hh_y1; y++) {
for (int x = hh_x0; x < hh_x1; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) hh_zeros++;
else if (val == 1 || val == -1) hh_ones++;
else if (val > 0) hh_pos++;
else hh_neg++;
if (val < hh_min) hh_min = val;
if (val > hh_max) hh_max = val;
}
}
size_t sub_total = half_w * half_h;
printf("Level %d subbands (%dx%d each):\n", level, half_w, half_h);
printf(" LH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
level, sub_total, lh_zeros, 100.0*lh_zeros/sub_total, lh_ones, 100.0*lh_ones/sub_total,
lh_pos, 100.0*lh_pos/sub_total, lh_neg, 100.0*lh_neg/sub_total, lh_min, lh_max);
printf(" HL%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
level, sub_total, hl_zeros, 100.0*hl_zeros/sub_total, hl_ones, 100.0*hl_ones/sub_total,
hl_pos, 100.0*hl_pos/sub_total, hl_neg, 100.0*hl_neg/sub_total, hl_min, hl_max);
printf(" HH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n\n",
level, sub_total, hh_zeros, 100.0*hh_zeros/sub_total, hh_ones, 100.0*hh_ones/sub_total,
hh_pos, 100.0*hh_pos/sub_total, hh_neg, 100.0*hh_neg/sub_total, hh_min, hh_max);
}
#undef GET_COEFF
// Write PPM image
FILE *fp_out = fopen(output_file, "wb");
if (!fp_out) {
printf("Error: Cannot create %s\n", output_file);
free(coeffs);
return 1;
}
// PPM header
fprintf(fp_out, "P6\n%d %d\n255\n", width, height);
// Write pixel data
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
size_t idx = y * width + x;
rgb_t color = map_coefficient_to_color(coeffs[idx]);
fwrite(&color, 3, 1, fp_out);
}
}
fclose(fp_out);
free(coeffs);
printf("\nWrote %dx%d image to %s\n", width, height, output_file);
printf("Color mapping:\n");
printf(" Black: Zero coefficients\n");
printf(" Light Green (#55FF55): +1 coefficients\n");
printf(" Dark Green (#00AA00): -1 coefficients\n");
printf(" Red→Yellow: Positive coefficients > +1 (logarithmic)\n");
printf(" Blue→Cyan: Negative coefficients < -1 (logarithmic)\n");
return 0;
}

View File

@@ -1,402 +0,0 @@
// TAV-DT Noise Injector - Simulates satellite transmission channel noise
// Models QPSK over Ku-band satellite with AWGN and burst interference
// to compile: gcc -O2 -o tavdt_noise_injector tavdt_noise_injector.c -lm
// Created by CuriousTorvald and Claude on 2025-12-14
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <getopt.h>
#include <time.h>
// Buffer size for streaming processing
#define BUFFER_SIZE (1024 * 1024) // 1 MB chunks
// Default TAV-DT bitrate for timing calculations (~2 Mbps)
#define DEFAULT_BITRATE_BPS 2000000.0
// Global bitrate (can be overridden by --bitrate)
static double g_bitrate_bps = DEFAULT_BITRATE_BPS;
// Burst noise parameters
#define BURST_LENGTH_MEAN 100.0
#define BURST_LENGTH_STDDEV 30.0
#define BURST_LENGTH_MIN 10
//=============================================================================
// PRNG Functions (xorshift64)
//=============================================================================
static uint64_t xorshift64(uint64_t *state) {
uint64_t x = *state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return *state = x;
}
// Returns uniform random in [0, 1)
static double rand_uniform(uint64_t *state) {
return (double)xorshift64(state) / (double)UINT64_MAX;
}
// Box-Muller transform for Gaussian random numbers
static double gaussian_rand(uint64_t *state, double mean, double stddev) {
double u1 = rand_uniform(state);
double u2 = rand_uniform(state);
// Avoid log(0)
if (u1 < 1e-15) u1 = 1e-15;
double z = sqrt(-2.0 * log(u1)) * cos(2.0 * M_PI * u2);
return mean + stddev * z;
}
//=============================================================================
// BER Calculation
//=============================================================================
// Calculate BER from SNR in dB for QPSK modulation
// BER = 0.5 * erfc(sqrt(Eb/N0))
// For QPSK, Eb/N0 = SNR (2 bits per symbol)
static double snr_to_ber(double snr_db) {
double snr_linear = pow(10.0, snr_db / 10.0);
double eb_n0 = snr_linear;
return 0.5 * erfc(sqrt(eb_n0));
}
//=============================================================================
// Burst State Management
//=============================================================================
typedef struct {
double current_time_sec; // Elapsed playback time
double next_burst_time; // When next burst occurs
int burst_bytes_remaining; // Bytes left in current burst (0 = no active burst)
double burst_interval; // Mean interval between bursts (60.0 / bursts_per_minute)
double burst_ber; // BER during burst
int burst_count; // Total bursts applied
int total_burst_bytes; // Total bytes affected by bursts
int verbose; // Verbose output flag
} burst_state_t;
static void burst_state_init(burst_state_t *state, double bursts_per_minute,
double burst_ber, int verbose, uint64_t *seed) {
state->current_time_sec = 0.0;
state->burst_bytes_remaining = 0;
state->burst_ber = burst_ber;
state->burst_count = 0;
state->total_burst_bytes = 0;
state->verbose = verbose;
if (bursts_per_minute > 0) {
state->burst_interval = 60.0 / bursts_per_minute;
// Schedule first burst using exponential distribution
state->next_burst_time = -state->burst_interval * log(rand_uniform(seed));
} else {
state->burst_interval = 0;
state->next_burst_time = 1e30; // Never burst
}
}
static void burst_state_advance_time(burst_state_t *state, double delta_sec, uint64_t *seed) {
double end_time = state->current_time_sec + delta_sec;
// Check if any bursts should occur during this time span
while (state->burst_interval > 0 && state->next_burst_time < end_time) {
// A burst should start during this chunk
if (state->burst_bytes_remaining == 0) {
double length = gaussian_rand(seed, BURST_LENGTH_MEAN, BURST_LENGTH_STDDEV);
state->burst_bytes_remaining = (int)fmax(BURST_LENGTH_MIN, length);
state->burst_count++;
if (state->verbose) {
fprintf(stderr, " [burst] time %.2fs, %d bytes\n",
state->next_burst_time, state->burst_bytes_remaining);
}
}
// Schedule next burst
double wait = -state->burst_interval * log(rand_uniform(seed));
if (wait < 0.001) wait = 0.001; // Minimum 1ms between bursts
state->next_burst_time += wait;
}
state->current_time_sec = end_time;
}
//=============================================================================
// Noise Application Functions
//=============================================================================
// Apply AWGN-based bit errors to buffer
// Returns number of bits flipped
static int apply_background_noise(uint8_t *data, size_t len, double ber, uint64_t *seed) {
int bits_flipped = 0;
// Optimization: if BER is extremely low, use probability-based skipping
if (ber < 1e-10) {
return 0; // Effectively no errors at this BER
}
for (size_t i = 0; i < len; i++) {
for (int bit = 0; bit < 8; bit++) {
if (rand_uniform(seed) < ber) {
data[i] ^= (1 << bit);
bits_flipped++;
}
}
}
return bits_flipped;
}
// Apply burst noise to buffer (checks/updates burst state)
// Returns number of bits flipped
static int apply_burst_noise(uint8_t *data, size_t len, burst_state_t *state, uint64_t *seed) {
int bits_flipped = 0;
if (state->burst_bytes_remaining <= 0) {
return 0;
}
// Apply burst BER to bytes while burst is active
size_t burst_bytes = (size_t)state->burst_bytes_remaining;
if (burst_bytes > len) {
burst_bytes = len;
}
for (size_t i = 0; i < burst_bytes; i++) {
for (int bit = 0; bit < 8; bit++) {
if (rand_uniform(seed) < state->burst_ber) {
data[i] ^= (1 << bit);
bits_flipped++;
}
}
}
state->total_burst_bytes += burst_bytes;
state->burst_bytes_remaining -= burst_bytes;
return bits_flipped;
}
//=============================================================================
// Byte Position to Time Conversion
//=============================================================================
// Convert byte position to approximate playback time based on bitrate
static double bytes_to_time(size_t byte_pos) {
return (double)(byte_pos * 8) / g_bitrate_bps;
}
//=============================================================================
// Main Program
//=============================================================================
static void print_usage(const char *prog) {
fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
fprintf(stderr, "Simulates QPSK satellite transmission channel noise\n\n");
fprintf(stderr, "Usage: %s -i input.tavdt -o output.tavdt --snr N [options]\n\n", prog);
fprintf(stderr, "Required:\n");
fprintf(stderr, " -i, --input FILE Input TAV-DT file\n");
fprintf(stderr, " -o, --output FILE Output corrupted file\n");
fprintf(stderr, " --snr N Signal-to-noise ratio in dB (0-30)\n");
fprintf(stderr, "\nOptional:\n");
fprintf(stderr, " --burst N Burst events per minute (default: 0)\n");
fprintf(stderr, " --burst-ber N BER during burst events (default: 0.5)\n");
fprintf(stderr, " --bitrate N Stream bitrate in Mbps for timing (default: 2.0)\n");
fprintf(stderr, " --seed N RNG seed for reproducibility\n");
fprintf(stderr, " -v, --verbose Show detailed progress\n");
fprintf(stderr, " -h, --help Show this help\n");
fprintf(stderr, "\nSNR Reference:\n");
fprintf(stderr, " 0 dB: Worst case (BER ~7.9e-2, 1 in 13 bits)\n");
fprintf(stderr, " 6 dB: Poor but working (BER ~2.4e-3)\n");
fprintf(stderr, " 9 dB: Typical working (BER ~1.9e-4)\n");
fprintf(stderr, " 12 dB: Good condition (BER ~3.8e-6)\n");
fprintf(stderr, " 30 dB: Near-perfect (BER ~2.9e-16)\n");
}
int main(int argc, char *argv[]) {
const char *input_file = NULL;
const char *output_file = NULL;
double snr_db = -1;
double bursts_per_minute = 0;
double burst_ber = 0.5;
uint64_t seed = 0;
int seed_provided = 0;
int verbose = 0;
static struct option long_options[] = {
{"input", required_argument, 0, 'i'},
{"output", required_argument, 0, 'o'},
{"snr", required_argument, 0, 's'},
{"burst", required_argument, 0, 'b'},
{"burst-ber", required_argument, 0, 'B'},
{"bitrate", required_argument, 0, 'r'},
{"seed", required_argument, 0, 'S'},
{"verbose", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int opt;
while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, NULL)) != -1) {
switch (opt) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 's':
snr_db = atof(optarg);
break;
case 'b':
bursts_per_minute = atof(optarg);
break;
case 'B':
burst_ber = atof(optarg);
break;
case 'r':
g_bitrate_bps = atof(optarg) * 1000000.0; // Convert Mbps to bps
break;
case 'S':
seed = strtoull(optarg, NULL, 10);
seed_provided = 1;
break;
case 'v':
verbose = 1;
break;
case 'h':
default:
print_usage(argv[0]);
return opt == 'h' ? 0 : 1;
}
}
// Validate arguments
if (!input_file || !output_file || snr_db < 0) {
fprintf(stderr, "Error: Missing required arguments\n\n");
print_usage(argv[0]);
return 1;
}
if (burst_ber < 0 || burst_ber > 1) {
fprintf(stderr, "Error: --burst-ber must be between 0 and 1\n");
return 1;
}
// Initialize RNG
if (!seed_provided) {
seed = (uint64_t)time(NULL) ^ ((uint64_t)clock() << 32);
}
// Ensure seed is not zero (xorshift64 requirement)
if (seed == 0) seed = 0x853c49e6748fea9bULL;
// Warm up the generator (small seeds produce poor initial values)
for (int i = 0; i < 10; i++) xorshift64(&seed);
// Calculate BER from SNR
double ber = snr_to_ber(snr_db);
// Open files
FILE *in_fp = fopen(input_file, "rb");
if (!in_fp) {
fprintf(stderr, "Error: Cannot open input file: %s\n", input_file);
return 1;
}
FILE *out_fp = fopen(output_file, "wb");
if (!out_fp) {
fprintf(stderr, "Error: Cannot open output file: %s\n", output_file);
fclose(in_fp);
return 1;
}
// Print header info
fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
fprintf(stderr, "Input: %s\n", input_file);
fprintf(stderr, "Output: %s\n", output_file);
fprintf(stderr, "SNR: %.1f dB (BER: %.2e)\n", snr_db, ber);
if (bursts_per_minute > 0) {
fprintf(stderr, "Burst: %.1f events/minute (burst BER: %.2f)\n",
bursts_per_minute, burst_ber);
} else {
fprintf(stderr, "Burst: disabled\n");
}
if (seed_provided) {
fprintf(stderr, "Seed: %llu\n", (unsigned long long)seed);
}
fprintf(stderr, "\n");
// Initialize burst state
burst_state_t burst;
burst_state_init(&burst, bursts_per_minute, burst_ber, verbose, &seed);
// Allocate buffer for streaming processing
uint8_t *buffer = malloc(BUFFER_SIZE);
if (!buffer) {
fprintf(stderr, "Error: Cannot allocate buffer\n");
fclose(in_fp);
fclose(out_fp);
return 1;
}
// Processing statistics
long long total_bytes = 0;
long long bits_flipped_bg = 0;
long long bits_flipped_burst = 0;
int chunk_count = 0;
// Process file in chunks
size_t bytes_read;
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, in_fp)) > 0) {
// Calculate time delta for this chunk (for burst scheduling)
double delta_sec = bytes_to_time(bytes_read);
burst_state_advance_time(&burst, delta_sec, &seed);
// Apply noise to chunk
bits_flipped_bg += apply_background_noise(buffer, bytes_read, ber, &seed);
bits_flipped_burst += apply_burst_noise(buffer, bytes_read, &burst, &seed);
// Write corrupted chunk
fwrite(buffer, 1, bytes_read, out_fp);
total_bytes += bytes_read;
chunk_count++;
if (verbose && chunk_count % 10 == 0) {
double time_pos = bytes_to_time(total_bytes);
fprintf(stderr, "\rProcessed %.1f MB (%.1f sec)...",
total_bytes / (1024.0 * 1024.0), time_pos);
}
}
if (verbose) {
fprintf(stderr, "\r \r");
}
// Clean up
free(buffer);
fclose(in_fp);
fclose(out_fp);
// Print summary
double duration_sec = bytes_to_time(total_bytes);
long long total_bits = total_bytes * 8;
fprintf(stderr, "Complete.\n");
fprintf(stderr, " Total bytes: %lld (%.1f sec @ ~%.1f Mbps)\n",
total_bytes, duration_sec, g_bitrate_bps / 1000000.0);
fprintf(stderr, " Background bits flipped: %lld (%.4f%%)\n",
bits_flipped_bg, 100.0 * bits_flipped_bg / total_bits);
if (bursts_per_minute > 0) {
fprintf(stderr, " Burst events: %d (%d bytes total)\n",
burst.burst_count, burst.total_burst_bytes);
fprintf(stderr, " Burst bits flipped: %lld\n", bits_flipped_burst);
}
return 0;
}

View File

@@ -1,328 +0,0 @@
// Test mesh warp round-trip consistency
// Warps a frame forward, then backward, and checks if we get the original back
// This is critical for MC-lifting invertibility
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <ctime>
// Include the mesh functions from encoder
extern "C" {
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
);
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
int16_t *mesh_dx, int16_t *mesh_dy
);
void smooth_mesh_laplacian(
int16_t *mesh_dx, int16_t *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
);
}
// Mesh warp with bilinear interpolation (translation only)
static void apply_mesh_warp_rgb(
const cv::Mat &src,
cv::Mat &dst,
const int16_t *mesh_dx,
const int16_t *mesh_dy,
int mesh_w, int mesh_h
) {
int width = src.cols;
int height = src.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
dst = cv::Mat(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
cell_x = std::min(cell_x, mesh_w - 2);
cell_y = std::min(cell_y, mesh_h - 2);
int idx_00 = cell_y * mesh_w + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
int idx_11 = idx_01 + 1;
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
alpha = std::max(0.0f, std::min(1.0f, alpha));
beta = std::max(0.0f, std::min(1.0f, beta));
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
alpha * beta * (mesh_dx[idx_11] / 8.0f);
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
alpha * beta * (mesh_dy[idx_11] / 8.0f);
float src_x = x + dx;
float src_y = y + dy;
int sx0 = (int)floorf(src_x);
int sy0 = (int)floorf(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
sx0 = std::max(0, std::min(width - 1, sx0));
sy0 = std::max(0, std::min(height - 1, sy0));
sx1 = std::max(0, std::min(width - 1, sx1));
sy1 = std::max(0, std::min(height - 1, sy1));
float fx = src_x - sx0;
float fy = src_y - sy0;
for (int c = 0; c < 3; c++) {
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
}
}
}
}
int main(int argc, char** argv) {
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
int num_tests = (argc > 2) ? atoi(argv[2]) : 5;
printf("Opening video: %s\n", video_file);
cv::VideoCapture cap(video_file);
if (!cap.isOpened()) {
fprintf(stderr, "Error: Cannot open video file\n");
return 1;
}
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
// Mesh dimensions (32×32 cells)
int mesh_cell_size = 32;
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
if (mesh_w < 2) mesh_w = 2;
if (mesh_h < 2) mesh_h = 2;
printf("Mesh: %dx%d (approx %dx%d px cells)\n\n",
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
float smoothness = 0.5f;
int smooth_iterations = 8;
srand(time(NULL));
double total_forward_psnr = 0.0;
double total_roundtrip_psnr = 0.0;
double total_half_roundtrip_psnr = 0.0;
for (int test = 0; test < num_tests; test++) {
int frame_num = 5 + rand() % (total_frames - 10);
printf("[Test %d/%d] Frame pair %d → %d\n", test + 1, num_tests, frame_num - 1, frame_num);
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
cv::Mat frame0, frame1;
cap >> frame0;
cap >> frame1;
if (frame0.empty() || frame1.empty()) {
fprintf(stderr, "Error reading frames\n");
continue;
}
cv::Mat frame0_rgb, frame1_rgb;
cv::cvtColor(frame0, frame0_rgb, cv::COLOR_BGR2RGB);
cv::cvtColor(frame1, frame1_rgb, cv::COLOR_BGR2RGB);
// Compute mesh (F0 → F1)
float *flow_x = nullptr, *flow_y = nullptr;
estimate_motion_optical_flow(frame0_rgb.data, frame1_rgb.data,
width, height, &flow_x, &flow_y);
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
// Create inverted mesh
int16_t *inv_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *inv_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
for (int i = 0; i < mesh_w * mesh_h; i++) {
inv_mesh_dx[i] = -mesh_dx[i];
inv_mesh_dy[i] = -mesh_dy[i];
}
// Create half-mesh for symmetric lifting test
int16_t *half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *neg_half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *neg_half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
for (int i = 0; i < mesh_w * mesh_h; i++) {
half_mesh_dx[i] = mesh_dx[i] / 2;
half_mesh_dy[i] = mesh_dy[i] / 2;
neg_half_mesh_dx[i] = -half_mesh_dx[i];
neg_half_mesh_dy[i] = -half_mesh_dy[i];
}
// TEST 1: Full forward warp quality (F0 → F1)
cv::Mat warped_forward;
apply_mesh_warp_rgb(frame0, warped_forward, mesh_dx, mesh_dy, mesh_w, mesh_h);
double forward_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)warped_forward.at<cv::Vec3b>(y, x)[c] -
(double)frame1.at<cv::Vec3b>(y, x)[c];
forward_mse += diff * diff;
}
}
}
forward_mse /= (width * height * 3);
double forward_psnr = (forward_mse > 0) ? 10.0 * log10(255.0 * 255.0 / forward_mse) : 999.0;
total_forward_psnr += forward_psnr;
// TEST 2: Full round-trip (F0 → forward → backward → F0')
cv::Mat roundtrip;
apply_mesh_warp_rgb(warped_forward, roundtrip, inv_mesh_dx, inv_mesh_dy, mesh_w, mesh_h);
double roundtrip_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)roundtrip.at<cv::Vec3b>(y, x)[c] -
(double)frame0.at<cv::Vec3b>(y, x)[c];
roundtrip_mse += diff * diff;
}
}
}
roundtrip_mse /= (width * height * 3);
double roundtrip_psnr = (roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / roundtrip_mse) : 999.0;
total_roundtrip_psnr += roundtrip_psnr;
// TEST 3: Half-step symmetric round-trip (MC-lifting style)
// F0 → +½mesh, then → -½mesh (should return to F0)
cv::Mat half_forward, half_roundtrip;
apply_mesh_warp_rgb(frame0, half_forward, half_mesh_dx, half_mesh_dy, mesh_w, mesh_h);
apply_mesh_warp_rgb(half_forward, half_roundtrip, neg_half_mesh_dx, neg_half_mesh_dy, mesh_w, mesh_h);
double half_roundtrip_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)half_roundtrip.at<cv::Vec3b>(y, x)[c] -
(double)frame0.at<cv::Vec3b>(y, x)[c];
half_roundtrip_mse += diff * diff;
}
}
}
half_roundtrip_mse /= (width * height * 3);
double half_roundtrip_psnr = (half_roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / half_roundtrip_mse) : 999.0;
total_half_roundtrip_psnr += half_roundtrip_psnr;
printf(" Forward warp (F0→F1): PSNR = %.2f dB\n", forward_psnr);
printf(" Full round-trip (F0→F0'): PSNR = %.2f dB\n", roundtrip_psnr);
printf(" Half round-trip (±½mesh): PSNR = %.2f dB\n", half_roundtrip_psnr);
// Compute motion stats
float avg_motion = 0.0f, max_motion = 0.0f;
for (int i = 0; i < mesh_w * mesh_h; i++) {
float dx = mesh_dx[i] / 8.0f;
float dy = mesh_dy[i] / 8.0f;
float motion = sqrtf(dx * dx + dy * dy);
avg_motion += motion;
if (motion > max_motion) max_motion = motion;
}
avg_motion /= (mesh_w * mesh_h);
printf(" Motion: avg=%.2f px, max=%.2f px\n\n", avg_motion, max_motion);
// Save visualisation for worst case
if (test == 0 || roundtrip_psnr < 30.0) {
char filename[256];
sprintf(filename, "roundtrip_%04d_original.png", frame_num);
cv::imwrite(filename, frame0);
sprintf(filename, "roundtrip_%04d_forward.png", frame_num);
cv::imwrite(filename, warped_forward);
sprintf(filename, "roundtrip_%04d_roundtrip.png", frame_num);
cv::imwrite(filename, roundtrip);
// Difference images
cv::Mat diff_roundtrip = cv::Mat::zeros(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
int diff = abs((int)roundtrip.at<cv::Vec3b>(y, x)[c] -
(int)frame0.at<cv::Vec3b>(y, x)[c]);
diff_roundtrip.at<cv::Vec3b>(y, x)[c] = std::min(diff * 5, 255);
}
}
}
sprintf(filename, "roundtrip_%04d_diff.png", frame_num);
cv::imwrite(filename, diff_roundtrip);
printf(" Saved visualisation: roundtrip_%04d_*.png\n\n", frame_num);
}
free(flow_x);
free(flow_y);
free(mesh_dx);
free(mesh_dy);
free(inv_mesh_dx);
free(inv_mesh_dy);
free(half_mesh_dx);
free(half_mesh_dy);
free(neg_half_mesh_dx);
free(neg_half_mesh_dy);
}
printf("===========================================\n");
printf("Average Results (%d tests):\n", num_tests);
printf(" Forward warp quality: %.2f dB\n", total_forward_psnr / num_tests);
printf(" Full round-trip error: %.2f dB\n", total_roundtrip_psnr / num_tests);
printf(" Half round-trip error: %.2f dB\n", total_half_roundtrip_psnr / num_tests);
printf("===========================================\n\n");
if (total_roundtrip_psnr / num_tests < 35.0) {
printf("WARNING: Round-trip PSNR < 35 dB indicates poor invertibility!\n");
printf("This will cause MC-lifting to accumulate errors and hurt compression.\n");
printf("Bilinear interpolation artifacts are likely the culprit.\n");
} else {
printf("Round-trip consistency looks acceptable (>35 dB).\n");
}
cap.release();
return 0;
}

View File

@@ -1,422 +0,0 @@
// Visual unit test for mesh warping with hierarchical block matching and affine estimation
// Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh,
// and saves both warped and target frames for visual comparison
// Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms
#include <opencv2/opencv.hpp>
#include <opencv2/video/tracking.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <ctime>
// Include the mesh functions from encoder
extern "C" {
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
);
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
int16_t *mesh_dx, int16_t *mesh_dy
);
void smooth_mesh_laplacian(
int16_t *mesh_dx, int16_t *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
);
int estimate_cell_affine(
const float *flow_x, const float *flow_y,
int width, int height,
int cell_x, int cell_y,
int cell_w, int cell_h,
float threshold,
int16_t *out_tx, int16_t *out_ty,
int16_t *out_a11, int16_t *out_a12,
int16_t *out_a21, int16_t *out_a22
);
}
// Mesh warp with bilinear interpolation and optional affine support
static void apply_mesh_warp_rgb(
const cv::Mat &src, // Input BGR image
cv::Mat &dst, // Output warped BGR image
const int16_t *mesh_dx, // Mesh motion vectors (1/8 pixel)
const int16_t *mesh_dy,
const uint8_t *affine_mask, // 1=affine, 0=translation
const int16_t *affine_a11,
const int16_t *affine_a12,
const int16_t *affine_a21,
const int16_t *affine_a22,
int mesh_w, int mesh_h
) {
int width = src.cols;
int height = src.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
dst = cv::Mat(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
// Clamp to valid mesh range
cell_x = std::min(cell_x, mesh_w - 2);
cell_y = std::min(cell_y, mesh_h - 2);
// Four corner control points
int idx_00 = cell_y * mesh_w + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
int idx_11 = idx_01 + 1;
// Control point positions
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
// Local coordinates
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
alpha = std::max(0.0f, std::min(1.0f, alpha));
beta = std::max(0.0f, std::min(1.0f, beta));
// Bilinear interpolation of motion vectors
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
alpha * beta * (mesh_dx[idx_11] / 8.0f);
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
alpha * beta * (mesh_dy[idx_11] / 8.0f);
// Check if we're using affine in this cell
// For simplicity, just use the top-left corner's affine parameters
int cell_idx = cell_y * mesh_w + cell_x;
if (affine_mask && affine_mask[cell_idx]) {
// Apply affine transform
// Compute position relative to cell center
float rel_x = x - (cell_x * cell_w + cell_w / 2.0f);
float rel_y = y - (cell_y * cell_h + cell_h / 2.0f);
float a11 = affine_a11[cell_idx] / 256.0f;
float a12 = affine_a12[cell_idx] / 256.0f;
float a21 = affine_a21[cell_idx] / 256.0f;
float a22 = affine_a22[cell_idx] / 256.0f;
// Affine warp: [x'] = [a11 a12][x] + [dx]
// [y'] [a21 a22][y] [dy]
dx = a11 * rel_x + a12 * rel_y + dx;
dy = a21 * rel_x + a22 * rel_y + dy;
}
// Source coordinates (inverse warp)
float src_x = x + dx;
float src_y = y + dy;
// Bilinear interpolation
int sx0 = (int)floorf(src_x);
int sy0 = (int)floorf(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
sx0 = std::max(0, std::min(width - 1, sx0));
sy0 = std::max(0, std::min(height - 1, sy0));
sx1 = std::max(0, std::min(width - 1, sx1));
sy1 = std::max(0, std::min(height - 1, sy1));
float fx = src_x - sx0;
float fy = src_y - sy0;
// Interpolate each channel
for (int c = 0; c < 3; c++) {
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
}
}
}
}
// Create visualisation overlay showing affine cells
static void create_affine_overlay(
cv::Mat &img,
const uint8_t *affine_mask,
int mesh_w, int mesh_h
) {
int width = img.cols;
int height = img.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
for (int my = 0; my < mesh_h; my++) {
for (int mx = 0; mx < mesh_w; mx++) {
int idx = my * mesh_w + mx;
if (affine_mask[idx]) {
// Draw green rectangle for affine cells
int x0 = mx * cell_w;
int y0 = my * cell_h;
int x1 = (mx + 1) * cell_w;
int y1 = (my + 1) * cell_h;
cv::rectangle(img,
cv::Point(x0, y0),
cv::Point(x1, y1),
cv::Scalar(0, 255, 0), 1);
}
}
}
}
int main(int argc, char** argv) {
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5;
printf("Opening video: %s\n", video_file);
cv::VideoCapture cap(video_file);
if (!cap.isOpened()) {
fprintf(stderr, "Error: Cannot open video file %s\n", video_file);
return 1;
}
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
if (total_frames < 10) {
fprintf(stderr, "Error: Video too short (need at least 10 frames)\n");
return 1;
}
// Calculate mesh dimensions (32×32 pixel cells, matches current encoder)
int mesh_cell_size = 32;
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
if (mesh_w < 2) mesh_w = 2;
if (mesh_h < 2) mesh_h = 2;
printf("Mesh: %dx%d (approx %dx%d px cells)\n",
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
// Encoder parameters (match current encoder_tav.c settings)
float smoothness = 0.5f; // Mesh smoothness weight
int smooth_iterations = 8; // Smoothing iterations
float affine_threshold = 0.40f; // 40% improvement required for affine
printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n",
smoothness, smooth_iterations, affine_threshold * 100.0f);
// Seed random number generator
srand(time(NULL));
// Pick random frames (avoid first and last 5 frames)
printf("\nTesting %d random frame pairs:\n", num_test_frames);
for (int test = 0; test < num_test_frames; test++) {
// Pick random frame (ensure we have a previous frame)
int frame_num = 5 + rand() % (total_frames - 10);
printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n",
test + 1, num_test_frames, frame_num - 1, frame_num);
// Read previous frame (source for warping)
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
cv::Mat prev_frame;
cap >> prev_frame;
if (prev_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num - 1);
continue;
}
// Read current frame (target to match)
cv::Mat curr_frame;
cap >> curr_frame;
if (curr_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num);
continue;
}
// Convert to RGB for block matching
cv::Mat prev_rgb, curr_rgb;
cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB);
cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB);
// Compute hierarchical block matching (replaces optical flow)
printf(" Computing hierarchical block matching...\n");
float *flow_x = nullptr, *flow_y = nullptr;
estimate_motion_optical_flow(
prev_rgb.data, curr_rgb.data,
width, height,
&flow_x, &flow_y
);
// Build mesh from flow
printf(" Building mesh from block matches...\n");
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
// Apply Laplacian smoothing
printf(" Applying Laplacian smoothing (%d iterations, %.2f weight)...\n",
smooth_iterations, smoothness);
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
// Estimate selective per-cell affine transforms
printf(" Estimating selective affine transforms (threshold=%.0f%%)...\n",
affine_threshold * 100.0f);
uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t));
int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
int affine_count = 0;
for (int cy = 0; cy < mesh_h; cy++) {
for (int cx = 0; cx < mesh_w; cx++) {
int cell_idx = cy * mesh_w + cx;
int16_t tx, ty, a11, a12, a21, a22;
int use_affine = estimate_cell_affine(
flow_x, flow_y,
width, height,
cx, cy, cell_w, cell_h,
affine_threshold,
&tx, &ty, &a11, &a12, &a21, &a22
);
affine_mask[cell_idx] = use_affine ? 1 : 0;
mesh_dx[cell_idx] = tx;
mesh_dy[cell_idx] = ty;
affine_a11[cell_idx] = a11;
affine_a12[cell_idx] = a12;
affine_a21[cell_idx] = a21;
affine_a22[cell_idx] = a22;
if (use_affine) affine_count++;
}
}
printf(" Affine usage: %d/%d cells (%.1f%%)\n",
affine_count, mesh_w * mesh_h,
100.0f * affine_count / (mesh_w * mesh_h));
// Warp previous frame to current frame
printf(" Warping frame with mesh + affine...\n");
cv::Mat warped;
apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy,
affine_mask, affine_a11, affine_a12, affine_a21, affine_a22,
mesh_w, mesh_h);
// Create visualisation with affine overlay
cv::Mat warped_viz = warped.clone();
create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h);
// Compute MSE between warped and target
double mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)warped.at<cv::Vec3b>(y, x)[c] -
(double)curr_frame.at<cv::Vec3b>(y, x)[c];
mse += diff * diff;
}
}
}
mse /= (width * height * 3);
double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0;
printf(" Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr);
// Save images
char filename[256];
sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1);
cv::imwrite(filename, prev_frame);
printf(" Saved source: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num);
cv::imwrite(filename, warped);
printf(" Saved warped: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num);
cv::imwrite(filename, warped_viz);
printf(" Saved warped+viz (green=affine): %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num);
cv::imwrite(filename, curr_frame);
printf(" Saved target: %s\n", filename);
// Compute difference image
cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
int diff = abs((int)warped.at<cv::Vec3b>(y, x)[c] -
(int)curr_frame.at<cv::Vec3b>(y, x)[c]);
diff_img.at<cv::Vec3b>(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility
}
}
}
sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num);
cv::imwrite(filename, diff_img);
printf(" Saved difference (amplified 3x): %s\n", filename);
// Compute motion statistics
float max_motion = 0.0f, avg_motion = 0.0f;
for (int i = 0; i < mesh_w * mesh_h; i++) {
float dx = mesh_dx[i] / 8.0f;
float dy = mesh_dy[i] / 8.0f;
float motion = sqrtf(dx * dx + dy * dy);
avg_motion += motion;
if (motion > max_motion) max_motion = motion;
}
avg_motion /= (mesh_w * mesh_h);
printf(" Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion);
// Cleanup
free(flow_x);
free(flow_y);
free(mesh_dx);
free(mesh_dy);
free(affine_mask);
free(affine_a11);
free(affine_a12);
free(affine_a21);
free(affine_a22);
}
printf("\nDone! Check output images:\n");
printf(" *_source.png: Original frame before warping\n");
printf(" *_warped.png: Warped frame (should match target)\n");
printf(" *_warped_viz.png: Warped with green overlay showing affine cells\n");
printf(" *_target.png: Target frame to match\n");
printf(" *_diff.png: Difference image (should be mostly black if warp is good)\n");
cap.release();
return 0;
}