mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-06 05:28:31 +09:00
video_decoder removed; fix video regression and updated to no-zstd
This commit is contained in:
11
.idea/libraries/badlogicgames_gdx.xml
generated
Normal file
11
.idea/libraries/badlogicgames_gdx.xml
generated
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<component name="libraryTable">
|
||||||
|
<library name="badlogicgames.gdx" type="repository">
|
||||||
|
<properties maven-id="com.badlogicgames.gdx:gdx:1.12.1" />
|
||||||
|
<CLASSES>
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
|
||||||
|
</CLASSES>
|
||||||
|
<JAVADOC />
|
||||||
|
<SOURCES />
|
||||||
|
</library>
|
||||||
|
</component>
|
||||||
62
.idea/libraries/badlogicgames_gdx_backend_lwjgl3.xml
generated
Normal file
62
.idea/libraries/badlogicgames_gdx_backend_lwjgl3.xml
generated
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
<component name="libraryTable">
|
||||||
|
<library name="badlogicgames.gdx.backend.lwjgl3" type="repository">
|
||||||
|
<properties maven-id="com.badlogicgames.gdx:gdx-backend-lwjgl3:1.12.1" />
|
||||||
|
<CLASSES>
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-backend-lwjgl3/1.12.1/gdx-backend-lwjgl3-1.12.1.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx/1.12.1/gdx-1.12.1.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/gdx/gdx-jnigen-loader/2.3.1/gdx-jnigen-loader-2.3.1.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm32.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-linux-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-macos-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl/3.3.3/lwjgl-3.3.3-natives-windows-x86.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm32.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-linux-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-macos-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-glfw/3.3.3/lwjgl-glfw-3.3.3-natives-windows-x86.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm32.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-linux-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-macos-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-jemalloc/3.3.3/lwjgl-jemalloc-3.3.3-natives-windows-x86.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm32.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-linux-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-macos-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-openal/3.3.3/lwjgl-openal-3.3.3-natives-windows-x86.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm32.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-linux-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-macos-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-opengl/3.3.3/lwjgl-opengl-3.3.3-natives-windows-x86.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm32.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-linux-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-macos-arm64.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/lwjgl/lwjgl-stb/3.3.3/lwjgl-stb-3.3.3-natives-windows-x86.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/com/badlogicgames/jlayer/jlayer/1.0.1-gdx/jlayer-1.0.1-gdx.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/jcraft/jorbis/0.0.17/jorbis-0.0.17.jar!/" />
|
||||||
|
</CLASSES>
|
||||||
|
<JAVADOC />
|
||||||
|
<SOURCES />
|
||||||
|
</library>
|
||||||
|
</component>
|
||||||
12
2taud.sh
12
2taud.sh
@@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env fish
|
#!/usr/bin/env fish
|
||||||
|
|
||||||
for f in *.mod; python3 mod2taud.py $f assets/disk0/(basename $f .mod).taud; end
|
for f in *.mod; python3 mod2taud.py $f assets/disk0/home/music/(basename $f .mod).taud; end
|
||||||
for f in *.s3m; python3 s3m2taud.py $f assets/disk0/(basename $f .s3m).taud; end
|
for f in *.s3m; python3 s3m2taud.py $f assets/disk0/home/music/(basename $f .s3m).taud; end
|
||||||
for f in *.it; python3 it2taud.py $f assets/disk0/(basename $f .it).taud; end
|
for f in *.it; python3 it2taud.py $f assets/disk0/home/music/(basename $f .it).taud; end
|
||||||
for f in *.xm; python3 xm2taud.py $f assets/disk0/(basename $f .xm).taud; end
|
for f in *.xm; python3 xm2taud.py $f assets/disk0/home/music/(basename $f .xm).taud; end
|
||||||
for f in *.mon; python3 mon2taud.py $f assets/disk0/(basename $f .mon).taud; end
|
for f in *.mon; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .mon).taud; end
|
||||||
for f in *.MON; python3 mon2taud.py $f assets/disk0/(basename $f .MON).taud; end
|
for f in *.MON; python3 mon2taud.py $f assets/disk0/home/music/(basename $f .MON).taud; end
|
||||||
|
|||||||
@@ -10,5 +10,7 @@
|
|||||||
<orderEntry type="module" module-name="tsvm_core" />
|
<orderEntry type="module" module-name="tsvm_core" />
|
||||||
<orderEntry type="library" name="TerranVirtualDisk" level="project" />
|
<orderEntry type="library" name="TerranVirtualDisk" level="project" />
|
||||||
<orderEntry type="library" name="lib" level="project" />
|
<orderEntry type="library" name="lib" level="project" />
|
||||||
|
<orderEntry type="library" name="badlogicgames.gdx" level="project" />
|
||||||
|
<orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
const SND_BASE_ADDR = audio.getBaseAddr()
|
const SND_BASE_ADDR = audio.getBaseAddr()
|
||||||
const SND_MEM_ADDR = audio.getMemAddr()
|
const SND_MEM_ADDR = audio.getMemAddr()
|
||||||
const TAD_INPUT_ADDR = SND_MEM_ADDR - 262144 // TAD input buffer (matches TAV packet 0x24)
|
// tadInputBin lives at audio-local offset 917504 and tadDecodedBin at 983040
|
||||||
const TAD_DECODED_ADDR = SND_MEM_ADDR - 262144 + 65536 // TAD decoded buffer
|
// (post-bef85f6 memory map; the old 262144 offset now hits the enlarged sampleBin).
|
||||||
|
const TAD_INPUT_ADDR = SND_MEM_ADDR - 917504 // TAD input buffer (matches TAV packet 0x24)
|
||||||
|
const TAD_DECODED_ADDR = SND_MEM_ADDR - 983040 // TAD decoded buffer
|
||||||
|
|
||||||
if (!SND_BASE_ADDR) return 10
|
if (!SND_BASE_ADDR) return 10
|
||||||
|
|
||||||
|
|||||||
@@ -1746,7 +1746,9 @@ try {
|
|||||||
tadInitialised = true
|
tadInitialised = true
|
||||||
}
|
}
|
||||||
|
|
||||||
seqread.readBytes(payloadLen, SND_MEM_ADDR - 262144)
|
// tadInputBin lives at audio-local offset 917504 (post-bef85f6 memory map);
|
||||||
|
// the previous 262144 offset now points into the enlarged sampleBin.
|
||||||
|
seqread.readBytes(payloadLen, SND_MEM_ADDR - 917504)
|
||||||
audio.tadDecode()
|
audio.tadDecode()
|
||||||
audio.tadUploadDecoded(AUDIO_DEVICE, sampleLen)
|
audio.tadUploadDecoded(AUDIO_DEVICE, sampleLen)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -275,6 +275,7 @@ class AudioJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// while the following code does work, it was decided that MP3 is "too new" for tsvm and thus removed.
|
||||||
/*
|
/*
|
||||||
js-mp3
|
js-mp3
|
||||||
https://github.com/soundbus-technologies/js-mp3
|
https://github.com/soundbus-technologies/js-mp3
|
||||||
|
|||||||
@@ -5433,6 +5433,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
|
private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
|
||||||
|
|
||||||
|
// Zstd magic = 0x28 0xB5 0x2F 0xFD (little-endian frame magic).
|
||||||
|
// Newer TAV files default to no Zstd (Video Flags bit 4); detecting the magic
|
||||||
|
// lets the decoder accept both compressed and raw payloads transparently.
|
||||||
|
private fun tavDecompressIfZstd(data: ByteArray): ByteArray {
|
||||||
|
if (data.size >= 4 &&
|
||||||
|
data[0] == 0x28.toByte() && data[1] == 0xB5.toByte() &&
|
||||||
|
data[2] == 0x2F.toByte() && data[3] == 0xFD.toByte()) {
|
||||||
|
return ZstdInputStream(ByteArrayInputStream(data)).use { it.readBytes() }
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
// New tavDecode function that accepts compressed data and decompresses internally
|
// New tavDecode function that accepts compressed data and decompresses internally
|
||||||
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
|
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
|
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
|
||||||
@@ -5445,12 +5457,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return try {
|
return try {
|
||||||
// Decompress using Zstd
|
// Decompress with Zstd if the payload starts with the Zstd frame magic;
|
||||||
val bais = ByteArrayInputStream(compressedData)
|
// otherwise pass through (TAV files written without --zstd-level).
|
||||||
val zis = ZstdInputStream(bais)
|
val decompressedData = tavDecompressIfZstd(compressedData)
|
||||||
val decompressedData = zis.readBytes()
|
|
||||||
zis.close()
|
|
||||||
bais.close()
|
|
||||||
|
|
||||||
// Allocate buffer for decompressed data
|
// Allocate buffer for decompressed data
|
||||||
val decompressedBuffer = vm.malloc(decompressedData.size)
|
val decompressedBuffer = vm.malloc(decompressedData.size)
|
||||||
@@ -6725,9 +6734,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
)
|
)
|
||||||
|
|
||||||
val decompressedData = try {
|
val decompressedData = try {
|
||||||
ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
|
// Decompress with Zstd if the payload starts with the Zstd frame magic;
|
||||||
zstd.readBytes()
|
// otherwise pass through (TAV files written without --zstd-level).
|
||||||
}
|
tavDecompressIfZstd(compressedData)
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
println("ERROR: Zstd decompression failed: ${e.message}")
|
println("ERROR: Zstd decompression failed: ${e.message}")
|
||||||
return arrayOf(0, dbgOut)
|
return arrayOf(0, dbgOut)
|
||||||
|
|||||||
@@ -911,24 +911,32 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
|||||||
((tadInputBin[offset++].toUint()) shl 8)
|
((tadInputBin[offset++].toUint()) shl 8)
|
||||||
)
|
)
|
||||||
val maxIndex = tadInputBin[offset++].toUint()
|
val maxIndex = tadInputBin[offset++].toUint()
|
||||||
val payloadSize = (
|
val payloadSizeField = (
|
||||||
(tadInputBin[offset++].toUint()) or
|
(tadInputBin[offset++].toUint()) or
|
||||||
((tadInputBin[offset++].toUint()) shl 8) or
|
((tadInputBin[offset++].toUint()) shl 8) or
|
||||||
((tadInputBin[offset++].toUint()) shl 16) or
|
((tadInputBin[offset++].toUint()) shl 16) or
|
||||||
((tadInputBin[offset++].toUint()) shl 24)
|
((tadInputBin[offset++].toUint()) shl 24)
|
||||||
)
|
)
|
||||||
|
|
||||||
// Decompress payload
|
// MSB of payload size = 1 means the payload is stored uncompressed (no Zstd).
|
||||||
|
val payloadIsRaw = (payloadSizeField and 0x80000000.toInt()) != 0
|
||||||
|
val payloadSize = payloadSizeField and 0x7FFFFFFF
|
||||||
|
|
||||||
|
// Read payload bytes
|
||||||
val compressed = ByteArray(payloadSize)
|
val compressed = ByteArray(payloadSize)
|
||||||
UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong())
|
UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong())
|
||||||
|
|
||||||
val payload: ByteArray = try {
|
val payload: ByteArray = if (payloadIsRaw) {
|
||||||
ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
|
compressed
|
||||||
zstd.readBytes()
|
} else {
|
||||||
|
try {
|
||||||
|
ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
|
||||||
|
zstd.readBytes()
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
println("ERROR: Zstd decompression failed: ${e.message}")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
} catch (e: Exception) {
|
|
||||||
println("ERROR: Zstd decompression failed: ${e.message}")
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode using binary tree EZBC - FIXED!
|
// Decode using binary tree EZBC - FIXED!
|
||||||
|
|||||||
@@ -12,5 +12,7 @@
|
|||||||
<orderEntry type="library" name="jetbrains.kotlin.reflect" level="project" />
|
<orderEntry type="library" name="jetbrains.kotlin.reflect" level="project" />
|
||||||
<orderEntry type="library" name="jetbrains.kotlin.test" level="project" />
|
<orderEntry type="library" name="jetbrains.kotlin.test" level="project" />
|
||||||
<orderEntry type="library" name="lib" level="project" />
|
<orderEntry type="library" name="lib" level="project" />
|
||||||
|
<orderEntry type="library" name="badlogicgames.gdx" level="project" />
|
||||||
|
<orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
@@ -10,5 +10,7 @@
|
|||||||
<orderEntry type="library" name="TerranVirtualDisk" level="project" />
|
<orderEntry type="library" name="TerranVirtualDisk" level="project" />
|
||||||
<orderEntry type="module" module-name="tsvm_core" />
|
<orderEntry type="module" module-name="tsvm_core" />
|
||||||
<orderEntry type="library" name="lib" level="project" />
|
<orderEntry type="library" name="lib" level="project" />
|
||||||
|
<orderEntry type="library" name="badlogicgames.gdx" level="project" />
|
||||||
|
<orderEntry type="library" name="badlogicgames.gdx.backend.lwjgl3" level="project" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
@@ -1,221 +0,0 @@
|
|||||||
# Created by CuriousTorvald and Claude on 2025-08-17.
|
|
||||||
# Makefile for TSVM Enhanced Video (TEV) encoder and libraries
|
|
||||||
|
|
||||||
CC = gcc
|
|
||||||
CXX = g++
|
|
||||||
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
|
|
||||||
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
|
|
||||||
DBGFLAGS =
|
|
||||||
PREFIX = /usr/local
|
|
||||||
|
|
||||||
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
|
|
||||||
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
|
|
||||||
ZSTD_LIBS = $(shell pkg-config --libs libzstd 2>/dev/null || echo "-lzstd")
|
|
||||||
LIBS = -lm $(ZSTD_LIBS)
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Library Object Files
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
# libtavenc - TAV encoder library
|
|
||||||
LIBTAVENC_OBJ = lib/libtavenc/tav_encoder_lib.o \
|
|
||||||
lib/libtavenc/tav_encoder_color.o \
|
|
||||||
lib/libtavenc/tav_encoder_dwt.o \
|
|
||||||
lib/libtavenc/tav_encoder_quantize.o \
|
|
||||||
lib/libtavenc/tav_encoder_ezbc.o \
|
|
||||||
lib/libtavenc/tav_encoder_utils.o \
|
|
||||||
lib/libtavenc/tav_encoder_tile.o
|
|
||||||
|
|
||||||
# libtavdec - TAV decoder library
|
|
||||||
LIBTAVDEC_OBJ = lib/libtavdec/tav_video_decoder.o
|
|
||||||
|
|
||||||
# libtadenc - TAD encoder library
|
|
||||||
LIBTADENC_OBJ = lib/libtadenc/encoder_tad.o
|
|
||||||
|
|
||||||
# libtaddec - TAD decoder library
|
|
||||||
LIBTADDEC_OBJ = lib/libtaddec/decoder_tad.o
|
|
||||||
|
|
||||||
# libfec - Forward Error Correction library (LDPC + Reed-Solomon)
|
|
||||||
LIBFEC_OBJ = lib/libfec/ldpc.o lib/libfec/reed_solomon.o lib/libfec/ldpc_payload.o
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Targets
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
# Source files and targets
|
|
||||||
TARGETS = libs encoder_tav_ref decoder_tav_ref tav_inspector tad tav_dt
|
|
||||||
LIBRARIES = lib/libtavenc.a lib/libtavdec.a lib/libtadenc.a lib/libtaddec.a lib/libfec.a
|
|
||||||
TAV_TARGETS = encoder_tav_ref decoder_tav_ref tav_inspector
|
|
||||||
TAD_TARGETS = encoder_tad decoder_tad
|
|
||||||
DT_TARGETS = encoder_tav_dt decoder_tav_dt tavdt_noise_injector
|
|
||||||
|
|
||||||
# Build all encoders (default)
|
|
||||||
all: clean $(TARGETS)
|
|
||||||
|
|
||||||
# Build all libraries
|
|
||||||
libs: $(LIBRARIES)
|
|
||||||
|
|
||||||
# Reference encoder using libtavenc (replaces old monolithic encoder)
|
|
||||||
encoder_tav_ref: src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a
|
|
||||||
rm -f encoder_tav_ref
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o encoder_tav_ref src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a $(LIBS)
|
|
||||||
@echo ""
|
|
||||||
@echo "Reference encoder built: encoder_tav_ref"
|
|
||||||
@echo "This is the official reference implementation with all features"
|
|
||||||
|
|
||||||
# Reference decoder using libtavdec (replaces old monolithic decoder)
|
|
||||||
decoder_tav_ref: src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a
|
|
||||||
rm -f decoder_tav_ref
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o decoder_tav_ref src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a $(LIBS)
|
|
||||||
@echo ""
|
|
||||||
@echo "Reference decoder built: decoder_tav_ref"
|
|
||||||
@echo "This is the official reference implementation with all features"
|
|
||||||
|
|
||||||
tav_inspector: tav_inspector.c lib/libfec.a
|
|
||||||
rm -f tav_inspector
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Ilib/libfec -o tav_inspector $< lib/libfec.a $(LIBS)
|
|
||||||
|
|
||||||
tav: $(TAV_TARGETS)
|
|
||||||
|
|
||||||
# Build TAD (Terrarum Advanced Audio) tools
|
|
||||||
encoder_tad: src/encoder_tad_standalone.c lib/libtadenc/encoder_tad.c include/encoder_tad.h
|
|
||||||
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtadenc/encoder_tad.c -o encoder_tad.o
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/encoder_tad_standalone.c -o encoder_tad_standalone.o
|
|
||||||
$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
|
|
||||||
|
|
||||||
decoder_tad: lib/libtaddec/decoder_tad.c
|
|
||||||
rm -f decoder_tad
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o decoder_tad $< $(LIBS)
|
|
||||||
|
|
||||||
# Build all TAD tools
|
|
||||||
tad: $(TAD_TARGETS)
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Library Build Rules
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
# Compile library object files
|
|
||||||
lib/libtavenc/%.o: lib/libtavenc/%.c
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
lib/libtavdec/%.o: lib/libtavdec/%.c
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
lib/libtadenc/%.o: lib/libtadenc/%.c
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
lib/libtaddec/%.o: lib/libtaddec/%.c
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c $< -o $@
|
|
||||||
|
|
||||||
lib/libfec/%.o: lib/libfec/%.c
|
|
||||||
$(CC) $(CFLAGS) -Ilib/libfec -c $< -o $@
|
|
||||||
|
|
||||||
# Build static libraries
|
|
||||||
lib/libtavenc.a: $(LIBTAVENC_OBJ)
|
|
||||||
ar rcs $@ $^
|
|
||||||
|
|
||||||
lib/libtavdec.a: $(LIBTAVDEC_OBJ)
|
|
||||||
ar rcs $@ $^
|
|
||||||
|
|
||||||
lib/libtadenc.a: $(LIBTADENC_OBJ)
|
|
||||||
ar rcs $@ $^
|
|
||||||
|
|
||||||
lib/libtaddec.a: $(LIBTADDEC_OBJ)
|
|
||||||
ar rcs $@ $^
|
|
||||||
|
|
||||||
lib/libfec.a: $(LIBFEC_OBJ)
|
|
||||||
ar rcs $@ $^
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# TAV-DT (Digital Tape) Encoder/Decoder
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
# TAV-DT encoder with FEC (multithreaded)
|
|
||||||
encoder_tav_dt: src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a
|
|
||||||
rm -f encoder_tav_dt
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o encoder_tav_dt src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a $(LIBS) -lpthread
|
|
||||||
@echo ""
|
|
||||||
@echo "TAV-DT encoder built: encoder_tav_dt"
|
|
||||||
@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
|
|
||||||
|
|
||||||
# TAV-DT decoder with FEC (multithreaded)
|
|
||||||
decoder_tav_dt: src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a
|
|
||||||
rm -f decoder_tav_dt
|
|
||||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o decoder_tav_dt src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a $(LIBS) -lpthread
|
|
||||||
@echo ""
|
|
||||||
@echo "TAV-DT decoder built: decoder_tav_dt"
|
|
||||||
@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
|
|
||||||
|
|
||||||
# TAV-DT noise injector (channel simulator)
|
|
||||||
tavdt_noise_injector: tavdt_noise_injector.c
|
|
||||||
rm -f tavdt_noise_injector
|
|
||||||
$(CC) -std=c99 -Wall -Ofast -D_GNU_SOURCE -o tavdt_noise_injector tavdt_noise_injector.c -lm
|
|
||||||
@echo ""
|
|
||||||
@echo "TAV-DT noise injector built: tavdt_noise_injector"
|
|
||||||
@echo "Simulates QPSK satellite channel noise (AWGN + burst)"
|
|
||||||
|
|
||||||
# Build all TAV-DT tools
|
|
||||||
tav_dt: $(DT_TARGETS)
|
|
||||||
|
|
||||||
# Build with debug symbols
|
|
||||||
debug: CFLAGS += -g -DDEBUG -fsanitize=address -fno-omit-frame-pointer
|
|
||||||
debug: DBGFLAGS += -fsanitize=address -fno-omit-frame-pointer
|
|
||||||
debug: clean $(TARGETS)
|
|
||||||
|
|
||||||
# Clean build artifacts
|
|
||||||
clean:
|
|
||||||
rm -f $(TARGETS) $(TAD_TARGETS) $(DT_TARGETS) $(LIBRARIES) *.o lib/*/*.o
|
|
||||||
|
|
||||||
# Install (copy to PATH)
|
|
||||||
install: $(TARGETS)
|
|
||||||
cp encoder_tav_ref $(PREFIX)/bin/
|
|
||||||
cp decoder_tav_ref $(PREFIX)/bin/
|
|
||||||
cp encoder_tad $(PREFIX)/bin/
|
|
||||||
cp decoder_tad $(PREFIX)/bin/
|
|
||||||
cp encoder_tav_dt $(PREFIX)/bin/
|
|
||||||
cp decoder_tav_dt $(PREFIX)/bin/
|
|
||||||
cp tav_inspector $(PREFIX)/bin/
|
|
||||||
|
|
||||||
# Check for required dependencies
|
|
||||||
check-deps:
|
|
||||||
@echo "Checking dependencies..."
|
|
||||||
@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install libzstd-dev or equivalent" && exit 1)
|
|
||||||
@echo "All dependencies found."
|
|
||||||
|
|
||||||
# Help
|
|
||||||
help:
|
|
||||||
@echo "TSVM Advanced Video (TAV) and Audio (TAD) Encoders"
|
|
||||||
@echo ""
|
|
||||||
@echo "Targets:"
|
|
||||||
@echo " all - Build video encoders (default)"
|
|
||||||
@echo " libs - Build all codec libraries (.a files)"
|
|
||||||
@echo " tav - Build the TAV advanced video encoder"
|
|
||||||
@echo " tav_dt - Build all TAV-DT (Digital Tape) tools with FEC"
|
|
||||||
@echo " tavdt_noise_injector - Build TAV-DT channel noise simulator"
|
|
||||||
@echo " tad - Build all TAD audio tools (encoder, decoder)"
|
|
||||||
@echo " encoder_tad - Build TAD audio encoder"
|
|
||||||
@echo " decoder_tad - Build TAD audio decoder"
|
|
||||||
@echo " tests - Build test programs"
|
|
||||||
@echo " debug - Build with debug symbols"
|
|
||||||
@echo " clean - Remove build artifacts"
|
|
||||||
@echo " install - Install to /usr/local/bin"
|
|
||||||
@echo " check-deps - Check for required dependencies"
|
|
||||||
@echo " help - Show this help"
|
|
||||||
@echo ""
|
|
||||||
@echo "Libraries:"
|
|
||||||
@echo " lib/libtavenc.a - TAV encoder library"
|
|
||||||
@echo " lib/libtavdec.a - TAV decoder library"
|
|
||||||
@echo " lib/libtadenc.a - TAD encoder library"
|
|
||||||
@echo " lib/libtaddec.a - TAD decoder library"
|
|
||||||
@echo " lib/libfec.a - Forward Error Correction library (LDPC + RS)"
|
|
||||||
@echo ""
|
|
||||||
@echo "Usage:"
|
|
||||||
@echo " make # Build video encoders"
|
|
||||||
@echo " make libs # Build all libraries"
|
|
||||||
@echo " make tav # Build TAV encoder"
|
|
||||||
@echo " make tav_dt # Build TAV-DT encoder/decoder with FEC"
|
|
||||||
@echo " make tad # Build all TAD audio tools"
|
|
||||||
@echo " sudo make install # Install all encoders"
|
|
||||||
|
|
||||||
.PHONY: all libs clean install check-deps help debug tad tav_dt tests
|
|
||||||
@@ -1,350 +0,0 @@
|
|||||||
# TAD - TSVM Advanced Audio Codec
|
|
||||||
|
|
||||||
A perceptually-optimised wavelet-based audio codec designed for resource-constrained systems, featuring CDF 9/7 wavelets, EZBC sparse coding, and sophisticated perceptual quantisation.
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
TAD (TSVM Advanced Audio) is a modern audio codec built on discrete wavelet transform (DWT) using Cohen-Daubechies-Feauveau (CDF) 9/7 biorthogonal wavelets. It combines perceptual quantisation, advanced entropy coding, and careful optimisation for resource-constrained systems.
|
|
||||||
|
|
||||||
### Key Advantages
|
|
||||||
|
|
||||||
- **Perceptual optimisation**: HVS-aware quantisation preserves audio quality where it matters
|
|
||||||
- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity (86.9% zeros in typical content)
|
|
||||||
- **Variable chunk sizes**: Supports any chunk size ≥1024 samples, including non-power-of-2
|
|
||||||
- **Stereo decorrelation**: Mid/Side encoding exploits stereo correlation for better compression
|
|
||||||
- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
### Compression Technology
|
|
||||||
|
|
||||||
- **CDF 9/7 Biorthogonal Wavelets**
|
|
||||||
- 9-level fixed decomposition for all chunk sizes
|
|
||||||
- Lifting scheme implementation for efficient computation
|
|
||||||
- Optimal frequency discrimination for audio signals
|
|
||||||
|
|
||||||
- **Pre-processing**
|
|
||||||
- First-order IIR pre-emphasis filter (α=0.5) shifts quantisation noise to lower frequencies, where they are less objectionable to listeners
|
|
||||||
- Gamma companding (γ=0.5) for dynamic range compression before quantisation
|
|
||||||
- Mid/Side stereo transformation exploits stereo correlation
|
|
||||||
- Lambda companding (λ=6.0) with Laplacian CDF mapping for full bit utilisation
|
|
||||||
|
|
||||||
- **Perceptual Quantisation**
|
|
||||||
- Channel-specific (Mid/Side) frequency-dependent weights
|
|
||||||
- Subband-aware quantisation preserves perceptually important frequencies
|
|
||||||
|
|
||||||
- **EZBC Encoding**
|
|
||||||
- Binary tree embedded zero block coding
|
|
||||||
- Exploits coefficient sparsity (86.9% Mid, 97.8% Side typical)
|
|
||||||
- Progressive refinement structure
|
|
||||||
- Spatial clustering of non-zero coefficients
|
|
||||||
|
|
||||||
- **Entropy Coding**
|
|
||||||
- Zstandard compression (level 7) on concatenated EZBC bitstreams
|
|
||||||
- Cross-channel compression optimisation
|
|
||||||
- Optional Zstd bypass for debugging
|
|
||||||
|
|
||||||
### Audio Format
|
|
||||||
|
|
||||||
- **Sample Rate**: 32 KHz (TSVM audio hardware native format)
|
|
||||||
- **Channels**: Stereo (L/R input, Mid/Side internal representation)
|
|
||||||
- **Chunk Sizes**: Variable, any size ≥1024 samples (including non-power-of-2)
|
|
||||||
- **Bit Depth**: 32-bit float internal, 8-bit unsigned PCM output with noise-shaped dithering
|
|
||||||
- **Bandwidth**: Full 0-16 KHz frequency range preserved
|
|
||||||
|
|
||||||
### Quality Levels
|
|
||||||
|
|
||||||
Six quality levels (0-5) provide a wide range of compression/quality trade-offs:
|
|
||||||
- **Level 0**: Lowest quality, smallest file size
|
|
||||||
- **Level 3**: Default, balanced quality/compression (2.51:1 vs PCMu8)
|
|
||||||
- **Level 5**: Highest quality, largest file size
|
|
||||||
|
|
||||||
Quality levels are designed to be synchronised with TAV video codec for unified encoding.
|
|
||||||
|
|
||||||
## Building
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
|
|
||||||
- C compiler (GCC/Clang)
|
|
||||||
- Zstandard library (libzstd)
|
|
||||||
- Math library (libm)
|
|
||||||
|
|
||||||
### Compilation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build TAD encoder/decoder
|
|
||||||
make tad
|
|
||||||
|
|
||||||
# Build all tools
|
|
||||||
make all
|
|
||||||
|
|
||||||
# Clean build artifacts
|
|
||||||
make clean
|
|
||||||
```
|
|
||||||
|
|
||||||
### Build Targets
|
|
||||||
|
|
||||||
- `encoder_tad` - Standalone audio encoder with FFmpeg calls
|
|
||||||
- `decoder_tad` - Standalone audio decoder
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Basic Encoding
|
|
||||||
|
|
||||||
Encoding requires FFmpeg executable installed in your system.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Default encoding (quality level 3)
|
|
||||||
./encoder_tad -i input.mp3 -o output.tad
|
|
||||||
|
|
||||||
# Specify quality level (0-5)
|
|
||||||
./encoder_tad -i input.m4a -o output.tad -q 0 # Lowest quality
|
|
||||||
./encoder_tad -i input.ogg -o output.tad -q 5 # Highest quality
|
|
||||||
|
|
||||||
# Disable Zstd compression (for debugging)
|
|
||||||
./encoder_tad -i input.opus -o output.tad --no-zstd
|
|
||||||
|
|
||||||
# Verbose output with statistics
|
|
||||||
./encoder_tad -i input.flac -o output.tad -v
|
|
||||||
```
|
|
||||||
|
|
||||||
### Decoding
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Decode to PCMu8
|
|
||||||
./decoder_tad -i input.tad -o output.pcm --raw-pcm
|
|
||||||
|
|
||||||
# Decode to WAV
|
|
||||||
./decoder_tad -i input.tad -o output.wav
|
|
||||||
```
|
|
||||||
|
|
||||||
### Input Formats
|
|
||||||
|
|
||||||
TAD encoder accepts any audio format supported by FFmpeg:
|
|
||||||
- Audio files: WAV, MP3, FLAC, OGG, AAC, etc.
|
|
||||||
- Video files with audio streams: MP4, MKV, AVI, etc.
|
|
||||||
- Raw PCM formats
|
|
||||||
|
|
||||||
Audio is automatically resampled to 32 KHz stereo if necessary.
|
|
||||||
|
|
||||||
## Technical Architecture
|
|
||||||
|
|
||||||
### Encoder Pipeline
|
|
||||||
|
|
||||||
1. **Input Processing**
|
|
||||||
- FFmpeg demuxing and audio stream extraction
|
|
||||||
- Resampling to 32 KHz stereo
|
|
||||||
- Conversion to PCM32f
|
|
||||||
|
|
||||||
2. **Pre-emphasis Filter**
|
|
||||||
- First-order IIR filter with α=0.5
|
|
||||||
- Shifts quantisation noise toward lower frequencies
|
|
||||||
- Improves perceptual quality
|
|
||||||
|
|
||||||
3. **Gamma Companding**
|
|
||||||
- Dynamic range compression with γ=0.5
|
|
||||||
- Applied independently to each sample
|
|
||||||
- Reduces quantisation error for low-amplitude signals
|
|
||||||
|
|
||||||
4. **Stereo Decorrelation**
|
|
||||||
- Left/Right to Mid/Side transformation
|
|
||||||
- Mid = (L + R) / 2
|
|
||||||
- Side = (L - R) / 2
|
|
||||||
- Exploits stereo correlation for better compression
|
|
||||||
|
|
||||||
5. **9-Level CDF 9/7 DWT**
|
|
||||||
- Fixed 9 decomposition levels for all chunk sizes
|
|
||||||
- Forward lifting scheme implementation
|
|
||||||
- Correct length tracking for non-power-of-2 sizes
|
|
||||||
|
|
||||||
6. **Perceptual Quantisation**
|
|
||||||
- Channel-specific (Mid/Side) subband weights
|
|
||||||
- Lambda companding with λ=6.0
|
|
||||||
- Laplacian CDF mapping: `sign(x) * floor(λ * log(1 + |x|/λ))`
|
|
||||||
- Quantised to int8 coefficients
|
|
||||||
|
|
||||||
7. **EZBC Encoding**
|
|
||||||
- Binary tree structure per channel
|
|
||||||
- Progressive refinement by bitplanes
|
|
||||||
- Zero block coding exploits sparsity
|
|
||||||
- Independent bitstreams for Mid and Side
|
|
||||||
|
|
||||||
8. **Zstd Compression**
|
|
||||||
- Level 7 compression on concatenated `[Mid_bitstream][Side_bitstream]`
|
|
||||||
- Cross-channel optimisation opportunities
|
|
||||||
- Adaptive compression based on content
|
|
||||||
|
|
||||||
### Decoder Pipeline
|
|
||||||
|
|
||||||
1. **Container Parsing**
|
|
||||||
- TAD packet identification (type 0x24)
|
|
||||||
- Chunk size extraction
|
|
||||||
- Compressed data boundaries
|
|
||||||
|
|
||||||
2. **Zstd Decompression**
|
|
||||||
- Decompress concatenated bitstreams
|
|
||||||
- Split into Mid and Side EZBC streams
|
|
||||||
|
|
||||||
3. **EZBC Decoding**
|
|
||||||
- Binary tree decoder per channel
|
|
||||||
- Reconstruct quantised int8 coefficients
|
|
||||||
- Progressive refinement reconstruction
|
|
||||||
|
|
||||||
4. **Lambda Decompanding**
|
|
||||||
- Inverse Laplacian CDF with channel-specific weights
|
|
||||||
- Reconstruct float32 DWT coefficients
|
|
||||||
- Apply subband-specific perceptual weights
|
|
||||||
|
|
||||||
5. **9-Level Inverse CDF 9/7 DWT**
|
|
||||||
- Inverse lifting scheme implementation
|
|
||||||
- Correct length tracking for non-power-of-2 chunk sizes
|
|
||||||
- Pre-calculated length sequence from forward transform
|
|
||||||
|
|
||||||
6. **Mid/Side to Left/Right**
|
|
||||||
- L = Mid + Side
|
|
||||||
- R = Mid - Side
|
|
||||||
- Reconstruct stereo channels
|
|
||||||
|
|
||||||
7. **Gamma Decompanding**
|
|
||||||
- Inverse gamma with γ⁻¹=2.0
|
|
||||||
- Restore original dynamic range
|
|
||||||
|
|
||||||
8. **De-emphasis Filter**
|
|
||||||
- Reverse pre-emphasis with α=0.5
|
|
||||||
- Remove frequency shaping
|
|
||||||
- Restore flat frequency response
|
|
||||||
|
|
||||||
9. **PCM32f to PCM8u Conversion**
|
|
||||||
- Noise-shaped dithering for 8-bit output
|
|
||||||
- Clamping to valid range
|
|
||||||
- Final output format
|
|
||||||
|
|
||||||
### Wavelet Implementation
|
|
||||||
|
|
||||||
CDF 9/7 wavelet follows a **two-stage lifting scheme**:
|
|
||||||
|
|
||||||
```c
|
|
||||||
// Forward Transform: Predict → Update
|
|
||||||
// Predict step (generate high-pass)
|
|
||||||
temp[half + i] = data[odd] - α * (data[even_left] + data[even_right]);
|
|
||||||
|
|
||||||
// Update step (generate low-pass)
|
|
||||||
temp[i] = data[even] + β * (temp[half + i - 1] + temp[half + i]);
|
|
||||||
|
|
||||||
// Normalization (K factor)
|
|
||||||
temp[i] *= K;
|
|
||||||
temp[half + i] /= K;
|
|
||||||
|
|
||||||
// Inverse Transform: Denormalize → Undo Update → Undo Predict (reversed order)
|
|
||||||
temp[i] /= K;
|
|
||||||
temp[half + i] *= K;
|
|
||||||
|
|
||||||
temp[i] -= β * (temp[half + i - 1] + temp[half + i]);
|
|
||||||
data[odd] = temp[half + i] + α * (temp[i] + temp[i + 1]);
|
|
||||||
data[even] = temp[i];
|
|
||||||
```
|
|
||||||
|
|
||||||
**CDF 9/7 Coefficients**:
|
|
||||||
- α = -1.586134342
|
|
||||||
- β = -0.052980118
|
|
||||||
- γ = +0.882911075
|
|
||||||
- δ = +0.443506852
|
|
||||||
- K = 1.230174105
|
|
||||||
|
|
||||||
### Non-Power-of-2 Chunk Size Handling
|
|
||||||
|
|
||||||
Critical implementation detail for variable chunk sizes:
|
|
||||||
|
|
||||||
```c
|
|
||||||
// Pre-calculate exact length sequence from forward transform
|
|
||||||
int lengths[MAX_LEVELS + 1];
|
|
||||||
lengths[0] = chunk_size;
|
|
||||||
for (int i = 1; i <= levels; i++) {
|
|
||||||
lengths[i] = (lengths[i - 1] + 1) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply inverse DWT using lengths[level] for each level
|
|
||||||
// NEVER use simple doubling (length *= 2) - incorrect for non-power-of-2!
|
|
||||||
```
|
|
||||||
|
|
||||||
Incorrect length tracking causes mirrored subband artefacts in decoded audio.
|
|
||||||
|
|
||||||
### Perceptual Quantisation Weights
|
|
||||||
|
|
||||||
Channel-specific weights for Mid (channel 0) and Side (channel 1):
|
|
||||||
|
|
||||||
```c
|
|
||||||
// Base quantiser weights per subband (9 levels + approximation)
|
|
||||||
float BASE_QUANTISER_WEIGHTS[2][10] = {
|
|
||||||
// Mid channel (0)
|
|
||||||
{4.0f, 2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f, 1.3f, 2.0f},
|
|
||||||
|
|
||||||
// Side channel (1)
|
|
||||||
{6.0f, 5.0f, 2.6f, 2.4f, 1.8f, 1.3f, 1.0f, 1.0f, 1.6f, 3.2f}
|
|
||||||
};
|
|
||||||
|
|
||||||
// During dequantisation:
|
|
||||||
float weight = BASE_QUANTISER_WEIGHTS[channel][subband] * quantiser_scale;
|
|
||||||
coeffs[i] = normalised_val * TAD32_COEFF_SCALARS[subband] * weight;
|
|
||||||
```
|
|
||||||
|
|
||||||
Different weights for Mid and Side channels reflect perceptual importance of frequency bands in each channel. DC frequency has highest weight (4.0 Mid, 6.0 Side) due to energy concentration.
|
|
||||||
|
|
||||||
## Performance Characteristics
|
|
||||||
|
|
||||||
### Compression Efficiency
|
|
||||||
|
|
||||||
- **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input)
|
|
||||||
- **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3
|
|
||||||
- **Audio Quality**: Preserves full 0-16 KHz bandwidth
|
|
||||||
- **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
|
|
||||||
- **EZBC Benefits**: Exploits sparsity, progressive refinement, spatial clustering
|
|
||||||
|
|
||||||
### Computational Complexity
|
|
||||||
|
|
||||||
- **Encoding**: O(n log n) per chunk for DWT, O(n) for EZBC encoding
|
|
||||||
- **Decoding**: O(n log n) per chunk for inverse DWT, O(n) for EZBC decoding
|
|
||||||
- **Memory**: O(n) working memory for chunk processing
|
|
||||||
|
|
||||||
### Quality Characteristics
|
|
||||||
|
|
||||||
- **Frequency Response**: Flat 0-16 KHz within perceptual limits
|
|
||||||
- **Dynamic Range**: Preserved through gamma companding
|
|
||||||
- **Stereo Imaging**: Maintained through Mid/Side decorrelation
|
|
||||||
- **Perceptual Quality**: Optimised for human auditory system characteristics
|
|
||||||
|
|
||||||
## Integration with TAV
|
|
||||||
|
|
||||||
TAD is designed as an includable API for TAV video encoder integration:
|
|
||||||
|
|
||||||
- **Variable Chunk Sizes**: Audio chunks can match video GOP boundaries (e.g., 32016 samples for 1-second TAV GOP)
|
|
||||||
- **Unified Quality Levels**: TAD quality 0-5 synchronised with TAV quality 0-5
|
|
||||||
- **Embedded Packets**: TAV embeds TAD-compressed audio using packet type 0x24
|
|
||||||
- **Shared Container**: Single .tav file contains both video and audio streams
|
|
||||||
|
|
||||||
### TAV Integration Example
|
|
||||||
|
|
||||||
```c
|
|
||||||
// TAD handles non-power-of-2 chunk size correctly
|
|
||||||
tad_encode_chunk(audio_buffer, audio_samples_per_gop, output_buffer, &output_size);
|
|
||||||
|
|
||||||
// TAV embeds TAD packet
|
|
||||||
tav_write_packet(TAV_PACKET_AUDIO, output_buffer, output_size);
|
|
||||||
```
|
|
||||||
|
|
||||||
## Format Specification
|
|
||||||
|
|
||||||
For complete packet structure and bitstream format details, refer to `format documentation.txt`.
|
|
||||||
|
|
||||||
### Key Packet Types
|
|
||||||
|
|
||||||
- `0x24`: TAD audio packet (used in standalone .tad files and embedded in .tav files)
|
|
||||||
|
|
||||||
## Related Projects
|
|
||||||
|
|
||||||
- **TAV** (TSVM Advanced Video): Wavelet-based video codec with integrated TAD audio
|
|
||||||
- **TSVM**: Target virtual machine platform for TAD playback
|
|
||||||
|
|
||||||
## Licence
|
|
||||||
|
|
||||||
MIT.
|
|
||||||
@@ -1,261 +0,0 @@
|
|||||||
# TAV - TSVM Advanced Video Codec
|
|
||||||
|
|
||||||
A perceptually-optimised wavelet-based video codec designed for resource-constrained systems, featuring multiple wavelet types, temporal 3D DWT, and sophisticated compression techniques.
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
TAV (TSVM Advanced Video) is a modern video codec built on discrete wavelet transformation (DWT). It combines cutting-edge compression techniques with careful optimisation for resource-constrained systems.
|
|
||||||
|
|
||||||
### Key Advantages
|
|
||||||
|
|
||||||
- **No blocking artefacts**: Large-tile DWT encoding with padding eliminates DCT block boundaries
|
|
||||||
- **No colour banding**: Wavelets spreads gradients across scales, preventing banding in the first place
|
|
||||||
- **Perceptual optimisation**: HVS-aware quantisation preserves visual quality where it matters
|
|
||||||
- **Temporal coherence**: 3D DWT with GOP encoding exploits inter-frame similarity
|
|
||||||
- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity for 16-18% additional compression
|
|
||||||
- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
### Compression Technology
|
|
||||||
|
|
||||||
- **Wavelet Types**
|
|
||||||
- **5/3 Reversible** (JPEG 2000 standard): Lossless-capable, good for archival
|
|
||||||
- **9/7 Irreversible** (default): Best overall compression, CDF 9/7 variant
|
|
||||||
|
|
||||||
- **Spatial Encoding**
|
|
||||||
- Large-tile encoding with padding, with optional single-tile mode (no blocking artefacts)
|
|
||||||
- 6-level DWT decomposition for deep frequency analysis
|
|
||||||
- Perceptual quantisation with HVS-optimised coefficient scaling
|
|
||||||
- YCoCg-R colour space with anisotropic chroma quantisation
|
|
||||||
|
|
||||||
- **Temporal Encoding** (3D DWT Mode)
|
|
||||||
- Group-of-pictures (GOP) encoding with adaptive size (typically 20 frames)
|
|
||||||
- Unified EZBC encoding across temporal dimension
|
|
||||||
- Adaptive GOP boundaries with scene change detection
|
|
||||||
|
|
||||||
- **EZBC Encoding**
|
|
||||||
- Binary tree embedded zero block coding exploits coefficient sparsity
|
|
||||||
- Progressive refinement structure with bitplane encoding
|
|
||||||
- Concatenated channel layout for cross-channel compression optimisation
|
|
||||||
- Typical sparsity: 86.9% (Y), 97.8% (Co), 99.5% (Cg)
|
|
||||||
- 16-18% compression improvement over naive coefficient encoding
|
|
||||||
|
|
||||||
### Audio Integration
|
|
||||||
|
|
||||||
TAV seamlessly integrates with the TAD (TSVM Advanced Audio) codec for synchronised audio/video encoding:
|
|
||||||
- Variable chunk sizes match video GOP boundaries
|
|
||||||
- Embedded TAD packets (type 0x24) with Zstd compression
|
|
||||||
- Unified container format
|
|
||||||
|
|
||||||
## Building
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
|
|
||||||
- C compiler (GCC/Clang)
|
|
||||||
- Zstandard library
|
|
||||||
- OpenCV 4 library (only used by experimental motion estimation feature)
|
|
||||||
|
|
||||||
### Compilation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build TAV encoder/decoder
|
|
||||||
make tav
|
|
||||||
|
|
||||||
# Build all tools including TAD audio codec
|
|
||||||
make all
|
|
||||||
|
|
||||||
# Clean build artefacts
|
|
||||||
make clean
|
|
||||||
```
|
|
||||||
|
|
||||||
### Build Targets
|
|
||||||
|
|
||||||
- `encoder_tav` - Main video encoder
|
|
||||||
- `decoder_tav` - Standalone video decoder
|
|
||||||
- `tav_inspector` - Packet analysis and debugging tool
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Basic Encoding
|
|
||||||
|
|
||||||
Encoding requires FFmpeg executable installed in your system.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Default encoding (CDF 9/7 wavelet, quality level 3)
|
|
||||||
./encoder_tav -i input.mp4 -o output.tav
|
|
||||||
|
|
||||||
# Quality levels (0-5)
|
|
||||||
./encoder_tav -i input.avi -q 0 -o output.tav # Lowest quality, smallest file
|
|
||||||
./encoder_tav -i input.mkv -q 5 -o output.tav # Highest quality, largest file
|
|
||||||
```
|
|
||||||
|
|
||||||
### Intra-only Encoding
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Enable Intra-only encoding
|
|
||||||
./encoder_tav -i input.mp4 --intra-only -o output.tav
|
|
||||||
```
|
|
||||||
|
|
||||||
### Decoding and Inspection
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Decode TAV to raw video
|
|
||||||
./decoder_tav -i input.tav -o output.mkv
|
|
||||||
|
|
||||||
# Inspect packet structure (debugging)
|
|
||||||
./tav_inspector input.tav -v
|
|
||||||
```
|
|
||||||
|
|
||||||
### Frame Limiting
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Encode only first N frames (useful for testing)
|
|
||||||
./encoder_tav -i input.mp4 -o output.tav --encode-limit 100
|
|
||||||
```
|
|
||||||
|
|
||||||
## Technical Architecture
|
|
||||||
|
|
||||||
### Encoder Pipeline
|
|
||||||
|
|
||||||
1. **Input Processing**
|
|
||||||
- FFmpeg demuxing and frame extraction
|
|
||||||
- RGB to YCoCg-R colour space conversion
|
|
||||||
- Resolution validation and padding
|
|
||||||
|
|
||||||
2. **DWT Transform**
|
|
||||||
- Spatial: 6-level decomposition per frame
|
|
||||||
- Temporal: 1D DWT across GOP frames (3D DWT mode)
|
|
||||||
- Lifting scheme implementation for all wavelets
|
|
||||||
|
|
||||||
3. **Perceptual Quantisation**
|
|
||||||
- HVS-based subband weights
|
|
||||||
- Anisotropic chroma quantisation (YCoCg-R specific)
|
|
||||||
- Quality-dependent quantisation matrices
|
|
||||||
|
|
||||||
4. **EZBC Encoding**
|
|
||||||
- Binary tree embedded zero block coding per channel
|
|
||||||
- Progressive refinement by bitplanes
|
|
||||||
- Concatenated bitstream layout: `[Y_bitstream][Co_bitstream][Cg_bitstream]`
|
|
||||||
- Cross-channel compression optimisation
|
|
||||||
|
|
||||||
5. **Entropy Coding**
|
|
||||||
- Zstandard compression (level 7) on concatenated EZBC bitstreams
|
|
||||||
- Cross-channel compression opportunities
|
|
||||||
- Adaptive compression based on GOP structure
|
|
||||||
|
|
||||||
### Decoder Pipeline
|
|
||||||
|
|
||||||
1. **Container Parsing**
|
|
||||||
- Packet type identification (0x00-0xFF)
|
|
||||||
- Timecode synchronisation
|
|
||||||
- GOP boundary detection
|
|
||||||
|
|
||||||
2. **Entropy Decoding**
|
|
||||||
- Zstd decompression of concatenated bitstreams
|
|
||||||
- EZBC binary tree decoding per channel
|
|
||||||
- Progressive coefficient reconstruction
|
|
||||||
|
|
||||||
3. **Inverse Quantisation**
|
|
||||||
- Perceptual weight application
|
|
||||||
- Subband-specific scaling
|
|
||||||
- Coefficient reconstruction from sparse representation
|
|
||||||
|
|
||||||
4. **Inverse DWT**
|
|
||||||
- Temporal: 1D inverse DWT across frames (3D DWT mode)
|
|
||||||
- Spatial: 6-level inverse wavelet reconstruction
|
|
||||||
|
|
||||||
5. **Output Conversion**
|
|
||||||
- YCoCg-R to RGB colour space
|
|
||||||
- Clamping and dithering
|
|
||||||
- Frame buffering for display
|
|
||||||
|
|
||||||
### Wavelet Implementation
|
|
||||||
|
|
||||||
All wavelets follow a **lifting scheme** pattern with symmetric boundary extension:
|
|
||||||
|
|
||||||
```c
|
|
||||||
// Forward Transform: Predict → Update
|
|
||||||
temp[half + i] = data[odd] - predict(data[even]); // High-pass
|
|
||||||
temp[i] = data[even] + update(temp[half]); // Low-pass
|
|
||||||
|
|
||||||
// Inverse Transform: Undo Update → Undo Predict (reversed order)
|
|
||||||
data[even] = temp[i] - update(temp[half]); // Undo low-pass
|
|
||||||
data[odd] = temp[half + i] + predict(data[even]); // Undo high-pass
|
|
||||||
```
|
|
||||||
|
|
||||||
**Critical**: Forward and inverse transforms must use identical coefficient indexing and exactly reverse operations to avoid grid artefacts.
|
|
||||||
|
|
||||||
### Coefficient Layout
|
|
||||||
|
|
||||||
TAV uses **2D Spatial Layout** in memory for each decomposition level:
|
|
||||||
|
|
||||||
```
|
|
||||||
[LL] [LH] [HL] [HH] [LH] [HL] [HH] ...
|
|
||||||
└── Level 0 ──┘ └─── Level 1 ───┘
|
|
||||||
```
|
|
||||||
|
|
||||||
- `LL`: Low-pass (approximation) - progressively smaller with each level
|
|
||||||
- `LH`, `HL`, `HH`: High-pass subbands (horizontal, vertical, diagonal detail)
|
|
||||||
|
|
||||||
## Performance Characteristics
|
|
||||||
|
|
||||||
### Compression Efficiency
|
|
||||||
|
|
||||||
- **Sparsity Exploitation**: Typical quantised coefficient sparsity
|
|
||||||
- Y channel: 86.9% zeros
|
|
||||||
- Co channel: 97.8% zeros
|
|
||||||
- Cg channel: 99.5% zeros
|
|
||||||
|
|
||||||
- **EZBC Benefits**: 16-18% compression improvement over naive coefficient encoding through sparsity exploitation
|
|
||||||
|
|
||||||
- **Temporal Coherence**: Additional 15-25% improvement with 3D DWT (content-dependent)
|
|
||||||
|
|
||||||
### Computational Complexity
|
|
||||||
|
|
||||||
- **Encoding**: O(n log n) per frame for spatial DWT
|
|
||||||
- **Decoding**: O(n log n) per frame, optimised lifting scheme implementation
|
|
||||||
- **Memory**: Single-tile encoding requires O(w × h) working memory
|
|
||||||
|
|
||||||
### Quality Characteristics
|
|
||||||
|
|
||||||
- **No blocking artefacts**: Wavelet-based encoding is inherently smooth
|
|
||||||
- **Perceptual optimisation**: Better subjective quality than bitrate-equivalent DCT codecs
|
|
||||||
- **Scalability**: 6 quality levels (0-5) provide wide range of bitrate/quality trade-offs
|
|
||||||
- **Temporal stability**: 3D DWT mode reduces flickering and temporal artefacts
|
|
||||||
|
|
||||||
## Format Specification
|
|
||||||
|
|
||||||
For complete packet structure and bitstream format details, refer to `format documentation.txt`.
|
|
||||||
|
|
||||||
### Key Packet Types
|
|
||||||
|
|
||||||
- `0x00`: Metadata and initialisation
|
|
||||||
- `0x01`: I-frame (intra-coded frame)
|
|
||||||
- `0x12`: GOP unified packet (3D DWT mode)
|
|
||||||
- `0x24`: Embedded TAD audio
|
|
||||||
- `0xFC`: GOP synchronisation
|
|
||||||
- `0xFD`: Timecode
|
|
||||||
|
|
||||||
## Debugging Tools
|
|
||||||
|
|
||||||
### TAV Inspector
|
|
||||||
|
|
||||||
Analyse TAV packet structure and decode individual frames:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Verbose packet analysis
|
|
||||||
./tav_inspector input.tav -v
|
|
||||||
|
|
||||||
# Extract specific frame ranges
|
|
||||||
./tav_inspector input.tav --frame-range 100-200
|
|
||||||
```
|
|
||||||
|
|
||||||
## Related Projects
|
|
||||||
|
|
||||||
- **TAD** (TSVM Advanced Audio): Perceptual audio codec using CDF 9/7 wavelets
|
|
||||||
- **TSVM**: Target virtual machine platform for TAV playback
|
|
||||||
|
|
||||||
## Licence
|
|
||||||
|
|
||||||
MIT.
|
|
||||||
@@ -1,424 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV+UCF Payload Writer for TAV Files
|
|
||||||
* Creates a TAV header-only (32 bytes) + UCF cue file (4KB) for concatenated TAV files
|
|
||||||
* Total output size: 4096 bytes (32 + 4064)
|
|
||||||
* Usage: ./create_ucf_payload input.tav output.ucf [track_names.txt]
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#define TAV_HEADER_SIZE 32
|
|
||||||
#define UCF_SIZE 4064
|
|
||||||
#define TAV_OFFSET_BIAS (TAV_HEADER_SIZE + UCF_SIZE)
|
|
||||||
#define TAV_MAGIC "\x1FTSVMTA" // Matches both TAV and TAP
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint8_t magic[8];
|
|
||||||
uint8_t version;
|
|
||||||
uint16_t width;
|
|
||||||
uint16_t height;
|
|
||||||
uint8_t fps;
|
|
||||||
uint32_t total_frames;
|
|
||||||
// ... rest of header fields
|
|
||||||
} __attribute__((packed)) TAVHeader;
|
|
||||||
|
|
||||||
// Write TAV header-only payload (File Role = 1)
|
|
||||||
static void write_tav_header_only(FILE *out) {
|
|
||||||
uint8_t header[TAV_HEADER_SIZE] = {0};
|
|
||||||
|
|
||||||
// Magic: "\x1FTSVMTAV"
|
|
||||||
header[0] = 0x1F;
|
|
||||||
header[1] = 'T';
|
|
||||||
header[2] = 'S';
|
|
||||||
header[3] = 'V';
|
|
||||||
header[4] = 'M';
|
|
||||||
header[5] = 'T';
|
|
||||||
header[6] = 'A';
|
|
||||||
header[7] = 'V';
|
|
||||||
|
|
||||||
// Version: 5 (YCoCg-R perceptual)
|
|
||||||
header[8] = 5;
|
|
||||||
|
|
||||||
// Width: 560 (little-endian)
|
|
||||||
header[9] = 0x30;
|
|
||||||
header[10] = 0x02;
|
|
||||||
|
|
||||||
// Height: 448 (little-endian)
|
|
||||||
header[11] = 0xC0;
|
|
||||||
header[12] = 0x01;
|
|
||||||
|
|
||||||
// FPS: 30
|
|
||||||
header[13] = 30;
|
|
||||||
|
|
||||||
// Total Frames: 0xFFFFFFFF (still image marker / not applicable)
|
|
||||||
header[14] = 0xFF;
|
|
||||||
header[15] = 0xFF;
|
|
||||||
header[16] = 0xFF;
|
|
||||||
header[17] = 0xFF;
|
|
||||||
|
|
||||||
// Wavelet Filter Type: 1 (9/7 irreversible, default)
|
|
||||||
header[18] = 1;
|
|
||||||
|
|
||||||
// Decomposition Levels: 6
|
|
||||||
header[19] = 6;
|
|
||||||
|
|
||||||
// Quantiser Indices (Y, Co, Cg): 255 (not applicable for header-only)
|
|
||||||
header[20] = 0xFF;
|
|
||||||
header[21] = 0xFF;
|
|
||||||
header[22] = 0xFF;
|
|
||||||
|
|
||||||
// Extra Feature Flags: 0x80 (bit 7 = has no actual packets)
|
|
||||||
header[23] = 0x80;
|
|
||||||
|
|
||||||
// Video Flags: 0
|
|
||||||
header[24] = 0;
|
|
||||||
|
|
||||||
// Encoder quality level: 0
|
|
||||||
header[25] = 0;
|
|
||||||
|
|
||||||
// Channel layout: 0 (Y-Co-Cg)
|
|
||||||
header[26] = 0;
|
|
||||||
|
|
||||||
// Reserved[4]: zeros (27-30 already initialised to 0)
|
|
||||||
|
|
||||||
// File Role: 1 (header-only, UCF payload follows)
|
|
||||||
header[31] = 1;
|
|
||||||
|
|
||||||
fwrite(header, 1, TAV_HEADER_SIZE, out);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write UCF header
|
|
||||||
static void write_ucf_header(FILE *out, uint16_t num_cues) {
|
|
||||||
uint8_t magic[8] = {0x1F, 'T', 'S', 'V', 'M', 'U', 'C', 'F'};
|
|
||||||
uint8_t version = 1;
|
|
||||||
uint32_t cue_file_size = TAV_OFFSET_BIAS;
|
|
||||||
uint8_t reserved = 0;
|
|
||||||
|
|
||||||
fwrite(magic, 1, 8, out);
|
|
||||||
fwrite(&version, 1, 1, out);
|
|
||||||
fwrite(&num_cues, 2, 1, out);
|
|
||||||
fwrite(&cue_file_size, 4, 1, out);
|
|
||||||
fwrite(&reserved, 1, 1, out);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write UCF cue element (internal addressing, human+machine interactable)
|
|
||||||
static void write_cue_element(FILE *out, uint64_t offset, const char *name) {
|
|
||||||
uint8_t addressing_mode = 0x22; // 0x20 (human) | 0x01 (machine) | 0x02 (internal)
|
|
||||||
uint16_t name_len = strlen(name);
|
|
||||||
|
|
||||||
// Offset with 4KB bias
|
|
||||||
uint64_t biased_offset = offset + TAV_OFFSET_BIAS;
|
|
||||||
|
|
||||||
fwrite(&addressing_mode, 1, 1, out);
|
|
||||||
fwrite(&name_len, 2, 1, out);
|
|
||||||
fwrite(name, 1, name_len, out);
|
|
||||||
|
|
||||||
// Write 48-bit (6-byte) offset
|
|
||||||
fwrite(&biased_offset, 6, 1, out);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read track names from file (newline-delimited)
|
|
||||||
static char **read_track_names(const char *filename, int *count_out) {
|
|
||||||
FILE *f = fopen(filename, "r");
|
|
||||||
if (!f) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
char **names = NULL;
|
|
||||||
int count = 0;
|
|
||||||
int capacity = 16;
|
|
||||||
char line[256];
|
|
||||||
|
|
||||||
names = malloc(capacity * sizeof(char *));
|
|
||||||
if (!names) {
|
|
||||||
fclose(f);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (fgets(line, sizeof(line), f)) {
|
|
||||||
// Remove trailing newline
|
|
||||||
size_t len = strlen(line);
|
|
||||||
if (len > 0 && line[len - 1] == '\n') {
|
|
||||||
line[len - 1] = '\0';
|
|
||||||
len--;
|
|
||||||
}
|
|
||||||
if (len > 0 && line[len - 1] == '\r') {
|
|
||||||
line[len - 1] = '\0';
|
|
||||||
len--;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip empty lines
|
|
||||||
if (len == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expand capacity if needed
|
|
||||||
if (count >= capacity) {
|
|
||||||
capacity *= 2;
|
|
||||||
char **new_names = realloc(names, capacity * sizeof(char *));
|
|
||||||
if (!new_names) {
|
|
||||||
// Cleanup on failure
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
free(names[i]);
|
|
||||||
}
|
|
||||||
free(names);
|
|
||||||
fclose(f);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
names = new_names;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate and copy name
|
|
||||||
names[count] = strdup(line);
|
|
||||||
if (!names[count]) {
|
|
||||||
// Cleanup on failure
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
free(names[i]);
|
|
||||||
}
|
|
||||||
free(names);
|
|
||||||
fclose(f);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(f);
|
|
||||||
*count_out = count;
|
|
||||||
return names;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find all TAV headers in the file (with smart packet-wise skipping)
|
|
||||||
static int find_tav_headers(FILE *in, uint64_t **offsets_out) {
|
|
||||||
uint64_t *offsets = NULL;
|
|
||||||
int count = 0;
|
|
||||||
int capacity = 16;
|
|
||||||
|
|
||||||
offsets = malloc(capacity * sizeof(uint64_t));
|
|
||||||
if (!offsets) {
|
|
||||||
fprintf(stderr, "Error: Memory allocation failed\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Seek to beginning
|
|
||||||
fseek(in, 0, SEEK_SET);
|
|
||||||
|
|
||||||
uint8_t magic[8];
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
// Remember current position before reading
|
|
||||||
uint64_t pos = ftell(in);
|
|
||||||
|
|
||||||
// Try to read magic
|
|
||||||
if (fread(magic, 1, 8, in) != 8) {
|
|
||||||
// End of file
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for TAV magic signature
|
|
||||||
if (memcmp(magic, TAV_MAGIC, 7) == 0 && (magic[7] == 'V' || magic[7] == 'P')) {
|
|
||||||
// Found TAV header
|
|
||||||
if (count >= capacity) {
|
|
||||||
capacity *= 2;
|
|
||||||
uint64_t *new_offsets = realloc(offsets, capacity * sizeof(uint64_t));
|
|
||||||
if (!new_offsets) {
|
|
||||||
fprintf(stderr, "Error: Memory reallocation failed\n");
|
|
||||||
free(offsets);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
offsets = new_offsets;
|
|
||||||
}
|
|
||||||
|
|
||||||
offsets[count++] = pos;
|
|
||||||
printf("Found TAV header at offset: 0x%lX (%lu)\n", pos, pos);
|
|
||||||
|
|
||||||
// Skip past this header (32 bytes total)
|
|
||||||
uint64_t packet_pos = pos + 32;
|
|
||||||
fseek(in, packet_pos, SEEK_SET);
|
|
||||||
|
|
||||||
// Smart packet-wise skipping
|
|
||||||
while (1) {
|
|
||||||
uint8_t packet_type;
|
|
||||||
if (fread(&packet_type, 1, 1, in) != 1) {
|
|
||||||
// End of file
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if this is the start of next TAV file (0x1F is prohibited as packet type)
|
|
||||||
if (packet_type == 0x1F) {
|
|
||||||
// Rewind 1 byte to re-read as magic at the top of outer loop
|
|
||||||
fseek(in, packet_pos, SEEK_SET);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// printf("TAV Packet 0x%02X at 0x%lX\n", packet_type, packet_pos);
|
|
||||||
|
|
||||||
// Sync packets (0xFE, 0xFF) have no payload size - they're single-byte packets
|
|
||||||
if (packet_type == 0xFE || packet_type == 0xFF) {
|
|
||||||
packet_pos += 1;
|
|
||||||
fseek(in, packet_pos, SEEK_SET);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read payload size (uint32, little-endian)
|
|
||||||
uint32_t payload_size = 0;
|
|
||||||
if (fread(&payload_size, 4, 1, in) != 1) {
|
|
||||||
// End of file
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip packet: 1 byte (type) + 4 bytes (size) + payload_size
|
|
||||||
packet_pos += 1 + 4 + payload_size;
|
|
||||||
fseek(in, packet_pos, SEEK_SET);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Move forward by 1 byte for next search
|
|
||||||
fseek(in, pos + 1, SEEK_SET);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*offsets_out = offsets;
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
if (argc < 3 || argc > 4) {
|
|
||||||
fprintf(stderr, "Usage: %s <input.tav> <output.ucf> [track_names.txt]\n", argv[0]);
|
|
||||||
fprintf(stderr, "Creates a 4KB UCF payload for concatenated TAV file\n");
|
|
||||||
fprintf(stderr, " track_names.txt: Optional file with track names (one per line)\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *input_path = argv[1];
|
|
||||||
const char *output_path = argv[2];
|
|
||||||
const char *names_path = (argc == 4) ? argv[3] : NULL;
|
|
||||||
|
|
||||||
// Read track names if provided
|
|
||||||
char **track_names = NULL;
|
|
||||||
int num_names = 0;
|
|
||||||
if (names_path) {
|
|
||||||
track_names = read_track_names(names_path, &num_names);
|
|
||||||
if (track_names) {
|
|
||||||
printf("Loaded %d track name(s) from '%s'\n", num_names, names_path);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "Warning: Could not read track names from '%s', using defaults\n", names_path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open input file
|
|
||||||
FILE *in = fopen(input_path, "rb");
|
|
||||||
if (!in) {
|
|
||||||
fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
|
|
||||||
if (track_names) {
|
|
||||||
for (int i = 0; i < num_names; i++) {
|
|
||||||
free(track_names[i]);
|
|
||||||
}
|
|
||||||
free(track_names);
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find all TAV headers
|
|
||||||
uint64_t *offsets = NULL;
|
|
||||||
int num_tracks = find_tav_headers(in, &offsets);
|
|
||||||
fclose(in);
|
|
||||||
|
|
||||||
if (num_tracks < 0) {
|
|
||||||
fprintf(stderr, "Error: Failed to scan input file\n");
|
|
||||||
if (track_names) {
|
|
||||||
for (int i = 0; i < num_names; i++) {
|
|
||||||
free(track_names[i]);
|
|
||||||
}
|
|
||||||
free(track_names);
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (num_tracks == 0) {
|
|
||||||
fprintf(stderr, "Error: No TAV headers found in input file\n");
|
|
||||||
free(offsets);
|
|
||||||
if (track_names) {
|
|
||||||
for (int i = 0; i < num_names; i++) {
|
|
||||||
free(track_names[i]);
|
|
||||||
}
|
|
||||||
free(track_names);
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("\nFound %d TAV header(s)\n", num_tracks);
|
|
||||||
|
|
||||||
// Create output UCF file
|
|
||||||
FILE *out = fopen(output_path, "wb");
|
|
||||||
if (!out) {
|
|
||||||
fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
|
|
||||||
free(offsets);
|
|
||||||
if (track_names) {
|
|
||||||
for (int i = 0; i < num_names; i++) {
|
|
||||||
free(track_names[i]);
|
|
||||||
}
|
|
||||||
free(track_names);
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write TAV header-only payload (File Role = 1)
|
|
||||||
write_tav_header_only(out);
|
|
||||||
printf("Written TAV header-only payload (%d bytes)\n", TAV_HEADER_SIZE);
|
|
||||||
|
|
||||||
// Write UCF header
|
|
||||||
write_ucf_header(out, num_tracks);
|
|
||||||
|
|
||||||
// Write cue elements
|
|
||||||
for (int i = 0; i < num_tracks; i++) {
|
|
||||||
char default_name[32];
|
|
||||||
const char *name;
|
|
||||||
|
|
||||||
// Use custom name if available, otherwise generate default
|
|
||||||
if (track_names && i < num_names) {
|
|
||||||
name = track_names[i];
|
|
||||||
} else {
|
|
||||||
snprintf(default_name, sizeof(default_name), "Track %d", i + 1);
|
|
||||||
name = default_name;
|
|
||||||
}
|
|
||||||
|
|
||||||
write_cue_element(out, offsets[i], name);
|
|
||||||
printf("Written cue element: '%s' at offset 0x%lX (biased: 0x%lX)\n",
|
|
||||||
name, offsets[i], offsets[i] + TAV_OFFSET_BIAS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get current file position
|
|
||||||
long current_pos = ftell(out);
|
|
||||||
|
|
||||||
// Fill remaining space with zeros to reach TAV header + 4KB UCF
|
|
||||||
size_t target_size = TAV_HEADER_SIZE + UCF_SIZE;
|
|
||||||
if (current_pos < target_size) {
|
|
||||||
size_t remaining = target_size - current_pos;
|
|
||||||
uint8_t *zeros = calloc(remaining, 1);
|
|
||||||
if (zeros) {
|
|
||||||
fwrite(zeros, 1, remaining, out);
|
|
||||||
free(zeros);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(out);
|
|
||||||
free(offsets);
|
|
||||||
|
|
||||||
// Clean up track names
|
|
||||||
if (track_names) {
|
|
||||||
for (int i = 0; i < num_names; i++) {
|
|
||||||
free(track_names[i]);
|
|
||||||
}
|
|
||||||
free(track_names);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("\nTAV+UCF payload created successfully: %s\n", output_path);
|
|
||||||
printf("File size: %zu bytes (TAV header: %d + UCF: %d)\n",
|
|
||||||
(size_t)(TAV_HEADER_SIZE + UCF_SIZE), TAV_HEADER_SIZE, UCF_SIZE);
|
|
||||||
printf("\nTo create seekable TAV file, prepend this payload to your concatenated TAV file:\n");
|
|
||||||
printf(" cat %s input.tav > output_seekable.tav\n", output_path);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,935 +0,0 @@
|
|||||||
#define _GNU_SOURCE
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <zlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <sys/wait.h>
|
|
||||||
#include <getopt.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
|
|
||||||
// TVDOS Movie format constants
|
|
||||||
#define TVDOS_MAGIC "\x1F\x54\x53\x56\x4D\x4D\x4F\x56" // "\x1FTSVM MOV"
|
|
||||||
#define IPF_BLOCK_SIZE 12
|
|
||||||
|
|
||||||
// iPF1-delta opcodes
|
|
||||||
#define SKIP_OP 0x00
|
|
||||||
#define PATCH_OP 0x01
|
|
||||||
#define REPEAT_OP 0x02
|
|
||||||
#define END_OP 0xFF
|
|
||||||
|
|
||||||
// Video packet types
|
|
||||||
#define IPF1_PACKET_TYPE 0x04, 0x00 // iPF Type 1 (4 + 0)
|
|
||||||
#define IPF1_DELTA_PACKET_TYPE 0x04, 0x02 // iPF Type 1 delta
|
|
||||||
#define SYNC_PACKET_TYPE 0xFF, 0xFF // Sync packet
|
|
||||||
|
|
||||||
// Audio constants
|
|
||||||
#define MP2_SAMPLE_RATE 32000
|
|
||||||
#define MP2_DEFAULT_PACKET_SIZE 0x240
|
|
||||||
#define MP2_PACKET_TYPE_BASE 0x11
|
|
||||||
|
|
||||||
// Default values
|
|
||||||
#define DEFAULT_WIDTH 560
|
|
||||||
#define DEFAULT_HEIGHT 448
|
|
||||||
#define TEMP_AUDIO_FILE "/tmp/tvdos_temp_audio.mp2"
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
char *input_file;
|
|
||||||
char *output_file;
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
int fps;
|
|
||||||
int total_frames;
|
|
||||||
double duration;
|
|
||||||
int has_audio;
|
|
||||||
int output_to_stdout;
|
|
||||||
|
|
||||||
// Internal buffers
|
|
||||||
uint8_t *previous_ipf_frame;
|
|
||||||
uint8_t *current_ipf_frame;
|
|
||||||
uint8_t *delta_buffer;
|
|
||||||
uint8_t *rgb_buffer;
|
|
||||||
uint8_t *compressed_buffer;
|
|
||||||
uint8_t *mp2_buffer;
|
|
||||||
size_t frame_buffer_size;
|
|
||||||
|
|
||||||
// Audio handling
|
|
||||||
FILE *mp2_file;
|
|
||||||
int mp2_packet_size;
|
|
||||||
int mp2_rate_index;
|
|
||||||
size_t audio_remaining;
|
|
||||||
int audio_frames_in_buffer;
|
|
||||||
int target_audio_buffer_size;
|
|
||||||
|
|
||||||
// FFmpeg processes
|
|
||||||
FILE *ffmpeg_video_pipe;
|
|
||||||
FILE *ffmpeg_audio_pipe;
|
|
||||||
|
|
||||||
// Progress tracking
|
|
||||||
struct timeval start_time;
|
|
||||||
struct timeval last_progress_time;
|
|
||||||
size_t total_output_bytes;
|
|
||||||
|
|
||||||
// Dithering mode
|
|
||||||
int dither_mode;
|
|
||||||
} encoder_config_t;
|
|
||||||
|
|
||||||
// CORRECTED YCoCg conversion matching Kotlin implementation
|
|
||||||
typedef struct {
|
|
||||||
float y, co, cg;
|
|
||||||
} ycocg_t;
|
|
||||||
|
|
||||||
static ycocg_t rgb_to_ycocg_correct(uint8_t r, uint8_t g, uint8_t b, float ditherThreshold) {
|
|
||||||
ycocg_t result;
|
|
||||||
float rf = floor((ditherThreshold / 15.0 + r / 255.0) * 15.0) / 15.0;
|
|
||||||
float gf = floor((ditherThreshold / 15.0 + g / 255.0) * 15.0) / 15.0;
|
|
||||||
float bf = floor((ditherThreshold / 15.0 + b / 255.0) * 15.0) / 15.0;
|
|
||||||
|
|
||||||
// CORRECTED: Match Kotlin implementation exactly
|
|
||||||
float co = rf - bf; // co = r - b [-1..1]
|
|
||||||
float tmp = bf + co / 2.0f; // tmp = b + co/2
|
|
||||||
float cg = gf - tmp; // cg = g - tmp [-1..1]
|
|
||||||
float y = tmp + cg / 2.0f; // y = tmp + cg/2 [0..1]
|
|
||||||
|
|
||||||
result.y = y;
|
|
||||||
result.co = co;
|
|
||||||
result.cg = cg;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int quantise_4bit_y(float value) {
|
|
||||||
// Y quantisation: round(y * 15)
|
|
||||||
return (int)round(fmaxf(0.0f, fminf(15.0f, value * 15.0f)));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int chroma_to_four_bits(float f) {
|
|
||||||
// CORRECTED: Match Kotlin chromaToFourBits function exactly
|
|
||||||
// return (round(f * 8) + 7).coerceIn(0..15)
|
|
||||||
int result = (int)round(f * 8.0f) + 7;
|
|
||||||
return fmaxf(0, fminf(15, result));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse resolution string like "1024x768"
|
|
||||||
static int parse_resolution(const char *res_str, int *width, int *height) {
|
|
||||||
if (!res_str) return 0;
|
|
||||||
return sscanf(res_str, "%dx%d", width, height) == 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Execute command and capture output
|
|
||||||
static char *execute_command(const char *command) {
|
|
||||||
FILE *pipe = popen(command, "r");
|
|
||||||
if (!pipe) return NULL;
|
|
||||||
|
|
||||||
char *result = malloc(4096);
|
|
||||||
size_t len = fread(result, 1, 4095, pipe);
|
|
||||||
result[len] = '\0';
|
|
||||||
|
|
||||||
pclose(pipe);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get video metadata using ffprobe
|
|
||||||
static int get_video_metadata(encoder_config_t *config) {
|
|
||||||
char command[1024];
|
|
||||||
char *output;
|
|
||||||
|
|
||||||
// Get frame count
|
|
||||||
snprintf(command, sizeof(command),
|
|
||||||
"ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 \"%s\"",
|
|
||||||
config->input_file);
|
|
||||||
output = execute_command(command);
|
|
||||||
if (!output) {
|
|
||||||
fprintf(stderr, "Failed to get frame count\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
config->total_frames = atoi(output);
|
|
||||||
free(output);
|
|
||||||
|
|
||||||
// Get frame rate
|
|
||||||
snprintf(command, sizeof(command),
|
|
||||||
"ffprobe -v quiet -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 \"%s\"",
|
|
||||||
config->input_file);
|
|
||||||
output = execute_command(command);
|
|
||||||
if (!output) {
|
|
||||||
fprintf(stderr, "Failed to get frame rate\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse framerate (could be "30/1" or "29.97")
|
|
||||||
int num, den;
|
|
||||||
if (sscanf(output, "%d/%d", &num, &den) == 2) {
|
|
||||||
config->fps = (den > 0) ? (num / den) : 30;
|
|
||||||
} else {
|
|
||||||
config->fps = (int)round(atof(output));
|
|
||||||
}
|
|
||||||
free(output);
|
|
||||||
|
|
||||||
// Get duration
|
|
||||||
snprintf(command, sizeof(command),
|
|
||||||
"ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"%s\"",
|
|
||||||
config->input_file);
|
|
||||||
output = execute_command(command);
|
|
||||||
if (output) {
|
|
||||||
config->duration = atof(output);
|
|
||||||
free(output);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if has audio
|
|
||||||
snprintf(command, sizeof(command),
|
|
||||||
"ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\"",
|
|
||||||
config->input_file);
|
|
||||||
output = execute_command(command);
|
|
||||||
config->has_audio = (output && strlen(output) > 0 && atoi(output) >= 0);
|
|
||||||
if (output) free(output);
|
|
||||||
|
|
||||||
// Validate frame count using duration if needed
|
|
||||||
if (config->total_frames <= 0 && config->duration > 0) {
|
|
||||||
config->total_frames = (int)(config->duration * config->fps);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "Video metadata:\n");
|
|
||||||
fprintf(stderr, " Frames: %d\n", config->total_frames);
|
|
||||||
fprintf(stderr, " FPS: %d\n", config->fps);
|
|
||||||
fprintf(stderr, " Duration: %.2fs\n", config->duration);
|
|
||||||
fprintf(stderr, " Audio: %s\n", config->has_audio ? "Yes" : "No");
|
|
||||||
fprintf(stderr, " Resolution: %dx%d\n", config->width, config->height);
|
|
||||||
|
|
||||||
return (config->total_frames > 0 && config->fps > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start FFmpeg process for video conversion
|
|
||||||
static int start_video_conversion(encoder_config_t *config) {
|
|
||||||
char command[2048];
|
|
||||||
snprintf(command, sizeof(command),
|
|
||||||
"ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d -y - 2>/dev/null",
|
|
||||||
config->input_file, config->width, config->height, config->width, config->height);
|
|
||||||
|
|
||||||
config->ffmpeg_video_pipe = popen(command, "r");
|
|
||||||
return (config->ffmpeg_video_pipe != NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start FFmpeg process for audio conversion
|
|
||||||
static int start_audio_conversion(encoder_config_t *config) {
|
|
||||||
if (!config->has_audio) return 1;
|
|
||||||
|
|
||||||
char command[2048];
|
|
||||||
snprintf(command, sizeof(command),
|
|
||||||
"ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
|
|
||||||
config->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
|
|
||||||
|
|
||||||
int result = system(command);
|
|
||||||
if (result == 0) {
|
|
||||||
config->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
|
|
||||||
if (config->mp2_file) {
|
|
||||||
fseek(config->mp2_file, 0, SEEK_END);
|
|
||||||
config->audio_remaining = ftell(config->mp2_file);
|
|
||||||
fseek(config->mp2_file, 0, SEEK_SET);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "Warning: Failed to convert audio, proceeding without audio\n");
|
|
||||||
config->has_audio = 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write variable-length integer
|
|
||||||
static void write_varint(uint8_t **ptr, uint32_t value) {
|
|
||||||
while (value >= 0x80) {
|
|
||||||
**ptr = (uint8_t)((value & 0x7F) | 0x80);
|
|
||||||
(*ptr)++;
|
|
||||||
value >>= 7;
|
|
||||||
}
|
|
||||||
**ptr = (uint8_t)(value & 0x7F);
|
|
||||||
(*ptr)++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get MP2 packet size and rate index
|
|
||||||
static int get_mp2_packet_size(uint8_t *header) {
|
|
||||||
int bitrate_index = (header[2] >> 4) & 0xF;
|
|
||||||
int padding_bit = (header[2] >> 1) & 0x1;
|
|
||||||
|
|
||||||
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, -1};
|
|
||||||
int bitrate = bitrates[bitrate_index];
|
|
||||||
|
|
||||||
if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE;
|
|
||||||
|
|
||||||
int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit;
|
|
||||||
return frame_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
|
|
||||||
int rate_index;
|
|
||||||
switch (packet_size) {
|
|
||||||
case 144: rate_index = 0; break;
|
|
||||||
case 216: rate_index = 2; break;
|
|
||||||
case 252: rate_index = 4; break;
|
|
||||||
case 288: rate_index = 6; break;
|
|
||||||
case 360: rate_index = 8; break;
|
|
||||||
case 432: rate_index = 10; break;
|
|
||||||
case 504: rate_index = 12; break;
|
|
||||||
case 576: rate_index = 14; break;
|
|
||||||
case 720: rate_index = 16; break;
|
|
||||||
case 864: rate_index = 18; break;
|
|
||||||
case 1008: rate_index = 20; break;
|
|
||||||
case 1152: rate_index = 22; break;
|
|
||||||
case 1440: rate_index = 24; break;
|
|
||||||
case 1728: rate_index = 26; break;
|
|
||||||
default: rate_index = 14; break;
|
|
||||||
}
|
|
||||||
return rate_index + (is_mono ? 1 : 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Gzip compress function (instead of zlib)
|
|
||||||
static size_t gzip_compress(uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_max) {
|
|
||||||
z_stream stream = {0};
|
|
||||||
stream.next_in = src;
|
|
||||||
stream.avail_in = src_len;
|
|
||||||
stream.next_out = dst;
|
|
||||||
stream.avail_out = dst_max;
|
|
||||||
|
|
||||||
// Use deflateInit2 with gzip format
|
|
||||||
if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (deflate(&stream, Z_FINISH) != Z_STREAM_END) {
|
|
||||||
deflateEnd(&stream);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t compressed_size = stream.total_out;
|
|
||||||
deflateEnd(&stream);
|
|
||||||
return compressed_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bayer dithering kernels (4 patterns, each 4x4)
|
|
||||||
static const float bayerKernels[4][16] = {
|
|
||||||
{ // Pattern 0
|
|
||||||
(0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
|
|
||||||
(12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
|
|
||||||
(3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f,
|
|
||||||
(15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f
|
|
||||||
},
|
|
||||||
{ // Pattern 1
|
|
||||||
(8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
|
|
||||||
(4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
|
|
||||||
(11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f,
|
|
||||||
(7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f
|
|
||||||
},
|
|
||||||
{ // Pattern 2
|
|
||||||
(7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f, (15.0f + 0.5f) / 16.0f,
|
|
||||||
(8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f, (0.0f + 0.5f) / 16.0f,
|
|
||||||
(4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f, (12.0f + 0.5f) / 16.0f,
|
|
||||||
(11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f, (3.0f + 0.5f) / 16.0f
|
|
||||||
},
|
|
||||||
{ // Pattern 3
|
|
||||||
(15.0f + 0.5f) / 16.0f, (7.0f + 0.5f) / 16.0f, (13.0f + 0.5f) / 16.0f, (5.0f + 0.5f) / 16.0f,
|
|
||||||
(0.0f + 0.5f) / 16.0f, (8.0f + 0.5f) / 16.0f, (2.0f + 0.5f) / 16.0f, (10.0f + 0.5f) / 16.0f,
|
|
||||||
(12.0f + 0.5f) / 16.0f, (4.0f + 0.5f) / 16.0f, (14.0f + 0.5f) / 16.0f, (6.0f + 0.5f) / 16.0f,
|
|
||||||
(3.0f + 0.5f) / 16.0f, (11.0f + 0.5f) / 16.0f, (1.0f + 0.5f) / 16.0f, (9.0f + 0.5f) / 16.0f
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// CORRECTED: Encode a 4x4 block to iPF1 format matching Kotlin implementation
|
|
||||||
static void encode_ipf1_block_correct(uint8_t *rgb_data, int width, int height, int block_x, int block_y,
|
|
||||||
int channels, int pattern, uint8_t *output) {
|
|
||||||
ycocg_t pixels[16];
|
|
||||||
int y_values[16];
|
|
||||||
float co_values[16]; // Keep full precision for subsampling
|
|
||||||
float cg_values[16]; // Keep full precision for subsampling
|
|
||||||
|
|
||||||
// Convert 4x4 block to YCoCg using corrected transform
|
|
||||||
for (int py = 0; py < 4; py++) {
|
|
||||||
for (int px = 0; px < 4; px++) {
|
|
||||||
int src_x = block_x * 4 + px;
|
|
||||||
int src_y = block_y * 4 + py;
|
|
||||||
float t = (pattern < 0) ? 0.0f : bayerKernels[pattern % 4][4 * (py % 4) + (px % 4)];
|
|
||||||
int idx = py * 4 + px;
|
|
||||||
|
|
||||||
if (src_x < width && src_y < height) {
|
|
||||||
int pixel_offset = (src_y * width + src_x) * channels;
|
|
||||||
uint8_t r = rgb_data[pixel_offset];
|
|
||||||
uint8_t g = rgb_data[pixel_offset + 1];
|
|
||||||
uint8_t b = rgb_data[pixel_offset + 2];
|
|
||||||
pixels[idx] = rgb_to_ycocg_correct(r, g, b, t);
|
|
||||||
} else {
|
|
||||||
pixels[idx] = (ycocg_t){0.0f, 0.0f, 0.0f};
|
|
||||||
}
|
|
||||||
|
|
||||||
y_values[idx] = quantise_4bit_y(pixels[idx].y);
|
|
||||||
co_values[idx] = pixels[idx].co;
|
|
||||||
cg_values[idx] = pixels[idx].cg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CORRECTED: Chroma subsampling (4:2:0 for iPF1) with correct averaging
|
|
||||||
int cos1 = chroma_to_four_bits((co_values[0] + co_values[1] + co_values[4] + co_values[5]) / 4.0f);
|
|
||||||
int cos2 = chroma_to_four_bits((co_values[2] + co_values[3] + co_values[6] + co_values[7]) / 4.0f);
|
|
||||||
int cos3 = chroma_to_four_bits((co_values[8] + co_values[9] + co_values[12] + co_values[13]) / 4.0f);
|
|
||||||
int cos4 = chroma_to_four_bits((co_values[10] + co_values[11] + co_values[14] + co_values[15]) / 4.0f);
|
|
||||||
|
|
||||||
int cgs1 = chroma_to_four_bits((cg_values[0] + cg_values[1] + cg_values[4] + cg_values[5]) / 4.0f);
|
|
||||||
int cgs2 = chroma_to_four_bits((cg_values[2] + cg_values[3] + cg_values[6] + cg_values[7]) / 4.0f);
|
|
||||||
int cgs3 = chroma_to_four_bits((cg_values[8] + cg_values[9] + cg_values[12] + cg_values[13]) / 4.0f);
|
|
||||||
int cgs4 = chroma_to_four_bits((cg_values[10] + cg_values[11] + cg_values[14] + cg_values[15]) / 4.0f);
|
|
||||||
|
|
||||||
// CORRECTED: Pack into iPF1 format matching Kotlin exactly
|
|
||||||
// Co values (2 bytes): cos2|cos1, cos4|cos3
|
|
||||||
output[0] = ((cos2 << 4) | cos1);
|
|
||||||
output[1] = ((cos4 << 4) | cos3);
|
|
||||||
|
|
||||||
// Cg values (2 bytes): cgs2|cgs1, cgs4|cgs3
|
|
||||||
output[2] = ((cgs2 << 4) | cgs1);
|
|
||||||
output[3] = ((cgs4 << 4) | cgs3);
|
|
||||||
|
|
||||||
// CORRECTED: Y values (8 bytes) with correct ordering from Kotlin
|
|
||||||
output[4] = ((y_values[1] << 4) | y_values[0]); // Y1|Y0
|
|
||||||
output[5] = ((y_values[5] << 4) | y_values[4]); // Y5|Y4
|
|
||||||
output[6] = ((y_values[3] << 4) | y_values[2]); // Y3|Y2
|
|
||||||
output[7] = ((y_values[7] << 4) | y_values[6]); // Y7|Y6
|
|
||||||
output[8] = ((y_values[9] << 4) | y_values[8]); // Y9|Y8
|
|
||||||
output[9] = ((y_values[13] << 4) | y_values[12]); // Y13|Y12
|
|
||||||
output[10] = ((y_values[11] << 4) | y_values[10]); // Y11|Y10
|
|
||||||
output[11] = ((y_values[15] << 4) | y_values[14]); // Y15|Y14
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function for contrast weighting
|
|
||||||
static double contrast_weight(int v1, int v2, int delta, int weight) {
|
|
||||||
double avg = (v1 + v2) / 2.0;
|
|
||||||
double contrast = (avg < 4 || avg > 11) ? 1.5 : 1.0;
|
|
||||||
return delta * weight * contrast;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if two iPF1 blocks are significantly different
|
|
||||||
static int is_significantly_different(uint8_t *block_a, uint8_t *block_b) {
|
|
||||||
double score = 0.0;
|
|
||||||
|
|
||||||
// Co values (bytes 0-1)
|
|
||||||
uint16_t co_a = block_a[0] | (block_a[1] << 8);
|
|
||||||
uint16_t co_b = block_b[0] | (block_b[1] << 8);
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
int va = (co_a >> (i * 4)) & 0xF;
|
|
||||||
int vb = (co_b >> (i * 4)) & 0xF;
|
|
||||||
int delta = abs(va - vb);
|
|
||||||
score += contrast_weight(va, vb, delta, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cg values (bytes 2-3)
|
|
||||||
uint16_t cg_a = block_a[2] | (block_a[3] << 8);
|
|
||||||
uint16_t cg_b = block_b[2] | (block_b[3] << 8);
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
int va = (cg_a >> (i * 4)) & 0xF;
|
|
||||||
int vb = (cg_b >> (i * 4)) & 0xF;
|
|
||||||
int delta = abs(va - vb);
|
|
||||||
score += contrast_weight(va, vb, delta, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Y values (bytes 4-11)
|
|
||||||
for (int i = 4; i < 12; i++) {
|
|
||||||
int byte_a = block_a[i] & 0xFF;
|
|
||||||
int byte_b = block_b[i] & 0xFF;
|
|
||||||
|
|
||||||
int y_a_high = (byte_a >> 4) & 0xF;
|
|
||||||
int y_a_low = byte_a & 0xF;
|
|
||||||
int y_b_high = (byte_b >> 4) & 0xF;
|
|
||||||
int y_b_low = byte_b & 0xF;
|
|
||||||
|
|
||||||
int delta_high = abs(y_a_high - y_b_high);
|
|
||||||
int delta_low = abs(y_a_low - y_b_low);
|
|
||||||
|
|
||||||
score += contrast_weight(y_a_high, y_b_high, delta_high, 2);
|
|
||||||
score += contrast_weight(y_a_low, y_b_low, delta_low, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
return score > 4.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode iPF1 frame to buffer
|
|
||||||
static void encode_ipf1_frame(uint8_t *rgb_data, int width, int height, int channels, int pattern,
|
|
||||||
uint8_t *ipf_buffer) {
|
|
||||||
int blocks_per_row = (width + 3) / 4;
|
|
||||||
int blocks_per_col = (height + 3) / 4;
|
|
||||||
|
|
||||||
for (int block_y = 0; block_y < blocks_per_col; block_y++) {
|
|
||||||
for (int block_x = 0; block_x < blocks_per_row; block_x++) {
|
|
||||||
int block_index = block_y * blocks_per_row + block_x;
|
|
||||||
uint8_t *output_block = ipf_buffer + block_index * IPF_BLOCK_SIZE;
|
|
||||||
encode_ipf1_block_correct(rgb_data, width, height, block_x, block_y, channels, pattern, output_block);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create iPF1-delta encoded frame
|
|
||||||
static size_t encode_ipf1_delta(uint8_t *previous_frame, uint8_t *current_frame,
|
|
||||||
int width, int height, uint8_t *delta_buffer) {
|
|
||||||
int blocks_per_row = (width + 3) / 4;
|
|
||||||
int blocks_per_col = (height + 3) / 4;
|
|
||||||
int total_blocks = blocks_per_row * blocks_per_col;
|
|
||||||
|
|
||||||
uint8_t *output_ptr = delta_buffer;
|
|
||||||
int skip_count = 0;
|
|
||||||
uint8_t *patch_blocks = malloc(total_blocks * IPF_BLOCK_SIZE);
|
|
||||||
int patch_count = 0;
|
|
||||||
|
|
||||||
for (int block_index = 0; block_index < total_blocks; block_index++) {
|
|
||||||
uint8_t *prev_block = previous_frame + block_index * IPF_BLOCK_SIZE;
|
|
||||||
uint8_t *curr_block = current_frame + block_index * IPF_BLOCK_SIZE;
|
|
||||||
|
|
||||||
if (is_significantly_different(prev_block, curr_block)) {
|
|
||||||
if (skip_count > 0) {
|
|
||||||
*output_ptr++ = SKIP_OP;
|
|
||||||
write_varint(&output_ptr, skip_count);
|
|
||||||
skip_count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(patch_blocks + patch_count * IPF_BLOCK_SIZE, curr_block, IPF_BLOCK_SIZE);
|
|
||||||
patch_count++;
|
|
||||||
} else {
|
|
||||||
if (patch_count > 0) {
|
|
||||||
*output_ptr++ = PATCH_OP;
|
|
||||||
write_varint(&output_ptr, patch_count);
|
|
||||||
memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
|
|
||||||
output_ptr += patch_count * IPF_BLOCK_SIZE;
|
|
||||||
patch_count = 0;
|
|
||||||
}
|
|
||||||
skip_count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (patch_count > 0) {
|
|
||||||
*output_ptr++ = PATCH_OP;
|
|
||||||
write_varint(&output_ptr, patch_count);
|
|
||||||
memcpy(output_ptr, patch_blocks, patch_count * IPF_BLOCK_SIZE);
|
|
||||||
output_ptr += patch_count * IPF_BLOCK_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
*output_ptr++ = END_OP;
|
|
||||||
|
|
||||||
free(patch_blocks);
|
|
||||||
return output_ptr - delta_buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get current time in seconds
|
|
||||||
static double get_current_time_sec(struct timeval *tv) {
|
|
||||||
gettimeofday(tv, NULL);
|
|
||||||
return tv->tv_sec + tv->tv_usec / 1000000.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Display progress information similar to FFmpeg
|
|
||||||
static void display_progress(encoder_config_t *config, int frame_num) {
|
|
||||||
struct timeval current_time;
|
|
||||||
double current_sec = get_current_time_sec(¤t_time);
|
|
||||||
|
|
||||||
// Only update progress once per second
|
|
||||||
double last_progress_sec = config->last_progress_time.tv_sec + config->last_progress_time.tv_usec / 1000000.0;
|
|
||||||
if (current_sec - last_progress_sec < 1.0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
config->last_progress_time = current_time;
|
|
||||||
|
|
||||||
// Calculate timing
|
|
||||||
double start_sec = config->start_time.tv_sec + config->start_time.tv_usec / 1000000.0;
|
|
||||||
double elapsed_sec = current_sec - start_sec;
|
|
||||||
double current_video_time = (double)frame_num / config->fps;
|
|
||||||
double fps = frame_num / elapsed_sec;
|
|
||||||
double speed = (elapsed_sec > 0) ? current_video_time / elapsed_sec : 0.0;
|
|
||||||
double bitrate = (elapsed_sec > 0) ? (config->total_output_bytes * 8.0 / 1024.0) / elapsed_sec : 0.0;
|
|
||||||
|
|
||||||
// Format output size in human readable format
|
|
||||||
char size_str[32];
|
|
||||||
if (config->total_output_bytes >= 1024 * 1024) {
|
|
||||||
snprintf(size_str, sizeof(size_str), "%.1fMB", config->total_output_bytes / (1024.0 * 1024.0));
|
|
||||||
} else if (config->total_output_bytes >= 1024) {
|
|
||||||
snprintf(size_str, sizeof(size_str), "%.1fkB", config->total_output_bytes / 1024.0);
|
|
||||||
} else {
|
|
||||||
snprintf(size_str, sizeof(size_str), "%zuB", config->total_output_bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format current time as HH:MM:SS.xx
|
|
||||||
int hours = (int)(current_video_time / 3600);
|
|
||||||
int minutes = (int)((current_video_time - hours * 3600) / 60);
|
|
||||||
double seconds = current_video_time - hours * 3600 - minutes * 60;
|
|
||||||
|
|
||||||
// Print progress line (overwrite previous line)
|
|
||||||
fprintf(stderr, "\rframe=%d fps=%.1f size=%s time=%02d:%02d:%05.2f bitrate=%.1fkbits/s speed=%4.2fx",
|
|
||||||
frame_num, fps, size_str, hours, minutes, seconds, bitrate, speed);
|
|
||||||
fflush(stderr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process audio for current frame
|
|
||||||
static int process_audio(encoder_config_t *config, int frame_num, FILE *output) {
|
|
||||||
if (!config->has_audio || !config->mp2_file || config->audio_remaining <= 0) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialise packet size on first frame
|
|
||||||
if (config->mp2_packet_size == 0) {
|
|
||||||
uint8_t header[4];
|
|
||||||
if (fread(header, 1, 4, config->mp2_file) != 4) return 1;
|
|
||||||
fseek(config->mp2_file, 0, SEEK_SET);
|
|
||||||
|
|
||||||
config->mp2_packet_size = get_mp2_packet_size(header);
|
|
||||||
int is_mono = (header[3] >> 6) == 3;
|
|
||||||
config->mp2_rate_index = mp2_packet_size_to_rate_index(config->mp2_packet_size, is_mono);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate how much audio time each frame represents (in seconds)
|
|
||||||
double frame_audio_time = 1.0 / config->fps;
|
|
||||||
|
|
||||||
// Calculate how much audio time each MP2 packet represents
|
|
||||||
// MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
|
|
||||||
double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
|
|
||||||
|
|
||||||
// Estimate how many packets we consume per video frame
|
|
||||||
double packets_per_frame = frame_audio_time / packet_audio_time;
|
|
||||||
|
|
||||||
// Only insert audio when buffer would go below 2 frames
|
|
||||||
// Initialise with 2 packets on first frame to prime the buffer
|
|
||||||
int packets_to_insert = 0;
|
|
||||||
if (frame_num == 1) {
|
|
||||||
packets_to_insert = 2;
|
|
||||||
config->audio_frames_in_buffer = 2;
|
|
||||||
} else {
|
|
||||||
// Simulate buffer consumption (packets consumed per frame)
|
|
||||||
config->audio_frames_in_buffer -= (int)ceil(packets_per_frame);
|
|
||||||
|
|
||||||
// Only insert packets when buffer gets low (≤ 2 frames)
|
|
||||||
if (config->audio_frames_in_buffer <= 2) {
|
|
||||||
packets_to_insert = config->target_audio_buffer_size - config->audio_frames_in_buffer;
|
|
||||||
packets_to_insert = (packets_to_insert > 0) ? packets_to_insert : 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insert the calculated number of audio packets
|
|
||||||
for (int q = 0; q < packets_to_insert; q++) {
|
|
||||||
size_t bytes_to_read = config->mp2_packet_size;
|
|
||||||
if (bytes_to_read > config->audio_remaining) {
|
|
||||||
bytes_to_read = config->audio_remaining;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t bytes_read = fread(config->mp2_buffer, 1, bytes_to_read, config->mp2_file);
|
|
||||||
if (bytes_read == 0) break;
|
|
||||||
|
|
||||||
uint8_t audio_packet_type[2] = {config->mp2_rate_index, MP2_PACKET_TYPE_BASE};
|
|
||||||
fwrite(audio_packet_type, 1, 2, output);
|
|
||||||
fwrite(config->mp2_buffer, 1, bytes_read, output);
|
|
||||||
|
|
||||||
// Track audio bytes written
|
|
||||||
config->total_output_bytes += 2 + bytes_read;
|
|
||||||
config->audio_remaining -= bytes_read;
|
|
||||||
config->audio_frames_in_buffer++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write TVDOS header
|
|
||||||
static void write_tvdos_header(encoder_config_t *config, FILE *output) {
|
|
||||||
fwrite(TVDOS_MAGIC, 1, 8, output);
|
|
||||||
fwrite(&config->width, 2, 1, output);
|
|
||||||
fwrite(&config->height, 2, 1, output);
|
|
||||||
fwrite(&config->fps, 2, 1, output);
|
|
||||||
fwrite(&config->total_frames, 4, 1, output);
|
|
||||||
|
|
||||||
uint16_t unused = 0x00FF;
|
|
||||||
fwrite(&unused, 2, 1, output);
|
|
||||||
|
|
||||||
int audio_sample_size = 2 * (((MP2_SAMPLE_RATE / config->fps) + 1));
|
|
||||||
int audio_queue_size = config->has_audio ?
|
|
||||||
(int)ceil(audio_sample_size / 2304.0) + 1 : 0;
|
|
||||||
|
|
||||||
uint16_t audio_queue_info = config->has_audio ?
|
|
||||||
(MP2_DEFAULT_PACKET_SIZE >> 2) | (audio_queue_size << 12) : 0x0000;
|
|
||||||
fwrite(&audio_queue_info, 2, 1, output);
|
|
||||||
|
|
||||||
// Store target buffer size for audio timing
|
|
||||||
config->target_audio_buffer_size = audio_queue_size;
|
|
||||||
|
|
||||||
uint8_t reserved[10] = {0};
|
|
||||||
fwrite(reserved, 1, 10, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialise encoder configuration
|
|
||||||
static encoder_config_t *init_encoder_config() {
|
|
||||||
encoder_config_t *config = calloc(1, sizeof(encoder_config_t));
|
|
||||||
if (!config) return NULL;
|
|
||||||
|
|
||||||
config->width = DEFAULT_WIDTH;
|
|
||||||
config->height = DEFAULT_HEIGHT;
|
|
||||||
|
|
||||||
return config;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate encoder buffers
|
|
||||||
static int allocate_buffers(encoder_config_t *config) {
|
|
||||||
config->frame_buffer_size = ((config->width + 3) / 4) * ((config->height + 3) / 4) * IPF_BLOCK_SIZE;
|
|
||||||
|
|
||||||
config->rgb_buffer = malloc(config->width * config->height * 3);
|
|
||||||
config->previous_ipf_frame = malloc(config->frame_buffer_size);
|
|
||||||
config->current_ipf_frame = malloc(config->frame_buffer_size);
|
|
||||||
config->delta_buffer = malloc(config->frame_buffer_size * 2);
|
|
||||||
config->compressed_buffer = malloc(config->frame_buffer_size * 2);
|
|
||||||
config->mp2_buffer = malloc(2048);
|
|
||||||
|
|
||||||
return (config->rgb_buffer && config->previous_ipf_frame &&
|
|
||||||
config->current_ipf_frame && config->delta_buffer &&
|
|
||||||
config->compressed_buffer && config->mp2_buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process one frame - CORRECTED ORDER: Audio -> Video -> Sync
|
|
||||||
static int process_frame(encoder_config_t *config, int frame_num, int is_keyframe, FILE *output) {
|
|
||||||
// Read RGB data from FFmpeg pipe first
|
|
||||||
size_t rgb_size = config->width * config->height * 3;
|
|
||||||
if (fread(config->rgb_buffer, 1, rgb_size, config->ffmpeg_video_pipe) != rgb_size) {
|
|
||||||
if (feof(config->ffmpeg_video_pipe)) return 0;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 1: Process audio FIRST (matches working file pattern)
|
|
||||||
if (!process_audio(config, frame_num, output)) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 2: Encode and write video
|
|
||||||
int pattern;
|
|
||||||
switch (config->dither_mode) {
|
|
||||||
case 0: pattern = -1; break; // No dithering
|
|
||||||
case 1: pattern = 0; break; // Static pattern
|
|
||||||
case 2: pattern = frame_num % 4; break; // Dynamic pattern
|
|
||||||
default: pattern = 0; break; // Fallback to static
|
|
||||||
}
|
|
||||||
encode_ipf1_frame(config->rgb_buffer, config->width, config->height, 3, pattern,
|
|
||||||
config->current_ipf_frame);
|
|
||||||
|
|
||||||
// Determine if we should use delta encoding
|
|
||||||
int use_delta = 0;
|
|
||||||
size_t data_size = config->frame_buffer_size;
|
|
||||||
uint8_t *frame_data = config->current_ipf_frame;
|
|
||||||
|
|
||||||
if (frame_num > 1 && !is_keyframe) {
|
|
||||||
size_t delta_size = encode_ipf1_delta(config->previous_ipf_frame,
|
|
||||||
config->current_ipf_frame,
|
|
||||||
config->width, config->height,
|
|
||||||
config->delta_buffer);
|
|
||||||
|
|
||||||
if (delta_size < config->frame_buffer_size * 0.576) {
|
|
||||||
use_delta = 1;
|
|
||||||
data_size = delta_size;
|
|
||||||
frame_data = config->delta_buffer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compress the frame data using gzip
|
|
||||||
size_t compressed_size = gzip_compress(frame_data, data_size,
|
|
||||||
config->compressed_buffer,
|
|
||||||
config->frame_buffer_size * 2);
|
|
||||||
if (compressed_size == 0) {
|
|
||||||
fprintf(stderr, "Gzip compression failed\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write video packet
|
|
||||||
if (use_delta) {
|
|
||||||
uint8_t packet_type[2] = {IPF1_DELTA_PACKET_TYPE};
|
|
||||||
fwrite(packet_type, 1, 2, output);
|
|
||||||
} else {
|
|
||||||
uint8_t packet_type[2] = {IPF1_PACKET_TYPE};
|
|
||||||
fwrite(packet_type, 1, 2, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t size_le = compressed_size;
|
|
||||||
fwrite(&size_le, 4, 1, output);
|
|
||||||
fwrite(config->compressed_buffer, 1, compressed_size, output);
|
|
||||||
|
|
||||||
// Step 3: Write sync packet AFTER video (matches working file pattern)
|
|
||||||
uint8_t sync[2] = {SYNC_PACKET_TYPE};
|
|
||||||
fwrite(sync, 1, 2, output);
|
|
||||||
|
|
||||||
// Track video bytes written (packet type + size + compressed data + sync)
|
|
||||||
config->total_output_bytes += 2 + 4 + compressed_size + 2;
|
|
||||||
|
|
||||||
// Swap frame buffers
|
|
||||||
uint8_t *temp = config->previous_ipf_frame;
|
|
||||||
config->previous_ipf_frame = config->current_ipf_frame;
|
|
||||||
config->current_ipf_frame = temp;
|
|
||||||
|
|
||||||
// Display progress
|
|
||||||
display_progress(config, frame_num);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup function
|
|
||||||
static void cleanup_config(encoder_config_t *config) {
|
|
||||||
if (!config) return;
|
|
||||||
|
|
||||||
if (config->ffmpeg_video_pipe) pclose(config->ffmpeg_video_pipe);
|
|
||||||
if (config->mp2_file) fclose(config->mp2_file);
|
|
||||||
|
|
||||||
free(config->input_file);
|
|
||||||
free(config->output_file);
|
|
||||||
free(config->rgb_buffer);
|
|
||||||
free(config->previous_ipf_frame);
|
|
||||||
free(config->current_ipf_frame);
|
|
||||||
free(config->delta_buffer);
|
|
||||||
free(config->compressed_buffer);
|
|
||||||
free(config->mp2_buffer);
|
|
||||||
|
|
||||||
// Remove temporary audio file
|
|
||||||
unlink(TEMP_AUDIO_FILE);
|
|
||||||
|
|
||||||
free(config);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print usage information
|
|
||||||
static void print_usage(const char *program_name) {
|
|
||||||
printf("TVDOS Movie Encoder\n\n");
|
|
||||||
printf("Usage: %s [options] input_video\n\n", program_name);
|
|
||||||
printf("Options:\n");
|
|
||||||
printf(" -o, --output FILE Output TVDOS movie file (default: stdout)\n");
|
|
||||||
printf(" -s, --size WxH Video resolution (default: 560x448)\n");
|
|
||||||
printf(" -d, --dither MODE Dithering mode (default: 1)\n");
|
|
||||||
printf(" 0: No dithering\n");
|
|
||||||
printf(" 1: Static pattern\n");
|
|
||||||
printf(" 2: Dynamic pattern (better quality, larger files)\n");
|
|
||||||
printf(" -h, --help Show this help message\n\n");
|
|
||||||
printf("Examples:\n");
|
|
||||||
printf(" %s input.mp4 -o output.mov\n", program_name);
|
|
||||||
printf(" %s input.avi -s 1024x768 -o output.mov\n", program_name);
|
|
||||||
printf(" yt-dlp -o - \"https://youtube.com/watch?v=VIDEO_ID\" | ffmpeg -i pipe:0 -c copy temp.mp4 && %s temp.mp4 -o youtube_video.mov && rm temp.mp4\n", program_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
encoder_config_t *config = init_encoder_config();
|
|
||||||
if (!config) {
|
|
||||||
fprintf(stderr, "Failed to initialise encoder\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
config->output_to_stdout = 1; // Default to stdout
|
|
||||||
config->dither_mode = 1; // Default to static dithering
|
|
||||||
|
|
||||||
// Parse command line arguments
|
|
||||||
static struct option long_options[] = {
|
|
||||||
{"output", required_argument, 0, 'o'},
|
|
||||||
{"size", required_argument, 0, 's'},
|
|
||||||
{"dither", required_argument, 0, 'd'},
|
|
||||||
{"help", no_argument, 0, 'h'},
|
|
||||||
{0, 0, 0, 0}
|
|
||||||
};
|
|
||||||
|
|
||||||
int c;
|
|
||||||
while ((c = getopt_long(argc, argv, "o:s:d:h", long_options, NULL)) != -1) {
|
|
||||||
switch (c) {
|
|
||||||
case 'o':
|
|
||||||
config->output_file = strdup(optarg);
|
|
||||||
config->output_to_stdout = 0;
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
if (!parse_resolution(optarg, &config->width, &config->height)) {
|
|
||||||
fprintf(stderr, "Invalid resolution format: %s\n", optarg);
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'd':
|
|
||||||
config->dither_mode = atoi(optarg);
|
|
||||||
if (config->dither_mode < 0 || config->dither_mode > 2) {
|
|
||||||
fprintf(stderr, "Invalid dither mode: %s (must be 0, 1, or 2)\n", optarg);
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'h':
|
|
||||||
print_usage(argv[0]);
|
|
||||||
cleanup_config(config);
|
|
||||||
return 0;
|
|
||||||
default:
|
|
||||||
print_usage(argv[0]);
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (optind >= argc) {
|
|
||||||
fprintf(stderr, "Error: Input video file required\n\n");
|
|
||||||
print_usage(argv[0]);
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
config->input_file = strdup(argv[optind]);
|
|
||||||
|
|
||||||
// Get video metadata
|
|
||||||
if (!get_video_metadata(config)) {
|
|
||||||
fprintf(stderr, "Failed to analyze video metadata\n");
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate buffers
|
|
||||||
if (!allocate_buffers(config)) {
|
|
||||||
fprintf(stderr, "Failed to allocate memory buffers\n");
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start video conversion
|
|
||||||
if (!start_video_conversion(config)) {
|
|
||||||
fprintf(stderr, "Failed to start video conversion\n");
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start audio conversion
|
|
||||||
if (!start_audio_conversion(config)) {
|
|
||||||
fprintf(stderr, "Failed to start audio conversion\n");
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open output
|
|
||||||
FILE *output = config->output_to_stdout ? stdout : fopen(config->output_file, "wb");
|
|
||||||
if (!output) {
|
|
||||||
fprintf(stderr, "Failed to open output file\n");
|
|
||||||
cleanup_config(config);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write TVDOS header
|
|
||||||
write_tvdos_header(config, output);
|
|
||||||
|
|
||||||
// Initialise progress tracking
|
|
||||||
gettimeofday(&config->start_time, NULL);
|
|
||||||
config->last_progress_time = config->start_time;
|
|
||||||
config->total_output_bytes = 8 + 2 + 2 + 2 + 4 + 2 + 2 + 10; // TVDOS header size
|
|
||||||
|
|
||||||
// Process frames with correct order: Audio -> Video -> Sync
|
|
||||||
for (int frame = 1; frame <= config->total_frames; frame++) {
|
|
||||||
int is_keyframe = (frame == 1) || (frame % 30 == 0);
|
|
||||||
|
|
||||||
int result = process_frame(config, frame, is_keyframe, output);
|
|
||||||
if (result <= 0) {
|
|
||||||
if (result == 0) {
|
|
||||||
fprintf(stderr, "End of video at frame %d\n", frame);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final progress update and newline
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
|
|
||||||
if (!config->output_to_stdout) {
|
|
||||||
fclose(output);
|
|
||||||
fprintf(stderr, "Encoding complete: %s\n", config->output_file);
|
|
||||||
}
|
|
||||||
|
|
||||||
cleanup_config(config);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,183 +0,0 @@
|
|||||||
// Created by CuriousTorvald and Claude on 2025-10-17
|
|
||||||
// MPEG-style bidirectional block motion compensation for TAV encoder
|
|
||||||
// Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement
|
|
||||||
|
|
||||||
#include <opencv2/opencv.hpp>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
|
|
||||||
// Dense optical flow estimation using Farneback algorithm
|
|
||||||
// Computes flow at every pixel, then samples at block centers for motion vectors
|
|
||||||
// Much more spatially coherent than independent block matching
|
|
||||||
void estimate_optical_flow_motion(
|
|
||||||
const float *current_y, // Current frame Y channel (width×height)
|
|
||||||
const float *reference_y, // Reference frame Y channel
|
|
||||||
int width, int height,
|
|
||||||
int block_size, // Block size (e.g., 16)
|
|
||||||
int16_t *mvs_x, // Output: motion vectors X (in 1/4-pixel units)
|
|
||||||
int16_t *mvs_y // Output: motion vectors Y (in 1/4-pixel units)
|
|
||||||
) {
|
|
||||||
// Convert float Y channels to 8-bit grayscale for OpenCV
|
|
||||||
cv::Mat cur_gray(height, width, CV_8UC1);
|
|
||||||
cv::Mat ref_gray(height, width, CV_8UC1);
|
|
||||||
|
|
||||||
// Detect if Y is in [0,1] range and scale to [0,255] if needed
|
|
||||||
float y_min = current_y[0], y_max = current_y[0];
|
|
||||||
for (int i = 1; i < width * height; i++) {
|
|
||||||
if (current_y[i] < y_min) y_min = current_y[i];
|
|
||||||
if (current_y[i] > y_max) y_max = current_y[i];
|
|
||||||
}
|
|
||||||
float scale = (y_max <= 1.1f) ? 255.0f : 1.0f;
|
|
||||||
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
int idx = y * width + x;
|
|
||||||
cur_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, current_y[idx] * scale)));
|
|
||||||
ref_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, reference_y[idx] * scale)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute dense optical flow using Farneback algorithm
|
|
||||||
// IMPORTANT: We need BACKWARD flow (current → reference) for motion compensation
|
|
||||||
// This tells us where to PULL pixels FROM in the reference frame
|
|
||||||
cv::Mat flow;
|
|
||||||
cv::calcOpticalFlowFarneback(
|
|
||||||
cur_gray, // Current frame (source)
|
|
||||||
ref_gray, // Reference frame (destination)
|
|
||||||
flow, // Output flow (2-channel float: dx, dy per pixel)
|
|
||||||
0.5, // pyr_scale: pyramid scale (0.5 = each layer is half size)
|
|
||||||
3, // levels: number of pyramid levels
|
|
||||||
20, // winsize: averaging window size
|
|
||||||
3, // iterations: number of iterations at each pyramid level
|
|
||||||
5, // poly_n: size of pixel neighborhood (5 or 7)
|
|
||||||
1.2, // poly_sigma: standard deviation of Gaussian for polynomial expansion
|
|
||||||
0 // flags: 0 = normal, OPTFLOW_USE_INITIAL_FLOW = use input flow as initial estimate
|
|
||||||
);
|
|
||||||
|
|
||||||
// Sample flow at block centers to get motion vectors
|
|
||||||
int num_blocks_x = (width + block_size - 1) / block_size;
|
|
||||||
int num_blocks_y = (height + block_size - 1) / block_size;
|
|
||||||
|
|
||||||
for (int by = 0; by < num_blocks_y; by++) {
|
|
||||||
for (int bx = 0; bx < num_blocks_x; bx++) {
|
|
||||||
int block_idx = by * num_blocks_x + bx;
|
|
||||||
|
|
||||||
// Block center position
|
|
||||||
int center_x = bx * block_size + block_size / 2;
|
|
||||||
int center_y = by * block_size + block_size / 2;
|
|
||||||
|
|
||||||
// Clamp to frame boundaries
|
|
||||||
if (center_x >= width) center_x = width - 1;
|
|
||||||
if (center_y >= height) center_y = height - 1;
|
|
||||||
|
|
||||||
// Get flow at block center
|
|
||||||
cv::Point2f flow_vec = flow.at<cv::Point2f>(center_y, center_x);
|
|
||||||
|
|
||||||
// Convert to 1/4-pixel units and store
|
|
||||||
// Flow is in pixels, positive = motion to the right/down
|
|
||||||
mvs_x[block_idx] = (int16_t)std::round(flow_vec.x * 4.0f);
|
|
||||||
mvs_y[block_idx] = (int16_t)std::round(flow_vec.y * 4.0f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Block-based motion compensation with bilinear interpolation (sub-pixel precision)
|
|
||||||
// MVs are in 1/4-pixel units
|
|
||||||
// This implements the warp() function from MC-EZBC pseudocode
|
|
||||||
void warp_block_motion(
|
|
||||||
const float *src, // Source frame
|
|
||||||
int width, int height,
|
|
||||||
const int16_t *mvs_x, // Motion vectors X (1/4-pixel units)
|
|
||||||
const int16_t *mvs_y, // Motion vectors Y (1/4-pixel units)
|
|
||||||
int block_size, // Block size (e.g., 16)
|
|
||||||
float *dst // Output warped frame
|
|
||||||
) {
|
|
||||||
int num_blocks_x = (width + block_size - 1) / block_size;
|
|
||||||
int num_blocks_y = (height + block_size - 1) / block_size;
|
|
||||||
|
|
||||||
// Process each block
|
|
||||||
for (int by = 0; by < num_blocks_y; by++) {
|
|
||||||
for (int bx = 0; bx < num_blocks_x; bx++) {
|
|
||||||
int block_idx = by * num_blocks_x + bx;
|
|
||||||
|
|
||||||
// Get motion vector for this block (in 1/4-pixel units)
|
|
||||||
float mv_x = mvs_x[block_idx] / 4.0f; // Convert to pixels
|
|
||||||
float mv_y = mvs_y[block_idx] / 4.0f;
|
|
||||||
|
|
||||||
// Block boundaries in destination frame
|
|
||||||
int block_x_start = bx * block_size;
|
|
||||||
int block_y_start = by * block_size;
|
|
||||||
int block_x_end = std::min(block_x_start + block_size, width);
|
|
||||||
int block_y_end = std::min(block_y_start + block_size, height);
|
|
||||||
|
|
||||||
// Warp each pixel in the block
|
|
||||||
for (int y = block_y_start; y < block_y_end; y++) {
|
|
||||||
for (int x = block_x_start; x < block_x_end; x++) {
|
|
||||||
// Source position (backward warping)
|
|
||||||
float src_x = x - mv_x;
|
|
||||||
float src_y = y - mv_y;
|
|
||||||
|
|
||||||
// Clamp to valid range
|
|
||||||
src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
|
|
||||||
src_y = std::max(0.0f, std::min((float)(height - 1), src_y));
|
|
||||||
|
|
||||||
// Bilinear interpolation
|
|
||||||
int x0 = (int)src_x;
|
|
||||||
int y0 = (int)src_y;
|
|
||||||
int x1 = std::min(x0 + 1, width - 1);
|
|
||||||
int y1 = std::min(y0 + 1, height - 1);
|
|
||||||
|
|
||||||
float fx = src_x - x0;
|
|
||||||
float fy = src_y - y0;
|
|
||||||
|
|
||||||
float val00 = src[y0 * width + x0];
|
|
||||||
float val10 = src[y0 * width + x1];
|
|
||||||
float val01 = src[y1 * width + x0];
|
|
||||||
float val11 = src[y1 * width + x1];
|
|
||||||
|
|
||||||
float val_top = (1.0f - fx) * val00 + fx * val10;
|
|
||||||
float val_bot = (1.0f - fx) * val01 + fx * val11;
|
|
||||||
float val = (1.0f - fy) * val_top + fy * val_bot;
|
|
||||||
|
|
||||||
dst[y * width + x] = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bidirectional motion compensation for MC-EZBC predict step
|
|
||||||
// Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
|
|
||||||
void warp_bidirectional(
|
|
||||||
const float *f0, const float *f1,
|
|
||||||
int width, int height,
|
|
||||||
const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y, // F0 → F1
|
|
||||||
const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y, // F1 → F0
|
|
||||||
int block_size,
|
|
||||||
float *prediction // Output: 0.5 * (warped_f0 + warped_f1)
|
|
||||||
) {
|
|
||||||
int num_pixels = width * height;
|
|
||||||
|
|
||||||
// Allocate temporary buffers
|
|
||||||
float *warped_f0 = new float[num_pixels];
|
|
||||||
float *warped_f1 = new float[num_pixels];
|
|
||||||
|
|
||||||
// Warp f0 forward using forward MVs
|
|
||||||
warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);
|
|
||||||
|
|
||||||
// Warp f1 backward using backward MVs
|
|
||||||
warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);
|
|
||||||
|
|
||||||
// Average the two warped frames
|
|
||||||
for (int i = 0; i < num_pixels; i++) {
|
|
||||||
prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
delete[] warped_f0;
|
|
||||||
delete[] warped_f1;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // extern "C"
|
|
||||||
@@ -1,795 +0,0 @@
|
|||||||
/*
|
|
||||||
encoder_tav_text.c
|
|
||||||
Text-based video encoder for TSVM using custom font ROMs
|
|
||||||
|
|
||||||
Outputs Videotex files with custom header and packet type 0x3F (text mode)
|
|
||||||
|
|
||||||
File structure:
|
|
||||||
- Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
|
|
||||||
- Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
|
|
||||||
- Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
|
|
||||||
- Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
|
|
||||||
|
|
||||||
Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
|
|
||||||
- rows: uint8 (32)
|
|
||||||
- cols: uint8 (80)
|
|
||||||
- fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
|
|
||||||
- bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
|
|
||||||
- char-array: rows*cols bytes (glyph indices 0-255)
|
|
||||||
|
|
||||||
Total uncompressed size: 2 + (80*32*3) = 7682 bytes
|
|
||||||
Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
|
|
||||||
Video size: 80×32 characters (560×448 pixels with 7×14 font)
|
|
||||||
Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
|
|
||||||
Each text frame is treated as an I-frame with sync packet
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
gcc -Ofast -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
|
|
||||||
./encoder_tav_text -i video.mp4 -f font.chr -o output.mv3
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define _POSIX_C_SOURCE 200809L
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <zstd.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
|
|
||||||
#define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
|
|
||||||
|
|
||||||
#define CHAR_W 7
|
|
||||||
#define CHAR_H 14
|
|
||||||
#define GRID_W 80
|
|
||||||
#define GRID_H 32
|
|
||||||
#define PIXEL_W (GRID_W * CHAR_W) // 560
|
|
||||||
#define PIXEL_H (GRID_H * CHAR_H) // 448
|
|
||||||
#define PATCH_SZ (CHAR_W * CHAR_H)
|
|
||||||
#define SAMPLE_RATE 32000
|
|
||||||
#define MP2_DEFAULT_PACKET_SIZE 1152
|
|
||||||
|
|
||||||
// TAV packet types
|
|
||||||
#define PACKET_TIMECODE 0xFD
|
|
||||||
#define PACKET_SYNC 0xFF
|
|
||||||
#define PACKET_AUDIO_MP2 0x20
|
|
||||||
#define PACKET_SSF 0x30
|
|
||||||
#define PACKET_TEXT 0x3F
|
|
||||||
#define PACKET_EXTENDED_HDR 0xEF
|
|
||||||
|
|
||||||
// SSF opcodes for font ROM
|
|
||||||
#define SSF_OPCODE_LOWROM 0x80
|
|
||||||
#define SSF_OPCODE_HIGHROM 0x81
|
|
||||||
|
|
||||||
// Font ROM size constants
|
|
||||||
#define FONTROM_PADDED_SIZE 1920
|
|
||||||
#define GLYPHS_PER_ROM 128
|
|
||||||
|
|
||||||
// Color mapping (4-bit RGB to TSVM palette)
|
|
||||||
#define COLOR_BLACK 0xF0
|
|
||||||
#define COLOR_WHITE 0xFE
|
|
||||||
|
|
||||||
// Generate random filename for temporary audio file
|
|
||||||
static void generate_random_filename(char *filename) {
|
|
||||||
srand(time(NULL));
|
|
||||||
|
|
||||||
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
||||||
const int charset_size = sizeof(charset) - 1;
|
|
||||||
|
|
||||||
// Start with the prefix
|
|
||||||
strcpy(filename, "/tmp/");
|
|
||||||
|
|
||||||
// Generate 32 random characters
|
|
||||||
for (int i = 0; i < 32; i++) {
|
|
||||||
filename[5 + i] = charset[rand() % charset_size];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the .mp2 extension
|
|
||||||
strcpy(filename + 37, ".mp2");
|
|
||||||
filename[41] = '\0'; // Null terminate
|
|
||||||
}
|
|
||||||
|
|
||||||
char TEMP_AUDIO_FILE[42];
|
|
||||||
|
|
||||||
// Global flag to disable inverted character matching
|
|
||||||
int g_no_invert_char = 0;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint8_t *data; // Binary glyph data (PATCH_SZ bytes per glyph)
|
|
||||||
int count; // Number of glyphs
|
|
||||||
} FontROM;
|
|
||||||
|
|
||||||
// Get FFmpeg version string
|
|
||||||
char *get_ffmpeg_version(void) {
|
|
||||||
FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
|
|
||||||
if (!pipe) return NULL;
|
|
||||||
|
|
||||||
char *version = malloc(256);
|
|
||||||
if (!version) {
|
|
||||||
pclose(pipe);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fgets(version, 256, pipe)) {
|
|
||||||
// Remove trailing newline
|
|
||||||
size_t len = strlen(version);
|
|
||||||
if (len > 0 && version[len - 1] == '\n') {
|
|
||||||
version[len - 1] = '\0';
|
|
||||||
}
|
|
||||||
pclose(pipe);
|
|
||||||
return version;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(version);
|
|
||||||
pclose(pipe);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detect video FPS using ffprobe
|
|
||||||
float detect_fps(const char *video_path) {
|
|
||||||
char cmd[1024];
|
|
||||||
snprintf(cmd, sizeof(cmd),
|
|
||||||
"ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
|
|
||||||
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
|
|
||||||
video_path);
|
|
||||||
|
|
||||||
FILE *pipe = popen(cmd, "r");
|
|
||||||
if (!pipe) return 30.0f; // fallback
|
|
||||||
|
|
||||||
char fps_str[64] = {0};
|
|
||||||
if (fgets(fps_str, sizeof(fps_str), pipe)) {
|
|
||||||
// Parse fraction like "30/1" or "24000/1001"
|
|
||||||
int num = 0, den = 1;
|
|
||||||
if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
|
|
||||||
pclose(pipe);
|
|
||||||
return (float)num / (float)den;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pclose(pipe);
|
|
||||||
return 30.0f; // fallback
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load font ROM (14 bytes per glyph, no header)
|
|
||||||
FontROM *load_font_rom(const char *path) {
|
|
||||||
FILE *f = fopen(path, "rb");
|
|
||||||
if (!f) return NULL;
|
|
||||||
|
|
||||||
fseek(f, 0, SEEK_END);
|
|
||||||
long size = ftell(f);
|
|
||||||
fseek(f, 0, SEEK_SET);
|
|
||||||
|
|
||||||
if (size % 14 != 0) {
|
|
||||||
fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
|
|
||||||
}
|
|
||||||
|
|
||||||
int glyph_count = size / 14;
|
|
||||||
FontROM *rom = malloc(sizeof(FontROM));
|
|
||||||
rom->count = glyph_count;
|
|
||||||
rom->data = malloc(glyph_count * PATCH_SZ);
|
|
||||||
|
|
||||||
// Read and unpack glyphs
|
|
||||||
for (int g = 0; g < glyph_count; g++) {
|
|
||||||
uint8_t row_bytes[14];
|
|
||||||
if (fread(row_bytes, 14, 1, f) != 1) {
|
|
||||||
free(rom->data);
|
|
||||||
free(rom);
|
|
||||||
fclose(f);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unpack bits to binary pixels
|
|
||||||
for (int row = 0; row < CHAR_H; row++) {
|
|
||||||
for (int col = 0; col < CHAR_W; col++) {
|
|
||||||
// Bit 6 = leftmost, bit 0 = rightmost
|
|
||||||
int bit = (row_bytes[row] >> (6 - col)) & 1;
|
|
||||||
rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(f);
|
|
||||||
fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
|
|
||||||
return rom;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find best matching glyph for a grayscale patch
|
|
||||||
int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
|
|
||||||
// Try both normal and inverted matching (unless --no-invert-char is set)
|
|
||||||
int best_glyph = 0;
|
|
||||||
float best_error = INFINITY;
|
|
||||||
uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
|
|
||||||
|
|
||||||
for (int g = 0; g < rom->count; g++) {
|
|
||||||
const uint8_t *glyph = &rom->data[g * PATCH_SZ];
|
|
||||||
|
|
||||||
// Try normal: glyph 1 = fg, glyph 0 = bg
|
|
||||||
float err_normal = 0;
|
|
||||||
for (int i = 0; i < PATCH_SZ; i++) {
|
|
||||||
int expected = glyph[i] ? 255 : 0;
|
|
||||||
int diff = patch[i] - expected;
|
|
||||||
err_normal += diff * diff;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err_normal < best_error) {
|
|
||||||
best_error = err_normal;
|
|
||||||
best_glyph = g;
|
|
||||||
best_bg = COLOR_BLACK;
|
|
||||||
best_fg = COLOR_WHITE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try inverted: glyph 0 = fg, glyph 1 = bg (skip if --no-invert-char)
|
|
||||||
if (!g_no_invert_char) {
|
|
||||||
float err_inverted = 0;
|
|
||||||
for (int i = 0; i < PATCH_SZ; i++) {
|
|
||||||
int expected = glyph[i] ? 0 : 255;
|
|
||||||
int diff = patch[i] - expected;
|
|
||||||
err_inverted += diff * diff;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err_inverted < best_error) {
|
|
||||||
best_error = err_inverted;
|
|
||||||
best_glyph = g;
|
|
||||||
best_bg = COLOR_WHITE;
|
|
||||||
best_fg = COLOR_BLACK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*out_bg = best_bg;
|
|
||||||
*out_fg = best_fg;
|
|
||||||
return best_glyph;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert frame to text mode
|
|
||||||
void frame_to_text(const uint8_t *pixels, const FontROM *rom,
|
|
||||||
uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
|
|
||||||
uint8_t patch[PATCH_SZ];
|
|
||||||
|
|
||||||
for (int gr = 0; gr < GRID_H; gr++) {
|
|
||||||
for (int gc = 0; gc < GRID_W; gc++) {
|
|
||||||
int idx = gr * GRID_W + gc;
|
|
||||||
|
|
||||||
// Extract patch
|
|
||||||
for (int y = 0; y < CHAR_H; y++) {
|
|
||||||
for (int x = 0; x < CHAR_W; x++) {
|
|
||||||
int px = gc * CHAR_W + x;
|
|
||||||
int py = gr * CHAR_H + y;
|
|
||||||
patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find best match
|
|
||||||
chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get current time in nanoseconds since UNIX epoch
|
|
||||||
uint64_t get_current_time_ns(void) {
|
|
||||||
struct timeval tv;
|
|
||||||
gettimeofday(&tv, NULL);
|
|
||||||
return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse MP2 packet header to get accurate packet size
|
|
||||||
int get_mp2_packet_size(uint8_t *header) {
|
|
||||||
int bitrate_index = (header[2] >> 4) & 0x0F;
|
|
||||||
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
|
|
||||||
if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
|
|
||||||
|
|
||||||
int bitrate = bitrates[bitrate_index];
|
|
||||||
if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
|
|
||||||
|
|
||||||
int sampling_freq_index = (header[2] >> 2) & 0x03;
|
|
||||||
int sampling_freqs[] = {44100, 48000, 32000, 0};
|
|
||||||
int sampling_freq = sampling_freqs[sampling_freq_index];
|
|
||||||
if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
|
|
||||||
|
|
||||||
int padding = (header[2] >> 1) & 0x01;
|
|
||||||
return (144 * bitrate * 1000) / sampling_freq + padding;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write Videotex header (32 bytes, similar to TAV but simpler)
|
|
||||||
void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
|
|
||||||
fwrite("\x1FTSVMTAV", 8, 1, f);
|
|
||||||
|
|
||||||
// Version: 1 (uint8)
|
|
||||||
fputc(1, f);
|
|
||||||
|
|
||||||
// Grid dimensions (uint8 each)
|
|
||||||
uint16_t width = GRID_W;
|
|
||||||
uint16_t height = GRID_H;
|
|
||||||
fwrite(&width, sizeof(uint16_t), 1, f); // cols = 80
|
|
||||||
fwrite(&height, sizeof(uint16_t), 1, f); // rows = 32
|
|
||||||
|
|
||||||
// FPS (uint8)
|
|
||||||
fputc(fps, f);
|
|
||||||
|
|
||||||
// Total frames (uint32, little-endian)
|
|
||||||
fwrite(&total_frames, sizeof(uint32_t), 1, f);
|
|
||||||
|
|
||||||
fputc(0, f); // wavelet filter type
|
|
||||||
fputc(0, f); // decomposition levels
|
|
||||||
fputc(0, f); // quantiser Y
|
|
||||||
fputc(0, f); // quantiser Co
|
|
||||||
fputc(0, f); // quantiser Cg
|
|
||||||
|
|
||||||
// Feature Flags
|
|
||||||
fputc(0x03, f); // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
|
|
||||||
|
|
||||||
// Video Flags
|
|
||||||
fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
|
|
||||||
|
|
||||||
|
|
||||||
fputc(0, f); // encoder quality level
|
|
||||||
fputc(0x02, f); // channel layout: Y only
|
|
||||||
fputc(0, f); // entropy coder
|
|
||||||
|
|
||||||
fputc(0, f); // reserved
|
|
||||||
fputc(0, f); // reserved
|
|
||||||
|
|
||||||
fputc(0, f); // device orientation: no rotation
|
|
||||||
fputc(0, f); // file role: generic
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write extended header packet with metadata
|
|
||||||
// Returns the file offset where ENDT value is written (for later update)
|
|
||||||
long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
|
|
||||||
fputc(PACKET_EXTENDED_HDR, f);
|
|
||||||
|
|
||||||
// Helper macros for key-value pairs
|
|
||||||
#define WRITE_KV_UINT64(key_str, value) do { \
|
|
||||||
fwrite(key_str, 1, 4, f); \
|
|
||||||
uint8_t value_type = 0x04; /* Uint64 */ \
|
|
||||||
fwrite(&value_type, 1, 1, f); \
|
|
||||||
uint64_t val = (value); \
|
|
||||||
fwrite(&val, sizeof(uint64_t), 1, f); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define WRITE_KV_BYTES(key_str, data, len) do { \
|
|
||||||
fwrite(key_str, 1, 4, f); \
|
|
||||||
uint8_t value_type = 0x10; /* Bytes */ \
|
|
||||||
fwrite(&value_type, 1, 1, f); \
|
|
||||||
uint16_t length = (len); \
|
|
||||||
fwrite(&length, sizeof(uint16_t), 1, f); \
|
|
||||||
fwrite((data), 1, (len), f); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
// Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
|
|
||||||
uint16_t num_pairs = ffmpeg_version ? 5 : 4; // FMPG is optional
|
|
||||||
fwrite(&num_pairs, sizeof(uint16_t), 1, f);
|
|
||||||
|
|
||||||
// BGNT: Video begin time (0 for frame 0)
|
|
||||||
WRITE_KV_UINT64("BGNT", 0ULL);
|
|
||||||
|
|
||||||
// ENDT: Video end time (placeholder, will be updated at end)
|
|
||||||
long endt_offset = ftell(f);
|
|
||||||
WRITE_KV_UINT64("ENDT", 0ULL);
|
|
||||||
|
|
||||||
// CDAT: Creation time in nanoseconds since UNIX epoch
|
|
||||||
WRITE_KV_UINT64("CDAT", creation_time_ns);
|
|
||||||
|
|
||||||
// VNDR: Encoder name and version
|
|
||||||
const char *vendor_str = ENCODER_VENDOR_STRING;
|
|
||||||
WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
|
|
||||||
|
|
||||||
// FMPG: FFmpeg version (if available)
|
|
||||||
if (ffmpeg_version) {
|
|
||||||
WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef WRITE_KV_UINT64
|
|
||||||
#undef WRITE_KV_BYTES
|
|
||||||
|
|
||||||
// Return offset of ENDT value (skip key, type byte)
|
|
||||||
return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write font ROM packet (SSF packet type 0x30)
|
|
||||||
void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
|
|
||||||
// Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
|
|
||||||
uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
|
|
||||||
memcpy(padded_data, rom_data, data_size);
|
|
||||||
|
|
||||||
// Packet structure:
|
|
||||||
// [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
|
|
||||||
uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
|
|
||||||
|
|
||||||
// Write packet type and size
|
|
||||||
fputc(PACKET_SSF, f);
|
|
||||||
fwrite(&packet_size, sizeof(uint32_t), 1, f);
|
|
||||||
|
|
||||||
// Write SSF payload
|
|
||||||
// Index (3 bytes, always 0 for font ROM)
|
|
||||||
fputc(0, f);
|
|
||||||
fputc(0, f);
|
|
||||||
fputc(0, f);
|
|
||||||
|
|
||||||
// Opcode (0x80=lowrom, 0x81=highrom)
|
|
||||||
fputc(opcode, f);
|
|
||||||
|
|
||||||
// Payload length (uint16, little-endian)
|
|
||||||
uint16_t payload_len = FONTROM_PADDED_SIZE;
|
|
||||||
fwrite(&payload_len, sizeof(uint16_t), 1, f);
|
|
||||||
|
|
||||||
// Font data (padded to 1920 bytes)
|
|
||||||
fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
|
|
||||||
|
|
||||||
// Terminator
|
|
||||||
fputc(0x00, f);
|
|
||||||
|
|
||||||
free(padded_data);
|
|
||||||
|
|
||||||
fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
|
|
||||||
data_size, FONTROM_PADDED_SIZE, opcode);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write timecode packet (nanoseconds)
|
|
||||||
void write_timecode(FILE *f, uint64_t timecode_ns) {
|
|
||||||
fputc(PACKET_TIMECODE, f);
|
|
||||||
fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write sync packet
|
|
||||||
void write_sync(FILE *f) {
|
|
||||||
fputc(PACKET_SYNC, f);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write MP2 audio packet
|
|
||||||
void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
|
|
||||||
fputc(PACKET_AUDIO_MP2, f);
|
|
||||||
fwrite(&size, sizeof(uint32_t), 1, f);
|
|
||||||
fwrite(data, 1, size, f);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write text packet with separated arrays (better compression)
|
|
||||||
void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
|
|
||||||
const uint8_t *chars, int rows, int cols) {
|
|
||||||
int grid_size = rows * cols;
|
|
||||||
|
|
||||||
// Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
|
|
||||||
// Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
|
|
||||||
size_t uncompressed_size = 2 + grid_size * 3;
|
|
||||||
uint8_t *uncompressed = malloc(uncompressed_size);
|
|
||||||
|
|
||||||
uncompressed[0] = rows;
|
|
||||||
uncompressed[1] = cols;
|
|
||||||
|
|
||||||
// Copy arrays in order: foreground, background, characters
|
|
||||||
memcpy(&uncompressed[2], fg_col, grid_size); // Foreground first
|
|
||||||
memcpy(&uncompressed[2 + grid_size], bg_col, grid_size); // Background second
|
|
||||||
memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size); // Characters third
|
|
||||||
|
|
||||||
// Compress with Zstd
|
|
||||||
size_t max_compressed = ZSTD_compressBound(uncompressed_size);
|
|
||||||
uint8_t *compressed = malloc(max_compressed);
|
|
||||||
size_t compressed_size = ZSTD_compress(compressed, max_compressed,
|
|
||||||
uncompressed, uncompressed_size, 3);
|
|
||||||
|
|
||||||
if (ZSTD_isError(compressed_size)) {
|
|
||||||
fprintf(stderr, "Zstd compression error\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write packet: [type][size][data]
|
|
||||||
fputc(PACKET_TEXT, f);
|
|
||||||
uint32_t size32 = compressed_size;
|
|
||||||
fwrite(&size32, 4, 1, f);
|
|
||||||
fwrite(compressed, compressed_size, 1, f);
|
|
||||||
|
|
||||||
free(compressed);
|
|
||||||
free(uncompressed);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
if (argc < 7) {
|
|
||||||
fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav> [--no-invert-char]\n", argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *input_video = NULL;
|
|
||||||
const char *font_path = NULL;
|
|
||||||
const char *output_path = NULL;
|
|
||||||
|
|
||||||
for (int i = 1; i < argc; i++) {
|
|
||||||
if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
|
|
||||||
else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
|
|
||||||
else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
|
|
||||||
else if (strcmp(argv[i], "--no-invert-char") == 0) g_no_invert_char = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!input_video || !font_path || !output_path) {
|
|
||||||
fprintf(stderr, "Missing required arguments\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_no_invert_char) {
|
|
||||||
fprintf(stderr, "Inverted character matching disabled\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate random temp filename for audio
|
|
||||||
generate_random_filename(TEMP_AUDIO_FILE);
|
|
||||||
|
|
||||||
// Capture creation time and FFmpeg version for extended header
|
|
||||||
uint64_t creation_time_ns = get_current_time_ns();
|
|
||||||
char *ffmpeg_version = get_ffmpeg_version();
|
|
||||||
|
|
||||||
// Detect video FPS
|
|
||||||
float fps_float = detect_fps(input_video);
|
|
||||||
uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
|
|
||||||
fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
|
|
||||||
|
|
||||||
// Load font ROM
|
|
||||||
FontROM *rom = load_font_rom(font_path);
|
|
||||||
if (!rom) {
|
|
||||||
fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open FFmpeg pipe for grayscale frames at 560×448
|
|
||||||
char ffmpeg_cmd[1024];
|
|
||||||
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
|
|
||||||
"ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
|
|
||||||
"-f rawvideo -pix_fmt gray - 2>/dev/null",
|
|
||||||
input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
|
|
||||||
|
|
||||||
fprintf(stderr, "Opening video stream...\n");
|
|
||||||
FILE *video_pipe = popen(ffmpeg_cmd, "r");
|
|
||||||
if (!video_pipe) {
|
|
||||||
fprintf(stderr, "Failed to open FFmpeg pipe\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract MP2 audio to temporary file using libtwolame
|
|
||||||
fprintf(stderr, "Extracting MP2 audio...\n");
|
|
||||||
char audio_cmd[1024];
|
|
||||||
snprintf(audio_cmd, sizeof(audio_cmd),
|
|
||||||
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 224k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
|
|
||||||
input_video, SAMPLE_RATE, TEMP_AUDIO_FILE);
|
|
||||||
|
|
||||||
int audio_result = system(audio_cmd);
|
|
||||||
if (audio_result != 0) {
|
|
||||||
fprintf(stderr, "Warning: Audio extraction failed, continuing without audio\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open MP2 file for reading
|
|
||||||
FILE *mp2_file = NULL;
|
|
||||||
long audio_remaining = 0;
|
|
||||||
if (audio_result == 0) {
|
|
||||||
mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
|
|
||||||
if (mp2_file) {
|
|
||||||
fseek(mp2_file, 0, SEEK_END);
|
|
||||||
audio_remaining = ftell(mp2_file);
|
|
||||||
fseek(mp2_file, 0, SEEK_SET);
|
|
||||||
fprintf(stderr, "Audio ready: %ld bytes\n", audio_remaining);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open output file
|
|
||||||
FILE *out = fopen(output_path, "wb");
|
|
||||||
if (!out) {
|
|
||||||
fprintf(stderr, "Failed to open output file\n");
|
|
||||||
pclose(video_pipe);
|
|
||||||
if (mp2_file) fclose(mp2_file);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write Videotex header with placeholder total_frames (will update at end)
|
|
||||||
long header_offset = ftell(out);
|
|
||||||
write_videotex_header(out, fps, 0);
|
|
||||||
|
|
||||||
// Write extended header packet (before first timecode)
|
|
||||||
long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
|
|
||||||
|
|
||||||
// Upload font ROM to TSVM (split into lowrom and highrom)
|
|
||||||
fprintf(stderr, "Uploading font ROM to TSVM...\n");
|
|
||||||
FILE *rom_file = fopen(font_path, "rb");
|
|
||||||
if (rom_file) {
|
|
||||||
fseek(rom_file, 0, SEEK_END);
|
|
||||||
long rom_size = ftell(rom_file);
|
|
||||||
fseek(rom_file, 0, SEEK_SET);
|
|
||||||
|
|
||||||
uint8_t *raw_rom = malloc(rom_size);
|
|
||||||
if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
|
|
||||||
// Split into lowrom and highrom
|
|
||||||
size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
|
|
||||||
|
|
||||||
// Write lowrom (first 128 glyphs)
|
|
||||||
if (rom_size >= bytes_per_half) {
|
|
||||||
write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write highrom (second 128 glyphs)
|
|
||||||
if (rom_size >= bytes_per_half * 2) {
|
|
||||||
write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
|
|
||||||
} else if (rom_size > bytes_per_half) {
|
|
||||||
// Partial highrom
|
|
||||||
write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(raw_rom);
|
|
||||||
}
|
|
||||||
fclose(rom_file);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate buffers
|
|
||||||
size_t frame_size = PIXEL_W * PIXEL_H;
|
|
||||||
uint8_t *gray_pixels = malloc(frame_size);
|
|
||||||
uint8_t *bg_col = malloc(GRID_W * GRID_H);
|
|
||||||
uint8_t *fg_col = malloc(GRID_W * GRID_H);
|
|
||||||
uint8_t *chars = malloc(GRID_W * GRID_H);
|
|
||||||
|
|
||||||
// Audio buffer for MP2 packets
|
|
||||||
#define MP2_BUFFER_SIZE 2048
|
|
||||||
uint8_t *audio_buffer = malloc(MP2_BUFFER_SIZE);
|
|
||||||
|
|
||||||
uint32_t frame_num = 0;
|
|
||||||
uint64_t total_audio_bytes = 0;
|
|
||||||
|
|
||||||
// Audio timing calculation
|
|
||||||
double frame_audio_time = 1.0 / fps_float; // Time per video frame
|
|
||||||
double packet_audio_time = (double)MP2_DEFAULT_PACKET_SIZE / SAMPLE_RATE; // Time per audio packet
|
|
||||||
double packets_per_frame = frame_audio_time / packet_audio_time;
|
|
||||||
double audio_frames_in_buffer = 0.0; // Simulated audio buffer level
|
|
||||||
|
|
||||||
fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
|
|
||||||
GRID_W, GRID_H, PIXEL_W, PIXEL_H);
|
|
||||||
|
|
||||||
// Track encoding start time
|
|
||||||
struct timeval start_time, now;
|
|
||||||
gettimeofday(&start_time, NULL);
|
|
||||||
|
|
||||||
// Read and process frames
|
|
||||||
while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
|
|
||||||
// Calculate timecode in nanoseconds
|
|
||||||
uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
|
|
||||||
|
|
||||||
// Write audio packets for this frame (based on timing)
|
|
||||||
if (mp2_file && audio_remaining > 0) {
|
|
||||||
// Simulate buffer consumption
|
|
||||||
audio_frames_in_buffer -= packets_per_frame;
|
|
||||||
|
|
||||||
// Calculate how many packets we need to maintain buffer
|
|
||||||
double target_level = fmax(packets_per_frame, 2.0);
|
|
||||||
int packets_to_insert = 0;
|
|
||||||
|
|
||||||
if (audio_frames_in_buffer < target_level) {
|
|
||||||
double deficit = target_level - audio_frames_in_buffer;
|
|
||||||
packets_to_insert = (int)ceil(deficit);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insert the calculated number of audio packets
|
|
||||||
for (int q = 0; q < packets_to_insert; q++) {
|
|
||||||
// Peek at header to get actual packet size
|
|
||||||
long pos = ftell(mp2_file);
|
|
||||||
uint8_t header[4];
|
|
||||||
if (fread(header, 1, 4, mp2_file) != 4) break;
|
|
||||||
fseek(mp2_file, pos, SEEK_SET); // Rewind to re-read with full packet
|
|
||||||
|
|
||||||
int actual_packet_size = get_mp2_packet_size(header);
|
|
||||||
size_t bytes_to_read = actual_packet_size;
|
|
||||||
|
|
||||||
// Clamp to remaining audio
|
|
||||||
if (bytes_to_read > audio_remaining) {
|
|
||||||
bytes_to_read = audio_remaining;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sanity check
|
|
||||||
if (bytes_to_read > MP2_BUFFER_SIZE) {
|
|
||||||
fprintf(stderr, "ERROR: MP2 packet size %zu exceeds buffer\n", bytes_to_read);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read full packet
|
|
||||||
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
|
|
||||||
if (bytes_read == 0) break;
|
|
||||||
|
|
||||||
// Write MP2 audio packet
|
|
||||||
write_audio_mp2(out, audio_buffer, bytes_read);
|
|
||||||
|
|
||||||
// Track audio
|
|
||||||
audio_remaining -= bytes_read;
|
|
||||||
audio_frames_in_buffer++;
|
|
||||||
total_audio_bytes += bytes_read;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write timecode
|
|
||||||
write_timecode(out, timecode_ns);
|
|
||||||
|
|
||||||
// Convert to text mode
|
|
||||||
frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
|
|
||||||
|
|
||||||
// Write text packet (treated as I-frame)
|
|
||||||
write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
|
|
||||||
|
|
||||||
// Write sync packet after each frame
|
|
||||||
write_sync(out);
|
|
||||||
|
|
||||||
frame_num++;
|
|
||||||
if (frame_num % 30 == 0) {
|
|
||||||
// Calculate encoding speed
|
|
||||||
gettimeofday(&now, NULL);
|
|
||||||
double elapsed = (now.tv_sec - start_time.tv_sec) +
|
|
||||||
(now.tv_usec - start_time.tv_usec) / 1000000.0;
|
|
||||||
double encoding_fps = frame_num / elapsed;
|
|
||||||
|
|
||||||
fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
|
|
||||||
fflush(stderr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write any remaining audio
|
|
||||||
if (mp2_file && audio_remaining > 0) {
|
|
||||||
while (audio_remaining > 0) {
|
|
||||||
// Peek at header to get actual packet size
|
|
||||||
long pos = ftell(mp2_file);
|
|
||||||
uint8_t header[4];
|
|
||||||
if (fread(header, 1, 4, mp2_file) != 4) break;
|
|
||||||
fseek(mp2_file, pos, SEEK_SET);
|
|
||||||
|
|
||||||
int actual_packet_size = get_mp2_packet_size(header);
|
|
||||||
size_t bytes_to_read = (actual_packet_size < audio_remaining) ? actual_packet_size : audio_remaining;
|
|
||||||
|
|
||||||
if (bytes_to_read > MP2_BUFFER_SIZE) break;
|
|
||||||
|
|
||||||
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
|
|
||||||
if (bytes_read == 0) break;
|
|
||||||
|
|
||||||
write_audio_mp2(out, audio_buffer, bytes_read);
|
|
||||||
audio_remaining -= bytes_read;
|
|
||||||
total_audio_bytes += bytes_read;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final timing
|
|
||||||
gettimeofday(&now, NULL);
|
|
||||||
double total_time = (now.tv_sec - start_time.tv_sec) +
|
|
||||||
(now.tv_usec - start_time.tv_usec) / 1000000.0;
|
|
||||||
double final_fps = frame_num / total_time;
|
|
||||||
|
|
||||||
fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
|
|
||||||
frame_num, total_time, final_fps);
|
|
||||||
fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
|
|
||||||
(unsigned long long)total_audio_bytes,
|
|
||||||
total_audio_bytes / 1024.0 / 1024.0);
|
|
||||||
|
|
||||||
// Update total_frames in header
|
|
||||||
if (frame_num > 0) {
|
|
||||||
fseek(out, header_offset + 14, SEEK_SET); // Offset to total_frames field
|
|
||||||
fwrite(&frame_num, sizeof(uint32_t), 1, out);
|
|
||||||
fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update ENDT in extended header (calculate end time for last frame)
|
|
||||||
if (frame_num > 0) {
|
|
||||||
// Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
|
|
||||||
uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
|
|
||||||
uint64_t endt_ns = duration_ns;
|
|
||||||
|
|
||||||
fseek(out, endt_offset, SEEK_SET);
|
|
||||||
fwrite(&endt_ns, sizeof(uint64_t), 1, out);
|
|
||||||
fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
|
|
||||||
(unsigned long long)endt_ns, endt_ns / 1000000000.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
pclose(video_pipe);
|
|
||||||
if (mp2_file) {
|
|
||||||
fclose(mp2_file);
|
|
||||||
unlink(TEMP_AUDIO_FILE); // Remove temporary audio file
|
|
||||||
}
|
|
||||||
fclose(out);
|
|
||||||
free(gray_pixels);
|
|
||||||
free(bg_col);
|
|
||||||
free(fg_col);
|
|
||||||
free(chars);
|
|
||||||
free(audio_buffer);
|
|
||||||
free(rom->data);
|
|
||||||
free(rom);
|
|
||||||
if (ffmpeg_version) free(ffmpeg_version);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,169 +0,0 @@
|
|||||||
// Affine estimation for TAV mesh warping
|
|
||||||
// This file contains logic to estimate per-cell affine transforms from block motion
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
|
|
||||||
// Estimate affine transform for a mesh cell from surrounding block motion vectors
|
|
||||||
// Uses least-squares fitting of motion vectors to affine model: [x'] = [a11 a12][x] + [tx]
|
|
||||||
// [y'] [a21 a22][y] [ty]
|
|
||||||
//
|
|
||||||
// Returns 1 if affine improves residual by >threshold, 0 if translation-only is better
|
|
||||||
int estimate_cell_affine(
|
|
||||||
const float *flow_x, const float *flow_y,
|
|
||||||
int width, int height,
|
|
||||||
int cell_x, int cell_y, // Cell position in mesh coordinates
|
|
||||||
int cell_w, int cell_h, // Cell size in pixels
|
|
||||||
float threshold, // Residual improvement threshold (e.g. 0.10 = 10%)
|
|
||||||
short *out_tx, short *out_ty, // Translation (1/8 pixel)
|
|
||||||
short *out_a11, short *out_a12, // Affine matrix (1/256 fixed-point)
|
|
||||||
short *out_a21, short *out_a22
|
|
||||||
) {
|
|
||||||
// Compute cell bounding box
|
|
||||||
int x_start = cell_x * cell_w;
|
|
||||||
int y_start = cell_y * cell_h;
|
|
||||||
int x_end = (cell_x + 1) * cell_w;
|
|
||||||
int y_end = (cell_y + 1) * cell_h;
|
|
||||||
if (x_end > width) x_end = width;
|
|
||||||
if (y_end > height) y_end = height;
|
|
||||||
|
|
||||||
// Sample motion vectors from a 4×4 grid within the cell
|
|
||||||
const int samples_x = 4;
|
|
||||||
const int samples_y = 4;
|
|
||||||
float sample_motion_x[16];
|
|
||||||
float sample_motion_y[16];
|
|
||||||
int sample_px[16];
|
|
||||||
int sample_py[16];
|
|
||||||
int n_samples = 0;
|
|
||||||
|
|
||||||
for (int sy = 0; sy < samples_y; sy++) {
|
|
||||||
for (int sx = 0; sx < samples_x; sx++) {
|
|
||||||
int px = x_start + (x_end - x_start) * sx / (samples_x - 1);
|
|
||||||
int py = y_start + (y_end - y_start) * sy / (samples_y - 1);
|
|
||||||
|
|
||||||
if (px >= width) px = width - 1;
|
|
||||||
if (py >= height) py = height - 1;
|
|
||||||
|
|
||||||
int idx = py * width + px;
|
|
||||||
sample_motion_x[n_samples] = flow_x[idx];
|
|
||||||
sample_motion_y[n_samples] = flow_y[idx];
|
|
||||||
sample_px[n_samples] = px - (x_start + x_end) / 2; // Relative to cell center
|
|
||||||
sample_py[n_samples] = py - (y_start + y_end) / 2;
|
|
||||||
n_samples++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 1. Compute translation-only model (average motion)
|
|
||||||
float avg_dx = 0, avg_dy = 0;
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
avg_dx += sample_motion_x[i];
|
|
||||||
avg_dy += sample_motion_y[i];
|
|
||||||
}
|
|
||||||
avg_dx /= n_samples;
|
|
||||||
avg_dy /= n_samples;
|
|
||||||
|
|
||||||
// Translation residual
|
|
||||||
float trans_residual = 0;
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
float dx_err = sample_motion_x[i] - avg_dx;
|
|
||||||
float dy_err = sample_motion_y[i] - avg_dy;
|
|
||||||
trans_residual += dx_err * dx_err + dy_err * dy_err;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Estimate affine model using least-squares
|
|
||||||
// Solve: [vx] = [a11 a12][px] + [tx]
|
|
||||||
// [vy] [a21 a22][py] [ty]
|
|
||||||
// Using normal equations for 2×2 affine
|
|
||||||
|
|
||||||
double sum_x = 0, sum_y = 0, sum_xx = 0, sum_yy = 0, sum_xy = 0;
|
|
||||||
double sum_vx = 0, sum_vy = 0, sum_vx_x = 0, sum_vx_y = 0;
|
|
||||||
double sum_vy_x = 0, sum_vy_y = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
double px = sample_px[i];
|
|
||||||
double py = sample_py[i];
|
|
||||||
double vx = sample_motion_x[i];
|
|
||||||
double vy = sample_motion_y[i];
|
|
||||||
|
|
||||||
sum_x += px;
|
|
||||||
sum_y += py;
|
|
||||||
sum_xx += px * px;
|
|
||||||
sum_yy += py * py;
|
|
||||||
sum_xy += px * py;
|
|
||||||
sum_vx += vx;
|
|
||||||
sum_vy += vy;
|
|
||||||
sum_vx_x += vx * px;
|
|
||||||
sum_vx_y += vx * py;
|
|
||||||
sum_vy_x += vy * px;
|
|
||||||
sum_vy_y += vy * py;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Solve 2×2 system for [a11, a12, tx] and [a21, a22, ty]
|
|
||||||
double n = n_samples;
|
|
||||||
double det = n * sum_xx * sum_yy + 2 * sum_x * sum_y * sum_xy -
|
|
||||||
sum_xx * sum_y * sum_y - sum_yy * sum_x * sum_x - n * sum_xy * sum_xy;
|
|
||||||
|
|
||||||
if (fabs(det) < 1e-6) {
|
|
||||||
// Singular matrix, fall back to translation
|
|
||||||
*out_tx = (short)(avg_dx * 8.0f);
|
|
||||||
*out_ty = (short)(avg_dy * 8.0f);
|
|
||||||
*out_a11 = 256; // Identity
|
|
||||||
*out_a12 = 0;
|
|
||||||
*out_a21 = 0;
|
|
||||||
*out_a22 = 256;
|
|
||||||
return 0; // Translation only
|
|
||||||
}
|
|
||||||
|
|
||||||
// Solve for affine parameters (simplified for readability)
|
|
||||||
double a11 = (sum_vx_x * sum_yy * n - sum_vx_y * sum_xy * n - sum_vx * sum_y * sum_y +
|
|
||||||
sum_vx * sum_xy * sum_y + sum_vx_y * sum_x * sum_y - sum_vx_x * sum_y * sum_y) / det;
|
|
||||||
double a12 = (sum_vx_y * sum_xx * n - sum_vx_x * sum_xy * n - sum_vx * sum_x * sum_xy +
|
|
||||||
sum_vx * sum_xx * sum_y + sum_vx_x * sum_x * sum_y - sum_vx_y * sum_x * sum_x) / det;
|
|
||||||
double tx = (sum_vx - a11 * sum_x - a12 * sum_y) / n;
|
|
||||||
|
|
||||||
double a21 = (sum_vy_x * sum_yy * n - sum_vy_y * sum_xy * n - sum_vy * sum_y * sum_y +
|
|
||||||
sum_vy * sum_xy * sum_y + sum_vy_y * sum_x * sum_y - sum_vy_x * sum_y * sum_y) / det;
|
|
||||||
double a22 = (sum_vy_y * sum_xx * n - sum_vy_x * sum_xy * n - sum_vy * sum_x * sum_xy +
|
|
||||||
sum_vy * sum_xx * sum_y + sum_vy_x * sum_x * sum_y - sum_vy_y * sum_x * sum_x) / det;
|
|
||||||
double ty = (sum_vy - a21 * sum_x - a22 * sum_y) / n;
|
|
||||||
|
|
||||||
// Affine residual
|
|
||||||
float affine_residual = 0;
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
double px = sample_px[i];
|
|
||||||
double py = sample_py[i];
|
|
||||||
double pred_vx = a11 * px + a12 * py + tx;
|
|
||||||
double pred_vy = a21 * px + a22 * py + ty;
|
|
||||||
double dx_err = sample_motion_x[i] - pred_vx;
|
|
||||||
double dy_err = sample_motion_y[i] - pred_vy;
|
|
||||||
affine_residual += dx_err * dx_err + dy_err * dy_err;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decision: Use affine if residual improves by > threshold
|
|
||||||
float improvement = (trans_residual - affine_residual) / (trans_residual + 1e-6f);
|
|
||||||
|
|
||||||
if (improvement > threshold) {
|
|
||||||
// Use affine
|
|
||||||
*out_tx = (short)(tx * 8.0f);
|
|
||||||
*out_ty = (short)(ty * 8.0f);
|
|
||||||
*out_a11 = (short)(a11 * 256.0);
|
|
||||||
*out_a12 = (short)(a12 * 256.0);
|
|
||||||
*out_a21 = (short)(a21 * 256.0);
|
|
||||||
*out_a22 = (short)(a22 * 256.0);
|
|
||||||
return 1; // Affine
|
|
||||||
} else {
|
|
||||||
// Use translation
|
|
||||||
*out_tx = (short)(avg_dx * 8.0f);
|
|
||||||
*out_ty = (short)(avg_dy * 8.0f);
|
|
||||||
*out_a11 = 256; // Identity
|
|
||||||
*out_a12 = 0;
|
|
||||||
*out_a21 = 0;
|
|
||||||
*out_a22 = 256;
|
|
||||||
return 0; // Translation only
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // extern "C"
|
|
||||||
Binary file not shown.
@@ -1,65 +0,0 @@
|
|||||||
// Simple coefficient preprocessing for better compression
|
|
||||||
// Insert right before Zstd compression
|
|
||||||
|
|
||||||
#ifndef COEFFICIENT_COMPRESS_H
|
|
||||||
#define COEFFICIENT_COMPRESS_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
// Preprocess coefficients using significance map
|
|
||||||
// Returns new buffer size, modifies buffer in-place if possible
|
|
||||||
static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) {
|
|
||||||
// Count non-zero coefficients
|
|
||||||
int nonzero_count = 0;
|
|
||||||
for (int i = 0; i < coeff_count; i++) {
|
|
||||||
if (coeffs[i] != 0) nonzero_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create significance map (1 bit per coefficient, packed into bytes)
|
|
||||||
int map_bytes = (coeff_count + 7) / 8; // Round up to nearest byte
|
|
||||||
uint8_t *sig_map = output_buffer;
|
|
||||||
int16_t *values = (int16_t *)(output_buffer + map_bytes);
|
|
||||||
|
|
||||||
// Clear significance map
|
|
||||||
memset(sig_map, 0, map_bytes);
|
|
||||||
|
|
||||||
// Fill significance map and extract non-zero values
|
|
||||||
int value_idx = 0;
|
|
||||||
for (int i = 0; i < coeff_count; i++) {
|
|
||||||
if (coeffs[i] != 0) {
|
|
||||||
// Set bit in significance map
|
|
||||||
int byte_idx = i / 8;
|
|
||||||
int bit_idx = i % 8;
|
|
||||||
sig_map[byte_idx] |= (1 << bit_idx);
|
|
||||||
|
|
||||||
// Store the value
|
|
||||||
values[value_idx++] = coeffs[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return map_bytes + (nonzero_count * sizeof(int16_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decoder: reconstruct coefficients from significance map
|
|
||||||
static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
|
|
||||||
int map_bytes = (coeff_count + 7) / 8;
|
|
||||||
uint8_t *sig_map = compressed_data;
|
|
||||||
int16_t *values = (int16_t *)(compressed_data + map_bytes);
|
|
||||||
|
|
||||||
// Clear output
|
|
||||||
memset(output_coeffs, 0, coeff_count * sizeof(int16_t));
|
|
||||||
|
|
||||||
// Reconstruct coefficients
|
|
||||||
int value_idx = 0;
|
|
||||||
for (int i = 0; i < coeff_count; i++) {
|
|
||||||
int byte_idx = i / 8;
|
|
||||||
int bit_idx = i % 8;
|
|
||||||
|
|
||||||
if (sig_map[byte_idx] & (1 << bit_idx)) {
|
|
||||||
output_coeffs[i] = values[value_idx++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // COEFFICIENT_COMPRESS_H
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
#ifndef TAD32_DECODER_H
|
|
||||||
#define TAD32_DECODER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// TAD32 (Terrarum Advanced Audio - PCM32f version) Decoder
|
|
||||||
// DWT-based perceptual audio codec for TSVM
|
|
||||||
// Shared decoder library used by both decoder_tad (standalone) and decoder_tav (video decoder)
|
|
||||||
|
|
||||||
// Constants (must match encoder)
|
|
||||||
#define TAD32_SAMPLE_RATE 32000
|
|
||||||
#define TAD32_CHANNELS 2 // Stereo
|
|
||||||
#define TAD_DEFAULT_CHUNK_SIZE 32768 // Default chunk size for standalone TAD files
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode audio chunk with TAD32 codec
|
|
||||||
*
|
|
||||||
* @param input Input TAD32 chunk data
|
|
||||||
* @param input_size Size of input buffer
|
|
||||||
* @param pcmu8_stereo Output PCMu8 stereo samples (interleaved L,R)
|
|
||||||
* @param bytes_consumed [out] Number of bytes consumed from input
|
|
||||||
* @param samples_decoded [out] Number of samples decoded per channel
|
|
||||||
* @return 0 on success, -1 on error
|
|
||||||
*
|
|
||||||
* Input format:
|
|
||||||
* uint16 sample_count (samples per channel)
|
|
||||||
* uint8 max_index (maximum quantisation index)
|
|
||||||
* uint32 payload_size (bytes in payload)
|
|
||||||
* * payload (encoded M/S data, Zstd-compressed with EZBC)
|
|
||||||
*
|
|
||||||
* Output format:
|
|
||||||
* PCMu8 stereo interleaved (8-bit unsigned PCM, L,R pairs)
|
|
||||||
* Range: [0, 255] where 128 = silence
|
|
||||||
*/
|
|
||||||
int tad32_decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
|
|
||||||
size_t *bytes_consumed, size_t *samples_decoded);
|
|
||||||
|
|
||||||
#endif // TAD32_DECODER_H
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
#ifndef TAD32_ENCODER_H
|
|
||||||
#define TAD32_ENCODER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder
|
|
||||||
// DWT-based perceptual audio codec for TSVM
|
|
||||||
// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
|
|
||||||
|
|
||||||
// Constants
|
|
||||||
#define TAD32_COEFF_SCALARS {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f} // value only valid for CDF 9/7 with decomposition level 9. Index 0 = LL band
|
|
||||||
#define TAD32_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples
|
|
||||||
#define TAD32_SAMPLE_RATE 32000
|
|
||||||
#define TAD32_CHANNELS 2 // Stereo
|
|
||||||
#define TAD32_QUALITY_MIN 0
|
|
||||||
#define TAD32_QUALITY_MAX 6
|
|
||||||
#define TAD32_QUALITY_DEFAULT 3
|
|
||||||
#define TAD32_ZSTD_LEVEL 15
|
|
||||||
|
|
||||||
static inline int tad32_quality_to_max_index(int quality) {
|
|
||||||
static const int quality_map[6] = {21, 31, 44, 63, 89, 127};
|
|
||||||
if (quality < 0) quality = 0;
|
|
||||||
if (quality > 5) quality = 5;
|
|
||||||
return quality_map[quality];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode audio chunk with TAD32 codec (PCM32f version)
|
|
||||||
*
|
|
||||||
* @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R)
|
|
||||||
* @param num_samples Number of samples per channel (min 1024)
|
|
||||||
* @param max_index Maximum quantisation index (7=3bit, 15=4bit, 31=5bit, 63=6bit, 127=7bit)
|
|
||||||
* @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantisation)
|
|
||||||
* Higher values = more aggressive quantisation = smaller files
|
|
||||||
* @param zstd_level Zstd compression level (1-22). Use negative value to disable compression.
|
|
||||||
* When disabled, MSB of payload_size is set to indicate uncompressed data.
|
|
||||||
* @param output Output buffer (must be large enough)
|
|
||||||
* @return Number of bytes written to output, or 0 on error
|
|
||||||
*
|
|
||||||
* Output format:
|
|
||||||
* uint16 sample_count (samples per channel)
|
|
||||||
* uint8 max_index (maximum quantisation index)
|
|
||||||
* uint32 payload_size (bytes in payload; MSB=1 indicates uncompressed)
|
|
||||||
* * payload (encoded M/S data, optionally Zstd-compressed)
|
|
||||||
*/
|
|
||||||
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
|
||||||
int max_index,
|
|
||||||
float quantiser_scale, int zstd_level, uint8_t *output);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Print accumulated coefficient statistics
|
|
||||||
* Only effective if TAD_COEFF_STATS environment variable is set
|
|
||||||
*/
|
|
||||||
void tad32_print_statistics(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free accumulated statistics memory
|
|
||||||
* Should be called after tad32_print_statistics()
|
|
||||||
*/
|
|
||||||
void tad32_free_statistics(void);
|
|
||||||
|
|
||||||
#endif // TAD32_ENCODER_H
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
// TEV Entropy Coder - Specialised for DCT coefficients
|
|
||||||
// Replaces gzip with video-optimized compression
|
|
||||||
#ifndef ENTROPY_CODER_H
|
|
||||||
#define ENTROPY_CODER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
// Bit writer for variable-length codes
|
|
||||||
typedef struct {
|
|
||||||
uint8_t *buffer;
|
|
||||||
size_t buffer_size;
|
|
||||||
size_t byte_pos;
|
|
||||||
int bit_pos; // 0-7, next bit to write
|
|
||||||
} bit_writer_t;
|
|
||||||
|
|
||||||
// Bit reader for decoding
|
|
||||||
typedef struct {
|
|
||||||
const uint8_t *buffer;
|
|
||||||
size_t buffer_size;
|
|
||||||
size_t byte_pos;
|
|
||||||
int bit_pos; // 0-7, next bit to read
|
|
||||||
} bit_reader_t;
|
|
||||||
|
|
||||||
// Huffman table entry
|
|
||||||
typedef struct {
|
|
||||||
uint16_t code; // Huffman code
|
|
||||||
uint8_t bits; // Code length in bits
|
|
||||||
} huffman_entry_t;
|
|
||||||
|
|
||||||
// Video entropy coder optimized for TEV coefficients
|
|
||||||
typedef struct {
|
|
||||||
// Huffman tables for different coefficient types
|
|
||||||
huffman_entry_t y_dc_table[512]; // Y DC coefficients (-255 to +255)
|
|
||||||
huffman_entry_t y_ac_table[512]; // Y AC coefficients
|
|
||||||
huffman_entry_t c_dc_table[512]; // Chroma DC coefficients
|
|
||||||
huffman_entry_t c_ac_table[512]; // Chroma AC coefficients
|
|
||||||
huffman_entry_t run_table[256]; // Zero run lengths (0-255)
|
|
||||||
|
|
||||||
// Motion vector Huffman tables
|
|
||||||
huffman_entry_t mv_table[65]; // Motion vectors (-32 to +32)
|
|
||||||
|
|
||||||
// Bit writer/reader
|
|
||||||
bit_writer_t writer;
|
|
||||||
bit_reader_t reader;
|
|
||||||
} entropy_coder_t;
|
|
||||||
|
|
||||||
static const huffman_entry_t BLOCK_MODE_HUFFMAN[16];
|
|
||||||
|
|
||||||
void write_bits(bit_writer_t *writer, uint32_t value, int bits);
|
|
||||||
uint32_t read_bits(bit_reader_t *reader, int bits);
|
|
||||||
|
|
||||||
// Initialise entropy coder
|
|
||||||
entropy_coder_t* entropy_coder_create(uint8_t *buffer, size_t buffer_size);
|
|
||||||
void entropy_coder_destroy(entropy_coder_t *coder);
|
|
||||||
|
|
||||||
// Encoding functions
|
|
||||||
int encode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
|
|
||||||
int encode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
|
|
||||||
int encode_motion_vector(entropy_coder_t *coder, int16_t mv_x, int16_t mv_y);
|
|
||||||
int encode_block_mode(entropy_coder_t *coder, uint8_t mode);
|
|
||||||
|
|
||||||
// Decoding functions
|
|
||||||
void entropy_coder_init_reader(entropy_coder_t *coder, const uint8_t *buffer, size_t buffer_size);
|
|
||||||
int decode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
|
|
||||||
int decode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
|
|
||||||
int decode_motion_vector(entropy_coder_t *coder, int16_t *mv_x, int16_t *mv_y);
|
|
||||||
int decode_block_mode(entropy_coder_t *coder, uint8_t *mode);
|
|
||||||
|
|
||||||
// Get compressed size
|
|
||||||
size_t entropy_coder_get_size(entropy_coder_t *coder);
|
|
||||||
void entropy_coder_reset(entropy_coder_t *coder);
|
|
||||||
|
|
||||||
#endif // ENTROPY_CODER_H
|
|
||||||
@@ -1,837 +0,0 @@
|
|||||||
/*
|
|
||||||
* TAV AVX-512 Optimisations
|
|
||||||
*
|
|
||||||
* This file contains AVX-512 optimised versions of performance-critical functions
|
|
||||||
* in the TAV encoder. Runtime CPU detection ensures fallback to scalar versions
|
|
||||||
* on non-AVX-512 systems.
|
|
||||||
*
|
|
||||||
* Optimised functions:
|
|
||||||
* - 1D DWT transforms (5/3, 9/7, Haar, Bior13/7, DD4)
|
|
||||||
* - Quantisation functions
|
|
||||||
* - RGB to YCoCg colour conversion
|
|
||||||
* - 2D DWT gather/scatter operations
|
|
||||||
*
|
|
||||||
* Compile with: -mavx512f -mavx512dq -mavx512bw -mavx512vl
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_AVX512_H
|
|
||||||
#define TAV_AVX512_H
|
|
||||||
|
|
||||||
#include <immintrin.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// SIMD Capability Detection
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
SIMD_NONE = 0,
|
|
||||||
SIMD_AVX512F = 1
|
|
||||||
} simd_level_t;
|
|
||||||
|
|
||||||
// Global SIMD level (set by tav_simd_init)
|
|
||||||
static simd_level_t g_simd_level = SIMD_NONE;
|
|
||||||
|
|
||||||
// CPU feature detection
|
|
||||||
static inline int cpu_has_avx512f(void) {
|
|
||||||
#ifdef __AVX512F__
|
|
||||||
return __builtin_cpu_supports("avx512f") &&
|
|
||||||
__builtin_cpu_supports("avx512dq");
|
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize SIMD detection (call once at startup)
|
|
||||||
static inline void tav_simd_init(void) {
|
|
||||||
#ifdef __AVX512F__
|
|
||||||
if (cpu_has_avx512f()) {
|
|
||||||
g_simd_level = SIMD_AVX512F;
|
|
||||||
fprintf(stderr, "[TAV] AVX-512 optimisations enabled\n");
|
|
||||||
} else {
|
|
||||||
g_simd_level = SIMD_NONE;
|
|
||||||
fprintf(stderr, "[TAV] AVX-512 not available, using scalar fallback\n");
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
g_simd_level = SIMD_NONE;
|
|
||||||
fprintf(stderr, "[TAV] Compiled without AVX-512 support\n");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __AVX512F__
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Helper Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Horizontal sum of 16 floats
|
|
||||||
static inline float _mm512_reduce_add_ps_compat(__m512 v) {
|
|
||||||
__m256 low = _mm512_castps512_ps256(v);
|
|
||||||
__m256 high = _mm512_extractf32x8_ps(v, 1);
|
|
||||||
__m256 sum256 = _mm256_add_ps(low, high);
|
|
||||||
__m128 sum128 = _mm_add_ps(_mm256_castps256_ps128(sum256), _mm256_extractf128_ps(sum256, 1));
|
|
||||||
sum128 = _mm_hadd_ps(sum128, sum128);
|
|
||||||
sum128 = _mm_hadd_ps(sum128, sum128);
|
|
||||||
return _mm_cvtss_f32(sum128);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clamp helper for vectorised operations
|
|
||||||
static inline __m512 _mm512_clamp_ps(__m512 v, __m512 min_val, __m512 max_val) {
|
|
||||||
return _mm512_min_ps(_mm512_max_ps(v, min_val), max_val);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// AVX-512 Optimised 1D DWT Forward Transforms
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// 5/3 Reversible Forward DWT with AVX-512
|
|
||||||
static inline void dwt_53_forward_1d_avx512(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = (float*)calloc(length, sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Predict step (high-pass) - vectorised
|
|
||||||
// temp[half + i] = data[2*i+1] - 0.5 * (data[2*i] + data[2*i+2])
|
|
||||||
int i;
|
|
||||||
for (i = 0; i + 16 <= half; i += 16) {
|
|
||||||
__mmask16 valid_mask = 0xFFFF;
|
|
||||||
|
|
||||||
// Check boundary for last iteration
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
int idx = 2 * (i + j) + 1;
|
|
||||||
if (idx >= length) {
|
|
||||||
valid_mask &= ~(1 << j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (valid_mask == 0) break;
|
|
||||||
|
|
||||||
// Load data[2*i] - stride 2 load
|
|
||||||
float even_curr_vals[16], even_next_vals[16], odd_vals[16];
|
|
||||||
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
if (valid_mask & (1 << j)) {
|
|
||||||
even_curr_vals[j] = data[2 * (i + j)];
|
|
||||||
even_next_vals[j] = (2 * (i + j) + 2 < length) ? data[2 * (i + j) + 2] : data[2 * (i + j)];
|
|
||||||
odd_vals[j] = data[2 * (i + j) + 1];
|
|
||||||
} else {
|
|
||||||
even_curr_vals[j] = 0.0f;
|
|
||||||
even_next_vals[j] = 0.0f;
|
|
||||||
odd_vals[j] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__m512 even_curr = _mm512_loadu_ps(even_curr_vals);
|
|
||||||
__m512 even_next = _mm512_loadu_ps(even_next_vals);
|
|
||||||
__m512 odd = _mm512_loadu_ps(odd_vals);
|
|
||||||
|
|
||||||
__m512 pred = _mm512_mul_ps(_mm512_add_ps(even_curr, even_next), _mm512_set1_ps(0.5f));
|
|
||||||
__m512 high = _mm512_sub_ps(odd, pred);
|
|
||||||
|
|
||||||
_mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle remaining elements
|
|
||||||
for (; i < half; i++) {
|
|
||||||
int idx = 2 * i + 1;
|
|
||||||
if (idx < length) {
|
|
||||||
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
|
|
||||||
temp[half + i] = data[idx] - pred;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update step (low-pass) - vectorised
|
|
||||||
// temp[i] = data[2*i] + 0.25 * (temp[half+i-1] + temp[half+i])
|
|
||||||
for (i = 0; i + 16 <= half; i += 16) {
|
|
||||||
__m512 even = _mm512_loadu_ps(&data[2 * i]); // Load with stride 2 (simplified)
|
|
||||||
|
|
||||||
// Manual gather for strided load
|
|
||||||
float even_vals[16];
|
|
||||||
for (int j = 0; j < 16 && (i + j) < half; j++) {
|
|
||||||
even_vals[j] = data[2 * (i + j)];
|
|
||||||
}
|
|
||||||
even = _mm512_loadu_ps(even_vals);
|
|
||||||
|
|
||||||
// Load high-pass neighbours
|
|
||||||
float high_prev[16], high_curr[16];
|
|
||||||
for (int j = 0; j < 16 && (i + j) < half; j++) {
|
|
||||||
high_prev[j] = ((i + j) > 0) ? temp[half + (i + j) - 1] : 0.0f;
|
|
||||||
high_curr[j] = ((i + j) < half - 1) ? temp[half + (i + j)] : 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
__m512 hp = _mm512_loadu_ps(high_prev);
|
|
||||||
__m512 hc = _mm512_loadu_ps(high_curr);
|
|
||||||
__m512 update = _mm512_mul_ps(_mm512_add_ps(hp, hc), _mm512_set1_ps(0.25f));
|
|
||||||
__m512 low = _mm512_add_ps(even, update);
|
|
||||||
|
|
||||||
__mmask16 store_mask = (i + 16 <= half) ? 0xFFFF : (1 << (half - i)) - 1;
|
|
||||||
_mm512_mask_storeu_ps(&temp[i], store_mask, low);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle remaining elements
|
|
||||||
for (; i < half; i++) {
|
|
||||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
|
||||||
(i < half - 1 ? temp[half + i] : 0));
|
|
||||||
temp[i] = data[2 * i] + update;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 9/7 Irreversible Forward DWT with AVX-512
|
|
||||||
static inline void dwt_97_forward_1d_avx512(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Allocate aligned temp buffer once (64-byte align for cache lines)
|
|
||||||
float *temp = NULL;
|
|
||||||
#if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE)
|
|
||||||
if (posix_memalign((void**)&temp, 64, (size_t)length * sizeof(float)) != 0) {
|
|
||||||
temp = (float*)malloc((size_t)length * sizeof(float));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
temp = (float*)aligned_alloc(64, ((size_t)length * sizeof(float) + 63) & ~63);
|
|
||||||
if (!temp) temp = (float*)malloc((size_t)length * sizeof(float));
|
|
||||||
#endif
|
|
||||||
if (!temp) return; // allocation failure: bail out (preserve original behavior could be different)
|
|
||||||
|
|
||||||
// FAST SPLIT: interleave into temp: first half = evens, second half = odds
|
|
||||||
// This is simple, streaming-friendly, and much faster than per-iteration small-array gathers.
|
|
||||||
{
|
|
||||||
float *even = temp;
|
|
||||||
float *odd = temp + half;
|
|
||||||
int i = 0;
|
|
||||||
// process pairs to minimize branches and memory ops
|
|
||||||
for (; i + 1 < length; i += 2) {
|
|
||||||
even[0] = data[i];
|
|
||||||
odd[0] = data[i + 1];
|
|
||||||
++even; ++odd;
|
|
||||||
}
|
|
||||||
if (i < length) { // odd leftover
|
|
||||||
even[0] = data[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lifting coefficients as vectors
|
|
||||||
const __m512 alpha_vec = _mm512_set1_ps(-1.586134342f);
|
|
||||||
const __m512 beta_vec = _mm512_set1_ps(-0.052980118f);
|
|
||||||
const __m512 gamma_vec = _mm512_set1_ps(0.882911076f);
|
|
||||||
const __m512 delta_vec = _mm512_set1_ps(0.443506852f);
|
|
||||||
const __m512 K_vec = _mm512_set1_ps(1.230174105f);
|
|
||||||
const __m512 invK_vec = _mm512_set1_ps(1.0f / 1.230174105f);
|
|
||||||
|
|
||||||
// Helper variables
|
|
||||||
int i;
|
|
||||||
|
|
||||||
// -----------------------
|
|
||||||
// Step 1: Predict α
|
|
||||||
// d[i] += alpha * (s[i] + s[i+1])
|
|
||||||
// -----------------------
|
|
||||||
if (half > 0) {
|
|
||||||
// handle small or trivial cases
|
|
||||||
if (half == 1) {
|
|
||||||
if (half < length) {
|
|
||||||
temp[half + 0] += -1.586134342f * (temp[0] + temp[0]);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// main vectorised body: ensure s_next loads (i+1) valid -> i <= half-2
|
|
||||||
int limit = (half - 1);
|
|
||||||
int n_full = (limit / 16) * 16; // process up to n_full (multiple of 16)
|
|
||||||
i = 0;
|
|
||||||
for (; i + 32 <= n_full; i += 32) {
|
|
||||||
// unroll 2x (i and i+16)
|
|
||||||
__m512 s0 = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
|
|
||||||
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 sum0 = _mm512_add_ps(s0, s0n);
|
|
||||||
d0 = _mm512_fmadd_ps(alpha_vec, sum0, d0);
|
|
||||||
_mm512_storeu_ps(&temp[half + i], d0);
|
|
||||||
|
|
||||||
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
|
|
||||||
__m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
|
|
||||||
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
|
|
||||||
__m512 sum1 = _mm512_add_ps(s1, s1n);
|
|
||||||
d1 = _mm512_fmadd_ps(alpha_vec, sum1, d1);
|
|
||||||
_mm512_storeu_ps(&temp[half + i + 16], d1);
|
|
||||||
}
|
|
||||||
for (; i + 16 <= n_full; i += 16) {
|
|
||||||
__m512 s = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 sn = _mm512_loadu_ps(&temp[i + 1]);
|
|
||||||
__m512 d = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 sum = _mm512_add_ps(s, sn);
|
|
||||||
d = _mm512_fmadd_ps(alpha_vec, sum, d);
|
|
||||||
_mm512_storeu_ps(&temp[half + i], d);
|
|
||||||
}
|
|
||||||
// scalar remainder up to limit (half-2 -> last vector handled below)
|
|
||||||
for (; i < limit; ++i) {
|
|
||||||
temp[half + i] += -1.586134342f * (temp[i] + temp[i + 1]);
|
|
||||||
}
|
|
||||||
// handle last index i = half-1 (mirror)
|
|
||||||
int last = half - 1;
|
|
||||||
if (half + last < length) {
|
|
||||||
float s_curr = temp[last];
|
|
||||||
float s_next = s_curr;
|
|
||||||
temp[half + last] += -1.586134342f * (s_curr + s_next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------
|
|
||||||
// Step 2: Update β
|
|
||||||
// s[i] += beta * (d[i-1] + d[i])
|
|
||||||
// -----------------------
|
|
||||||
if (half > 0) {
|
|
||||||
// handle i == 0 separately (d_prev = d_curr for boundary semantics)
|
|
||||||
if (half >= 1) {
|
|
||||||
// i == 0
|
|
||||||
if (half + 0 < length) {
|
|
||||||
float d_curr0 = temp[half + 0];
|
|
||||||
temp[0] += -0.052980118f * (d_curr0 + d_curr0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (half > 1) {
|
|
||||||
// main vector loop starting from i = 1 to half-1 (we will write s[i] for i>=1)
|
|
||||||
int start = 1;
|
|
||||||
int limit = half; // exclusive
|
|
||||||
int n_elems = limit - start;
|
|
||||||
int n_full = (n_elems / 16) * 16;
|
|
||||||
i = start;
|
|
||||||
for (; i + 32 <= start + n_full; i += 32) {
|
|
||||||
// unroll 2x
|
|
||||||
__m512 s0 = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
|
|
||||||
__m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
|
|
||||||
s0 = _mm512_fmadd_ps(beta_vec, sum0, s0);
|
|
||||||
_mm512_storeu_ps(&temp[i], s0);
|
|
||||||
|
|
||||||
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
|
|
||||||
__m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
|
|
||||||
__m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
|
|
||||||
__m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
|
|
||||||
s1 = _mm512_fmadd_ps(beta_vec, sum1, s1);
|
|
||||||
_mm512_storeu_ps(&temp[i + 16], s1);
|
|
||||||
}
|
|
||||||
for (; i + 16 <= start + n_full; i += 16) {
|
|
||||||
__m512 s = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
|
|
||||||
__m512 sum = _mm512_add_ps(dprev, dcurr);
|
|
||||||
s = _mm512_fmadd_ps(beta_vec, sum, s);
|
|
||||||
_mm512_storeu_ps(&temp[i], s);
|
|
||||||
}
|
|
||||||
// scalar remainder
|
|
||||||
for (; i < limit; ++i) {
|
|
||||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
|
||||||
float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
|
|
||||||
temp[i] += -0.052980118f * (d_prev + d_curr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------
|
|
||||||
// Step 3: Predict γ
|
|
||||||
// d[i] += gamma * (s[i] + s[i+1])
|
|
||||||
// -----------------------
|
|
||||||
if (half > 0) {
|
|
||||||
if (half == 1) {
|
|
||||||
if (half < length) {
|
|
||||||
temp[half + 0] += 0.882911076f * (temp[0] + temp[0]);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int limit = (half - 1);
|
|
||||||
int n_full = (limit / 16) * 16;
|
|
||||||
i = 0;
|
|
||||||
for (; i + 32 <= n_full; i += 32) {
|
|
||||||
__m512 s0 = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
|
|
||||||
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 sum0 = _mm512_add_ps(s0, s0n);
|
|
||||||
d0 = _mm512_fmadd_ps(gamma_vec, sum0, d0);
|
|
||||||
_mm512_storeu_ps(&temp[half + i], d0);
|
|
||||||
|
|
||||||
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
|
|
||||||
__m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
|
|
||||||
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
|
|
||||||
__m512 sum1 = _mm512_add_ps(s1, s1n);
|
|
||||||
d1 = _mm512_fmadd_ps(gamma_vec, sum1, d1);
|
|
||||||
_mm512_storeu_ps(&temp[half + i + 16], d1);
|
|
||||||
}
|
|
||||||
for (; i + 16 <= n_full; i += 16) {
|
|
||||||
__m512 s = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 sn = _mm512_loadu_ps(&temp[i + 1]);
|
|
||||||
__m512 d = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 sum = _mm512_add_ps(s, sn);
|
|
||||||
d = _mm512_fmadd_ps(gamma_vec, sum, d);
|
|
||||||
_mm512_storeu_ps(&temp[half + i], d);
|
|
||||||
}
|
|
||||||
for (; i < limit; ++i) {
|
|
||||||
temp[half + i] += 0.882911076f * (temp[i] + temp[i + 1]);
|
|
||||||
}
|
|
||||||
// last index mirror
|
|
||||||
int last = half - 1;
|
|
||||||
if (half + last < length) {
|
|
||||||
float s_curr = temp[last];
|
|
||||||
float s_next = s_curr;
|
|
||||||
temp[half + last] += 0.882911076f * (s_curr + s_next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------
|
|
||||||
// Step 4: Update δ
|
|
||||||
// s[i] += delta * (d[i-1] + d[i])
|
|
||||||
// -----------------------
|
|
||||||
if (half > 0) {
|
|
||||||
// i == 0
|
|
||||||
if (half >= 1) {
|
|
||||||
if (half + 0 < length) {
|
|
||||||
float d_curr0 = temp[half + 0];
|
|
||||||
temp[0] += 0.443506852f * (d_curr0 + d_curr0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (half > 1) {
|
|
||||||
int start = 1;
|
|
||||||
int limit = half; // exclusive
|
|
||||||
int n_elems = limit - start;
|
|
||||||
int n_full = (n_elems / 16) * 16;
|
|
||||||
i = start;
|
|
||||||
for (; i + 32 <= start + n_full; i += 32) {
|
|
||||||
__m512 s0 = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
|
|
||||||
__m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
|
|
||||||
s0 = _mm512_fmadd_ps(delta_vec, sum0, s0);
|
|
||||||
_mm512_storeu_ps(&temp[i], s0);
|
|
||||||
|
|
||||||
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
|
|
||||||
__m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
|
|
||||||
__m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
|
|
||||||
__m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
|
|
||||||
s1 = _mm512_fmadd_ps(delta_vec, sum1, s1);
|
|
||||||
_mm512_storeu_ps(&temp[i + 16], s1);
|
|
||||||
}
|
|
||||||
for (; i + 16 <= start + n_full; i += 16) {
|
|
||||||
__m512 s = _mm512_loadu_ps(&temp[i]);
|
|
||||||
__m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
__m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
|
|
||||||
__m512 sum = _mm512_add_ps(dprev, dcurr);
|
|
||||||
s = _mm512_fmadd_ps(delta_vec, sum, s);
|
|
||||||
_mm512_storeu_ps(&temp[i], s);
|
|
||||||
}
|
|
||||||
for (; i < limit; ++i) {
|
|
||||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
|
||||||
float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
|
|
||||||
temp[i] += 0.443506852f * (d_prev + d_curr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------
|
|
||||||
// Step 5: Scaling
|
|
||||||
// s *= K, d *= invK
|
|
||||||
// -----------------------
|
|
||||||
// s (first half)
|
|
||||||
{
|
|
||||||
int n_full = (half / 16) * 16;
|
|
||||||
i = 0;
|
|
||||||
for (; i + 32 <= n_full; i += 32) {
|
|
||||||
__m512 s0 = _mm512_loadu_ps(&temp[i]);
|
|
||||||
s0 = _mm512_mul_ps(s0, K_vec);
|
|
||||||
_mm512_storeu_ps(&temp[i], s0);
|
|
||||||
|
|
||||||
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
|
|
||||||
s1 = _mm512_mul_ps(s1, K_vec);
|
|
||||||
_mm512_storeu_ps(&temp[i + 16], s1);
|
|
||||||
}
|
|
||||||
for (; i + 16 <= n_full; i += 16) {
|
|
||||||
__m512 s = _mm512_loadu_ps(&temp[i]);
|
|
||||||
s = _mm512_mul_ps(s, K_vec);
|
|
||||||
_mm512_storeu_ps(&temp[i], s);
|
|
||||||
}
|
|
||||||
for (; i < half; ++i) temp[i] *= 1.230174105f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// d (second half)
|
|
||||||
{
|
|
||||||
int dlen = length - half;
|
|
||||||
int n_full = (dlen / 16) * 16;
|
|
||||||
i = 0;
|
|
||||||
for (; i + 32 <= n_full; i += 32) {
|
|
||||||
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
d0 = _mm512_mul_ps(d0, invK_vec);
|
|
||||||
_mm512_storeu_ps(&temp[half + i], d0);
|
|
||||||
|
|
||||||
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
|
|
||||||
d1 = _mm512_mul_ps(d1, invK_vec);
|
|
||||||
_mm512_storeu_ps(&temp[half + i + 16], d1);
|
|
||||||
}
|
|
||||||
for (; i + 16 <= n_full; i += 16) {
|
|
||||||
__m512 d = _mm512_loadu_ps(&temp[half + i]);
|
|
||||||
d = _mm512_mul_ps(d, invK_vec);
|
|
||||||
_mm512_storeu_ps(&temp[half + i], d);
|
|
||||||
}
|
|
||||||
for (; i < dlen; ++i) {
|
|
||||||
if (half + i < length) temp[half + i] /= 1.230174105f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy back and free
|
|
||||||
memcpy(data, temp, (size_t)length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Haar Forward DWT with AVX-512
|
|
||||||
static inline void dwt_haar_forward_1d_avx512(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = (float*)malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
const __m512 half_vec = _mm512_set1_ps(0.5f);
|
|
||||||
|
|
||||||
// Process 16 pairs at a time
|
|
||||||
int i;
|
|
||||||
for (i = 0; i + 16 <= half; i += 16) {
|
|
||||||
__mmask16 valid_mask = 0xFFFF;
|
|
||||||
|
|
||||||
float even_vals[16], odd_vals[16];
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
even_vals[j] = data[2 * (i + j)];
|
|
||||||
if (2 * (i + j) + 1 < length) {
|
|
||||||
odd_vals[j] = data[2 * (i + j) + 1];
|
|
||||||
} else {
|
|
||||||
odd_vals[j] = even_vals[j];
|
|
||||||
valid_mask &= ~(1 << j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__m512 even = _mm512_loadu_ps(even_vals);
|
|
||||||
__m512 odd = _mm512_loadu_ps(odd_vals);
|
|
||||||
|
|
||||||
// Low-pass: (even + odd) / 2
|
|
||||||
__m512 low = _mm512_mul_ps(_mm512_add_ps(even, odd), half_vec);
|
|
||||||
// High-pass: (even - odd) / 2
|
|
||||||
__m512 high = _mm512_mul_ps(_mm512_sub_ps(even, odd), half_vec);
|
|
||||||
|
|
||||||
_mm512_storeu_ps(&temp[i], low);
|
|
||||||
_mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining scalar
|
|
||||||
for (; i < half; i++) {
|
|
||||||
if (2 * i + 1 < length) {
|
|
||||||
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
|
|
||||||
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
|
|
||||||
} else {
|
|
||||||
temp[i] = data[2 * i];
|
|
||||||
if (half + i < length) {
|
|
||||||
temp[half + i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// AVX-512 Optimised Quantisation Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static inline void quantise_dwt_coefficients_avx512(
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
float effective_q, float dead_zone_threshold,
|
|
||||||
int width, int height, int decomp_levels, int is_chroma,
|
|
||||||
int (*get_subband_level)(int, int, int, int),
|
|
||||||
int (*get_subband_type)(int, int, int, int)
|
|
||||||
) {
|
|
||||||
const __m512 q_vec = _mm512_set1_ps(effective_q);
|
|
||||||
const __m512 inv_q_vec = _mm512_set1_ps(1.0f / effective_q);
|
|
||||||
const __m512 half_vec = _mm512_set1_ps(0.5f);
|
|
||||||
const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
|
|
||||||
const __m512 zero_vec = _mm512_setzero_ps();
|
|
||||||
const __m512i min_i32 = _mm512_set1_epi32(-32768);
|
|
||||||
const __m512i max_i32 = _mm512_set1_epi32(32767);
|
|
||||||
|
|
||||||
int i;
|
|
||||||
for (i = 0; i + 16 <= size; i += 16) {
|
|
||||||
__m512 coeff = _mm512_loadu_ps(&coeffs[i]);
|
|
||||||
__m512 quant = _mm512_mul_ps(coeff, inv_q_vec);
|
|
||||||
|
|
||||||
// Dead-zone handling (simplified - full version needs per-coeff logic)
|
|
||||||
if (dead_zone_threshold > 0.0f && !is_chroma) {
|
|
||||||
__m512 threshold_vec = _mm512_set1_ps(dead_zone_threshold);
|
|
||||||
__m512 abs_quant = _mm512_abs_ps(quant);
|
|
||||||
__mmask16 dead_mask = _mm512_cmp_ps_mask(abs_quant, threshold_vec, _CMP_LE_OQ);
|
|
||||||
quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Manual rounding to match scalar behaviour (round away from zero)
|
|
||||||
// First add 0.5 or -0.5 based on sign
|
|
||||||
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
|
|
||||||
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
|
|
||||||
quant = _mm512_add_ps(quant, round_val);
|
|
||||||
|
|
||||||
// Now truncate to int32 (this matches scalar (int32_t) cast after adding 0.5)
|
|
||||||
__m512i quant_i32 = _mm512_cvttps_epi32(quant); // cvtt = truncate (round toward zero)
|
|
||||||
quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
|
|
||||||
quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
|
|
||||||
|
|
||||||
// Pack to int16 (AVX-512 has cvtsepi32_epi16)
|
|
||||||
__m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
|
|
||||||
_mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining scalar
|
|
||||||
for (; i < size; i++) {
|
|
||||||
float quantised_val = coeffs[i] / effective_q;
|
|
||||||
|
|
||||||
// Dead-zone (simplified)
|
|
||||||
if (dead_zone_threshold > 0.0f && !is_chroma) {
|
|
||||||
if (fabsf(quantised_val) <= dead_zone_threshold) {
|
|
||||||
quantised_val = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
|
|
||||||
quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Perceptual quantisation with per-coefficient weighting
|
|
||||||
static inline void quantise_dwt_coefficients_perceptual_avx512(
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
float *weights, // Pre-computed per-coefficient weights
|
|
||||||
float base_quantiser
|
|
||||||
) {
|
|
||||||
const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
|
|
||||||
const __m512 half_vec = _mm512_set1_ps(0.5f);
|
|
||||||
const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
|
|
||||||
const __m512 zero_vec = _mm512_setzero_ps();
|
|
||||||
const __m512i min_i32 = _mm512_set1_epi32(-32768);
|
|
||||||
const __m512i max_i32 = _mm512_set1_epi32(32767);
|
|
||||||
|
|
||||||
int i;
|
|
||||||
for (i = 0; i + 16 <= size; i += 16) {
|
|
||||||
__m512 coeff = _mm512_loadu_ps(&coeffs[i]);
|
|
||||||
__m512 weight = _mm512_loadu_ps(&weights[i]);
|
|
||||||
|
|
||||||
// effective_q = base_q * weight
|
|
||||||
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
|
|
||||||
__m512 quant = _mm512_div_ps(coeff, effective_q);
|
|
||||||
|
|
||||||
// Manual rounding to match scalar behaviour
|
|
||||||
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
|
|
||||||
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
|
|
||||||
quant = _mm512_add_ps(quant, round_val);
|
|
||||||
|
|
||||||
// Truncate to int32 (matches scalar cast after rounding)
|
|
||||||
__m512i quant_i32 = _mm512_cvttps_epi32(quant);
|
|
||||||
quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
|
|
||||||
quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
|
|
||||||
|
|
||||||
__m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
|
|
||||||
_mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining scalar
|
|
||||||
for (; i < size; i++) {
|
|
||||||
float effective_q = base_quantiser * weights[i];
|
|
||||||
float quantised_val = coeffs[i] / effective_q;
|
|
||||||
int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
|
|
||||||
quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// AVX-512 Optimised Dequantisation Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Basic dequantisation: quantised[i] * effective_q
|
|
||||||
static inline void dequantise_dwt_coefficients_avx512(
|
|
||||||
const int16_t *quantised, float *coeffs, int size,
|
|
||||||
float effective_q
|
|
||||||
) {
|
|
||||||
const __m512 q_vec = _mm512_set1_ps(effective_q);
|
|
||||||
|
|
||||||
int i;
|
|
||||||
for (i = 0; i + 16 <= size; i += 16) {
|
|
||||||
// Load 16 int16 values
|
|
||||||
__m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
|
|
||||||
|
|
||||||
// Convert int16 to int32
|
|
||||||
__m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
|
|
||||||
|
|
||||||
// Convert int32 to float
|
|
||||||
__m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
|
|
||||||
|
|
||||||
// Multiply by quantiser
|
|
||||||
__m512 dequant = _mm512_mul_ps(quant_f32, q_vec);
|
|
||||||
|
|
||||||
_mm512_storeu_ps(&coeffs[i], dequant);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining scalar
|
|
||||||
for (; i < size; i++) {
|
|
||||||
coeffs[i] = (float)quantised[i] * effective_q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Perceptual dequantisation with per-coefficient weights
|
|
||||||
static inline void dequantise_dwt_coefficients_perceptual_avx512(
|
|
||||||
const int16_t *quantised, float *coeffs, int size,
|
|
||||||
const float *weights, float base_quantiser
|
|
||||||
) {
|
|
||||||
const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
|
|
||||||
|
|
||||||
int i;
|
|
||||||
for (i = 0; i + 16 <= size; i += 16) {
|
|
||||||
// Load 16 int16 values
|
|
||||||
__m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
|
|
||||||
|
|
||||||
// Convert int16 → int32 → float
|
|
||||||
__m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
|
|
||||||
__m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
|
|
||||||
|
|
||||||
// Load weights
|
|
||||||
__m512 weight = _mm512_loadu_ps(&weights[i]);
|
|
||||||
|
|
||||||
// effective_q = base_q * weight
|
|
||||||
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
|
|
||||||
|
|
||||||
// dequant = quantised * effective_q
|
|
||||||
__m512 dequant = _mm512_mul_ps(quant_f32, effective_q);
|
|
||||||
|
|
||||||
_mm512_storeu_ps(&coeffs[i], dequant);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining scalar
|
|
||||||
for (; i < size; i++) {
|
|
||||||
float effective_q = base_quantiser * weights[i];
|
|
||||||
coeffs[i] = (float)quantised[i] * effective_q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// AVX-512 Optimised RGB to YCoCg Conversion
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) {
|
|
||||||
const int total_pixels = width * height;
|
|
||||||
const __m512 half_vec = _mm512_set1_ps(0.5f);
|
|
||||||
|
|
||||||
int i;
|
|
||||||
// Process 16 pixels at a time (48 bytes of RGB data)
|
|
||||||
for (i = 0; i + 16 <= total_pixels; i += 16) {
|
|
||||||
// Load 16 RGB triplets (48 bytes)
|
|
||||||
// We need to deinterleave R, G, B channels
|
|
||||||
|
|
||||||
// Manual load and deinterleave (AVX-512 doesn't have direct RGB deinterleave)
|
|
||||||
float r_vals[16], g_vals[16], b_vals[16];
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
r_vals[j] = (float)rgb[(i + j) * 3 + 0];
|
|
||||||
g_vals[j] = (float)rgb[(i + j) * 3 + 1];
|
|
||||||
b_vals[j] = (float)rgb[(i + j) * 3 + 2];
|
|
||||||
}
|
|
||||||
|
|
||||||
__m512 r = _mm512_loadu_ps(r_vals);
|
|
||||||
__m512 g = _mm512_loadu_ps(g_vals);
|
|
||||||
__m512 b = _mm512_loadu_ps(b_vals);
|
|
||||||
|
|
||||||
// YCoCg-R transform:
|
|
||||||
// co = r - b
|
|
||||||
// tmp = b + co * 0.5
|
|
||||||
// cg = g - tmp
|
|
||||||
// y = tmp + cg * 0.5
|
|
||||||
|
|
||||||
__m512 co_vec = _mm512_sub_ps(r, b);
|
|
||||||
__m512 tmp = _mm512_fmadd_ps(co_vec, half_vec, b); // tmp = b + co * 0.5
|
|
||||||
__m512 cg_vec = _mm512_sub_ps(g, tmp);
|
|
||||||
__m512 y_vec = _mm512_fmadd_ps(cg_vec, half_vec, tmp); // y = tmp + cg * 0.5
|
|
||||||
|
|
||||||
_mm512_storeu_ps(&y[i], y_vec);
|
|
||||||
_mm512_storeu_ps(&co[i], co_vec);
|
|
||||||
_mm512_storeu_ps(&cg[i], cg_vec);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining pixels (scalar)
|
|
||||||
for (; i < total_pixels; i++) {
|
|
||||||
const float r = rgb[i * 3 + 0];
|
|
||||||
const float g = rgb[i * 3 + 1];
|
|
||||||
const float b = rgb[i * 3 + 2];
|
|
||||||
|
|
||||||
co[i] = r - b;
|
|
||||||
const float tmp = b + co[i] * 0.5f;
|
|
||||||
cg[i] = g - tmp;
|
|
||||||
y[i] = tmp + cg[i] * 0.5f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// AVX-512 Optimised 2D DWT with Gather/Scatter
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Optimised column extraction using gather
|
|
||||||
static inline void dwt_2d_extract_column_avx512(
|
|
||||||
const float *tile_data, float *column,
|
|
||||||
int x, int width, int height
|
|
||||||
) {
|
|
||||||
// Create gather indices for column extraction
|
|
||||||
// indices[i] = (i * width + x)
|
|
||||||
|
|
||||||
int y;
|
|
||||||
for (y = 0; y + 16 <= height; y += 16) {
|
|
||||||
// Build gather indices
|
|
||||||
int indices[16];
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
indices[j] = (y + j) * width + x;
|
|
||||||
}
|
|
||||||
|
|
||||||
__m512i vindex = _mm512_loadu_si512((__m512i*)indices);
|
|
||||||
__m512 col_data = _mm512_i32gather_ps(vindex, tile_data, 4);
|
|
||||||
_mm512_storeu_ps(&column[y], col_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining scalar
|
|
||||||
for (; y < height; y++) {
|
|
||||||
column[y] = tile_data[y * width + x];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Optimised column insertion using scatter
|
|
||||||
static inline void dwt_2d_insert_column_avx512(
|
|
||||||
float *tile_data, const float *column,
|
|
||||||
int x, int width, int height
|
|
||||||
) {
|
|
||||||
int y;
|
|
||||||
for (y = 0; y + 16 <= height; y += 16) {
|
|
||||||
// Build scatter indices
|
|
||||||
int indices[16];
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
indices[j] = (y + j) * width + x;
|
|
||||||
}
|
|
||||||
|
|
||||||
__m512i vindex = _mm512_loadu_si512((__m512i*)indices);
|
|
||||||
__m512 col_data = _mm512_loadu_ps(&column[y]);
|
|
||||||
_mm512_i32scatter_ps(tile_data, vindex, col_data, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remaining scalar
|
|
||||||
for (; y < height; y++) {
|
|
||||||
tile_data[y * width + x] = column[y];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // __AVX512F__
|
|
||||||
|
|
||||||
#endif // TAV_AVX512_H
|
|
||||||
@@ -1,295 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder Library - Public API
|
|
||||||
*
|
|
||||||
* High-level interface for encoding video using the TSVM Advanced Video (TAV) codec.
|
|
||||||
* Supports GOP-based encoding with internal multi-threading for optimal performance.
|
|
||||||
*
|
|
||||||
* Created by CuriousTorvald and Claude on 2025-12-03.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_LIB_H
|
|
||||||
#define TAV_ENCODER_LIB_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Opaque Encoder Context
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TAV encoder context - opaque to users.
|
|
||||||
* Created with tav_encoder_create(), freed with tav_encoder_free().
|
|
||||||
*/
|
|
||||||
typedef struct tav_encoder_context tav_encoder_context_t;
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Configuration Structures
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Video encoding parameters.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
// === Video Dimensions ===
|
|
||||||
int width; // Frame width (must be even)
|
|
||||||
int height; // Frame height (must be even)
|
|
||||||
int fps_num; // Framerate numerator (e.g., 60 for 60fps)
|
|
||||||
int fps_den; // Framerate denominator (e.g., 1 for 60/1)
|
|
||||||
|
|
||||||
// === Wavelet Configuration ===
|
|
||||||
int wavelet_type; // Spatial wavelet: 0=CDF 5/3, 1=CDF 9/7 (default), 2=CDF 13/7, 16=DD-4, 255=Haar
|
|
||||||
int temporal_wavelet; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (default for smooth motion)
|
|
||||||
int decomp_levels; // Spatial DWT levels (0=auto, typically 6)
|
|
||||||
int temporal_levels; // Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
|
|
||||||
|
|
||||||
// === Color Space ===
|
|
||||||
int channel_layout; // 0=YCoCg-R (default), 1=ICtCp (for HDR/BT.2100 sources)
|
|
||||||
int perceptual_tuning; // 1=enable HVS perceptual quantization (default), 0=uniform
|
|
||||||
|
|
||||||
// === GOP Configuration ===
|
|
||||||
int enable_temporal_dwt; // 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
|
|
||||||
int gop_size; // Frames per GOP (8, 16, or 24; 0=auto based on framerate)
|
|
||||||
int enable_two_pass; // 1=enable two-pass with scene change detection (default), 0=single-pass
|
|
||||||
|
|
||||||
// === Quality Control ===
|
|
||||||
int quality_level;
|
|
||||||
int quantiser_y; // Luma quantiser (0-255, indexed against QLUT)
|
|
||||||
int quantiser_co; // Orange chrominance quantiser (0-255, indexed against QLUT)
|
|
||||||
int quantiser_cg; // Green chrominance quantiser (0-255, indexed against QLUT)
|
|
||||||
float dead_zone_threshold; // Dead-zone quantization threshold (0.0=disabled, 0.6-1.5 typical)
|
|
||||||
|
|
||||||
// === Entropy Coding ===
|
|
||||||
int entropy_coder; // 0=Twobitmap (default), 1=EZBC (better for high-quality)
|
|
||||||
int zstd_level; // Zstd compression level (3-22, default: 7)
|
|
||||||
|
|
||||||
// === Multi-threading ===
|
|
||||||
int num_threads; // Worker threads (0=single-threaded, -1=auto, 1-16=explicit)
|
|
||||||
|
|
||||||
// === Encoder Presets ===
|
|
||||||
int encoder_preset; // Preset flags: 0x01=sports (finer temporal quant), 0x02=anime (disable grain)
|
|
||||||
|
|
||||||
// === Advanced Options ===
|
|
||||||
int verbose; // 1=enable debug output, 0=quiet (default)
|
|
||||||
int monoblock; // -1=auto (based on dimensions), 0=force tiled, 1=force monoblock
|
|
||||||
|
|
||||||
} tav_encoder_params_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize encoder parameters with default values.
|
|
||||||
*
|
|
||||||
* @param params Parameter structure to initialize
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
*/
|
|
||||||
void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encoder output packet.
|
|
||||||
* Contains encoded video or audio data.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
uint8_t *data; // Packet data (owned by encoder, valid until next encode/flush)
|
|
||||||
size_t size; // Packet size in bytes
|
|
||||||
uint8_t packet_type; // TAV packet type (0x10=I-frame, 0x12=GOP, 0x24=audio, etc.)
|
|
||||||
int frame_number; // Frame number (for video packets)
|
|
||||||
int is_video; // 1=video packet, 0=audio packet
|
|
||||||
} tav_encoder_packet_t;
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Encoder Lifecycle
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create TAV encoder context.
|
|
||||||
*
|
|
||||||
* Allocates internal buffers, initializes thread pool (if multi-threading enabled),
|
|
||||||
* and prepares encoder for frame submission.
|
|
||||||
*
|
|
||||||
* @param params Encoder parameters (copied internally)
|
|
||||||
* @return Encoder context, or NULL on failure
|
|
||||||
*/
|
|
||||||
tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free TAV encoder context.
|
|
||||||
*
|
|
||||||
* Shuts down thread pool, frees all buffers and resources.
|
|
||||||
* Any unflushed frames in the GOP buffer will be lost.
|
|
||||||
*
|
|
||||||
* @param ctx Encoder context
|
|
||||||
*/
|
|
||||||
void tav_encoder_free(tav_encoder_context_t *ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get last error message.
|
|
||||||
*
|
|
||||||
* @param ctx Encoder context
|
|
||||||
* @return Error message string (valid until next encode operation)
|
|
||||||
*/
|
|
||||||
const char *tav_encoder_get_error(tav_encoder_context_t *ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get encoder parameters (with calculated values).
|
|
||||||
* After context creation, params will contain actual values used
|
|
||||||
* (e.g., auto-calculated decomp_levels, gop_size).
|
|
||||||
*
|
|
||||||
* @param ctx Encoder context
|
|
||||||
* @param params Output parameters structure
|
|
||||||
*/
|
|
||||||
void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* DEBUG: Validate encoder context integrity
|
|
||||||
* Returns 1 if context appears valid, 0 otherwise
|
|
||||||
*/
|
|
||||||
int tav_encoder_validate_context(tav_encoder_context_t *ctx);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Video Encoding
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/*
|
|
||||||
* DEPRECATED: tav_encoder_encode_frame() and tav_encoder_flush() have been
|
|
||||||
* removed. Use tav_encoder_encode_gop() instead, which works for both
|
|
||||||
* single-threaded and multi-threaded modes. The CLI should buffer frames
|
|
||||||
* and call encode_gop() when a full GOP is ready.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode a complete GOP (Group of Pictures) directly.
|
|
||||||
*
|
|
||||||
* This function is STATELESS and THREAD-SAFE with separate contexts.
|
|
||||||
* Perfect for multithreaded encoding from CLI:
|
|
||||||
* - Each thread creates its own encoder context
|
|
||||||
* - Each thread calls encode_gop() with a batch of frames
|
|
||||||
* - No shared state, no locking needed
|
|
||||||
*
|
|
||||||
* Example multithreaded usage:
|
|
||||||
* ```c
|
|
||||||
* // Worker thread function
|
|
||||||
* void* worker(void* arg) {
|
|
||||||
* work_item_t* item = (work_item_t*)arg;
|
|
||||||
*
|
|
||||||
* // Create thread-local encoder context
|
|
||||||
* tav_encoder_context_t* ctx = tav_encoder_create(&shared_params);
|
|
||||||
*
|
|
||||||
* // Encode this GOP
|
|
||||||
* tav_encoder_packet_t* packet;
|
|
||||||
* tav_encoder_encode_gop(ctx, item->frames, item->num_frames,
|
|
||||||
* item->frame_numbers, &packet);
|
|
||||||
*
|
|
||||||
* // Store packet in output queue
|
|
||||||
* queue_push(output_queue, packet);
|
|
||||||
*
|
|
||||||
* tav_encoder_free(ctx);
|
|
||||||
* return NULL;
|
|
||||||
* }
|
|
||||||
* ```
|
|
||||||
*
|
|
||||||
* @param ctx Encoder context (one per thread)
|
|
||||||
* @param rgb_frames Array of RGB24 frames [frame][width*height*3]
|
|
||||||
* @param num_frames Number of frames in GOP (1-24)
|
|
||||||
* @param frame_numbers Frame indices for timecodes (can be NULL)
|
|
||||||
* @param packet Output packet pointer
|
|
||||||
* @return 1 if packet ready, -1 on error
|
|
||||||
*/
|
|
||||||
int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
|
|
||||||
const uint8_t **rgb_frames,
|
|
||||||
int num_frames,
|
|
||||||
const int *frame_numbers,
|
|
||||||
tav_encoder_packet_t **packet);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free a packet returned by encode_frame(), flush(), or encode_gop().
|
|
||||||
*
|
|
||||||
* @param packet Packet to free (can be NULL)
|
|
||||||
*/
|
|
||||||
void tav_encoder_free_packet(tav_encoder_packet_t *packet);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Audio Encoding (Optional)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode audio samples (TAD codec).
|
|
||||||
*
|
|
||||||
* Audio is encoded synchronously and returned immediately.
|
|
||||||
* For TAV muxing: interleave audio packets with video packets by frame PTS.
|
|
||||||
*
|
|
||||||
* @param ctx Encoder context
|
|
||||||
* @param pcm_samples PCM32f stereo samples (interleaved: L,R,L,R,...), num_samples×2 floats
|
|
||||||
* @param num_samples Number of samples per channel
|
|
||||||
* @param packet Output packet pointer
|
|
||||||
* @return 1 if packet ready, -1 on error
|
|
||||||
*/
|
|
||||||
int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
|
|
||||||
const float *pcm_samples,
|
|
||||||
size_t num_samples,
|
|
||||||
tav_encoder_packet_t **packet);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Statistics and Info
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get encoding statistics.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
int64_t frames_encoded; // Total frames encoded
|
|
||||||
int64_t gops_encoded; // Total GOPs encoded
|
|
||||||
size_t total_bytes; // Total bytes output (video + audio)
|
|
||||||
size_t video_bytes; // Video bytes
|
|
||||||
size_t audio_bytes; // Audio bytes
|
|
||||||
double avg_bitrate_kbps; // Average bitrate (kbps)
|
|
||||||
double encoding_fps; // Encoding speed (frames/sec)
|
|
||||||
} tav_encoder_stats_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get encoding statistics.
|
|
||||||
*
|
|
||||||
* @param ctx Encoder context
|
|
||||||
* @param stats Output statistics structure
|
|
||||||
*/
|
|
||||||
void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// TAV Packet Types (for reference)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#define TAV_PACKET_IFRAME 0x10 // I-frame (intra-only, single frame)
|
|
||||||
#define TAV_PACKET_PFRAME 0x11 // P-frame (delta from previous)
|
|
||||||
#define TAV_PACKET_GOP_UNIFIED 0x12 // GOP unified (3D DWT, multiple frames)
|
|
||||||
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec)
|
|
||||||
#define TAV_PACKET_AUDIO_PCM8 0x20 // PCM8 audio (legacy)
|
|
||||||
#define TAV_PACKET_LOOP_START 0xF0 // Loop point start (no payload)
|
|
||||||
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (frame count marker)
|
|
||||||
#define TAV_PACKET_TIMECODE 0xFD // Timecode metadata
|
|
||||||
#define TAV_PACKET_SYNC 0xFF // Sync packet (no payload)
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Tile Settings (for multi-tile mode)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#define TAV_TILE_SIZE_X 640 // Base tile width
|
|
||||||
#define TAV_TILE_SIZE_Y 540 // Base tile height
|
|
||||||
#define TAV_DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4)
|
|
||||||
#define TAV_TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 32px
|
|
||||||
#define TAV_TILE_MARGIN (TAV_DWT_FILTER_HALF_SUPPORT * (1 << TAV_TILE_MARGIN_LEVELS)) // 32px
|
|
||||||
#define TAV_PADDED_TILE_SIZE_X (TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN) // 704
|
|
||||||
#define TAV_PADDED_TILE_SIZE_Y (TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN) // 604
|
|
||||||
|
|
||||||
// Monoblock threshold: D1 PAL resolution (720x576)
|
|
||||||
// If width > 720 OR height > 576, automatically switch to tiled mode
|
|
||||||
#define TAV_MONOBLOCK_MAX_WIDTH 720
|
|
||||||
#define TAV_MONOBLOCK_MAX_HEIGHT 576
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TAV_ENCODER_LIB_H
|
|
||||||
@@ -1,275 +0,0 @@
|
|||||||
/*
|
|
||||||
* TAV SIMD Function Dispatcher
|
|
||||||
*
|
|
||||||
* This file provides runtime CPU detection and function pointer dispatch
|
|
||||||
* for SIMD-optimized versions of performance-critical TAV encoder functions.
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* 1. Include this header after defining all scalar functions
|
|
||||||
* 2. Call tav_simd_init() once at encoder initialization
|
|
||||||
* 3. Use function pointers (e.g., dwt_53_forward_1d_ptr) throughout code
|
|
||||||
*
|
|
||||||
* The dispatcher will automatically select AVX-512, AVX2, or scalar versions
|
|
||||||
* based on runtime CPU capabilities.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_SIMD_DISPATCH_H
|
|
||||||
#define TAV_SIMD_DISPATCH_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Function Pointer Types
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// 1D DWT function pointer types
|
|
||||||
typedef void (*dwt_1d_func_t)(float *data, int length);
|
|
||||||
|
|
||||||
// Quantization function pointer types
|
|
||||||
typedef void (*quantise_basic_func_t)(
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
float effective_q, float dead_zone_threshold,
|
|
||||||
int width, int height, int decomp_levels, int is_chroma,
|
|
||||||
int (*get_subband_level)(int, int, int, int),
|
|
||||||
int (*get_subband_type)(int, int, int, int)
|
|
||||||
);
|
|
||||||
|
|
||||||
typedef void (*quantise_perceptual_func_t)(
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
float *weights, float base_quantiser
|
|
||||||
);
|
|
||||||
|
|
||||||
// Color conversion function pointer type
|
|
||||||
typedef void (*rgb_to_ycocg_func_t)(
|
|
||||||
const uint8_t *rgb, float *y, float *co, float *cg,
|
|
||||||
int width, int height
|
|
||||||
);
|
|
||||||
|
|
||||||
// 2D DWT column operations
|
|
||||||
typedef void (*dwt_2d_column_extract_func_t)(
|
|
||||||
const float *tile_data, float *column,
|
|
||||||
int x, int width, int height
|
|
||||||
);
|
|
||||||
|
|
||||||
typedef void (*dwt_2d_column_insert_func_t)(
|
|
||||||
float *tile_data, const float *column,
|
|
||||||
int x, int width, int height
|
|
||||||
);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Global Function Pointers (initialized by tav_simd_init)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// DWT 1D transforms
|
|
||||||
static dwt_1d_func_t dwt_53_forward_1d_ptr = NULL;
|
|
||||||
static dwt_1d_func_t dwt_97_forward_1d_ptr = NULL;
|
|
||||||
static dwt_1d_func_t dwt_haar_forward_1d_ptr = NULL;
|
|
||||||
static dwt_1d_func_t dwt_53_inverse_1d_ptr = NULL;
|
|
||||||
static dwt_1d_func_t dwt_haar_inverse_1d_ptr = NULL;
|
|
||||||
|
|
||||||
// Quantization
|
|
||||||
static quantise_basic_func_t quantise_dwt_coefficients_ptr = NULL;
|
|
||||||
static quantise_perceptual_func_t quantise_dwt_coefficients_perceptual_ptr = NULL;
|
|
||||||
|
|
||||||
// Color conversion
|
|
||||||
static rgb_to_ycocg_func_t rgb_to_ycocg_ptr = NULL;
|
|
||||||
|
|
||||||
// 2D DWT column operations
|
|
||||||
static dwt_2d_column_extract_func_t dwt_2d_extract_column_ptr = NULL;
|
|
||||||
static dwt_2d_column_insert_func_t dwt_2d_insert_column_ptr = NULL;
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// SIMD Capability Detection
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
SIMD_NONE = 0,
|
|
||||||
SIMD_AVX512F = 1,
|
|
||||||
SIMD_AVX2 = 2,
|
|
||||||
SIMD_SSE42 = 3
|
|
||||||
} simd_level_t;
|
|
||||||
|
|
||||||
static simd_level_t detected_simd_level = SIMD_NONE;
|
|
||||||
|
|
||||||
static inline simd_level_t detect_simd_capabilities(void) {
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
|
||||||
// Use GCC/Clang built-in CPU detection
|
|
||||||
if (!__builtin_cpu_supports("sse4.2")) {
|
|
||||||
return SIMD_NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __AVX512F__
|
|
||||||
if (__builtin_cpu_supports("avx512f") &&
|
|
||||||
__builtin_cpu_supports("avx512dq") &&
|
|
||||||
__builtin_cpu_supports("avx512bw") &&
|
|
||||||
__builtin_cpu_supports("avx512vl")) {
|
|
||||||
return SIMD_AVX512F;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __AVX2__
|
|
||||||
if (__builtin_cpu_supports("avx2")) {
|
|
||||||
return SIMD_AVX2;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (__builtin_cpu_supports("sse4.2")) {
|
|
||||||
return SIMD_SSE42;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return SIMD_NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Scalar Fallback Wrappers
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// These wrappers adapt the scalar functions to match function pointer signatures
|
|
||||||
|
|
||||||
static void quantise_dwt_coefficients_scalar_wrapper(
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
float effective_q, float dead_zone_threshold,
|
|
||||||
int width, int height, int decomp_levels, int is_chroma,
|
|
||||||
int (*get_subband_level)(int, int, int, int),
|
|
||||||
int (*get_subband_type)(int, int, int, int)
|
|
||||||
);
|
|
||||||
// Implementation provided by including encoder - just declare prototype
|
|
||||||
|
|
||||||
static void quantise_dwt_coefficients_perceptual_scalar_wrapper(
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
float *weights, float base_quantiser
|
|
||||||
);
|
|
||||||
// Implementation provided by including encoder
|
|
||||||
|
|
||||||
static void dwt_2d_extract_column_scalar(
|
|
||||||
const float *tile_data, float *column,
|
|
||||||
int x, int width, int height
|
|
||||||
) {
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
column[y] = tile_data[y * width + x];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dwt_2d_insert_column_scalar(
|
|
||||||
float *tile_data, const float *column,
|
|
||||||
int x, int width, int height
|
|
||||||
) {
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
tile_data[y * width + x] = column[y];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// SIMD Initialization
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static void tav_simd_init(void) {
|
|
||||||
// Detect CPU capabilities
|
|
||||||
detected_simd_level = detect_simd_capabilities();
|
|
||||||
|
|
||||||
const char *simd_names[] = {"None", "AVX-512", "AVX2", "SSE4.2"};
|
|
||||||
fprintf(stderr, "[TAV] SIMD level detected: %s\n",
|
|
||||||
simd_names[detected_simd_level]);
|
|
||||||
|
|
||||||
#ifdef __AVX512F__
|
|
||||||
if (detected_simd_level == SIMD_AVX512F) {
|
|
||||||
fprintf(stderr, "[TAV] Using AVX-512 optimizations\n");
|
|
||||||
|
|
||||||
// DWT functions
|
|
||||||
extern void dwt_53_forward_1d_avx512(float *data, int length);
|
|
||||||
extern void dwt_97_forward_1d_avx512(float *data, int length);
|
|
||||||
extern void dwt_haar_forward_1d_avx512(float *data, int length);
|
|
||||||
|
|
||||||
dwt_53_forward_1d_ptr = dwt_53_forward_1d_avx512;
|
|
||||||
dwt_97_forward_1d_ptr = dwt_97_forward_1d_avx512;
|
|
||||||
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d_avx512;
|
|
||||||
|
|
||||||
// Quantization
|
|
||||||
// Note: Need wrapper functions that match the complex signature
|
|
||||||
// For now, using scalar versions
|
|
||||||
extern void dwt_53_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_97_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_haar_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_53_inverse_1d(float *data, int length);
|
|
||||||
extern void dwt_haar_inverse_1d(float *data, int length);
|
|
||||||
|
|
||||||
// Fallback to scalar for inverse (can optimize later)
|
|
||||||
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
|
|
||||||
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
|
|
||||||
|
|
||||||
// Color conversion
|
|
||||||
extern void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
|
||||||
rgb_to_ycocg_ptr = rgb_to_ycocg_avx512;
|
|
||||||
|
|
||||||
// 2D column operations
|
|
||||||
extern void dwt_2d_extract_column_avx512(const float *tile_data, float *column, int x, int width, int height);
|
|
||||||
extern void dwt_2d_insert_column_avx512(float *tile_data, const float *column, int x, int width, int height);
|
|
||||||
|
|
||||||
dwt_2d_extract_column_ptr = dwt_2d_extract_column_avx512;
|
|
||||||
dwt_2d_insert_column_ptr = dwt_2d_insert_column_avx512;
|
|
||||||
|
|
||||||
// Quantization uses scalar for now (needs integration work)
|
|
||||||
extern void dwt_53_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_97_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_haar_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_53_inverse_1d(float *data, int length);
|
|
||||||
extern void dwt_haar_inverse_1d(float *data, int length);
|
|
||||||
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
|
||||||
|
|
||||||
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
|
|
||||||
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Fallback to scalar implementations
|
|
||||||
fprintf(stderr, "[TAV] Using scalar (non-SIMD) implementations\n");
|
|
||||||
|
|
||||||
extern void dwt_53_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_97_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_haar_forward_1d(float *data, int length);
|
|
||||||
extern void dwt_53_inverse_1d(float *data, int length);
|
|
||||||
extern void dwt_haar_inverse_1d(float *data, int length);
|
|
||||||
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
|
||||||
|
|
||||||
dwt_53_forward_1d_ptr = dwt_53_forward_1d;
|
|
||||||
dwt_97_forward_1d_ptr = dwt_97_forward_1d;
|
|
||||||
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d;
|
|
||||||
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
|
|
||||||
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
|
|
||||||
|
|
||||||
rgb_to_ycocg_ptr = rgb_to_ycocg;
|
|
||||||
|
|
||||||
dwt_2d_extract_column_ptr = dwt_2d_extract_column_scalar;
|
|
||||||
dwt_2d_insert_column_ptr = dwt_2d_insert_column_scalar;
|
|
||||||
|
|
||||||
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
|
|
||||||
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Convenience Macros for Code Readability
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Use these macros in encoder code for cleaner dispatch
|
|
||||||
#define DWT_53_FORWARD_1D(data, length) \
|
|
||||||
dwt_53_forward_1d_ptr((data), (length))
|
|
||||||
|
|
||||||
#define DWT_97_FORWARD_1D(data, length) \
|
|
||||||
dwt_97_forward_1d_ptr((data), (length))
|
|
||||||
|
|
||||||
#define DWT_HAAR_FORWARD_1D(data, length) \
|
|
||||||
dwt_haar_forward_1d_ptr((data), (length))
|
|
||||||
|
|
||||||
#define RGB_TO_YCOCG(rgb, y, co, cg, width, height) \
|
|
||||||
rgb_to_ycocg_ptr((rgb), (y), (co), (cg), (width), (height))
|
|
||||||
|
|
||||||
#define DWT_2D_EXTRACT_COLUMN(tile_data, column, x, width, height) \
|
|
||||||
dwt_2d_extract_column_ptr((tile_data), (column), (x), (width), (height))
|
|
||||||
|
|
||||||
#define DWT_2D_INSERT_COLUMN(tile_data, column, x, width, height) \
|
|
||||||
dwt_2d_insert_column_ptr((tile_data), (column), (x), (width), (height))
|
|
||||||
|
|
||||||
#endif // TAV_SIMD_DISPATCH_H
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
// Created by CuriousTorvald and Claude on 2025-12-02.
|
|
||||||
// TAV Video Decoder Library - Shared decoding functions for TAV format
|
|
||||||
// Can be used by both regular TAV decoder and TAV-DT decoder
|
|
||||||
|
|
||||||
#ifndef TAV_VIDEO_DECODER_H
|
|
||||||
#define TAV_VIDEO_DECODER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// Video decoder context - opaque to users
|
|
||||||
typedef struct tav_video_context tav_video_context_t;
|
|
||||||
|
|
||||||
// Video parameters structure
|
|
||||||
typedef struct {
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
int decomp_levels; // Spatial DWT levels (typically 4)
|
|
||||||
int temporal_levels; // Temporal DWT levels (typically 2)
|
|
||||||
int wavelet_filter; // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
|
|
||||||
int temporal_wavelet; // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
|
|
||||||
int entropy_coder; // 0=Twobitmap, 1=EZBC, 2=RAW
|
|
||||||
int channel_layout; // 0=YCoCg-R, 1=ICtCp
|
|
||||||
int perceptual_tuning; // 1=perceptual quantisation, 0=uniform
|
|
||||||
uint8_t quantiser_y; // Base quantiser index for Y/I
|
|
||||||
uint8_t quantiser_co; // Base quantiser index for Co/Ct
|
|
||||||
uint8_t quantiser_cg; // Base quantiser index for Cg/Cp
|
|
||||||
uint8_t encoder_preset; // Encoder preset flags (sports, anime, etc.)
|
|
||||||
int monoblock; // 1=single tile (monoblock), 0=multi-tile
|
|
||||||
int no_zstd; // 1=packets are uncompressed (Video Flags bit 4), 0=Zstd compressed
|
|
||||||
} tav_video_params_t;
|
|
||||||
|
|
||||||
// Create video decoder context
|
|
||||||
// Returns NULL on failure
|
|
||||||
tav_video_context_t *tav_video_create(const tav_video_params_t *params);
|
|
||||||
|
|
||||||
// Free video decoder context
|
|
||||||
void tav_video_free(tav_video_context_t *ctx);
|
|
||||||
|
|
||||||
// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
|
|
||||||
// Input: compressed_data - GOP packet data (after packet type byte)
|
|
||||||
// compressed_size - size of compressed data
|
|
||||||
// gop_size - number of frames in GOP (read from packet)
|
|
||||||
// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
|
|
||||||
// Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
|
|
||||||
// Returns: 0 on success, -1 on error
|
|
||||||
int tav_video_decode_gop(tav_video_context_t *ctx,
|
|
||||||
const uint8_t *compressed_data, uint32_t compressed_size,
|
|
||||||
uint8_t gop_size, uint8_t **rgb_frames);
|
|
||||||
|
|
||||||
// Decode IFRAME packet (0x10) to RGB24 frame
|
|
||||||
// Input: compressed_data - I-frame packet data (after packet type byte)
|
|
||||||
// packet_size - size of packet data
|
|
||||||
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
|
|
||||||
// Must be pre-allocated by caller
|
|
||||||
// Returns: 0 on success, -1 on error
|
|
||||||
int tav_video_decode_iframe(tav_video_context_t *ctx,
|
|
||||||
const uint8_t *compressed_data, uint32_t packet_size,
|
|
||||||
uint8_t *rgb_frame);
|
|
||||||
|
|
||||||
// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
|
|
||||||
// Input: compressed_data - P-frame packet data (after packet type byte)
|
|
||||||
// packet_size - size of packet data
|
|
||||||
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
|
|
||||||
// Must be pre-allocated by caller
|
|
||||||
// Returns: 0 on success, -1 on error
|
|
||||||
// Note: Requires previous frame to be decoded first (stored internally as reference)
|
|
||||||
int tav_video_decode_pframe(tav_video_context_t *ctx,
|
|
||||||
const uint8_t *compressed_data, uint32_t packet_size,
|
|
||||||
uint8_t *rgb_frame);
|
|
||||||
|
|
||||||
// Get last error message
|
|
||||||
const char *tav_video_get_error(tav_video_context_t *ctx);
|
|
||||||
|
|
||||||
// Enable verbose debug output
|
|
||||||
void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
|
|
||||||
|
|
||||||
#endif // TAV_VIDEO_DECODER_H
|
|
||||||
@@ -1,397 +0,0 @@
|
|||||||
/**
|
|
||||||
* LDPC Rate 1/2 Codec Implementation
|
|
||||||
*
|
|
||||||
* LDPC for TAV-DT header protection.
|
|
||||||
* Uses a systematic rate 1/2 code with sum-product belief propagation decoder.
|
|
||||||
*
|
|
||||||
* The parity-check matrix is designed for good error correction on small blocks.
|
|
||||||
* Each parity bit is computed as XOR of multiple data bits using a pseudo-random
|
|
||||||
* but deterministic pattern.
|
|
||||||
*
|
|
||||||
* Created by CuriousTorvald and Claude on 2025-12-09.
|
|
||||||
* Updated 2025-12-17: Replaced bit-flipping with belief propagation decoder.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ldpc.h"
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
// Channel LLR magnitude for hard-decision input
|
|
||||||
// Higher value = more confidence in received bits
|
|
||||||
// For BER ~0.01, optimal is about 4.6; we use slightly lower for robustness
|
|
||||||
#define CHANNEL_LLR_MAG 4.0f
|
|
||||||
|
|
||||||
// Clipping value to prevent numerical overflow in tanh operations
|
|
||||||
#define LLR_CLIP 20.0f
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Parity-Check Matrix Generation
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// For rate 1/2 LDPC: n = 2k bits, parity-check matrix H is (n-k) x n = k x 2k
|
|
||||||
// We use H = [P | I_k] where P is the parity pattern matrix
|
|
||||||
// This gives systematic encoding: c = [data | parity] where parity = P * data
|
|
||||||
|
|
||||||
// Parity pattern: each parity bit j depends on data bits where pattern[j][i] = 1
|
|
||||||
// We use a regular pattern with column weight 3 (each data bit affects 3 parity bits)
|
|
||||||
// and row weight varies to cover the data bits well
|
|
||||||
|
|
||||||
// Simple hash function for generating parity connections
|
|
||||||
static inline uint32_t hash_mix(uint32_t a, uint32_t b) {
|
|
||||||
a ^= b;
|
|
||||||
a = (a ^ (a >> 16)) * 0x85ebca6b;
|
|
||||||
a = (a ^ (a >> 13)) * 0xc2b2ae35;
|
|
||||||
return a ^ (a >> 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get bit from byte array
|
|
||||||
static inline int get_bit(const uint8_t *data, int bit_idx) {
|
|
||||||
return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set bit in byte array
|
|
||||||
static inline void set_bit(uint8_t *data, int bit_idx, int value) {
|
|
||||||
int byte_idx = bit_idx >> 3;
|
|
||||||
int bit_pos = 7 - (bit_idx & 7);
|
|
||||||
if (value) {
|
|
||||||
data[byte_idx] |= (1 << bit_pos);
|
|
||||||
} else {
|
|
||||||
data[byte_idx] &= ~(1 << bit_pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flip bit in byte array
|
|
||||||
static inline void flip_bit(uint8_t *data, int bit_idx) {
|
|
||||||
int byte_idx = bit_idx >> 3;
|
|
||||||
int bit_pos = 7 - (bit_idx & 7);
|
|
||||||
data[byte_idx] ^= (1 << bit_pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get list of data bits that affect parity bit j
|
|
||||||
// Returns number of connected data bits, stores indices in connections[]
|
|
||||||
// For rate 1/2: data bits are 0 to k*8-1, parity bits are k*8 to 2*k*8-1
|
|
||||||
static int get_parity_connections(int parity_idx, int k_bits, int *connections) {
|
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
// Use a deterministic pseudo-random pattern
|
|
||||||
// Each parity bit connects to approximately k_bits/3 data bits
|
|
||||||
// Different seeds for different parity positions ensure coverage
|
|
||||||
|
|
||||||
uint32_t seed = hash_mix(0xDEADBEEF, (uint32_t)parity_idx);
|
|
||||||
|
|
||||||
for (int i = 0; i < k_bits; i++) {
|
|
||||||
// Each data bit has ~3/k_bits chance of connecting to this parity bit
|
|
||||||
// Total connections per parity ~ 3 (column weight)
|
|
||||||
uint32_t h = hash_mix(seed, (uint32_t)i);
|
|
||||||
if ((h % (k_bits / 3 + 1)) == 0) {
|
|
||||||
connections[count++] = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure at least 2 connections per parity bit
|
|
||||||
if (count < 2) {
|
|
||||||
connections[count++] = parity_idx % k_bits;
|
|
||||||
connections[count++] = (parity_idx + k_bits / 2) % k_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get list of parity bits affected by data bit i
|
|
||||||
static int get_data_connections(int data_idx, int k_bits, int *connections) {
|
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
int parity_conns[LDPC_MAX_DATA_BYTES * 8];
|
|
||||||
int n_conns = get_parity_connections(j, k_bits, parity_conns);
|
|
||||||
|
|
||||||
for (int c = 0; c < n_conns; c++) {
|
|
||||||
if (parity_conns[c] == data_idx) {
|
|
||||||
connections[count++] = j;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Initialization
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static int ldpc_initialized = 0;
|
|
||||||
|
|
||||||
void ldpc_init(void) {
|
|
||||||
if (ldpc_initialized) return;
|
|
||||||
// No pre-computation needed - patterns generated on the fly
|
|
||||||
ldpc_initialized = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Encoding
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
|
|
||||||
if (!ldpc_initialized) ldpc_init();
|
|
||||||
|
|
||||||
if (data_len > LDPC_MAX_DATA_BYTES) {
|
|
||||||
data_len = LDPC_MAX_DATA_BYTES;
|
|
||||||
}
|
|
||||||
|
|
||||||
int k_bits = (int)(data_len * 8); // Number of data bits
|
|
||||||
|
|
||||||
// Copy data to output (systematic encoding)
|
|
||||||
memcpy(output, data, data_len);
|
|
||||||
|
|
||||||
// Initialize parity bytes to zero
|
|
||||||
memset(output + data_len, 0, data_len);
|
|
||||||
|
|
||||||
// Compute parity bits
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
// Get data bits connected to parity bit j
|
|
||||||
int connections[LDPC_MAX_DATA_BYTES * 8];
|
|
||||||
int n_conns = get_parity_connections(j, k_bits, connections);
|
|
||||||
|
|
||||||
// Parity bit = XOR of connected data bits
|
|
||||||
int parity = 0;
|
|
||||||
for (int c = 0; c < n_conns; c++) {
|
|
||||||
parity ^= get_bit(data, connections[c]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set parity bit
|
|
||||||
set_bit(output + data_len, j, parity);
|
|
||||||
}
|
|
||||||
|
|
||||||
return data_len * 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Decoding
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
int ldpc_check_syndrome(const uint8_t *codeword, size_t len) {
|
|
||||||
if (!ldpc_initialized) ldpc_init();
|
|
||||||
|
|
||||||
size_t data_len = len / 2;
|
|
||||||
int k_bits = (int)(data_len * 8);
|
|
||||||
|
|
||||||
// Check all parity equations
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
int connections[LDPC_MAX_DATA_BYTES * 8];
|
|
||||||
int n_conns = get_parity_connections(j, k_bits, connections);
|
|
||||||
|
|
||||||
// Compute syndrome bit: XOR of connected data bits XOR parity bit
|
|
||||||
int syndrome = get_bit(codeword + data_len, j);
|
|
||||||
for (int c = 0; c < n_conns; c++) {
|
|
||||||
syndrome ^= get_bit(codeword, connections[c]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (syndrome != 0) {
|
|
||||||
return 0; // Syndrome non-zero: errors detected
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1; // Zero syndrome: valid codeword
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clip LLR to prevent overflow
|
|
||||||
static inline float clip_llr(float llr) {
|
|
||||||
if (llr > LLR_CLIP) return LLR_CLIP;
|
|
||||||
if (llr < -LLR_CLIP) return -LLR_CLIP;
|
|
||||||
return llr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sign of a float (returns +1 or -1)
|
|
||||||
static inline float sign_f(float x) {
|
|
||||||
return (x >= 0.0f) ? 1.0f : -1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output) {
|
|
||||||
if (!ldpc_initialized) ldpc_init();
|
|
||||||
|
|
||||||
if (encoded_len < 2 || (encoded_len & 1) != 0) {
|
|
||||||
return -1; // Invalid length
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t data_len = encoded_len / 2;
|
|
||||||
if (data_len > LDPC_MAX_DATA_BYTES) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int k_bits = (int)(data_len * 8);
|
|
||||||
int n_bits = k_bits * 2; // Total codeword bits (data + parity)
|
|
||||||
|
|
||||||
// Pre-compute the parity check matrix structure for efficiency
|
|
||||||
// For each check node j: which variable nodes it connects to
|
|
||||||
int check_to_var[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
|
|
||||||
int check_degree[LDPC_MAX_DATA_BYTES * 8];
|
|
||||||
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
int connections[LDPC_MAX_DATA_BYTES * 8];
|
|
||||||
int n_conns = get_parity_connections(j, k_bits, connections);
|
|
||||||
|
|
||||||
// Check j connects to: data bits in connections[] + parity bit j
|
|
||||||
check_degree[j] = n_conns + 1;
|
|
||||||
for (int c = 0; c < n_conns; c++) {
|
|
||||||
check_to_var[j][c] = connections[c]; // Data bit index
|
|
||||||
}
|
|
||||||
check_to_var[j][n_conns] = k_bits + j; // Parity bit index
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize channel LLRs from received hard bits
|
|
||||||
// LLR > 0 means bit is probably 0, LLR < 0 means bit is probably 1
|
|
||||||
float channel_llr[LDPC_MAX_DATA_BYTES * 16];
|
|
||||||
for (int i = 0; i < n_bits; i++) {
|
|
||||||
int bit = get_bit(encoded, i);
|
|
||||||
channel_llr[i] = bit ? -CHANNEL_LLR_MAG : CHANNEL_LLR_MAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Message arrays for BP
|
|
||||||
// check_to_var_msg[j][idx] = message from check j to variable check_to_var[j][idx]
|
|
||||||
float check_to_var_msg[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
|
|
||||||
|
|
||||||
// Initialize check-to-variable messages to zero
|
|
||||||
memset(check_to_var_msg, 0, sizeof(check_to_var_msg));
|
|
||||||
|
|
||||||
// Belief Propagation iterations
|
|
||||||
for (int iter = 0; iter < LDPC_MAX_ITERATIONS; iter++) {
|
|
||||||
// Step 1: Variable-to-check messages (implicit, computed on the fly)
|
|
||||||
// var_to_check[v→j] = channel_llr[v] + sum of all check_to_var_msg[k][idx_v] for k != j
|
|
||||||
|
|
||||||
// Step 2: Check-to-variable messages using min-sum approximation
|
|
||||||
// For each check node j, for each connected variable v:
|
|
||||||
// check_to_var_msg[j→v] = sign * min(|incoming messages from other vars|)
|
|
||||||
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
int degree = check_degree[j];
|
|
||||||
|
|
||||||
// First, compute variable-to-check messages for all variables in this check
|
|
||||||
float var_to_check[LDPC_MAX_DATA_BYTES * 8 + 1];
|
|
||||||
for (int idx = 0; idx < degree; idx++) {
|
|
||||||
int v = check_to_var[j][idx];
|
|
||||||
|
|
||||||
// Sum all incoming check messages to variable v, except from check j
|
|
||||||
float sum = channel_llr[v];
|
|
||||||
for (int jj = 0; jj < k_bits; jj++) {
|
|
||||||
if (jj == j) continue;
|
|
||||||
// Find if check jj connects to variable v
|
|
||||||
for (int idx2 = 0; idx2 < check_degree[jj]; idx2++) {
|
|
||||||
if (check_to_var[jj][idx2] == v) {
|
|
||||||
sum += check_to_var_msg[jj][idx2];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
var_to_check[idx] = clip_llr(sum);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now compute check-to-variable messages using min-sum
|
|
||||||
for (int idx = 0; idx < degree; idx++) {
|
|
||||||
float sign_prod = 1.0f;
|
|
||||||
float min_abs = 1e30f;
|
|
||||||
|
|
||||||
for (int idx2 = 0; idx2 < degree; idx2++) {
|
|
||||||
if (idx2 == idx) continue;
|
|
||||||
float msg = var_to_check[idx2];
|
|
||||||
sign_prod *= sign_f(msg);
|
|
||||||
float abs_msg = fabsf(msg);
|
|
||||||
if (abs_msg < min_abs) min_abs = abs_msg;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Min-sum with scaling factor 0.75 for better performance
|
|
||||||
check_to_var_msg[j][idx] = clip_llr(sign_prod * min_abs * 0.75f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 3: Compute posterior LLRs and make hard decisions
|
|
||||||
float posterior[LDPC_MAX_DATA_BYTES * 16];
|
|
||||||
for (int v = 0; v < n_bits; v++) {
|
|
||||||
float sum = channel_llr[v];
|
|
||||||
// Add all incoming check-to-variable messages
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
for (int idx = 0; idx < check_degree[j]; idx++) {
|
|
||||||
if (check_to_var[j][idx] == v) {
|
|
||||||
sum += check_to_var_msg[j][idx];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
posterior[v] = sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make hard decisions
|
|
||||||
uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
|
|
||||||
memset(decoded, 0, encoded_len);
|
|
||||||
for (int v = 0; v < n_bits; v++) {
|
|
||||||
if (posterior[v] < 0) {
|
|
||||||
set_bit(decoded, v, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check syndrome
|
|
||||||
int syndrome_count = 0;
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
int syn = 0;
|
|
||||||
for (int idx = 0; idx < check_degree[j]; idx++) {
|
|
||||||
syn ^= get_bit(decoded, check_to_var[j][idx]);
|
|
||||||
}
|
|
||||||
if (syn) syndrome_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If all syndromes are zero, we're done
|
|
||||||
if (syndrome_count == 0) {
|
|
||||||
memcpy(output, decoded, data_len);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Early termination if syndrome count is very small (nearly converged)
|
|
||||||
if (iter > 5 && syndrome_count <= 2) {
|
|
||||||
// Try one more iteration, if still stuck, accept
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decoding did not converge - compute final estimate
|
|
||||||
float posterior[LDPC_MAX_DATA_BYTES * 16];
|
|
||||||
for (int v = 0; v < n_bits; v++) {
|
|
||||||
float sum = channel_llr[v];
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
for (int idx = 0; idx < check_degree[j]; idx++) {
|
|
||||||
if (check_to_var[j][idx] == v) {
|
|
||||||
sum += check_to_var_msg[j][idx];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
posterior[v] = sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
|
|
||||||
memset(decoded, 0, encoded_len);
|
|
||||||
for (int v = 0; v < n_bits; v++) {
|
|
||||||
if (posterior[v] < 0) {
|
|
||||||
set_bit(decoded, v, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check final syndrome count
|
|
||||||
int final_syndromes = 0;
|
|
||||||
for (int j = 0; j < k_bits; j++) {
|
|
||||||
int syn = 0;
|
|
||||||
for (int idx = 0; idx < check_degree[j]; idx++) {
|
|
||||||
syn ^= get_bit(decoded, check_to_var[j][idx]);
|
|
||||||
}
|
|
||||||
if (syn) final_syndromes++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accept if syndrome count is low enough
|
|
||||||
if (final_syndromes <= k_bits / 4) {
|
|
||||||
memcpy(output, decoded, data_len);
|
|
||||||
return 0; // Soft success
|
|
||||||
}
|
|
||||||
|
|
||||||
// Total failure - return original data as best effort
|
|
||||||
memcpy(output, encoded, data_len);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
/**
|
|
||||||
* LDPC Rate 1/2 Codec for TAV-DT
|
|
||||||
*
|
|
||||||
* Simple LDPC implementation for header protection in TAV-DT format.
|
|
||||||
* Rate 1/2: k data bytes → 2k encoded bytes (doubles the size)
|
|
||||||
*
|
|
||||||
* Uses systematic encoding where first k bytes are data, last k bytes are parity.
|
|
||||||
* Decoding uses iterative bit-flipping algorithm.
|
|
||||||
*
|
|
||||||
* Designed for small blocks (headers up to 64 bytes).
|
|
||||||
*
|
|
||||||
* Created by CuriousTorvald and Claude on 2025-12-09.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LDPC_H
|
|
||||||
#define LDPC_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// Maximum block size (data bytes before encoding)
|
|
||||||
#define LDPC_MAX_DATA_BYTES 64
|
|
||||||
|
|
||||||
// LDPC decoder parameters
|
|
||||||
#define LDPC_MAX_ITERATIONS 50
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize LDPC codec.
|
|
||||||
* Must be called once before using encode/decode functions.
|
|
||||||
* Thread-safe: uses static initialization.
|
|
||||||
*/
|
|
||||||
void ldpc_init(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode data block with LDPC rate 1/2.
|
|
||||||
*
|
|
||||||
* @param data Input data bytes
|
|
||||||
* @param data_len Length of input data (1 to LDPC_MAX_DATA_BYTES)
|
|
||||||
* @param output Output buffer (must hold 2 * data_len bytes)
|
|
||||||
* @return Output length (2 * data_len)
|
|
||||||
*
|
|
||||||
* Output format: [data bytes][parity bytes]
|
|
||||||
* The output is systematic: first data_len bytes are the original data.
|
|
||||||
*/
|
|
||||||
size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode LDPC rate 1/2 encoded block.
|
|
||||||
*
|
|
||||||
* @param encoded Input encoded data (2 * data_len bytes)
|
|
||||||
* @param encoded_len Length of encoded data (must be even, max 2*LDPC_MAX_DATA_BYTES)
|
|
||||||
* @param output Output buffer for decoded data (encoded_len / 2 bytes)
|
|
||||||
* @return 0 on success, -1 if decoding failed (too many errors)
|
|
||||||
*
|
|
||||||
* Uses iterative bit-flipping decoder.
|
|
||||||
*/
|
|
||||||
int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate syndrome for validation.
|
|
||||||
*
|
|
||||||
* @param codeword Encoded codeword (2 * data_len bytes)
|
|
||||||
* @param len Length of codeword
|
|
||||||
* @return 1 if valid (zero syndrome), 0 if errors detected
|
|
||||||
*/
|
|
||||||
int ldpc_check_syndrome(const uint8_t *codeword, size_t len);
|
|
||||||
|
|
||||||
#endif // LDPC_H
|
|
||||||
@@ -1,478 +0,0 @@
|
|||||||
/**
|
|
||||||
* LDPC(255,223) Codec Implementation - Enhanced Version
|
|
||||||
*
|
|
||||||
* This implements a high-rate LDPC code designed to compete with RS(255,223).
|
|
||||||
*
|
|
||||||
* Key improvements in this version:
|
|
||||||
* - Sum-Product (Belief Propagation) decoder for optimal performance
|
|
||||||
* - Quasi-cyclic H matrix with optimized degree distribution
|
|
||||||
* - Layered scheduling for faster convergence
|
|
||||||
* - Adaptive LLR initialization
|
|
||||||
*
|
|
||||||
* Created by CuriousTorvald and Claude on 2025-12-15.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ldpc_payload.h"
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Constants
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#define N_BITS (LDPC_P_BLOCK_SIZE * 8) // 2040 total bits
|
|
||||||
#define K_BITS (LDPC_P_DATA_SIZE * 8) // 1784 data bits
|
|
||||||
#define M_BITS (LDPC_P_PARITY_SIZE * 8) // 256 parity bits
|
|
||||||
|
|
||||||
// LLR bounds - tighter bounds help prevent numerical issues
|
|
||||||
#define LLR_MAX 20.0f
|
|
||||||
#define LLR_MIN -20.0f
|
|
||||||
|
|
||||||
// Decoding parameters
|
|
||||||
#define LDPC_MAX_ITER 100
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Sparse Matrix Storage
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#define MAX_CHECK_DEGREE 50
|
|
||||||
#define MAX_VAR_DEGREE 12
|
|
||||||
|
|
||||||
static int ldpc_p_initialized = 0;
|
|
||||||
|
|
||||||
static int check_degree[M_BITS];
|
|
||||||
static int check_to_var[M_BITS][MAX_CHECK_DEGREE];
|
|
||||||
static int check_to_var_idx[M_BITS][MAX_CHECK_DEGREE];
|
|
||||||
|
|
||||||
static int var_degree[N_BITS];
|
|
||||||
static int var_to_check[N_BITS][MAX_VAR_DEGREE];
|
|
||||||
static int var_to_check_idx[N_BITS][MAX_VAR_DEGREE];
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Bit manipulation
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static inline int get_bit(const uint8_t *data, int bit_idx) {
|
|
||||||
return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void set_bit(uint8_t *data, int bit_idx, int value) {
|
|
||||||
int byte_idx = bit_idx >> 3;
|
|
||||||
int bit_pos = 7 - (bit_idx & 7);
|
|
||||||
if (value) {
|
|
||||||
data[byte_idx] |= (1 << bit_pos);
|
|
||||||
} else {
|
|
||||||
data[byte_idx] &= ~(1 << bit_pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// H Matrix Construction - Quasi-Cyclic with Optimized Distribution
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Hash function for deterministic pseudo-random connections
|
|
||||||
static inline uint32_t hash32(uint32_t a, uint32_t b) {
|
|
||||||
uint32_t h = a ^ (b * 0x9E3779B9);
|
|
||||||
h ^= h >> 16;
|
|
||||||
h *= 0x85EBCA6B;
|
|
||||||
h ^= h >> 13;
|
|
||||||
h *= 0xC2B2AE35;
|
|
||||||
h ^= h >> 16;
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void add_edge(int check, int var) {
|
|
||||||
// Check if already connected
|
|
||||||
for (int i = 0; i < check_degree[check]; i++) {
|
|
||||||
if (check_to_var[check][i] == var) return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (check_degree[check] >= MAX_CHECK_DEGREE || var_degree[var] >= MAX_VAR_DEGREE) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int cidx = check_degree[check];
|
|
||||||
int vidx = var_degree[var];
|
|
||||||
|
|
||||||
check_to_var[check][cidx] = var;
|
|
||||||
check_to_var_idx[check][cidx] = vidx;
|
|
||||||
check_degree[check]++;
|
|
||||||
|
|
||||||
var_to_check[var][vidx] = check;
|
|
||||||
var_to_check_idx[var][vidx] = cidx;
|
|
||||||
var_degree[var]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simplified cycle check - only check direct neighbors (faster)
|
|
||||||
static int would_create_short_cycle(int v, int c) {
|
|
||||||
// Quick check: if v is already connected to c, skip
|
|
||||||
for (int i = 0; i < var_degree[v]; i++) {
|
|
||||||
if (var_to_check[v][i] == c) return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For speed, only do basic 4-cycle check for low-degree nodes
|
|
||||||
if (var_degree[v] > 4 || check_degree[c] > 20) return 0;
|
|
||||||
|
|
||||||
// Check for 4-cycles
|
|
||||||
for (int i = 0; i < var_degree[v]; i++) {
|
|
||||||
int c_prime = var_to_check[v][i];
|
|
||||||
for (int j = 0; j < check_degree[c_prime] && j < 15; j++) {
|
|
||||||
int v_prime = check_to_var[c_prime][j];
|
|
||||||
if (v_prime == v) continue;
|
|
||||||
for (int k = 0; k < var_degree[v_prime] && k < 8; k++) {
|
|
||||||
if (var_to_check[v_prime][k] == c) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Quasi-cyclic expansion: shift value determines cyclic permutation
|
|
||||||
static int qc_shift(int base_idx, int shift, int size) {
|
|
||||||
return (base_idx + shift) % size;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void build_h_matrix(void) {
|
|
||||||
memset(check_degree, 0, sizeof(check_degree));
|
|
||||||
memset(var_degree, 0, sizeof(var_degree));
|
|
||||||
|
|
||||||
// ==========================================================================
|
|
||||||
// H matrix with staircase parity and PEG-based data connections
|
|
||||||
// ==========================================================================
|
|
||||||
|
|
||||||
// --- Part 1: Staircase parity structure ---
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
int parity_bit = K_BITS + c;
|
|
||||||
add_edge(c, parity_bit);
|
|
||||||
if (c > 0) {
|
|
||||||
add_edge(c, K_BITS + c - 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Part 2: Connect data bits using PEG approach ---
|
|
||||||
for (int v = 0; v < K_BITS; v++) {
|
|
||||||
// Target 6 connections per variable
|
|
||||||
int target = 6;
|
|
||||||
|
|
||||||
for (int d = 0; d < target; d++) {
|
|
||||||
uint32_t h = hash32((uint32_t)v * 2654435769U, (uint32_t)d * 1597334677U);
|
|
||||||
|
|
||||||
// Find best check (lowest degree)
|
|
||||||
int best_c = -1;
|
|
||||||
int best_deg = MAX_CHECK_DEGREE;
|
|
||||||
|
|
||||||
for (int attempt = 0; attempt < 16; attempt++) {
|
|
||||||
int c = (int)((h + attempt * 127) % M_BITS);
|
|
||||||
|
|
||||||
if (check_degree[c] < best_deg && check_degree[c] < MAX_CHECK_DEGREE - 2) {
|
|
||||||
// Check not already connected
|
|
||||||
int connected = 0;
|
|
||||||
for (int i = 0; i < var_degree[v]; i++) {
|
|
||||||
if (var_to_check[v][i] == c) { connected = 1; break; }
|
|
||||||
}
|
|
||||||
if (!connected) {
|
|
||||||
best_deg = check_degree[c];
|
|
||||||
best_c = c;
|
|
||||||
if (best_deg < 30) break; // Good enough
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (best_c >= 0 && var_degree[v] < MAX_VAR_DEGREE - 1) {
|
|
||||||
add_edge(best_c, v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Part 3: Fill in low-degree variables ---
|
|
||||||
for (int v = 0; v < K_BITS; v++) {
|
|
||||||
while (var_degree[v] < 5) {
|
|
||||||
uint32_t h = hash32((uint32_t)v * 12345, (uint32_t)var_degree[v] * 67890);
|
|
||||||
|
|
||||||
int added = 0;
|
|
||||||
for (int attempt = 0; attempt < 64 && !added; attempt++) {
|
|
||||||
int c = (int)((h + attempt * 31) % M_BITS);
|
|
||||||
if (check_degree[c] < MAX_CHECK_DEGREE - 2) {
|
|
||||||
int prev = var_degree[v];
|
|
||||||
add_edge(c, v);
|
|
||||||
if (var_degree[v] > prev) added = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!added) break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Part 4: Balance check degrees ---
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
int target = 35;
|
|
||||||
int attempts = 0;
|
|
||||||
while (check_degree[c] < target && attempts < 150) {
|
|
||||||
uint32_t h = hash32((uint32_t)c * 48271, (uint32_t)attempts * 16807);
|
|
||||||
int v = (int)(h % K_BITS);
|
|
||||||
|
|
||||||
if (var_degree[v] < MAX_VAR_DEGREE - 1) {
|
|
||||||
add_edge(c, v);
|
|
||||||
}
|
|
||||||
attempts++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ldpc_p_init(void) {
|
|
||||||
if (ldpc_p_initialized) return;
|
|
||||||
build_h_matrix();
|
|
||||||
ldpc_p_initialized = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Syndrome Check
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
int ldpc_p_check_syndrome(const uint8_t *codeword) {
|
|
||||||
if (!ldpc_p_initialized) ldpc_p_init();
|
|
||||||
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
int syndrome = 0;
|
|
||||||
for (int i = 0; i < check_degree[c]; i++) {
|
|
||||||
int v = check_to_var[c][i];
|
|
||||||
syndrome ^= get_bit(codeword, v);
|
|
||||||
}
|
|
||||||
if (syndrome != 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Encoding
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
|
|
||||||
if (!ldpc_p_initialized) ldpc_p_init();
|
|
||||||
|
|
||||||
if (data_len > LDPC_P_DATA_SIZE) {
|
|
||||||
data_len = LDPC_P_DATA_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy data to output and pad if necessary
|
|
||||||
memcpy(output, data, data_len);
|
|
||||||
if (data_len < LDPC_P_DATA_SIZE) {
|
|
||||||
memset(output + data_len, 0, LDPC_P_DATA_SIZE - data_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize parity bytes to zero
|
|
||||||
memset(output + LDPC_P_DATA_SIZE, 0, LDPC_P_PARITY_SIZE);
|
|
||||||
|
|
||||||
// Compute syndrome contribution from data bits
|
|
||||||
int syndrome[M_BITS];
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
syndrome[c] = 0;
|
|
||||||
for (int i = 0; i < check_degree[c]; i++) {
|
|
||||||
int v = check_to_var[c][i];
|
|
||||||
if (v < K_BITS) {
|
|
||||||
syndrome[c] ^= get_bit(output, v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Back-substitution for parity bits (staircase structure)
|
|
||||||
int prev_parity = 0;
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
int parity_bit = syndrome[c] ^ prev_parity;
|
|
||||||
set_bit(output + LDPC_P_DATA_SIZE, c, parity_bit);
|
|
||||||
prev_parity = parity_bit;
|
|
||||||
}
|
|
||||||
|
|
||||||
return LDPC_P_BLOCK_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Min-Sum Decoder with Optimized Parameters
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Clamp LLR to valid range
|
|
||||||
static inline float clamp_llr(float x) {
|
|
||||||
if (x > LLR_MAX) return LLR_MAX;
|
|
||||||
if (x < LLR_MIN) return LLR_MIN;
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ldpc_p_decode(uint8_t *data, size_t data_len) {
|
|
||||||
if (!ldpc_p_initialized) ldpc_p_init();
|
|
||||||
|
|
||||||
size_t total_len = data_len + LDPC_P_PARITY_SIZE;
|
|
||||||
if (total_len > LDPC_P_BLOCK_SIZE) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Working codeword buffer
|
|
||||||
uint8_t codeword[LDPC_P_BLOCK_SIZE];
|
|
||||||
memcpy(codeword, data, total_len);
|
|
||||||
if (total_len < LDPC_P_BLOCK_SIZE) {
|
|
||||||
memset(codeword + total_len, 0, LDPC_P_BLOCK_SIZE - total_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Quick check - if already valid, no decoding needed
|
|
||||||
if (ldpc_p_check_syndrome(codeword)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ==========================================================================
|
|
||||||
// Initialize channel LLRs
|
|
||||||
// ==========================================================================
|
|
||||||
|
|
||||||
float var_llr[N_BITS];
|
|
||||||
float llr_magnitude = 6.0f;
|
|
||||||
|
|
||||||
for (int v = 0; v < N_BITS; v++) {
|
|
||||||
int bit = get_bit(codeword, v);
|
|
||||||
var_llr[v] = bit ? -llr_magnitude : llr_magnitude;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Message storage
|
|
||||||
static float c2v[M_BITS][MAX_CHECK_DEGREE];
|
|
||||||
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
for (int i = 0; i < check_degree[c]; i++) {
|
|
||||||
c2v[c][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ==========================================================================
|
|
||||||
// Normalized Min-Sum Decoding with Layered Scheduling
|
|
||||||
// ==========================================================================
|
|
||||||
|
|
||||||
float v2c[MAX_CHECK_DEGREE];
|
|
||||||
const float alpha = 0.75f; // Normalization factor
|
|
||||||
|
|
||||||
for (int iter = 0; iter < LDPC_MAX_ITER; iter++) {
|
|
||||||
|
|
||||||
// Process each check node (layer)
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
int deg = check_degree[c];
|
|
||||||
|
|
||||||
// Step 1: Compute variable-to-check messages
|
|
||||||
for (int i = 0; i < deg; i++) {
|
|
||||||
int v = check_to_var[c][i];
|
|
||||||
v2c[i] = var_llr[v] - c2v[c][i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 2: Compute check-to-variable messages using min-sum
|
|
||||||
for (int i = 0; i < deg; i++) {
|
|
||||||
float sign_prod = 1.0f;
|
|
||||||
float min1 = LLR_MAX, min2 = LLR_MAX;
|
|
||||||
|
|
||||||
for (int j = 0; j < deg; j++) {
|
|
||||||
if (j == i) continue;
|
|
||||||
|
|
||||||
float val = v2c[j];
|
|
||||||
if (val < 0) sign_prod = -sign_prod;
|
|
||||||
|
|
||||||
float absval = fabsf(val);
|
|
||||||
if (absval < min1) {
|
|
||||||
min2 = min1;
|
|
||||||
min1 = absval;
|
|
||||||
} else if (absval < min2) {
|
|
||||||
min2 = absval;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalized min-sum message
|
|
||||||
float msg_mag = alpha * min1;
|
|
||||||
float new_c2v = sign_prod * msg_mag;
|
|
||||||
|
|
||||||
// Update variable LLR immediately (layered approach)
|
|
||||||
int v = check_to_var[c][i];
|
|
||||||
var_llr[v] = clamp_llr(var_llr[v] - c2v[c][i] + new_c2v);
|
|
||||||
c2v[c][i] = new_c2v;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make hard decisions
|
|
||||||
for (int v = 0; v < N_BITS; v++) {
|
|
||||||
set_bit(codeword, v, var_llr[v] < 0 ? 1 : 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if valid codeword
|
|
||||||
if (ldpc_p_check_syndrome(codeword)) {
|
|
||||||
memcpy(data, codeword, data_len);
|
|
||||||
return iter + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Adaptive restart at iteration milestones
|
|
||||||
if (iter == 25 || iter == 50 || iter == 75) {
|
|
||||||
float new_mag = 4.0f - (iter / 25) * 0.5f;
|
|
||||||
for (int v = 0; v < N_BITS; v++) {
|
|
||||||
int bit = get_bit(codeword, v);
|
|
||||||
var_llr[v] = bit ? -new_mag : new_mag;
|
|
||||||
}
|
|
||||||
for (int c = 0; c < M_BITS; c++) {
|
|
||||||
for (int i = 0; i < check_degree[c]; i++) {
|
|
||||||
c2v[c][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Failed to converge
|
|
||||||
memcpy(data, codeword, data_len);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Block-level operations
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
|
|
||||||
if (!ldpc_p_initialized) ldpc_p_init();
|
|
||||||
|
|
||||||
size_t output_len = 0;
|
|
||||||
size_t remaining = data_len;
|
|
||||||
const uint8_t *src = data;
|
|
||||||
uint8_t *dst = output;
|
|
||||||
|
|
||||||
while (remaining > 0) {
|
|
||||||
size_t block_data = (remaining > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining;
|
|
||||||
ldpc_p_encode(src, block_data, dst);
|
|
||||||
|
|
||||||
src += block_data;
|
|
||||||
dst += LDPC_P_BLOCK_SIZE;
|
|
||||||
output_len += LDPC_P_BLOCK_SIZE;
|
|
||||||
remaining -= block_data;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
|
|
||||||
if (!ldpc_p_initialized) ldpc_p_init();
|
|
||||||
|
|
||||||
int total_iterations = 0;
|
|
||||||
size_t remaining_output = output_len;
|
|
||||||
uint8_t *src = data;
|
|
||||||
uint8_t *dst = output;
|
|
||||||
|
|
||||||
while (total_len >= LDPC_P_BLOCK_SIZE && remaining_output > 0) {
|
|
||||||
size_t bytes_to_copy = (remaining_output > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining_output;
|
|
||||||
|
|
||||||
int result = ldpc_p_decode(src, LDPC_P_DATA_SIZE);
|
|
||||||
if (result < 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
total_iterations += result;
|
|
||||||
|
|
||||||
memcpy(dst, src, bytes_to_copy);
|
|
||||||
|
|
||||||
src += LDPC_P_BLOCK_SIZE;
|
|
||||||
dst += bytes_to_copy;
|
|
||||||
total_len -= LDPC_P_BLOCK_SIZE;
|
|
||||||
remaining_output -= bytes_to_copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
return total_iterations;
|
|
||||||
}
|
|
||||||
@@ -1,97 +0,0 @@
|
|||||||
/**
|
|
||||||
* LDPC(255,223) Codec for TAV-DT Payloads
|
|
||||||
*
|
|
||||||
* Alternative to RS(255,223) with same rate (~0.875):
|
|
||||||
* - Block size: 255 bytes (223 data + 32 parity)
|
|
||||||
* - Uses quasi-cyclic LDPC structure for efficiency
|
|
||||||
* - Soft-decision belief propagation decoder
|
|
||||||
*
|
|
||||||
* Designed as drop-in replacement for RS(255,223):
|
|
||||||
* - Same input/output sizes
|
|
||||||
* - Same API style
|
|
||||||
* - Different error correction characteristics:
|
|
||||||
* - LDPC: Better at high BER (>1e-3), gradual degradation
|
|
||||||
* - RS: Better at low BER, hard threshold at 16 byte errors
|
|
||||||
*
|
|
||||||
* Created by CuriousTorvald and Claude on 2025-12-15.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LDPC_PAYLOAD_H
|
|
||||||
#define LDPC_PAYLOAD_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// LDPC(255,223) parameters - matches RS(255,223) for drop-in replacement
|
|
||||||
#define LDPC_P_BLOCK_SIZE 255 // Total codeword size (bytes)
|
|
||||||
#define LDPC_P_DATA_SIZE 223 // Data bytes per block
|
|
||||||
#define LDPC_P_PARITY_SIZE 32 // Parity bytes per block
|
|
||||||
|
|
||||||
// Decoder parameters
|
|
||||||
#define LDPC_P_MAX_ITERATIONS 30 // Maximum BP iterations
|
|
||||||
#define LDPC_P_EARLY_TERM 1 // Enable early termination on valid codeword
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize LDPC(255,223) codec.
|
|
||||||
* Must be called once before using encode/decode functions.
|
|
||||||
* Thread-safe: uses static initialization.
|
|
||||||
*/
|
|
||||||
void ldpc_p_init(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode data block with LDPC(255,223).
|
|
||||||
*
|
|
||||||
* @param data Input data (up to LDPC_P_DATA_SIZE bytes)
|
|
||||||
* @param data_len Length of input data (1 to LDPC_P_DATA_SIZE)
|
|
||||||
* @param output Output buffer (must hold data_len + LDPC_P_PARITY_SIZE bytes)
|
|
||||||
* Format: [data][parity]
|
|
||||||
* @return Total output length (data_len + LDPC_P_PARITY_SIZE)
|
|
||||||
*
|
|
||||||
* Note: For data shorter than LDPC_P_DATA_SIZE, the encoder pads with zeros
|
|
||||||
* internally but only outputs actual data + parity.
|
|
||||||
*/
|
|
||||||
size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode and correct LDPC(255,223) encoded block.
|
|
||||||
*
|
|
||||||
* @param data Buffer containing [data][parity] (modified in-place)
|
|
||||||
* @param data_len Length of data portion (1 to LDPC_P_DATA_SIZE)
|
|
||||||
* @return Number of iterations used (1-30), or -1 if uncorrectable
|
|
||||||
*
|
|
||||||
* On success, data buffer contains corrected data.
|
|
||||||
* On failure, data buffer contents are undefined.
|
|
||||||
*/
|
|
||||||
int ldpc_p_decode(uint8_t *data, size_t data_len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode data with automatic block splitting.
|
|
||||||
* For data larger than LDPC_P_DATA_SIZE, splits into multiple blocks.
|
|
||||||
*
|
|
||||||
* @param data Input data
|
|
||||||
* @param data_len Length of input data
|
|
||||||
* @param output Output buffer (must hold ceil(data_len/223) * 255 bytes)
|
|
||||||
* @return Total output length
|
|
||||||
*/
|
|
||||||
size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode data with automatic block splitting.
|
|
||||||
*
|
|
||||||
* @param data Buffer containing LDPC-encoded blocks (modified in-place)
|
|
||||||
* @param total_len Total length of encoded data (multiple of LDPC_P_BLOCK_SIZE)
|
|
||||||
* @param output Output buffer for decoded data
|
|
||||||
* @param output_len Expected length of decoded data
|
|
||||||
* @return Total iterations across all blocks, or -1 if any block failed
|
|
||||||
*/
|
|
||||||
int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if codeword is valid (syndrome check).
|
|
||||||
*
|
|
||||||
* @param codeword Full codeword (LDPC_P_BLOCK_SIZE bytes)
|
|
||||||
* @return 1 if valid (zero syndrome), 0 if errors detected
|
|
||||||
*/
|
|
||||||
int ldpc_p_check_syndrome(const uint8_t *codeword);
|
|
||||||
|
|
||||||
#endif // LDPC_PAYLOAD_H
|
|
||||||
@@ -1,417 +0,0 @@
|
|||||||
/**
|
|
||||||
* Reed-Solomon (255,223) Codec Implementation
|
|
||||||
*
|
|
||||||
* Standard RS code over GF(2^8) for TAV-DT forward error correction.
|
|
||||||
*
|
|
||||||
* Created by CuriousTorvald and Claude on 2025-12-09.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "reed_solomon.h"
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Galois Field GF(2^8) Arithmetic
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 = 0x11D
|
|
||||||
#define GF_PRIMITIVE 0x11D
|
|
||||||
#define GF_SIZE 256
|
|
||||||
#define GF_MAX 255
|
|
||||||
|
|
||||||
// Lookup tables for GF(2^8) arithmetic
|
|
||||||
static uint8_t gf_exp[512]; // Anti-log table (doubled for easy modular reduction)
|
|
||||||
static uint8_t gf_log[256]; // Log table
|
|
||||||
static uint8_t gf_generator[RS_PARITY_SIZE + 1]; // Generator polynomial coefficients
|
|
||||||
|
|
||||||
static int rs_initialized = 0;
|
|
||||||
|
|
||||||
// Initialize GF(2^8) exp/log tables
|
|
||||||
static void init_gf_tables(void) {
|
|
||||||
uint16_t x = 1;
|
|
||||||
|
|
||||||
for (int i = 0; i < GF_MAX; i++) {
|
|
||||||
gf_exp[i] = (uint8_t)x;
|
|
||||||
gf_log[x] = (uint8_t)i;
|
|
||||||
|
|
||||||
// Multiply by alpha (primitive element = 2)
|
|
||||||
x <<= 1;
|
|
||||||
if (x & 0x100) {
|
|
||||||
x ^= GF_PRIMITIVE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double the exp table for easy modular reduction
|
|
||||||
for (int i = GF_MAX; i < 512; i++) {
|
|
||||||
gf_exp[i] = gf_exp[i - GF_MAX];
|
|
||||||
}
|
|
||||||
|
|
||||||
// gf_log[0] is undefined, set to 0 for safety
|
|
||||||
gf_log[0] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// GF multiplication
|
|
||||||
static inline uint8_t gf_mul(uint8_t a, uint8_t b) {
|
|
||||||
if (a == 0 || b == 0) return 0;
|
|
||||||
return gf_exp[gf_log[a] + gf_log[b]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// GF division
|
|
||||||
static inline uint8_t gf_div(uint8_t a, uint8_t b) {
|
|
||||||
if (a == 0) return 0;
|
|
||||||
if (b == 0) return 0; // Division by zero - shouldn't happen
|
|
||||||
return gf_exp[gf_log[a] + GF_MAX - gf_log[b]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// GF power
|
|
||||||
static inline uint8_t gf_pow(uint8_t a, int n) {
|
|
||||||
if (n == 0) return 1;
|
|
||||||
if (a == 0) return 0;
|
|
||||||
return gf_exp[(gf_log[a] * n) % GF_MAX];
|
|
||||||
}
|
|
||||||
|
|
||||||
// GF inverse
|
|
||||||
static inline uint8_t gf_inv(uint8_t a) {
|
|
||||||
if (a == 0) return 0;
|
|
||||||
return gf_exp[GF_MAX - gf_log[a]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Generator Polynomial
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Build generator polynomial: g(x) = (x - alpha^0)(x - alpha^1)...(x - alpha^31)
|
|
||||||
static void init_generator(void) {
|
|
||||||
// Start with g(x) = 1
|
|
||||||
gf_generator[0] = 1;
|
|
||||||
for (int i = 1; i <= RS_PARITY_SIZE; i++) {
|
|
||||||
gf_generator[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Multiply by (x - alpha^i) for i = 0 to 31
|
|
||||||
for (int i = 0; i < RS_PARITY_SIZE; i++) {
|
|
||||||
uint8_t alpha_i = gf_exp[i]; // alpha^i
|
|
||||||
|
|
||||||
// Multiply current polynomial by (x - alpha^i)
|
|
||||||
for (int j = RS_PARITY_SIZE; j > 0; j--) {
|
|
||||||
gf_generator[j] = gf_generator[j - 1] ^ gf_mul(gf_generator[j], alpha_i);
|
|
||||||
}
|
|
||||||
gf_generator[0] = gf_mul(gf_generator[0], alpha_i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Public API
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
void rs_init(void) {
|
|
||||||
if (rs_initialized) return;
|
|
||||||
|
|
||||||
init_gf_tables();
|
|
||||||
init_generator();
|
|
||||||
rs_initialized = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
|
|
||||||
if (!rs_initialized) rs_init();
|
|
||||||
|
|
||||||
// Validate input
|
|
||||||
if (data_len > RS_DATA_SIZE) {
|
|
||||||
data_len = RS_DATA_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy data to output
|
|
||||||
memcpy(output, data, data_len);
|
|
||||||
|
|
||||||
// Initialize parity bytes to zero
|
|
||||||
memset(output + data_len, 0, RS_PARITY_SIZE);
|
|
||||||
|
|
||||||
// Create padded message polynomial (RS_DATA_SIZE + RS_PARITY_SIZE coefficients)
|
|
||||||
// Message is shifted to leave room for parity (systematic encoding)
|
|
||||||
uint8_t msg[RS_BLOCK_SIZE];
|
|
||||||
memset(msg, 0, sizeof(msg));
|
|
||||||
memcpy(msg, data, data_len);
|
|
||||||
|
|
||||||
// Polynomial division: compute remainder of msg(x) * x^32 / g(x)
|
|
||||||
uint8_t remainder[RS_PARITY_SIZE];
|
|
||||||
memset(remainder, 0, RS_PARITY_SIZE);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < data_len; i++) {
|
|
||||||
uint8_t coef = msg[i] ^ remainder[0];
|
|
||||||
|
|
||||||
// Shift remainder
|
|
||||||
memmove(remainder, remainder + 1, RS_PARITY_SIZE - 1);
|
|
||||||
remainder[RS_PARITY_SIZE - 1] = 0;
|
|
||||||
|
|
||||||
// Subtract coef * g(x) from remainder
|
|
||||||
if (coef != 0) {
|
|
||||||
for (int j = 0; j < RS_PARITY_SIZE; j++) {
|
|
||||||
remainder[j] ^= gf_mul(gf_generator[RS_PARITY_SIZE - 1 - j], coef);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Append parity to output
|
|
||||||
memcpy(output + data_len, remainder, RS_PARITY_SIZE);
|
|
||||||
|
|
||||||
return data_len + RS_PARITY_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Berlekamp-Massey Decoder
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Compute syndromes S_i = r(alpha^i) for i = 0..31
|
|
||||||
static void compute_syndromes(const uint8_t *r, size_t len, uint8_t *syndromes) {
|
|
||||||
for (int i = 0; i < RS_PARITY_SIZE; i++) {
|
|
||||||
syndromes[i] = 0;
|
|
||||||
for (size_t j = 0; j < len; j++) {
|
|
||||||
syndromes[i] ^= gf_mul(r[j], gf_pow(gf_exp[i], (int)(len - 1 - j)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Berlekamp-Massey algorithm to find error locator polynomial
|
|
||||||
static int berlekamp_massey(const uint8_t *syndromes, uint8_t *sigma, int *sigma_deg) {
|
|
||||||
uint8_t C[RS_PARITY_SIZE + 1]; // Connection polynomial
|
|
||||||
uint8_t B[RS_PARITY_SIZE + 1]; // Previous connection polynomial
|
|
||||||
int L = 0; // Current length of LFSR
|
|
||||||
int m = 1; // Number of steps since last update
|
|
||||||
uint8_t b = 1; // Previous discrepancy
|
|
||||||
|
|
||||||
// Initialize: C(x) = 1, B(x) = 1
|
|
||||||
memset(C, 0, sizeof(C));
|
|
||||||
memset(B, 0, sizeof(B));
|
|
||||||
C[0] = 1;
|
|
||||||
B[0] = 1;
|
|
||||||
|
|
||||||
for (int n = 0; n < RS_PARITY_SIZE; n++) {
|
|
||||||
// Compute discrepancy
|
|
||||||
uint8_t d = syndromes[n];
|
|
||||||
for (int i = 1; i <= L; i++) {
|
|
||||||
d ^= gf_mul(C[i], syndromes[n - i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (d == 0) {
|
|
||||||
// No update needed
|
|
||||||
m++;
|
|
||||||
} else if (2 * L <= n) {
|
|
||||||
// Update both C and L
|
|
||||||
uint8_t T[RS_PARITY_SIZE + 1];
|
|
||||||
memcpy(T, C, sizeof(T));
|
|
||||||
|
|
||||||
uint8_t factor = gf_div(d, b);
|
|
||||||
for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
|
|
||||||
C[i + m] ^= gf_mul(factor, B[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
L = n + 1 - L;
|
|
||||||
memcpy(B, T, sizeof(B));
|
|
||||||
b = d;
|
|
||||||
m = 1;
|
|
||||||
} else {
|
|
||||||
// Only update C
|
|
||||||
uint8_t factor = gf_div(d, b);
|
|
||||||
for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
|
|
||||||
C[i + m] ^= gf_mul(factor, B[i]);
|
|
||||||
}
|
|
||||||
m++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy result
|
|
||||||
memcpy(sigma, C, RS_PARITY_SIZE + 1);
|
|
||||||
*sigma_deg = L;
|
|
||||||
|
|
||||||
return L;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Chien search: find error positions (roots of sigma)
|
|
||||||
static int chien_search(const uint8_t *sigma, int sigma_deg, size_t n, uint8_t *positions, int *num_errors) {
|
|
||||||
*num_errors = 0;
|
|
||||||
|
|
||||||
// Evaluate sigma(alpha^(-i)) for i = 0 to n-1
|
|
||||||
for (size_t i = 0; i < n; i++) {
|
|
||||||
uint8_t eval = 0;
|
|
||||||
for (int j = 0; j <= sigma_deg; j++) {
|
|
||||||
// sigma(alpha^(-i)) = sum of sigma[j] * alpha^(-i*j)
|
|
||||||
int exp = (GF_MAX - (int)((i * j) % GF_MAX)) % GF_MAX;
|
|
||||||
eval ^= gf_mul(sigma[j], gf_exp[exp]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (eval == 0) {
|
|
||||||
// Found a root - error at position n-1-i
|
|
||||||
positions[*num_errors] = (uint8_t)(n - 1 - i);
|
|
||||||
(*num_errors)++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we found the expected number of errors
|
|
||||||
return (*num_errors == sigma_deg) ? 0 : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute formal derivative of polynomial
|
|
||||||
static void poly_derivative(const uint8_t *poly, int deg, uint8_t *deriv) {
|
|
||||||
for (int i = 0; i < deg; i++) {
|
|
||||||
// Derivative of x^(i+1) is (i+1) * x^i
|
|
||||||
// In GF(2^m), coefficient is 1 if (i+1) is odd, 0 if even
|
|
||||||
deriv[i] = ((i + 1) & 1) ? poly[i + 1] : 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Forney algorithm: compute error values
|
|
||||||
static void forney(const uint8_t *syndromes, const uint8_t *sigma, int sigma_deg,
|
|
||||||
const uint8_t *positions, int num_errors, size_t n, uint8_t *errors) {
|
|
||||||
// Compute error evaluator polynomial omega(x) = S(x) * sigma(x) mod x^2t
|
|
||||||
uint8_t omega[RS_PARITY_SIZE + 1];
|
|
||||||
memset(omega, 0, sizeof(omega));
|
|
||||||
|
|
||||||
for (int i = 0; i < RS_PARITY_SIZE; i++) {
|
|
||||||
for (int j = 0; j <= sigma_deg && i - j >= 0; j++) {
|
|
||||||
omega[i] ^= gf_mul(syndromes[i - j], sigma[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute formal derivative of sigma
|
|
||||||
uint8_t sigma_prime[RS_PARITY_SIZE];
|
|
||||||
poly_derivative(sigma, sigma_deg, sigma_prime);
|
|
||||||
|
|
||||||
// Compute error values using Forney formula
|
|
||||||
for (int i = 0; i < num_errors; i++) {
|
|
||||||
uint8_t pos = positions[i];
|
|
||||||
uint8_t Xi = gf_exp[n - 1 - pos]; // alpha^(n-1-pos)
|
|
||||||
uint8_t Xi_inv = gf_inv(Xi);
|
|
||||||
|
|
||||||
// Evaluate omega at Xi_inv
|
|
||||||
uint8_t omega_val = 0;
|
|
||||||
for (int j = 0; j < RS_PARITY_SIZE; j++) {
|
|
||||||
omega_val ^= gf_mul(omega[j], gf_pow(Xi_inv, j));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Evaluate sigma' at Xi_inv
|
|
||||||
uint8_t sigma_prime_val = 0;
|
|
||||||
for (int j = 0; j < sigma_deg; j++) {
|
|
||||||
sigma_prime_val ^= gf_mul(sigma_prime[j], gf_pow(Xi_inv, j));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Error value: e_i = Xi * omega(Xi_inv) / sigma'(Xi_inv)
|
|
||||||
errors[i] = gf_mul(Xi, gf_div(omega_val, sigma_prime_val));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int rs_decode(uint8_t *data, size_t data_len) {
|
|
||||||
if (!rs_initialized) rs_init();
|
|
||||||
|
|
||||||
size_t total_len = data_len + RS_PARITY_SIZE;
|
|
||||||
if (total_len > RS_BLOCK_SIZE) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute syndromes
|
|
||||||
uint8_t syndromes[RS_PARITY_SIZE];
|
|
||||||
compute_syndromes(data, total_len, syndromes);
|
|
||||||
|
|
||||||
// Check if all syndromes are zero (no errors)
|
|
||||||
int has_errors = 0;
|
|
||||||
for (int i = 0; i < RS_PARITY_SIZE; i++) {
|
|
||||||
if (syndromes[i] != 0) {
|
|
||||||
has_errors = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!has_errors) {
|
|
||||||
return 0; // No errors
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find error locator polynomial using Berlekamp-Massey
|
|
||||||
uint8_t sigma[RS_PARITY_SIZE + 1];
|
|
||||||
int sigma_deg;
|
|
||||||
int num_errors_expected = berlekamp_massey(syndromes, sigma, &sigma_deg);
|
|
||||||
|
|
||||||
if (num_errors_expected > RS_MAX_ERRORS) {
|
|
||||||
return -1; // Too many errors
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find error positions using Chien search
|
|
||||||
uint8_t positions[RS_MAX_ERRORS];
|
|
||||||
int num_errors;
|
|
||||||
if (chien_search(sigma, sigma_deg, total_len, positions, &num_errors) != 0) {
|
|
||||||
return -1; // Inconsistent error count
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute error values using Forney algorithm
|
|
||||||
uint8_t error_values[RS_MAX_ERRORS];
|
|
||||||
forney(syndromes, sigma, sigma_deg, positions, num_errors, total_len, error_values);
|
|
||||||
|
|
||||||
// Apply corrections
|
|
||||||
for (int i = 0; i < num_errors; i++) {
|
|
||||||
if (positions[i] < total_len) {
|
|
||||||
data[positions[i]] ^= error_values[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return num_errors;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Block-level operations
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
|
|
||||||
if (!rs_initialized) rs_init();
|
|
||||||
|
|
||||||
size_t output_len = 0;
|
|
||||||
size_t remaining = data_len;
|
|
||||||
const uint8_t *src = data;
|
|
||||||
uint8_t *dst = output;
|
|
||||||
|
|
||||||
while (remaining > 0) {
|
|
||||||
size_t block_data = (remaining > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining;
|
|
||||||
size_t encoded_len = rs_encode(src, block_data, dst);
|
|
||||||
|
|
||||||
// Pad to full block size for consistent block boundaries
|
|
||||||
if (encoded_len < RS_BLOCK_SIZE) {
|
|
||||||
memset(dst + encoded_len, 0, RS_BLOCK_SIZE - encoded_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
src += block_data;
|
|
||||||
dst += RS_BLOCK_SIZE;
|
|
||||||
output_len += RS_BLOCK_SIZE;
|
|
||||||
remaining -= block_data;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
|
|
||||||
if (!rs_initialized) rs_init();
|
|
||||||
|
|
||||||
int total_errors = 0;
|
|
||||||
size_t remaining_output = output_len;
|
|
||||||
uint8_t *src = data;
|
|
||||||
uint8_t *dst = output;
|
|
||||||
|
|
||||||
while (total_len >= RS_BLOCK_SIZE && remaining_output > 0) {
|
|
||||||
// Always decode with full RS_DATA_SIZE since encoder pads to full blocks
|
|
||||||
// But only copy the bytes we actually need
|
|
||||||
size_t bytes_to_copy = (remaining_output > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining_output;
|
|
||||||
|
|
||||||
// Decode block with full data size (modifies src in place)
|
|
||||||
int errors = rs_decode(src, RS_DATA_SIZE);
|
|
||||||
if (errors < 0) {
|
|
||||||
return -1; // Uncorrectable block
|
|
||||||
}
|
|
||||||
total_errors += errors;
|
|
||||||
|
|
||||||
// Copy only the bytes we need to output
|
|
||||||
memcpy(dst, src, bytes_to_copy);
|
|
||||||
|
|
||||||
src += RS_BLOCK_SIZE;
|
|
||||||
dst += bytes_to_copy;
|
|
||||||
total_len -= RS_BLOCK_SIZE;
|
|
||||||
remaining_output -= bytes_to_copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
return total_errors;
|
|
||||||
}
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
/**
|
|
||||||
* Reed-Solomon (255,223) Codec for TAV-DT
|
|
||||||
*
|
|
||||||
* Standard RS code over GF(2^8):
|
|
||||||
* - Block size: 255 bytes (223 data + 32 parity)
|
|
||||||
* - Error correction: up to 16 byte errors
|
|
||||||
* - Error detection: up to 32 byte errors
|
|
||||||
*
|
|
||||||
* Uses primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x11D)
|
|
||||||
* Generator polynomial: g(x) = product of (x - alpha^i) for i = 0..31
|
|
||||||
*
|
|
||||||
* Created by CuriousTorvald and Claude on 2025-12-09.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef REED_SOLOMON_H
|
|
||||||
#define REED_SOLOMON_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// RS(255,223) parameters
|
|
||||||
#define RS_BLOCK_SIZE 255 // Total codeword size
|
|
||||||
#define RS_DATA_SIZE 223 // Data bytes per block
|
|
||||||
#define RS_PARITY_SIZE 32 // Parity bytes per block (2t = 32, t = 16)
|
|
||||||
#define RS_MAX_ERRORS 16 // Maximum correctable errors (t)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize Reed-Solomon codec.
|
|
||||||
* Must be called once before using encode/decode functions.
|
|
||||||
* Thread-safe: uses static initialization.
|
|
||||||
*/
|
|
||||||
void rs_init(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode data block with Reed-Solomon parity.
|
|
||||||
*
|
|
||||||
* @param data Input data (up to RS_DATA_SIZE bytes)
|
|
||||||
* @param data_len Length of input data (1 to RS_DATA_SIZE)
|
|
||||||
* @param output Output buffer (must hold data_len + RS_PARITY_SIZE bytes)
|
|
||||||
* Format: [data][parity]
|
|
||||||
* @return Total output length (data_len + RS_PARITY_SIZE)
|
|
||||||
*
|
|
||||||
* Note: For data shorter than RS_DATA_SIZE, the encoder pads with zeros
|
|
||||||
* internally but only outputs actual data + parity.
|
|
||||||
*/
|
|
||||||
size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode and correct Reed-Solomon encoded block.
|
|
||||||
*
|
|
||||||
* @param data Buffer containing [data][parity] (modified in-place)
|
|
||||||
* @param data_len Length of data portion (1 to RS_DATA_SIZE)
|
|
||||||
* @return Number of errors corrected (0-16), or -1 if uncorrectable
|
|
||||||
*
|
|
||||||
* On success, data buffer contains corrected data (parity may also be corrected).
|
|
||||||
* On failure, data buffer contents are undefined.
|
|
||||||
*/
|
|
||||||
int rs_decode(uint8_t *data, size_t data_len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode data with automatic block splitting.
|
|
||||||
* For data larger than RS_DATA_SIZE, splits into multiple RS blocks.
|
|
||||||
*
|
|
||||||
* @param data Input data
|
|
||||||
* @param data_len Length of input data
|
|
||||||
* @param output Output buffer (must hold ceil(data_len/223) * 255 bytes)
|
|
||||||
* @return Total output length
|
|
||||||
*/
|
|
||||||
size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode data with automatic block splitting.
|
|
||||||
*
|
|
||||||
* @param data Buffer containing RS-encoded blocks (modified in-place)
|
|
||||||
* @param total_len Total length of encoded data (multiple of RS_BLOCK_SIZE)
|
|
||||||
* @param output Output buffer for decoded data
|
|
||||||
* @param output_len Expected length of decoded data
|
|
||||||
* @return Total errors corrected across all blocks, or -1 if any block failed
|
|
||||||
*/
|
|
||||||
int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
|
|
||||||
|
|
||||||
#endif // REED_SOLOMON_H
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,255 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Color Space Conversion Library
|
|
||||||
*
|
|
||||||
* Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
|
|
||||||
* for the TSVM Advanced Video (TAV) encoder.
|
|
||||||
*
|
|
||||||
* Extracted from encoder_tav.c as part of library refactoring.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Utility Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static inline int CLAMP(int x, int min, int max) {
|
|
||||||
return x < min ? min : (x > max ? max : x);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline float FCLAMP(float x, float min, float max) {
|
|
||||||
return x < min ? min : (x > max ? max : x);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int iround(double v) {
|
|
||||||
return (int)floor(v + 0.5);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// sRGB Gamma Helpers
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static inline double srgb_linearise(double val) {
|
|
||||||
if (val <= 0.04045) return val / 12.92;
|
|
||||||
return pow((val + 0.055) / 1.055, 2.4);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline double srgb_unlinearise(double val) {
|
|
||||||
if (val <= 0.0031308) return 12.92 * val;
|
|
||||||
return 1.055 * pow(val, 1.0/2.4) - 0.055;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// HLG (Hybrid Log-Gamma) Transfer Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static inline double HLG_OETF(double E) {
|
|
||||||
const double a = 0.17883277;
|
|
||||||
const double b = 0.28466892; // 1 - 4*a
|
|
||||||
const double c = 0.55991073; // 0.5 - a*ln(4*a)
|
|
||||||
|
|
||||||
if (E <= 1.0/12.0) return sqrt(3.0 * E);
|
|
||||||
return a * log(12.0 * E - b) + c;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline double HLG_EOTF(double Ep) {
|
|
||||||
const double a = 0.17883277;
|
|
||||||
const double b = 0.28466892;
|
|
||||||
const double c = 0.55991073;
|
|
||||||
|
|
||||||
if (Ep <= 0.5) {
|
|
||||||
double val = Ep * Ep / 3.0;
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
double val = (exp((Ep - c) / a) + b) / 12.0;
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Color Space Transformation Matrices
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// BT.2100 RGB -> LMS matrix
|
|
||||||
static const double M_RGB_TO_LMS[3][3] = {
|
|
||||||
{1688.0/4096, 2146.0/4096, 262.0/4096},
|
|
||||||
{ 683.0/4096, 2951.0/4096, 462.0/4096},
|
|
||||||
{ 99.0/4096, 309.0/4096, 3688.0/4096}
|
|
||||||
};
|
|
||||||
|
|
||||||
// LMS -> RGB inverse matrix
|
|
||||||
static const double M_LMS_TO_RGB[3][3] = {
|
|
||||||
{ 6.1723815689243215, -5.319534979827695, 0.14699442094633924},
|
|
||||||
{-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
|
|
||||||
{-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
|
|
||||||
};
|
|
||||||
|
|
||||||
// ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
|
|
||||||
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
|
|
||||||
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
|
|
||||||
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
|
|
||||||
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
|
|
||||||
};
|
|
||||||
|
|
||||||
// ICtCp -> L' M' S' inverse matrix
|
|
||||||
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
|
|
||||||
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
|
|
||||||
{ 1.0, -0.015718580108730416, -0.20958106811640548},
|
|
||||||
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
|
|
||||||
};
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// YCoCg-R Color Space Conversion
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert RGB24 to YCoCg-R color space for a full frame.
|
|
||||||
*
|
|
||||||
* YCoCg-R is a reversible color transform optimized for compression:
|
|
||||||
* - Y = luma (G + (R-B)/2)
|
|
||||||
* - Co = orange chrominance (R - B)
|
|
||||||
* - Cg = green chrominance (G - (R+B)/2)
|
|
||||||
*
|
|
||||||
* @param rgb Input RGB24 data (planar: RRRR...GGGG...BBBB...)
|
|
||||||
* @param y Output luma channel
|
|
||||||
* @param co Output orange chrominance
|
|
||||||
* @param cg Output green chrominance
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
*/
|
|
||||||
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
|
|
||||||
int width, int height)
|
|
||||||
{
|
|
||||||
const int total_pixels = width * height;
|
|
||||||
|
|
||||||
// Process 4 pixels at a time for better cache utilization
|
|
||||||
int i = 0;
|
|
||||||
const int simd_end = (total_pixels / 4) * 4;
|
|
||||||
|
|
||||||
// Vectorized processing for groups of 4 pixels
|
|
||||||
for (i = 0; i < simd_end; i += 4) {
|
|
||||||
const uint8_t *rgb_ptr = &rgb[i * 3];
|
|
||||||
|
|
||||||
// Process 4 pixels simultaneously with loop unrolling
|
|
||||||
for (int j = 0; j < 4; j++) {
|
|
||||||
const int idx = i + j;
|
|
||||||
const float r = rgb_ptr[j * 3 + 0];
|
|
||||||
const float g = rgb_ptr[j * 3 + 1];
|
|
||||||
const float b = rgb_ptr[j * 3 + 2];
|
|
||||||
|
|
||||||
// YCoCg-R transform
|
|
||||||
co[idx] = r - b;
|
|
||||||
const float tmp = b + co[idx] * 0.5f;
|
|
||||||
cg[idx] = g - tmp;
|
|
||||||
y[idx] = tmp + cg[idx] * 0.5f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle remaining pixels (1-3 pixels)
|
|
||||||
for (; i < total_pixels; i++) {
|
|
||||||
const float r = rgb[i * 3 + 0];
|
|
||||||
const float g = rgb[i * 3 + 1];
|
|
||||||
const float b = rgb[i * 3 + 2];
|
|
||||||
|
|
||||||
co[i] = r - b;
|
|
||||||
const float tmp = b + co[i] * 0.5f;
|
|
||||||
cg[i] = g - tmp;
|
|
||||||
y[i] = tmp + cg[i] * 0.5f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// ICtCp Color Space Conversion (HDR-capable)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert sRGB8 to ICtCp color space using HLG transfer function.
|
|
||||||
*
|
|
||||||
* ICtCp is a perceptually uniform color space designed for HDR content:
|
|
||||||
* - I = intensity (luma)
|
|
||||||
* - Ct = tritanope (blue-yellow)
|
|
||||||
* - Cp = protanope (red-green)
|
|
||||||
*
|
|
||||||
* Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
|
|
||||||
*
|
|
||||||
* @param r8 Input red component (0-255)
|
|
||||||
* @param g8 Input green component (0-255)
|
|
||||||
* @param b8 Input blue component (0-255)
|
|
||||||
* @param out_I Output intensity (0-255)
|
|
||||||
* @param out_Ct Output tritanope (0-255, centered at 127.5)
|
|
||||||
* @param out_Cp Output protanope (0-255, centered at 127.5)
|
|
||||||
*/
|
|
||||||
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
|
|
||||||
double *out_I, double *out_Ct, double *out_Cp)
|
|
||||||
{
|
|
||||||
// 1) Linearize sRGB to 0..1
|
|
||||||
double r = srgb_linearise((double)r8 / 255.0);
|
|
||||||
double g = srgb_linearise((double)g8 / 255.0);
|
|
||||||
double b = srgb_linearise((double)b8 / 255.0);
|
|
||||||
|
|
||||||
// 2) Linear RGB -> LMS (3x3 multiply)
|
|
||||||
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
|
|
||||||
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
|
|
||||||
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
|
|
||||||
|
|
||||||
// 3) Apply HLG OETF (Hybrid Log-Gamma)
|
|
||||||
double Lp = HLG_OETF(L);
|
|
||||||
double Mp = HLG_OETF(M);
|
|
||||||
double Sp = HLG_OETF(S);
|
|
||||||
|
|
||||||
// 4) L'M'S' -> ICtCp
|
|
||||||
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
|
|
||||||
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
|
|
||||||
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
|
|
||||||
|
|
||||||
// 5) Scale and offset to 0-255 range
|
|
||||||
*out_I = FCLAMP(I * 255.0, 0.0, 255.0);
|
|
||||||
*out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
|
|
||||||
*out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
|
|
||||||
*
|
|
||||||
* @param I8 Input intensity (0-255)
|
|
||||||
* @param Ct8 Input tritanope (0-255, centered at 127.5)
|
|
||||||
* @param Cp8 Input protanope (0-255, centered at 127.5)
|
|
||||||
* @param r8 Output red component (0-255)
|
|
||||||
* @param g8 Output green component (0-255)
|
|
||||||
* @param b8 Output blue component (0-255)
|
|
||||||
*/
|
|
||||||
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
|
|
||||||
uint8_t *r8, uint8_t *g8, uint8_t *b8)
|
|
||||||
{
|
|
||||||
// 1) Denormalize from 0-255 range
|
|
||||||
double I = I8 / 255.0;
|
|
||||||
double Ct = (Ct8 - 127.5) / 255.0;
|
|
||||||
double Cp = (Cp8 - 127.5) / 255.0;
|
|
||||||
|
|
||||||
// 2) ICtCp -> L' M' S' (3x3 inverse multiply)
|
|
||||||
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
|
|
||||||
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
|
|
||||||
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
|
|
||||||
|
|
||||||
// 3) Apply HLG inverse EOTF
|
|
||||||
double L = HLG_EOTF(Lp);
|
|
||||||
double M = HLG_EOTF(Mp);
|
|
||||||
double S = HLG_EOTF(Sp);
|
|
||||||
|
|
||||||
// 4) LMS -> linear sRGB (3x3 inverse multiply)
|
|
||||||
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
|
|
||||||
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
|
|
||||||
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
|
|
||||||
|
|
||||||
// 5) Apply sRGB gamma and convert to 0-255 with rounding
|
|
||||||
double r = srgb_unlinearise(r_lin);
|
|
||||||
double g = srgb_unlinearise(g_lin);
|
|
||||||
double b = srgb_unlinearise(b_lin);
|
|
||||||
|
|
||||||
*r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
|
|
||||||
*g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
|
|
||||||
*b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
|
|
||||||
}
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Color Space Conversion Library
|
|
||||||
*
|
|
||||||
* Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_COLOR_H
|
|
||||||
#define TAV_ENCODER_COLOR_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// YCoCg-R Color Space Conversion
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert RGB24 to YCoCg-R color space for a full frame.
|
|
||||||
*
|
|
||||||
* @param rgb Input RGB24 data (interleaved: RGBRGBRGB...)
|
|
||||||
* @param y Output luma channel
|
|
||||||
* @param co Output orange chrominance
|
|
||||||
* @param cg Output green chrominance
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
*/
|
|
||||||
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
|
|
||||||
int width, int height);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// ICtCp Color Space Conversion (HDR-capable)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert sRGB8 to ICtCp color space using HLG transfer function.
|
|
||||||
*
|
|
||||||
* @param r8 Input red component (0-255)
|
|
||||||
* @param g8 Input green component (0-255)
|
|
||||||
* @param b8 Input blue component (0-255)
|
|
||||||
* @param out_I Output intensity (0-255)
|
|
||||||
* @param out_Ct Output tritanope (0-255, centered at 127.5)
|
|
||||||
* @param out_Cp Output protanope (0-255, centered at 127.5)
|
|
||||||
*/
|
|
||||||
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
|
|
||||||
double *out_I, double *out_Ct, double *out_Cp);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
|
|
||||||
*
|
|
||||||
* @param I8 Input intensity (0-255)
|
|
||||||
* @param Ct8 Input tritanope (0-255, centered at 127.5)
|
|
||||||
* @param Cp8 Input protanope (0-255, centered at 127.5)
|
|
||||||
* @param r8 Output red component (0-255)
|
|
||||||
* @param g8 Output green component (0-255)
|
|
||||||
* @param b8 Output blue component (0-255)
|
|
||||||
*/
|
|
||||||
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
|
|
||||||
uint8_t *r8, uint8_t *g8, uint8_t *b8);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TAV_ENCODER_COLOR_H
|
|
||||||
@@ -1,619 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Discrete Wavelet Transform (DWT) Library
|
|
||||||
*
|
|
||||||
* Provides multi-resolution wavelet decomposition for video compression.
|
|
||||||
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
|
|
||||||
*
|
|
||||||
* Extracted from encoder_tav.c as part of library refactoring.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Wavelet Type Constants
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 - Lossless capable
|
|
||||||
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 - Higher compression (default)
|
|
||||||
#define WAVELET_BIORTHOGONAL_13_7 2 // Biorthogonal 13/7
|
|
||||||
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
|
|
||||||
#define WAVELET_HAAR 255 // Haar - Simplest wavelet
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// 1D Forward DWT Transforms
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
|
|
||||||
*
|
|
||||||
* Uses lifting scheme with predict and update steps.
|
|
||||||
* Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
|
|
||||||
*
|
|
||||||
* @param data In/out signal data (modified in-place)
|
|
||||||
* @param length Signal length (handles non-power-of-2)
|
|
||||||
*/
|
|
||||||
static void dwt_53_forward_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = calloc(length, sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Predict step (high-pass)
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
int idx = 2 * i + 1;
|
|
||||||
if (idx < length) {
|
|
||||||
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
|
|
||||||
temp[half + i] = data[idx] - pred;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update step (low-pass)
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
|
||||||
(i < half - 1 ? temp[half + i] : 0));
|
|
||||||
temp[i] = data[2 * i] + update;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
|
|
||||||
*
|
|
||||||
* Five-step lifting scheme with scaling for optimal compression.
|
|
||||||
* Output layout: [LL...LL, HH...HH]
|
|
||||||
*
|
|
||||||
* @param data In/out signal data
|
|
||||||
* @param length Signal length
|
|
||||||
*/
|
|
||||||
static void dwt_97_forward_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Split into even/odd samples
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
temp[i] = data[2 * i]; // Even (low)
|
|
||||||
}
|
|
||||||
for (int i = 0; i < length / 2; i++) {
|
|
||||||
temp[half + i] = data[2 * i + 1]; // Odd (high)
|
|
||||||
}
|
|
||||||
|
|
||||||
// JPEG2000 9/7 lifting coefficients
|
|
||||||
const float alpha = -1.586134342f;
|
|
||||||
const float beta = -0.052980118f;
|
|
||||||
const float gamma = 0.882911076f;
|
|
||||||
const float delta = 0.443506852f;
|
|
||||||
const float K = 1.230174105f;
|
|
||||||
|
|
||||||
// Step 1: Predict α
|
|
||||||
for (int i = 0; i < length / 2; i++) {
|
|
||||||
if (half + i < length) {
|
|
||||||
float s_curr = temp[i];
|
|
||||||
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
|
|
||||||
temp[half + i] += alpha * (s_curr + s_next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 2: Update β
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
|
||||||
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
|
|
||||||
temp[i] += beta * (d_prev + d_curr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 3: Predict γ
|
|
||||||
for (int i = 0; i < length / 2; i++) {
|
|
||||||
if (half + i < length) {
|
|
||||||
float s_curr = temp[i];
|
|
||||||
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
|
|
||||||
temp[half + i] += gamma * (s_curr + s_next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4: Update δ
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
|
||||||
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
|
|
||||||
temp[i] += delta * (d_prev + d_curr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 5: Scaling
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
temp[i] *= K;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < length / 2; i++) {
|
|
||||||
if (half + i < length) {
|
|
||||||
temp[half + i] /= K;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
|
|
||||||
*
|
|
||||||
* Same structure as 9/7 irreversible but uses integer arithmetic.
|
|
||||||
*
|
|
||||||
* @param data In/out signal data
|
|
||||||
* @param length Signal length
|
|
||||||
*/
|
|
||||||
static void dwt_97_iint_forward_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
float *temp = malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
for (int i = 0; i < half; ++i) temp[i] = data[2*i];
|
|
||||||
for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
|
|
||||||
|
|
||||||
const int SHIFT = 16;
|
|
||||||
const int64_t ROUND = 1LL << (SHIFT - 1);
|
|
||||||
const int64_t A = -103949; // α
|
|
||||||
const int64_t B = -3472; // β
|
|
||||||
const int64_t G = 57862; // γ
|
|
||||||
const int64_t D = 29066; // δ
|
|
||||||
const int64_t K_FP = 80542; // ≈ 1.230174105 * 2^16
|
|
||||||
const int64_t Ki_FP = 53283; // ≈ (1/1.230174105) * 2^16
|
|
||||||
|
|
||||||
#define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
|
|
||||||
|
|
||||||
// Predict α
|
|
||||||
for (int i = 0; i < length/2; ++i) {
|
|
||||||
int s = temp[i];
|
|
||||||
int sn = (i+1<half)? temp[i+1] : s;
|
|
||||||
temp[half+i] += RN(A * (int64_t)(s + sn));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update β
|
|
||||||
for (int i = 0; i < half; ++i) {
|
|
||||||
int d = (half+i<length)? temp[half+i]:0;
|
|
||||||
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
|
|
||||||
temp[i] += RN(B * (int64_t)(dp + d));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Predict γ
|
|
||||||
for (int i = 0; i < length/2; ++i) {
|
|
||||||
int s = temp[i];
|
|
||||||
int sn = (i+1<half)? temp[i+1]:s;
|
|
||||||
temp[half+i] += RN(G * (int64_t)(s + sn));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update δ
|
|
||||||
for (int i = 0; i < half; ++i) {
|
|
||||||
int d = (half+i<length)? temp[half+i]:0;
|
|
||||||
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
|
|
||||||
temp[i] += RN(D * (int64_t)(dp + d));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scaling
|
|
||||||
for (int i = 0; i < half; ++i) {
|
|
||||||
temp[i] = (((int64_t)temp[i] * K_FP + ROUND) >> SHIFT);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < length/2; ++i) {
|
|
||||||
if (half + i < length) {
|
|
||||||
temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
#undef RN
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
|
|
||||||
*
|
|
||||||
* Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
|
|
||||||
* Good for smooth signals and still images.
|
|
||||||
*
|
|
||||||
* @param data In/out signal data
|
|
||||||
* @param length Signal length
|
|
||||||
*/
|
|
||||||
static void dwt_dd4_forward_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Split into even/odd samples
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
temp[i] = data[2 * i];
|
|
||||||
}
|
|
||||||
for (int i = 0; i < length / 2; i++) {
|
|
||||||
temp[half + i] = data[2 * i + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
// DD-4 prediction step with four-point kernel
|
|
||||||
for (int i = 0; i < length / 2; i++) {
|
|
||||||
// Get four neighbouring even samples with symmetric boundary extension
|
|
||||||
float s_m1, s_0, s_1, s_2;
|
|
||||||
|
|
||||||
s_m1 = (i > 0) ? temp[i - 1] : temp[0];
|
|
||||||
s_0 = temp[i];
|
|
||||||
s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
|
|
||||||
s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
|
|
||||||
|
|
||||||
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
|
|
||||||
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
|
|
||||||
|
|
||||||
temp[half + i] -= prediction;
|
|
||||||
}
|
|
||||||
|
|
||||||
// DD-4 update step
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
|
|
||||||
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
|
|
||||||
temp[i] += 0.25f * (d_prev + d_curr);
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Biorthogonal 13/7 wavelet forward 1D.
|
|
||||||
*
|
|
||||||
* Analysis filters: Low-pass (13 taps), High-pass (7 taps)
|
|
||||||
* Simplified implementation using 5/3 structure with scaling.
|
|
||||||
*
|
|
||||||
* @param data In/out signal data
|
|
||||||
* @param length Signal length
|
|
||||||
*/
|
|
||||||
static void dwt_bior137_forward_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
const float K = 1.230174105f;
|
|
||||||
|
|
||||||
float *temp = malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Predict step (high-pass)
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
int idx = 2 * i + 1;
|
|
||||||
if (idx < length) {
|
|
||||||
float left = data[2 * i];
|
|
||||||
float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
|
|
||||||
float prediction = 0.5f * (left + right);
|
|
||||||
temp[half + i] = data[idx] - prediction;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update step (low-pass)
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
|
||||||
(i < half - 1 ? temp[half + i] : 0));
|
|
||||||
temp[i] = data[2 * i] + update;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scaling
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
temp[i] *= K;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < length / 2; i++) {
|
|
||||||
if (half + i < length) {
|
|
||||||
temp[half + i] /= K;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Haar wavelet forward 1D transform.
|
|
||||||
*
|
|
||||||
* The simplest wavelet: averages (low-pass) and differences (high-pass).
|
|
||||||
* Useful for temporal DWT in GOPs.
|
|
||||||
*
|
|
||||||
* @param data In/out signal data
|
|
||||||
* @param length Signal length
|
|
||||||
*/
|
|
||||||
static void dwt_haar_forward_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
if (2 * i + 1 < length) {
|
|
||||||
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
|
|
||||||
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
|
|
||||||
} else {
|
|
||||||
temp[i] = data[2 * i];
|
|
||||||
if (half + i < length) {
|
|
||||||
temp[half + i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// 1D Inverse DWT Transforms
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* CDF 5/3 reversible wavelet inverse 1D transform.
|
|
||||||
*
|
|
||||||
* Reverses dwt_53_forward_1d() transform exactly.
|
|
||||||
*
|
|
||||||
* @param data In/out coefficient data
|
|
||||||
* @param length Signal length
|
|
||||||
*/
|
|
||||||
static void dwt_53_inverse_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Copy low-pass and high-pass coefficients
|
|
||||||
memcpy(temp, data, length * sizeof(float));
|
|
||||||
|
|
||||||
// Undo update step
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
|
||||||
(i < half - 1 ? temp[half + i] : 0));
|
|
||||||
temp[i] -= update;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Undo predict step
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
int idx = 2 * i + 1;
|
|
||||||
if (idx < length) {
|
|
||||||
float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
|
|
||||||
data[2 * i] = temp[i];
|
|
||||||
data[idx] = temp[half + i] + pred;
|
|
||||||
} else {
|
|
||||||
data[2 * i] = temp[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Haar wavelet inverse 1D transform.
|
|
||||||
*
|
|
||||||
* Reverses dwt_haar_forward_1d() transform.
|
|
||||||
*
|
|
||||||
* @param data In/out coefficient data
|
|
||||||
* @param length Signal length
|
|
||||||
*/
|
|
||||||
static void dwt_haar_inverse_1d(float *data, int length) {
|
|
||||||
if (length < 2) return;
|
|
||||||
|
|
||||||
float *temp = malloc(length * sizeof(float));
|
|
||||||
int half = (length + 1) / 2;
|
|
||||||
|
|
||||||
// Reconstruct from averages and differences
|
|
||||||
for (int i = 0; i < half; i++) {
|
|
||||||
if (2 * i + 1 < length) {
|
|
||||||
temp[2 * i] = data[i] + data[half + i];
|
|
||||||
temp[2 * i + 1] = data[i] - data[half + i];
|
|
||||||
} else {
|
|
||||||
temp[2 * i] = data[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, temp, length * sizeof(float));
|
|
||||||
free(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// 2D DWT Transform
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply 2D forward DWT to a frame (in-place).
|
|
||||||
*
|
|
||||||
* Applies separable 1D transforms: horizontal (rows), then vertical (columns).
|
|
||||||
* Supports multi-level decomposition.
|
|
||||||
*
|
|
||||||
* @param data In/out 2D image data (row-major, width stride)
|
|
||||||
* @param width Image width
|
|
||||||
* @param height Image height
|
|
||||||
* @param levels Number of decomposition levels
|
|
||||||
* @param filter_type Wavelet type (WAVELET_* constant)
|
|
||||||
*/
|
|
||||||
void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
|
|
||||||
const int max_size = (width > height) ? width : height;
|
|
||||||
float *temp_row = malloc(max_size * sizeof(float));
|
|
||||||
float *temp_col = malloc(max_size * sizeof(float));
|
|
||||||
|
|
||||||
// Pre-calculate dimensions for each level
|
|
||||||
int *widths = malloc((levels + 1) * sizeof(int));
|
|
||||||
int *heights = malloc((levels + 1) * sizeof(int));
|
|
||||||
widths[0] = width;
|
|
||||||
heights[0] = height;
|
|
||||||
for (int i = 1; i <= levels; i++) {
|
|
||||||
widths[i] = (widths[i - 1] + 1) / 2;
|
|
||||||
heights[i] = (heights[i - 1] + 1) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply multi-level decomposition
|
|
||||||
for (int level = 0; level < levels; level++) {
|
|
||||||
int current_width = widths[level];
|
|
||||||
int current_height = heights[level];
|
|
||||||
if (current_width < 1 || current_height < 1) break;
|
|
||||||
|
|
||||||
// Row transform (horizontal)
|
|
||||||
for (int y = 0; y < current_height; y++) {
|
|
||||||
// Extract row
|
|
||||||
for (int x = 0; x < current_width; x++) {
|
|
||||||
temp_row[x] = data[y * width + x];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply 1D DWT
|
|
||||||
switch (filter_type) {
|
|
||||||
case WAVELET_5_3_REVERSIBLE:
|
|
||||||
dwt_53_forward_1d(temp_row, current_width);
|
|
||||||
break;
|
|
||||||
case WAVELET_9_7_IRREVERSIBLE:
|
|
||||||
dwt_97_forward_1d(temp_row, current_width);
|
|
||||||
break;
|
|
||||||
case WAVELET_BIORTHOGONAL_13_7:
|
|
||||||
dwt_bior137_forward_1d(temp_row, current_width);
|
|
||||||
break;
|
|
||||||
case WAVELET_DD4:
|
|
||||||
dwt_dd4_forward_1d(temp_row, current_width);
|
|
||||||
break;
|
|
||||||
case WAVELET_HAAR:
|
|
||||||
dwt_haar_forward_1d(temp_row, current_width);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write back
|
|
||||||
for (int x = 0; x < current_width; x++) {
|
|
||||||
data[y * width + x] = temp_row[x];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Column transform (vertical)
|
|
||||||
for (int x = 0; x < current_width; x++) {
|
|
||||||
// Extract column
|
|
||||||
for (int y = 0; y < current_height; y++) {
|
|
||||||
temp_col[y] = data[y * width + x];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply 1D DWT
|
|
||||||
switch (filter_type) {
|
|
||||||
case WAVELET_5_3_REVERSIBLE:
|
|
||||||
dwt_53_forward_1d(temp_col, current_height);
|
|
||||||
break;
|
|
||||||
case WAVELET_9_7_IRREVERSIBLE:
|
|
||||||
dwt_97_forward_1d(temp_col, current_height);
|
|
||||||
break;
|
|
||||||
case WAVELET_BIORTHOGONAL_13_7:
|
|
||||||
dwt_bior137_forward_1d(temp_col, current_height);
|
|
||||||
break;
|
|
||||||
case WAVELET_DD4:
|
|
||||||
dwt_dd4_forward_1d(temp_col, current_height);
|
|
||||||
break;
|
|
||||||
case WAVELET_HAAR:
|
|
||||||
dwt_haar_forward_1d(temp_col, current_height);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write back
|
|
||||||
for (int y = 0; y < current_height; y++) {
|
|
||||||
data[y * width + x] = temp_col[y];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
free(widths);
|
|
||||||
free(heights);
|
|
||||||
free(temp_row);
|
|
||||||
free(temp_col);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// 3D DWT Transform (Temporal + Spatial)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply 3D forward DWT to a GOP (group of pictures).
|
|
||||||
*
|
|
||||||
* First applies temporal DWT across frames at each spatial location,
|
|
||||||
* then applies 2D spatial DWT to each resulting temporal subband.
|
|
||||||
*
|
|
||||||
* @param gop_data Array of frame pointers [num_frames][width*height]
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param num_frames Number of frames in GOP
|
|
||||||
* @param spatial_levels Number of 2D spatial decomposition levels
|
|
||||||
* @param temporal_levels Number of 1D temporal decomposition levels
|
|
||||||
* @param spatial_filter Wavelet type for spatial transform
|
|
||||||
* @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
|
|
||||||
*/
|
|
||||||
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
|
|
||||||
int spatial_levels, int temporal_levels,
|
|
||||||
int spatial_filter, int temporal_filter) {
|
|
||||||
if (num_frames < 2 || width < 2 || height < 2) return;
|
|
||||||
|
|
||||||
float *temporal_line = malloc(num_frames * sizeof(float));
|
|
||||||
|
|
||||||
// Pre-calculate temporal lengths for non-power-of-2 GOPs
|
|
||||||
int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
|
|
||||||
temporal_lengths[0] = num_frames;
|
|
||||||
for (int i = 1; i <= temporal_levels; i++) {
|
|
||||||
temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 1: Apply temporal DWT across frames
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
int pixel_idx = y * width + x;
|
|
||||||
|
|
||||||
// Extract temporal signal
|
|
||||||
for (int t = 0; t < num_frames; t++) {
|
|
||||||
temporal_line[t] = gop_data[t][pixel_idx];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply temporal DWT with multiple levels
|
|
||||||
for (int level = 0; level < temporal_levels; level++) {
|
|
||||||
int level_frames = temporal_lengths[level];
|
|
||||||
if (level_frames >= 2) {
|
|
||||||
if (temporal_filter == 255) {
|
|
||||||
// Haar temporal (default)
|
|
||||||
dwt_haar_forward_1d(temporal_line, level_frames);
|
|
||||||
} else if (temporal_filter == 0) {
|
|
||||||
// CDF 5/3 temporal
|
|
||||||
dwt_53_forward_1d(temporal_line, level_frames);
|
|
||||||
} else {
|
|
||||||
// Fallback to Haar for unsupported wavelets
|
|
||||||
dwt_haar_forward_1d(temporal_line, level_frames);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write back temporal coefficients
|
|
||||||
for (int t = 0; t < num_frames; t++) {
|
|
||||||
gop_data[t][pixel_idx] = temporal_line[t];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
free(temporal_lengths);
|
|
||||||
free(temporal_line);
|
|
||||||
|
|
||||||
// Step 2: Apply 2D spatial DWT to each temporal subband
|
|
||||||
for (int t = 0; t < num_frames; t++) {
|
|
||||||
tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Utility Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate recommended number of decomposition levels for given dimensions.
|
|
||||||
*
|
|
||||||
* @param width Image width
|
|
||||||
* @param height Image height
|
|
||||||
* @return Recommended number of levels (1-6)
|
|
||||||
*/
|
|
||||||
int tav_dwt_calculate_levels(int width, int height) {
|
|
||||||
int levels = 0;
|
|
||||||
int min_size = (width < height) ? width : height;
|
|
||||||
|
|
||||||
// Keep halving until we reach minimum size
|
|
||||||
while (min_size >= 32) {
|
|
||||||
min_size /= 2;
|
|
||||||
levels++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cap at reasonable maximum
|
|
||||||
return (levels > 6) ? 6 : levels;
|
|
||||||
}
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Discrete Wavelet Transform Library
|
|
||||||
*
|
|
||||||
* Public API for multi-resolution wavelet decomposition.
|
|
||||||
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_DWT_H
|
|
||||||
#define TAV_ENCODER_DWT_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Wavelet Type Constants
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 reversible (lossless capable)
|
|
||||||
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 JPEG2000 (default, best compression)
|
|
||||||
#define WAVELET_BIORTHOGONAL_13_7 2 // CDF 13/7 experimental
|
|
||||||
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
|
|
||||||
#define WAVELET_HAAR 255 // Haar (demonstration only)
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// 2D Discrete Wavelet Transform
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply 2D wavelet transform to spatial data.
|
|
||||||
*
|
|
||||||
* Uses separable 1D transforms: apply horizontal rows, then vertical columns.
|
|
||||||
* Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
|
|
||||||
*
|
|
||||||
* @param data Input/output data array (modified in-place)
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param levels Number of decomposition levels (0 = auto-calculate)
|
|
||||||
* @param filter_type Wavelet type (WAVELET_* constants)
|
|
||||||
*/
|
|
||||||
void tav_dwt_2d_forward(float *data, int width, int height,
|
|
||||||
int levels, int filter_type);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply 3D wavelet transform to group-of-pictures (GOP).
|
|
||||||
*
|
|
||||||
* Process:
|
|
||||||
* 1. Apply temporal 1D DWT across frames at each spatial position
|
|
||||||
* 2. Apply spatial 2D DWT to each temporal subband frame
|
|
||||||
*
|
|
||||||
* @param gop_data Array of frame pointers [num_frames]
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param num_frames Number of frames in GOP
|
|
||||||
* @param spatial_levels Spatial decomposition levels (0 = auto)
|
|
||||||
* @param temporal_levels Temporal decomposition levels
|
|
||||||
* @param spatial_filter Wavelet type for spatial transform
|
|
||||||
* @param temporal_filter Wavelet type for temporal transform
|
|
||||||
*/
|
|
||||||
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
|
|
||||||
int spatial_levels, int temporal_levels,
|
|
||||||
int spatial_filter, int temporal_filter);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Utility Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate optimal number of decomposition levels for given dimensions.
|
|
||||||
*
|
|
||||||
* Uses formula: floor(log2(min(width, height))) - 1
|
|
||||||
* Ensures at least 2x2 low-pass subband remains after decomposition.
|
|
||||||
*
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @return Recommended number of levels
|
|
||||||
*/
|
|
||||||
int tav_dwt_calculate_levels(int width, int height);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TAV_ENCODER_DWT_H
|
|
||||||
@@ -1,415 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
|
|
||||||
*
|
|
||||||
* Implements binary tree embedded zero block coding for efficient storage
|
|
||||||
* of sparse wavelet coefficients. Exploits coefficient sparsity through
|
|
||||||
* hierarchical significance testing and progressive bitplane encoding.
|
|
||||||
*
|
|
||||||
* Extracted from encoder_tav.c as part of library refactoring.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// EZBC Structures
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Bitstream writer for bit-level encoding.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
uint8_t *data;
|
|
||||||
size_t capacity;
|
|
||||||
size_t byte_pos;
|
|
||||||
uint8_t bit_pos; // 0-7, current bit position in current byte
|
|
||||||
} bitstream_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Block structure for EZBC quadtree decomposition.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
int x, y; // Top-left position in 2D coefficient array
|
|
||||||
int width, height; // Block dimensions
|
|
||||||
} ezbc_block_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Queue for EZBC block processing.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
ezbc_block_t *blocks;
|
|
||||||
size_t count;
|
|
||||||
size_t capacity;
|
|
||||||
} block_queue_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Track coefficient state for refinement.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
bool significant; // Has been marked significant
|
|
||||||
int first_bitplane; // Bitplane where it became significant
|
|
||||||
} coeff_state_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* EZBC encoding context for recursive processing.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
bitstream_t *bs;
|
|
||||||
int16_t *coeffs;
|
|
||||||
coeff_state_t *states;
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
int bitplane;
|
|
||||||
int threshold;
|
|
||||||
block_queue_t *next_insignificant;
|
|
||||||
block_queue_t *next_significant;
|
|
||||||
int *sign_count;
|
|
||||||
} ezbc_context_t;
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Bitstream Operations
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize bitstream with initial capacity.
|
|
||||||
*/
|
|
||||||
static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
|
|
||||||
// Ensure minimum capacity to avoid issues with zero-size allocations
|
|
||||||
if (initial_capacity < 64) initial_capacity = 64;
|
|
||||||
bs->capacity = initial_capacity;
|
|
||||||
bs->data = calloc(1, initial_capacity);
|
|
||||||
if (!bs->data) {
|
|
||||||
fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
bs->byte_pos = 0;
|
|
||||||
bs->bit_pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write a single bit to bitstream.
|
|
||||||
*/
|
|
||||||
static void bitstream_write_bit(bitstream_t *bs, int bit) {
|
|
||||||
// Grow if needed
|
|
||||||
if (bs->byte_pos >= bs->capacity) {
|
|
||||||
size_t old_capacity = bs->capacity;
|
|
||||||
bs->capacity *= 2;
|
|
||||||
bs->data = realloc(bs->data, bs->capacity);
|
|
||||||
// Clear only the newly allocated memory region
|
|
||||||
memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bit) {
|
|
||||||
bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bs->bit_pos++;
|
|
||||||
if (bs->bit_pos == 8) {
|
|
||||||
bs->bit_pos = 0;
|
|
||||||
bs->byte_pos++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write multiple bits to bitstream (LSB first).
|
|
||||||
*/
|
|
||||||
static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
|
|
||||||
for (int i = 0; i < num_bits; i++) {
|
|
||||||
bitstream_write_bit(bs, (value >> i) & 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get current bitstream size in bytes.
|
|
||||||
*/
|
|
||||||
static size_t bitstream_size(bitstream_t *bs) {
|
|
||||||
return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free bitstream buffer.
|
|
||||||
*/
|
|
||||||
static void bitstream_free(bitstream_t *bs) {
|
|
||||||
free(bs->data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Block Queue Operations
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize block queue with initial capacity.
|
|
||||||
*/
|
|
||||||
static void queue_init(block_queue_t *q) {
|
|
||||||
q->capacity = 1024;
|
|
||||||
q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
|
|
||||||
q->count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Push block onto queue, growing if needed.
|
|
||||||
*/
|
|
||||||
static void queue_push(block_queue_t *q, ezbc_block_t block) {
|
|
||||||
if (q->count >= q->capacity) {
|
|
||||||
q->capacity *= 2;
|
|
||||||
q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
|
|
||||||
}
|
|
||||||
q->blocks[q->count++] = block;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free block queue.
|
|
||||||
*/
|
|
||||||
static void queue_free(block_queue_t *q) {
|
|
||||||
free(q->blocks);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// EZBC Helper Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if all coefficients in block have |coeff| < threshold.
|
|
||||||
*/
|
|
||||||
static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
|
|
||||||
const ezbc_block_t *block, int threshold) {
|
|
||||||
for (int y = block->y; y < block->y + block->height && y < height; y++) {
|
|
||||||
for (int x = block->x; x < block->x + block->width && x < width; x++) {
|
|
||||||
int idx = y * width + x;
|
|
||||||
if (abs(coeffs[idx]) >= threshold) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find maximum absolute value in coefficient array.
|
|
||||||
*/
|
|
||||||
static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
|
|
||||||
int max_abs = 0;
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
int abs_val = abs(coeffs[i]);
|
|
||||||
if (abs_val > max_abs) {
|
|
||||||
max_abs = abs_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return max_abs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get MSB position (bitplane number).
|
|
||||||
* Returns floor(log2(value)), i.e., the position of the highest set bit.
|
|
||||||
*/
|
|
||||||
static int get_msb_bitplane(int value) {
|
|
||||||
if (value == 0) return 0;
|
|
||||||
int bitplane = 0;
|
|
||||||
while (value > 1) {
|
|
||||||
value >>= 1;
|
|
||||||
bitplane++;
|
|
||||||
}
|
|
||||||
return bitplane;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recursively process a significant block - subdivide until 1x1.
|
|
||||||
*/
|
|
||||||
static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
|
|
||||||
// If 1x1 block: emit sign bit and add to significant queue
|
|
||||||
if (block.width == 1 && block.height == 1) {
|
|
||||||
int idx = block.y * ctx->width + block.x;
|
|
||||||
bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
|
|
||||||
(*ctx->sign_count)++;
|
|
||||||
ctx->states[idx].significant = true;
|
|
||||||
ctx->states[idx].first_bitplane = ctx->bitplane;
|
|
||||||
queue_push(ctx->next_significant, block);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Block is > 1x1: subdivide into children and recursively process each
|
|
||||||
int mid_x = block.width / 2;
|
|
||||||
int mid_y = block.height / 2;
|
|
||||||
if (mid_x == 0) mid_x = 1;
|
|
||||||
if (mid_y == 0) mid_y = 1;
|
|
||||||
|
|
||||||
// Process top-left child
|
|
||||||
ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
|
|
||||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
|
|
||||||
bitstream_write_bit(ctx->bs, 1); // Significant
|
|
||||||
process_significant_block_recursive(ctx, tl);
|
|
||||||
} else {
|
|
||||||
bitstream_write_bit(ctx->bs, 0); // Insignificant
|
|
||||||
queue_push(ctx->next_insignificant, tl);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process top-right child (if exists)
|
|
||||||
if (block.width > mid_x) {
|
|
||||||
ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
|
|
||||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
|
|
||||||
bitstream_write_bit(ctx->bs, 1);
|
|
||||||
process_significant_block_recursive(ctx, tr);
|
|
||||||
} else {
|
|
||||||
bitstream_write_bit(ctx->bs, 0);
|
|
||||||
queue_push(ctx->next_insignificant, tr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process bottom-left child (if exists)
|
|
||||||
if (block.height > mid_y) {
|
|
||||||
ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
|
|
||||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
|
|
||||||
bitstream_write_bit(ctx->bs, 1);
|
|
||||||
process_significant_block_recursive(ctx, bl);
|
|
||||||
} else {
|
|
||||||
bitstream_write_bit(ctx->bs, 0);
|
|
||||||
queue_push(ctx->next_insignificant, bl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process bottom-right child (if exists)
|
|
||||||
if (block.width > mid_x && block.height > mid_y) {
|
|
||||||
ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
|
|
||||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
|
|
||||||
bitstream_write_bit(ctx->bs, 1);
|
|
||||||
process_significant_block_recursive(ctx, br);
|
|
||||||
} else {
|
|
||||||
bitstream_write_bit(ctx->bs, 0);
|
|
||||||
queue_push(ctx->next_insignificant, br);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Main EZBC Encoding Function
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* EZBC encoding for a single channel.
|
|
||||||
*
|
|
||||||
* Uses two separate queues for insignificant blocks and significant 1x1 blocks.
|
|
||||||
* Encodes coefficients progressively from MSB to LSB bitplane.
|
|
||||||
*
|
|
||||||
* Algorithm:
|
|
||||||
* 1. Find MSB bitplane from maximum absolute coefficient value
|
|
||||||
* 2. Write header: MSB bitplane, width, height
|
|
||||||
* 3. For each bitplane from MSB to 0:
|
|
||||||
* a. Process insignificant blocks: check if they become significant
|
|
||||||
* b. For newly significant blocks: recursively subdivide until 1x1
|
|
||||||
* c. Emit sign bits for newly significant 1x1 coefficients
|
|
||||||
* d. Process already-significant coefficients: emit refinement bits
|
|
||||||
* 4. Return encoded bitstream
|
|
||||||
*
|
|
||||||
* @param coeffs Input quantized coefficients (int16_t array)
|
|
||||||
* @param count Number of coefficients
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param output Output buffer pointer (allocated by this function)
|
|
||||||
* @return Encoded size in bytes
|
|
||||||
*/
|
|
||||||
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
|
|
||||||
uint8_t **output) {
|
|
||||||
bitstream_t bs;
|
|
||||||
bitstream_init(&bs, count / 4); // Initial guess
|
|
||||||
|
|
||||||
// Track coefficient significance
|
|
||||||
coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
|
|
||||||
|
|
||||||
// Find maximum value to determine MSB bitplane
|
|
||||||
int max_abs = find_max_abs_ezbc(coeffs, count);
|
|
||||||
int msb_bitplane = get_msb_bitplane(max_abs);
|
|
||||||
|
|
||||||
// Write header: MSB bitplane and dimensions
|
|
||||||
bitstream_write_bits(&bs, msb_bitplane, 8);
|
|
||||||
bitstream_write_bits(&bs, width, 16);
|
|
||||||
bitstream_write_bits(&bs, height, 16);
|
|
||||||
|
|
||||||
// Initialise two queues: insignificant blocks and significant 1x1 blocks
|
|
||||||
block_queue_t insignificant_queue, next_insignificant;
|
|
||||||
block_queue_t significant_queue, next_significant;
|
|
||||||
|
|
||||||
queue_init(&insignificant_queue);
|
|
||||||
queue_init(&next_insignificant);
|
|
||||||
queue_init(&significant_queue);
|
|
||||||
queue_init(&next_significant);
|
|
||||||
|
|
||||||
// Start with root block as insignificant
|
|
||||||
ezbc_block_t root = {0, 0, width, height};
|
|
||||||
queue_push(&insignificant_queue, root);
|
|
||||||
|
|
||||||
// Process bitplanes from MSB to LSB
|
|
||||||
for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
|
|
||||||
int threshold = 1 << bitplane;
|
|
||||||
|
|
||||||
int sign_bits_this_bitplane = 0;
|
|
||||||
|
|
||||||
// Process insignificant blocks - check if they become significant
|
|
||||||
for (size_t i = 0; i < insignificant_queue.count; i++) {
|
|
||||||
ezbc_block_t block = insignificant_queue.blocks[i];
|
|
||||||
|
|
||||||
// Check if this block has any coefficient >= threshold
|
|
||||||
if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
|
|
||||||
// Still insignificant: emit 0
|
|
||||||
bitstream_write_bit(&bs, 0);
|
|
||||||
// Keep in insignificant queue for next bitplane
|
|
||||||
queue_push(&next_insignificant, block);
|
|
||||||
} else {
|
|
||||||
// Became significant: emit 1
|
|
||||||
bitstream_write_bit(&bs, 1);
|
|
||||||
|
|
||||||
// Use recursive subdivision to process this block and all children
|
|
||||||
ezbc_context_t ctx = {
|
|
||||||
.bs = &bs,
|
|
||||||
.coeffs = coeffs,
|
|
||||||
.states = states,
|
|
||||||
.width = width,
|
|
||||||
.height = height,
|
|
||||||
.bitplane = bitplane,
|
|
||||||
.threshold = threshold,
|
|
||||||
.next_insignificant = &next_insignificant,
|
|
||||||
.next_significant = &next_significant,
|
|
||||||
.sign_count = &sign_bits_this_bitplane
|
|
||||||
};
|
|
||||||
process_significant_block_recursive(&ctx, block);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process significant 1x1 blocks - emit refinement bits
|
|
||||||
for (size_t i = 0; i < significant_queue.count; i++) {
|
|
||||||
ezbc_block_t block = significant_queue.blocks[i];
|
|
||||||
int idx = block.y * width + block.x;
|
|
||||||
int abs_val = abs(coeffs[idx]);
|
|
||||||
|
|
||||||
// Emit refinement bit at current bitplane
|
|
||||||
int bit = (abs_val >> bitplane) & 1;
|
|
||||||
bitstream_write_bit(&bs, bit);
|
|
||||||
|
|
||||||
// Keep in significant queue for next bitplane
|
|
||||||
queue_push(&next_significant, block);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Swap queues for next bitplane
|
|
||||||
queue_free(&insignificant_queue);
|
|
||||||
queue_free(&significant_queue);
|
|
||||||
insignificant_queue = next_insignificant;
|
|
||||||
significant_queue = next_significant;
|
|
||||||
queue_init(&next_insignificant);
|
|
||||||
queue_init(&next_significant);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Free all queues
|
|
||||||
queue_free(&insignificant_queue);
|
|
||||||
queue_free(&significant_queue);
|
|
||||||
queue_free(&next_insignificant);
|
|
||||||
queue_free(&next_significant);
|
|
||||||
free(states);
|
|
||||||
|
|
||||||
size_t final_size = bitstream_size(&bs);
|
|
||||||
*output = bs.data;
|
|
||||||
|
|
||||||
return final_size;
|
|
||||||
}
|
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
|
|
||||||
*
|
|
||||||
* Public API for EZBC entropy coding of wavelet coefficients.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_EZBC_H
|
|
||||||
#define TAV_ENCODER_EZBC_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// EZBC Encoding
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* EZBC encoding for a single channel.
|
|
||||||
*
|
|
||||||
* Implements binary tree embedded zero block coding for efficient storage
|
|
||||||
* of sparse wavelet coefficients. Exploits coefficient sparsity through
|
|
||||||
* hierarchical significance testing and progressive bitplane encoding.
|
|
||||||
*
|
|
||||||
* Algorithm:
|
|
||||||
* 1. Find MSB bitplane from maximum absolute coefficient value
|
|
||||||
* 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
|
|
||||||
* 3. For each bitplane from MSB to 0:
|
|
||||||
* a. Process insignificant blocks: check if they become significant
|
|
||||||
* - Emit 0 if still insignificant, 1 if became significant
|
|
||||||
* b. For newly significant blocks: recursively subdivide until 1x1
|
|
||||||
* - Emit tree structure: 1=child is significant, 0=child insignificant
|
|
||||||
* c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
|
|
||||||
* d. Process already-significant coefficients: emit refinement bits
|
|
||||||
* - Emit bit at current bitplane for progressive reconstruction
|
|
||||||
* 4. Return encoded bitstream
|
|
||||||
*
|
|
||||||
* Benefits:
|
|
||||||
* - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
|
|
||||||
* - Progressive refinement from MSB to LSB
|
|
||||||
* - Spatial clustering through quadtree decomposition
|
|
||||||
* - No additional entropy coding needed (bitstream is already compressed)
|
|
||||||
*
|
|
||||||
* @param coeffs Input quantized coefficients (int16_t array)
|
|
||||||
* @param count Number of coefficients (width × height)
|
|
||||||
* @param width Frame width (must match coefficient array layout)
|
|
||||||
* @param height Frame height (must match coefficient array layout)
|
|
||||||
* @param output Output buffer pointer (allocated by this function, caller must free)
|
|
||||||
* @return Encoded size in bytes (including header)
|
|
||||||
*/
|
|
||||||
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
|
|
||||||
uint8_t **output);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TAV_ENCODER_EZBC_H
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,635 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Quantization Library
|
|
||||||
*
|
|
||||||
* Provides DWT coefficient quantization with perceptual weighting based on
|
|
||||||
* the Human Visual System (HVS). Implements separable 3D quantization for
|
|
||||||
* temporal GOP encoding.
|
|
||||||
*
|
|
||||||
* Extracted from encoder_tav.c as part of library refactoring.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
// Forward declaration of encoder context (defined in main encoder)
|
|
||||||
typedef struct tav_encoder_s tav_encoder_t;
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Utility Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
static inline int CLAMP(int x, int min, int max) {
|
|
||||||
return x < min ? min : (x > max ? max : x);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline float FCLAMP(float x, float min, float max) {
|
|
||||||
return x < min ? min : (x > max ? max : x);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Constants for Perceptual Model
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Dead-zone quantization scaling factors (applied selectively to luma only)
|
|
||||||
#define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level
|
|
||||||
#define DEAD_ZONE_FINE_SCALE 0.5f // Reduced dead-zone for second-finest level
|
|
||||||
|
|
||||||
// Anisotropy parameters for horizontal vs vertical detail quantization
|
|
||||||
// Index by quality level (0-5)
|
|
||||||
static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
|
|
||||||
static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
|
|
||||||
|
|
||||||
// Chroma-specific anisotropy (more aggressive quantization)
|
|
||||||
static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
|
|
||||||
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
|
|
||||||
|
|
||||||
// Detail preservation factors for 2-pixel and 4-pixel structures
|
|
||||||
#define FOUR_PIXEL_DETAILER 0.88f
|
|
||||||
#define TWO_PIXEL_DETAILER 0.92f
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Subband Analysis Helper Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get decomposition level for coefficient at 2D spatial position.
|
|
||||||
* Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
|
|
||||||
*/
|
|
||||||
static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
|
|
||||||
// Recursively determine which level this coefficient belongs to
|
|
||||||
// by checking which quadrant it's in at each level
|
|
||||||
|
|
||||||
for (int level = 1; level <= decomp_levels; level++) {
|
|
||||||
int half_w = width >> 1;
|
|
||||||
int half_h = height >> 1;
|
|
||||||
|
|
||||||
// Check if in top-left quadrant (LL - contains finer levels)
|
|
||||||
if (x < half_w && y < half_h) {
|
|
||||||
// Continue to finer level
|
|
||||||
width = half_w;
|
|
||||||
height = half_h;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// In one of the detail bands (LH, HL, HH) at this level
|
|
||||||
return level;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reached LL subband at coarsest level
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get subband type for coefficient at 2D spatial position.
|
|
||||||
* Returns: 0=LL, 1=LH, 2=HL, 3=HH
|
|
||||||
*/
|
|
||||||
static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
|
|
||||||
// Recursively determine which subband this coefficient belongs to
|
|
||||||
|
|
||||||
for (int level = 1; level <= decomp_levels; level++) {
|
|
||||||
int half_w = width >> 1;
|
|
||||||
int half_h = height >> 1;
|
|
||||||
|
|
||||||
// Check if in top-left quadrant (LL - contains finer levels)
|
|
||||||
if (x < half_w && y < half_h) {
|
|
||||||
// Continue to finer level
|
|
||||||
width = half_w;
|
|
||||||
height = half_h;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine which detail band at this level
|
|
||||||
if (x >= half_w && y < half_h) {
|
|
||||||
return 1; // LH (top-right)
|
|
||||||
} else if (x < half_w && y >= half_h) {
|
|
||||||
return 2; // HL (bottom-left)
|
|
||||||
} else {
|
|
||||||
return 3; // HH (bottom-right)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reached LL subband at coarsest level
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Legacy functions - convert linear index to 2D coords.
|
|
||||||
*/
|
|
||||||
static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
|
|
||||||
int x = linear_idx % width;
|
|
||||||
int y = linear_idx / width;
|
|
||||||
return get_subband_level_2d(x, y, width, height, decomp_levels);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
|
|
||||||
int x = linear_idx % width;
|
|
||||||
int y = linear_idx / width;
|
|
||||||
return get_subband_type_2d(x, y, width, height, decomp_levels);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get temporal subband level for frame index in GOP.
|
|
||||||
* After temporal DWT with N levels, frames are organized as:
|
|
||||||
* - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
|
|
||||||
* - Remaining frames are temporal high-pass subbands at various levels
|
|
||||||
*
|
|
||||||
* Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
|
|
||||||
*/
|
|
||||||
static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
|
|
||||||
// Check each level boundary from coarsest to finest
|
|
||||||
for (int level = 0; level < temporal_levels; level++) {
|
|
||||||
int frames_at_this_level = num_frames >> (temporal_levels - level);
|
|
||||||
if (frame_idx < frames_at_this_level) {
|
|
||||||
return level;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finest level (first decomposition's high-pass)
|
|
||||||
return temporal_levels;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Perceptual Model Functions (HVS-based weighting)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Linear interpolation helper
|
|
||||||
static float lerp(float x, float y, float a) {
|
|
||||||
return x * (1.f - a) + y * a;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Perceptual model for LH subband (horizontal details).
|
|
||||||
* Human eyes are more sensitive to horizontal details than vertical.
|
|
||||||
* Curve: https://www.desmos.com/calculator/mjlpwqm8ge
|
|
||||||
*
|
|
||||||
* @param quality Quality level (0-5)
|
|
||||||
* @param level Normalized decomposition level (1.0-6.0)
|
|
||||||
* @return Perceptual weight multiplier
|
|
||||||
*/
|
|
||||||
static float perceptual_model3_LH(int quality, float level) {
|
|
||||||
float H4 = 1.2f;
|
|
||||||
float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
|
|
||||||
float K12 = K * 12.f;
|
|
||||||
float x = level;
|
|
||||||
|
|
||||||
float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
|
|
||||||
float C3 = -1.f / 45.f * (K12 + 92);
|
|
||||||
float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
|
|
||||||
|
|
||||||
return (level >= 4) ? Lx : G3x;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Perceptual model for HL subband (vertical details).
|
|
||||||
* Derived from LH with anisotropy compensation.
|
|
||||||
*
|
|
||||||
* @param quality Quality level (0-5)
|
|
||||||
* @param LH LH subband weight
|
|
||||||
* @return Perceptual weight multiplier
|
|
||||||
*/
|
|
||||||
static float perceptual_model3_HL(int quality, float LH) {
|
|
||||||
return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Perceptual model for HH subband (diagonal details).
|
|
||||||
* Interpolates between LH and HL based on level.
|
|
||||||
*
|
|
||||||
* @param LH LH subband weight
|
|
||||||
* @param HL HL subband weight
|
|
||||||
* @param level Normalized decomposition level
|
|
||||||
* @return Perceptual weight multiplier
|
|
||||||
*/
|
|
||||||
static float perceptual_model3_HH(float LH, float HL, float level) {
|
|
||||||
float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
|
|
||||||
return lerp(LH, HL, Kx);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Perceptual model for LL subband (low-frequency baseband).
|
|
||||||
* Contains most image energy, preserve carefully.
|
|
||||||
*
|
|
||||||
* @param quality Quality level (0-5)
|
|
||||||
* @param level Normalized decomposition level
|
|
||||||
* @return Perceptual weight multiplier
|
|
||||||
*/
|
|
||||||
static float perceptual_model3_LL(int quality, float level) {
|
|
||||||
float n = perceptual_model3_LH(quality, level);
|
|
||||||
float m = perceptual_model3_LH(quality, level - 1) / n;
|
|
||||||
|
|
||||||
return n / m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Chroma-specific perceptual model base curve.
|
|
||||||
* Less critical for human perception, more aggressive quantization.
|
|
||||||
*
|
|
||||||
* @param quality Quality level (0-5)
|
|
||||||
* @param level Normalized decomposition level
|
|
||||||
* @return Perceptual weight multiplier
|
|
||||||
*/
|
|
||||||
static float perceptual_model3_chroma_basecurve(int quality, float level) {
|
|
||||||
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get perceptual weight for a specific subband and level.
|
|
||||||
* Implements HVS-optimized frequency weighting.
|
|
||||||
*
|
|
||||||
* NOTE: This function requires enc->quality_level field from encoder context.
|
|
||||||
*
|
|
||||||
* @param enc Encoder context (for quality_level)
|
|
||||||
* @param level0 Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
|
|
||||||
* @param subband_type Subband type (0=LL, 1=LH, 2=HL, 3=HH)
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
* @param max_levels Maximum decomposition levels
|
|
||||||
* @return Perceptual weight multiplier (≥1.0)
|
|
||||||
*/
|
|
||||||
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get perceptual weight for coefficient at linear index position.
|
|
||||||
* Maps linear coefficient index to DWT subband layout.
|
|
||||||
*
|
|
||||||
* NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
|
|
||||||
*
|
|
||||||
* @param enc Encoder context (for widths/heights arrays and quality_level)
|
|
||||||
* @param linear_idx Linear coefficient index
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param decomp_levels Number of decomposition levels
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
* @return Perceptual weight multiplier (≥1.0)
|
|
||||||
*/
|
|
||||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Quantization Functions
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
|
|
||||||
*
|
|
||||||
* This is the basic quantization function without perceptual weighting.
|
|
||||||
* Dead-zone quantization is applied selectively to luma channel only:
|
|
||||||
* - HH1 (finest diagonal): full dead-zone
|
|
||||||
* - LH1/HL1/HH2: half dead-zone
|
|
||||||
* - Coarser levels: no dead-zone (preserve structure)
|
|
||||||
*
|
|
||||||
* @param coeffs Input DWT coefficients (float)
|
|
||||||
* @param quantised Output quantized coefficients (int16_t)
|
|
||||||
* @param size Number of coefficients
|
|
||||||
* @param quantiser Base quantizer value (1-4096)
|
|
||||||
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param decomp_levels Number of decomposition levels
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
*/
|
|
||||||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
|
||||||
float dead_zone_threshold, int width, int height,
|
|
||||||
int decomp_levels, int is_chroma);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Quantize DWT coefficients with per-coefficient perceptual weighting.
|
|
||||||
*
|
|
||||||
* Applies HVS-optimized frequency weighting to each coefficient based on its
|
|
||||||
* position in the DWT subband tree. Implements the full perceptual model with
|
|
||||||
* dead-zone quantization for luma.
|
|
||||||
*
|
|
||||||
* NOTE: This function requires encoder context fields:
|
|
||||||
* - enc->widths[]/enc->heights[] for subband layout
|
|
||||||
* - enc->quality_level for perceptual model
|
|
||||||
* - enc->dead_zone_threshold for dead-zone quantization
|
|
||||||
*
|
|
||||||
* @param enc Encoder context
|
|
||||||
* @param coeffs Input DWT coefficients (float)
|
|
||||||
* @param quantised Output quantized coefficients (int16_t)
|
|
||||||
* @param size Number of coefficients
|
|
||||||
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
|
||||||
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param decomp_levels Number of decomposition levels
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
* @param frame_count Current frame number (for any frame-dependent logic)
|
|
||||||
*/
|
|
||||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
int base_quantiser, float dead_zone_threshold, int width, int height,
|
|
||||||
int decomp_levels, int is_chroma, int frame_count);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
|
|
||||||
*
|
|
||||||
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
|
|
||||||
* - Temporal DWT applied first → temporal subbands at different levels
|
|
||||||
* - Spatial 2D DWT applied to each temporal subband
|
|
||||||
*
|
|
||||||
* Quantization strategy:
|
|
||||||
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
|
|
||||||
* - tLL (level 0): coarsest temporal → smallest quantizer
|
|
||||||
* - tHH (highest level): finest temporal → largest quantizer
|
|
||||||
* 2. Apply spatial perceptual weighting to tH_base
|
|
||||||
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
|
|
||||||
*
|
|
||||||
* NOTE: This function requires encoder context fields:
|
|
||||||
* - enc->encoder_preset for sports mode detection
|
|
||||||
* - enc->temporal_decomp_levels for temporal level calculation
|
|
||||||
* - enc->verbose for debug output
|
|
||||||
* - Plus all fields needed by tav_quantise_perceptual()
|
|
||||||
*
|
|
||||||
* @param enc Encoder context
|
|
||||||
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
|
|
||||||
* @param quantised Output quantized coefficients [frame][pixel]
|
|
||||||
* @param num_frames Number of temporal subband frames
|
|
||||||
* @param spatial_size Number of spatial coefficients per frame
|
|
||||||
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
*/
|
|
||||||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
|
||||||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
|
||||||
int spatial_size, int base_quantiser, int is_chroma);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
|
|
||||||
*
|
|
||||||
* Implements Floyd-Steinberg style error diffusion to avoid quantization
|
|
||||||
* artifacts when converting float quantizer values to integers for rate control.
|
|
||||||
*
|
|
||||||
* NOTE: This function requires encoder context fields:
|
|
||||||
* - enc->adjusted_quantiser_y_float (current float quantizer)
|
|
||||||
* - enc->dither_accumulator (accumulated error, modified by this function)
|
|
||||||
*
|
|
||||||
* @param enc Encoder context
|
|
||||||
* @return Integer quantizer value (0-254)
|
|
||||||
*/
|
|
||||||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Perceptual Weight Implementation (requires encoder context)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// NOTE: This implementation requires encoder context (enc->quality_level)
|
|
||||||
// Struct definition will be in encoder header when integrated
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
|
|
||||||
// Forward declare structure access - will be properly defined when integrated
|
|
||||||
struct tav_encoder_s {
|
|
||||||
int quality_level;
|
|
||||||
int *widths;
|
|
||||||
int *heights;
|
|
||||||
int decomp_levels;
|
|
||||||
float dead_zone_threshold;
|
|
||||||
int encoder_preset;
|
|
||||||
int temporal_decomp_levels;
|
|
||||||
int verbose;
|
|
||||||
int frame_count;
|
|
||||||
float adjusted_quantiser_y_float;
|
|
||||||
float dither_accumulator;
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
int perceptual_tuning;
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
|
|
||||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
|
||||||
|
|
||||||
float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
|
|
||||||
|
|
||||||
// strategy: more horizontal detail
|
|
||||||
if (!is_chroma) {
|
|
||||||
// LL subband - contains most image energy, preserve carefully
|
|
||||||
if (subband_type == 0)
|
|
||||||
return perceptual_model3_LL(enc->quality_level, level);
|
|
||||||
|
|
||||||
// LH subband - horizontal details (human eyes more sensitive)
|
|
||||||
float LH = perceptual_model3_LH(enc->quality_level, level);
|
|
||||||
if (subband_type == 1)
|
|
||||||
return LH;
|
|
||||||
|
|
||||||
// HL subband - vertical details
|
|
||||||
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
|
||||||
if (subband_type == 2)
|
|
||||||
return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
|
|
||||||
|
|
||||||
// HH subband - diagonal details
|
|
||||||
else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
|
|
||||||
} else {
|
|
||||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
|
|
||||||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
|
|
||||||
|
|
||||||
if (subband_type == 0) { // LL chroma - still important but less than luma
|
|
||||||
return 1.0f;
|
|
||||||
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
|
|
||||||
return FCLAMP(base, 1.0f, 100.0f);
|
|
||||||
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
|
|
||||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
|
|
||||||
} else { // HH chroma - diagonal chroma details (most aggressive)
|
|
||||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
|
||||||
// If perceptual tuning is disabled, use uniform quantization (weight = 1.0)
|
|
||||||
if (!enc->perceptual_tuning) {
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map linear coefficient index to DWT subband using same layout as decoder
|
|
||||||
int offset = 0;
|
|
||||||
|
|
||||||
// First: LL subband at maximum decomposition level
|
|
||||||
int ll_width = enc->widths[decomp_levels];
|
|
||||||
int ll_height = enc->heights[decomp_levels];
|
|
||||||
int ll_size = ll_width * ll_height;
|
|
||||||
|
|
||||||
if (linear_idx < offset + ll_size) {
|
|
||||||
// LL subband at maximum level - use get_perceptual_weight for consistency
|
|
||||||
return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += ll_size;
|
|
||||||
|
|
||||||
// Then: LH, HL, HH subbands for each level from max down to 1
|
|
||||||
for (int level = decomp_levels; level >= 1; level--) {
|
|
||||||
int level_width = enc->widths[decomp_levels - level + 1];
|
|
||||||
int level_height = enc->heights[decomp_levels - level + 1];
|
|
||||||
const int subband_size = level_width * level_height;
|
|
||||||
|
|
||||||
// LH subband (horizontal details)
|
|
||||||
if (linear_idx < offset + subband_size) {
|
|
||||||
return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += subband_size;
|
|
||||||
|
|
||||||
// HL subband (vertical details)
|
|
||||||
if (linear_idx < offset + subband_size) {
|
|
||||||
return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += subband_size;
|
|
||||||
|
|
||||||
// HH subband (diagonal details)
|
|
||||||
if (linear_idx < offset + subband_size) {
|
|
||||||
return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += subband_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback for out-of-bounds indices
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Quantization Function Implementations
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
|
||||||
float dead_zone_threshold, int width, int height,
|
|
||||||
int decomp_levels, int is_chroma) {
|
|
||||||
float effective_q = quantiser;
|
|
||||||
effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
|
|
||||||
|
|
||||||
// Scalar implementation (AVX-512 version would go in separate optimized module)
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
float quantised_val = coeffs[i] / effective_q;
|
|
||||||
|
|
||||||
// Apply dead-zone quantisation ONLY to luma channel and specific subbands
|
|
||||||
if (dead_zone_threshold > 0.0f && !is_chroma) {
|
|
||||||
int level = get_subband_level(i, width, height, decomp_levels);
|
|
||||||
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
|
||||||
float level_threshold = 0.0f;
|
|
||||||
|
|
||||||
if (level == 1) {
|
|
||||||
// Finest level
|
|
||||||
if (subband_type == 3) {
|
|
||||||
// HH1: full dead-zone
|
|
||||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
|
||||||
} else if (subband_type == 1 || subband_type == 2) {
|
|
||||||
// LH1, HL1: half dead-zone
|
|
||||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
|
||||||
}
|
|
||||||
} else if (level == 2) {
|
|
||||||
// Second-finest level
|
|
||||||
if (subband_type == 3) {
|
|
||||||
// HH2: half dead-zone
|
|
||||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fabsf(quantised_val) <= level_threshold) {
|
|
||||||
quantised_val = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
int base_quantiser, float dead_zone_threshold, int width, int height,
|
|
||||||
int decomp_levels, int is_chroma, int frame_count) {
|
|
||||||
float effective_base_q = base_quantiser;
|
|
||||||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
|
||||||
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
|
||||||
float effective_q = effective_base_q * weight;
|
|
||||||
float quantised_val = coeffs[i] / effective_q;
|
|
||||||
|
|
||||||
// Apply dead-zone quantisation ONLY to luma channel
|
|
||||||
if (dead_zone_threshold > 0.0f && !is_chroma) {
|
|
||||||
int level = get_subband_level(i, width, height, decomp_levels);
|
|
||||||
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
|
||||||
float level_threshold = 0.0f;
|
|
||||||
|
|
||||||
if (level == 1) {
|
|
||||||
if (subband_type == 3) {
|
|
||||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
|
||||||
} else if (subband_type == 1 || subband_type == 2) {
|
|
||||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
|
||||||
}
|
|
||||||
} else if (level == 2) {
|
|
||||||
if (subband_type == 3) {
|
|
||||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fabsf(quantised_val) <= level_threshold) {
|
|
||||||
quantised_val = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
|
||||||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
|
||||||
int spatial_size, int base_quantiser, int is_chroma) {
|
|
||||||
// Sports preset: use finer temporal quantisation (less aggressive)
|
|
||||||
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
|
|
||||||
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
|
|
||||||
|
|
||||||
// Process each temporal subband independently (separable approach)
|
|
||||||
for (int t = 0; t < num_frames; t++) {
|
|
||||||
// Step 1: Determine temporal subband level
|
|
||||||
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
|
|
||||||
|
|
||||||
// Step 2: Compute temporal base quantiser using exponential scaling
|
|
||||||
float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
|
|
||||||
float temporal_quantiser = base_quantiser * temporal_scale;
|
|
||||||
|
|
||||||
int temporal_base_quantiser = (int)roundf(temporal_quantiser);
|
|
||||||
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
|
|
||||||
|
|
||||||
// Step 3: Apply spatial quantisation within this temporal subband
|
|
||||||
// Check if perceptual tuning is enabled (stored in encoder_preset bit 1)
|
|
||||||
// NOTE: perceptual_tuning field is NOT in tav_encoder_s, so we check context flag
|
|
||||||
// For now, just use perceptual (this will be controlled by caller disabling)
|
|
||||||
tav_quantise_perceptual(
|
|
||||||
enc,
|
|
||||||
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
|
|
||||||
quantised[t], // Output: quantised spatial coefficients
|
|
||||||
spatial_size, // Number of spatial coefficients
|
|
||||||
temporal_base_quantiser, // Temporally-scaled base quantiser
|
|
||||||
enc->dead_zone_threshold, // Dead zone threshold
|
|
||||||
enc->width, // Frame width
|
|
||||||
enc->height, // Frame height
|
|
||||||
enc->decomp_levels, // Spatial decomposition levels
|
|
||||||
is_chroma, // Is chroma channel
|
|
||||||
enc->frame_count + t // Frame number
|
|
||||||
);
|
|
||||||
|
|
||||||
/*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
|
|
||||||
printf(" Temporal subband %d: level=%d, tH_base=%d\n",
|
|
||||||
t, temporal_level, temporal_base_quantiser);
|
|
||||||
}*/
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
|
|
||||||
float qy_float = enc->adjusted_quantiser_y_float;
|
|
||||||
|
|
||||||
// Add accumulated dithering error
|
|
||||||
float qy_with_error = qy_float + enc->dither_accumulator;
|
|
||||||
|
|
||||||
// Round to nearest integer
|
|
||||||
int qy_int = (int)(qy_with_error + 0.5f);
|
|
||||||
|
|
||||||
// Calculate quantisation error and accumulate for next frame
|
|
||||||
// This is Floyd-Steinberg style error diffusion
|
|
||||||
float quantisation_error = qy_with_error - (float)qy_int;
|
|
||||||
enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
|
|
||||||
|
|
||||||
// Clamp to valid range
|
|
||||||
qy_int = CLAMP(qy_int, 0, 254);
|
|
||||||
|
|
||||||
return qy_int;
|
|
||||||
}
|
|
||||||
@@ -1,138 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Quantization Library
|
|
||||||
*
|
|
||||||
* Public API for DWT coefficient quantization with perceptual weighting.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_QUANTIZE_H
|
|
||||||
#define TAV_ENCODER_QUANTIZE_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Forward declaration of encoder context (defined in main encoder)
|
|
||||||
typedef struct tav_encoder_s tav_encoder_t;
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Uniform Quantization
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
|
|
||||||
*
|
|
||||||
* This is the basic quantization function without perceptual weighting.
|
|
||||||
* Dead-zone quantization is applied selectively to luma channel only:
|
|
||||||
* - HH1 (finest diagonal): full dead-zone
|
|
||||||
* - LH1/HL1/HH2: half dead-zone
|
|
||||||
* - Coarser levels: no dead-zone (preserve structure)
|
|
||||||
*
|
|
||||||
* @param coeffs Input DWT coefficients (float)
|
|
||||||
* @param quantised Output quantized coefficients (int16_t)
|
|
||||||
* @param size Number of coefficients
|
|
||||||
* @param quantiser Base quantizer value (1-4096)
|
|
||||||
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param decomp_levels Number of decomposition levels
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
*/
|
|
||||||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
|
||||||
float dead_zone_threshold, int width, int height,
|
|
||||||
int decomp_levels, int is_chroma);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Perceptual Quantization
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Quantize DWT coefficients with per-coefficient perceptual weighting.
|
|
||||||
*
|
|
||||||
* Applies HVS-optimized frequency weighting to each coefficient based on its
|
|
||||||
* position in the DWT subband tree. Implements the full perceptual model with
|
|
||||||
* dead-zone quantization for luma.
|
|
||||||
*
|
|
||||||
* NOTE: This function requires encoder context fields:
|
|
||||||
* - enc->widths[]/enc->heights[] for subband layout
|
|
||||||
* - enc->quality_level for perceptual model
|
|
||||||
* - enc->dead_zone_threshold for dead-zone quantization
|
|
||||||
*
|
|
||||||
* @param enc Encoder context
|
|
||||||
* @param coeffs Input DWT coefficients (float)
|
|
||||||
* @param quantised Output quantized coefficients (int16_t)
|
|
||||||
* @param size Number of coefficients
|
|
||||||
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
|
||||||
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
|
||||||
* @param width Frame width
|
|
||||||
* @param height Frame height
|
|
||||||
* @param decomp_levels Number of decomposition levels
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
* @param frame_count Current frame number (for any frame-dependent logic)
|
|
||||||
*/
|
|
||||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
|
||||||
float *coeffs, int16_t *quantised, int size,
|
|
||||||
int base_quantiser, float dead_zone_threshold, int width, int height,
|
|
||||||
int decomp_levels, int is_chroma, int frame_count);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// 3D GOP Quantization
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
|
|
||||||
*
|
|
||||||
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
|
|
||||||
* - Temporal DWT applied first → temporal subbands at different levels
|
|
||||||
* - Spatial 2D DWT applied to each temporal subband
|
|
||||||
*
|
|
||||||
* Quantization strategy:
|
|
||||||
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
|
|
||||||
* - tLL (level 0): coarsest temporal → smallest quantizer
|
|
||||||
* - tHH (highest level): finest temporal → largest quantizer
|
|
||||||
* 2. Apply spatial perceptual weighting to tH_base
|
|
||||||
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
|
|
||||||
*
|
|
||||||
* NOTE: This function requires encoder context fields:
|
|
||||||
* - enc->encoder_preset for sports mode detection
|
|
||||||
* - enc->temporal_decomp_levels for temporal level calculation
|
|
||||||
* - enc->verbose for debug output
|
|
||||||
* - Plus all fields needed by tav_quantise_perceptual()
|
|
||||||
*
|
|
||||||
* @param enc Encoder context
|
|
||||||
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
|
|
||||||
* @param quantised Output quantized coefficients [frame][pixel]
|
|
||||||
* @param num_frames Number of temporal subband frames
|
|
||||||
* @param spatial_size Number of spatial coefficients per frame
|
|
||||||
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
|
|
||||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
|
||||||
*/
|
|
||||||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
|
||||||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
|
||||||
int spatial_size, int base_quantiser, int is_chroma);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Rate Control
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
|
|
||||||
*
|
|
||||||
* Implements Floyd-Steinberg style error diffusion to avoid quantization
|
|
||||||
* artifacts when converting float quantizer values to integers for rate control.
|
|
||||||
*
|
|
||||||
* NOTE: This function requires encoder context fields:
|
|
||||||
* - enc->adjusted_quantiser_y_float (current float quantizer)
|
|
||||||
* - enc->dither_accumulator (accumulated error, modified by this function)
|
|
||||||
*
|
|
||||||
* @param enc Encoder context
|
|
||||||
* @return Integer quantizer value (0-254)
|
|
||||||
*/
|
|
||||||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TAV_ENCODER_QUANTIZE_H
|
|
||||||
@@ -1,159 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder Library - Tile Processing Implementation
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "tav_encoder_tile.h"
|
|
||||||
#include "tav_encoder_dwt.h"
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
|
|
||||||
|
|
||||||
void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
|
|
||||||
int frame_width, int frame_height,
|
|
||||||
int tile_x, int tile_y,
|
|
||||||
float *padded_y, float *padded_co, float *padded_cg) {
|
|
||||||
const int core_start_x = tile_x * TAV_TILE_SIZE_X;
|
|
||||||
const int core_start_y = tile_y * TAV_TILE_SIZE_Y;
|
|
||||||
|
|
||||||
// Process row by row with bulk copying for core region where possible
|
|
||||||
for (int py = 0; py < TAV_PADDED_TILE_SIZE_Y; py++) {
|
|
||||||
// Map padded row to source image row
|
|
||||||
int src_y = core_start_y + py - TAV_TILE_MARGIN;
|
|
||||||
|
|
||||||
// Handle vertical boundary conditions with mirroring
|
|
||||||
if (src_y < 0) {
|
|
||||||
src_y = -src_y;
|
|
||||||
} else if (src_y >= frame_height) {
|
|
||||||
src_y = frame_height - 1 - (src_y - frame_height);
|
|
||||||
}
|
|
||||||
src_y = CLAMP(src_y, 0, frame_height - 1);
|
|
||||||
|
|
||||||
// Calculate source and destination row offsets
|
|
||||||
const int padded_row_offset = py * TAV_PADDED_TILE_SIZE_X;
|
|
||||||
const int src_row_offset = src_y * frame_width;
|
|
||||||
|
|
||||||
// Margin boundaries in padded tile
|
|
||||||
const int core_start_px = TAV_TILE_MARGIN;
|
|
||||||
const int core_end_px = TAV_TILE_MARGIN + TAV_TILE_SIZE_X;
|
|
||||||
|
|
||||||
// Check if core region is entirely within frame bounds
|
|
||||||
const int core_src_start_x = core_start_x;
|
|
||||||
const int core_src_end_x = core_start_x + TAV_TILE_SIZE_X;
|
|
||||||
|
|
||||||
if (core_src_start_x >= 0 && core_src_end_x <= frame_width) {
|
|
||||||
// Bulk copy core region in one operation
|
|
||||||
const int src_core_offset = src_row_offset + core_src_start_x;
|
|
||||||
|
|
||||||
memcpy(&padded_y[padded_row_offset + core_start_px],
|
|
||||||
&frame_y[src_core_offset],
|
|
||||||
TAV_TILE_SIZE_X * sizeof(float));
|
|
||||||
memcpy(&padded_co[padded_row_offset + core_start_px],
|
|
||||||
&frame_co[src_core_offset],
|
|
||||||
TAV_TILE_SIZE_X * sizeof(float));
|
|
||||||
memcpy(&padded_cg[padded_row_offset + core_start_px],
|
|
||||||
&frame_cg[src_core_offset],
|
|
||||||
TAV_TILE_SIZE_X * sizeof(float));
|
|
||||||
|
|
||||||
// Handle left margin pixels individually
|
|
||||||
for (int px = 0; px < core_start_px; px++) {
|
|
||||||
int src_x = core_start_x + px - TAV_TILE_MARGIN;
|
|
||||||
if (src_x < 0) src_x = -src_x;
|
|
||||||
src_x = CLAMP(src_x, 0, frame_width - 1);
|
|
||||||
|
|
||||||
int src_idx = src_row_offset + src_x;
|
|
||||||
int padded_idx = padded_row_offset + px;
|
|
||||||
|
|
||||||
padded_y[padded_idx] = frame_y[src_idx];
|
|
||||||
padded_co[padded_idx] = frame_co[src_idx];
|
|
||||||
padded_cg[padded_idx] = frame_cg[src_idx];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle right margin pixels individually
|
|
||||||
for (int px = core_end_px; px < TAV_PADDED_TILE_SIZE_X; px++) {
|
|
||||||
int src_x = core_start_x + px - TAV_TILE_MARGIN;
|
|
||||||
if (src_x >= frame_width) {
|
|
||||||
src_x = frame_width - 1 - (src_x - frame_width);
|
|
||||||
}
|
|
||||||
src_x = CLAMP(src_x, 0, frame_width - 1);
|
|
||||||
|
|
||||||
int src_idx = src_row_offset + src_x;
|
|
||||||
int padded_idx = padded_row_offset + px;
|
|
||||||
|
|
||||||
padded_y[padded_idx] = frame_y[src_idx];
|
|
||||||
padded_co[padded_idx] = frame_co[src_idx];
|
|
||||||
padded_cg[padded_idx] = frame_cg[src_idx];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Fallback: process entire row pixel by pixel (for edge tiles)
|
|
||||||
for (int px = 0; px < TAV_PADDED_TILE_SIZE_X; px++) {
|
|
||||||
int src_x = core_start_x + px - TAV_TILE_MARGIN;
|
|
||||||
|
|
||||||
// Handle horizontal boundary conditions with mirroring
|
|
||||||
if (src_x < 0) {
|
|
||||||
src_x = -src_x;
|
|
||||||
} else if (src_x >= frame_width) {
|
|
||||||
src_x = frame_width - 1 - (src_x - frame_width);
|
|
||||||
}
|
|
||||||
src_x = CLAMP(src_x, 0, frame_width - 1);
|
|
||||||
|
|
||||||
int src_idx = src_row_offset + src_x;
|
|
||||||
int padded_idx = padded_row_offset + px;
|
|
||||||
|
|
||||||
padded_y[padded_idx] = frame_y[src_idx];
|
|
||||||
padded_co[padded_idx] = frame_co[src_idx];
|
|
||||||
padded_cg[padded_idx] = frame_cg[src_idx];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use existing 2D DWT from tav_encoder_dwt.c
|
|
||||||
// For padded tiles, we simply call the existing function with tile dimensions
|
|
||||||
|
|
||||||
void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type) {
|
|
||||||
// Use the existing 2D DWT with padded tile dimensions
|
|
||||||
tav_dwt_2d_forward(tile_data, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
|
|
||||||
levels, filter_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type) {
|
|
||||||
// Note: Inverse transform not yet implemented in library for arbitrary dimensions
|
|
||||||
// For now, this is a placeholder - decoder uses different code path
|
|
||||||
(void)tile_data;
|
|
||||||
(void)levels;
|
|
||||||
(void)filter_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
void tav_crop_tile_margins(const float *padded_data, float *core_data) {
|
|
||||||
for (int y = 0; y < TAV_TILE_SIZE_Y; y++) {
|
|
||||||
const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
|
|
||||||
const int core_row = y * TAV_TILE_SIZE_X;
|
|
||||||
memcpy(&core_data[core_row], &padded_data[padded_row], TAV_TILE_SIZE_X * sizeof(float));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
|
|
||||||
int actual_width, int actual_height) {
|
|
||||||
for (int y = 0; y < actual_height; y++) {
|
|
||||||
const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
|
|
||||||
const int core_row = y * actual_width;
|
|
||||||
memcpy(&core_data[core_row], &padded_data[padded_row], actual_width * sizeof(float));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void tav_get_tile_dimensions(int frame_width, int frame_height,
|
|
||||||
int tile_x, int tile_y,
|
|
||||||
int *tile_width, int *tile_height) {
|
|
||||||
// Calculate the starting position of this tile
|
|
||||||
int start_x = tile_x * TAV_TILE_SIZE_X;
|
|
||||||
int start_y = tile_y * TAV_TILE_SIZE_Y;
|
|
||||||
|
|
||||||
// Calculate how much of the frame is left from this starting position
|
|
||||||
int remaining_width = frame_width - start_x;
|
|
||||||
int remaining_height = frame_height - start_y;
|
|
||||||
|
|
||||||
// Tile width is the minimum of standard tile size and remaining width
|
|
||||||
*tile_width = (remaining_width < TAV_TILE_SIZE_X) ? remaining_width : TAV_TILE_SIZE_X;
|
|
||||||
*tile_height = (remaining_height < TAV_TILE_SIZE_Y) ? remaining_height : TAV_TILE_SIZE_Y;
|
|
||||||
}
|
|
||||||
@@ -1,103 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder Library - Tile Processing
|
|
||||||
*
|
|
||||||
* Functions for padded tile extraction and DWT processing.
|
|
||||||
* Used when video dimensions exceed monoblock threshold (720x576).
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_TILE_H
|
|
||||||
#define TAV_ENCODER_TILE_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
#include "../../include/tav_encoder_lib.h"
|
|
||||||
|
|
||||||
// Tile dimensions (from header)
|
|
||||||
// TAV_TILE_SIZE_X = 640, TAV_TILE_SIZE_Y = 540
|
|
||||||
// TAV_PADDED_TILE_SIZE_X = 704, TAV_PADDED_TILE_SIZE_Y = 604
|
|
||||||
// TAV_TILE_MARGIN = 32
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract a padded tile from full-frame YCoCg buffers.
|
|
||||||
*
|
|
||||||
* Extracts a tile at position (tile_x, tile_y) with TAV_TILE_MARGIN pixels
|
|
||||||
* of padding on all sides for seamless DWT processing. Uses symmetric
|
|
||||||
* extension (mirroring) at frame boundaries.
|
|
||||||
*
|
|
||||||
* @param frame_y Full frame Y channel
|
|
||||||
* @param frame_co Full frame Co channel
|
|
||||||
* @param frame_cg Full frame Cg channel
|
|
||||||
* @param frame_width Full frame width
|
|
||||||
* @param frame_height Full frame height
|
|
||||||
* @param tile_x Tile X index (0-based)
|
|
||||||
* @param tile_y Tile Y index (0-based)
|
|
||||||
* @param padded_y Output: Padded tile Y (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y floats)
|
|
||||||
* @param padded_co Output: Padded tile Co
|
|
||||||
* @param padded_cg Output: Padded tile Cg
|
|
||||||
*/
|
|
||||||
void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
|
|
||||||
int frame_width, int frame_height,
|
|
||||||
int tile_x, int tile_y,
|
|
||||||
float *padded_y, float *padded_co, float *padded_cg);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply 2D DWT forward transform to a padded tile.
|
|
||||||
*
|
|
||||||
* Uses fixed PADDED_TILE_SIZE dimensions (704x604) for optimal performance.
|
|
||||||
*
|
|
||||||
* @param tile_data Tile data (modified in-place)
|
|
||||||
* @param levels Number of decomposition levels
|
|
||||||
* @param filter_type Wavelet filter type (0=CDF 5/3, 1=CDF 9/7, etc.)
|
|
||||||
*/
|
|
||||||
void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply 2D DWT inverse transform to a padded tile.
|
|
||||||
*
|
|
||||||
* @param tile_data Tile data (modified in-place)
|
|
||||||
* @param levels Number of decomposition levels
|
|
||||||
* @param filter_type Wavelet filter type
|
|
||||||
*/
|
|
||||||
void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Crop a padded tile to its core region (removing margins).
|
|
||||||
*
|
|
||||||
* Extracts the central TAV_TILE_SIZE_X × TAV_TILE_SIZE_Y region from a padded tile.
|
|
||||||
*
|
|
||||||
* @param padded_data Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
|
|
||||||
* @param core_data Output: Core tile (TILE_SIZE_X * TILE_SIZE_Y)
|
|
||||||
*/
|
|
||||||
void tav_crop_tile_margins(const float *padded_data, float *core_data);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Crop a padded tile to actual dimensions for edge tiles.
|
|
||||||
*
|
|
||||||
* For tiles at the right/bottom edges of a frame, the actual tile may be
|
|
||||||
* smaller than TILE_SIZE_X × TILE_SIZE_Y. This function handles that case.
|
|
||||||
*
|
|
||||||
* @param padded_data Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
|
|
||||||
* @param core_data Output: Core tile data
|
|
||||||
* @param actual_width Actual tile width (may be < TILE_SIZE_X for edge tiles)
|
|
||||||
* @param actual_height Actual tile height (may be < TILE_SIZE_Y for edge tiles)
|
|
||||||
*/
|
|
||||||
void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
|
|
||||||
int actual_width, int actual_height);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate actual tile dimensions for a given tile position.
|
|
||||||
*
|
|
||||||
* Edge tiles may be smaller than the standard tile size.
|
|
||||||
*
|
|
||||||
* @param frame_width Full frame width
|
|
||||||
* @param frame_height Full frame height
|
|
||||||
* @param tile_x Tile X index
|
|
||||||
* @param tile_y Tile Y index
|
|
||||||
* @param tile_width Output: Actual tile width
|
|
||||||
* @param tile_height Output: Actual tile height
|
|
||||||
*/
|
|
||||||
void tav_get_tile_dimensions(int frame_width, int frame_height,
|
|
||||||
int tile_x, int tile_y,
|
|
||||||
int *tile_width, int *tile_height);
|
|
||||||
|
|
||||||
#endif // TAV_ENCODER_TILE_H
|
|
||||||
@@ -1,441 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Utilities Library
|
|
||||||
*
|
|
||||||
* Common utility functions and helpers used across the encoder.
|
|
||||||
* Includes math utilities, clamping, filename generation, etc.
|
|
||||||
*
|
|
||||||
* Extracted from encoder_tav.c as part of library refactoring.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define _POSIX_C_SOURCE 200112L
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Math Utilities
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clamp integer value to range [min, max].
|
|
||||||
*/
|
|
||||||
int tav_clamp_int(int x, int min, int max) {
|
|
||||||
return x < min ? min : (x > max ? max : x);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clamp float value to range [min, max].
|
|
||||||
*/
|
|
||||||
float tav_clamp_float(float x, float min, float max) {
|
|
||||||
return x < min ? min : (x > max ? max : x);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clamp double value to range [min, max].
|
|
||||||
*/
|
|
||||||
double tav_clamp_double(double x, double min, double max) {
|
|
||||||
return x < min ? min : (x > max ? max : x);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Round double to nearest integer.
|
|
||||||
*/
|
|
||||||
int tav_iround(double v) {
|
|
||||||
return (int)floor(v + 0.5);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Linear interpolation between two values.
|
|
||||||
* @param a Start value (when t=0)
|
|
||||||
* @param b End value (when t=1)
|
|
||||||
* @param t Interpolation factor (0.0 to 1.0)
|
|
||||||
* @return Interpolated value
|
|
||||||
*/
|
|
||||||
float tav_lerp(float a, float b, float t) {
|
|
||||||
return a * (1.0f - t) + b * t;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Double precision linear interpolation.
|
|
||||||
*/
|
|
||||||
double tav_lerp_double(double a, double b, double t) {
|
|
||||||
return a * (1.0 - t) + b * t;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get minimum of two integers.
|
|
||||||
*/
|
|
||||||
int tav_min_int(int a, int b) {
|
|
||||||
return a < b ? a : b;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get maximum of two integers.
|
|
||||||
*/
|
|
||||||
int tav_max_int(int a, int b) {
|
|
||||||
return a > b ? a : b;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get minimum of two floats.
|
|
||||||
*/
|
|
||||||
float tav_min_float(float a, float b) {
|
|
||||||
return a < b ? a : b;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get maximum of two floats.
|
|
||||||
*/
|
|
||||||
float tav_max_float(float a, float b) {
|
|
||||||
return a > b ? a : b;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute absolute value of integer.
|
|
||||||
*/
|
|
||||||
int tav_abs_int(int x) {
|
|
||||||
return x < 0 ? -x : x;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute absolute value of float.
|
|
||||||
*/
|
|
||||||
float tav_abs_float(float x) {
|
|
||||||
return x < 0.0f ? -x : x;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sign function: returns -1, 0, or 1.
|
|
||||||
*/
|
|
||||||
int tav_sign(int x) {
|
|
||||||
return (x > 0) - (x < 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if integer is power of 2.
|
|
||||||
*/
|
|
||||||
int tav_is_power_of_2(int x) {
|
|
||||||
return x > 0 && (x & (x - 1)) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Round up to next power of 2.
|
|
||||||
*/
|
|
||||||
int tav_next_power_of_2(int x) {
|
|
||||||
if (x <= 0) return 1;
|
|
||||||
x--;
|
|
||||||
x |= x >> 1;
|
|
||||||
x |= x >> 2;
|
|
||||||
x |= x >> 4;
|
|
||||||
x |= x >> 8;
|
|
||||||
x |= x >> 16;
|
|
||||||
return x + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute floor of log2(x).
|
|
||||||
* Returns -1 for x <= 0.
|
|
||||||
*/
|
|
||||||
int tav_floor_log2(int x) {
|
|
||||||
if (x <= 0) return -1;
|
|
||||||
int log = 0;
|
|
||||||
while (x > 1) {
|
|
||||||
x >>= 1;
|
|
||||||
log++;
|
|
||||||
}
|
|
||||||
return log;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute ceil of log2(x).
|
|
||||||
* Returns -1 for x <= 0.
|
|
||||||
*/
|
|
||||||
int tav_ceil_log2(int x) {
|
|
||||||
if (x <= 0) return -1;
|
|
||||||
if (x == 1) return 0;
|
|
||||||
int log = tav_floor_log2(x);
|
|
||||||
// Check if x is power of 2
|
|
||||||
if ((1 << log) == x) {
|
|
||||||
return log;
|
|
||||||
}
|
|
||||||
return log + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Random Filename Generation
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a random temporary filename with .mp2 extension.
|
|
||||||
* Format: /tmp/[32 random chars].mp2
|
|
||||||
*
|
|
||||||
* @param filename Output buffer (must be at least 42 bytes)
|
|
||||||
*/
|
|
||||||
void tav_generate_random_filename(char *filename) {
|
|
||||||
static int seeded = 0;
|
|
||||||
if (!seeded) {
|
|
||||||
srand(time(NULL));
|
|
||||||
seeded = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
||||||
const int charset_size = sizeof(charset) - 1;
|
|
||||||
|
|
||||||
// Start with the prefix
|
|
||||||
strcpy(filename, "/tmp/");
|
|
||||||
|
|
||||||
// Generate 32 random characters
|
|
||||||
for (int i = 0; i < 32; i++) {
|
|
||||||
filename[5 + i] = charset[rand() % charset_size];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the .mp2 extension
|
|
||||||
strcpy(filename + 37, ".mp2");
|
|
||||||
filename[41] = '\0'; // Null terminate
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a random temporary filename with custom extension.
|
|
||||||
* Format: /tmp/[32 random chars].[ext]
|
|
||||||
*
|
|
||||||
* @param filename Output buffer (must be large enough for path + extension)
|
|
||||||
* @param ext File extension (without leading dot, e.g., "tmp", "wav")
|
|
||||||
*/
|
|
||||||
void tav_generate_random_filename_ext(char *filename, const char *ext) {
|
|
||||||
static int seeded = 0;
|
|
||||||
if (!seeded) {
|
|
||||||
srand(time(NULL));
|
|
||||||
seeded = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
||||||
const int charset_size = sizeof(charset) - 1;
|
|
||||||
|
|
||||||
// Start with the prefix
|
|
||||||
strcpy(filename, "/tmp/");
|
|
||||||
|
|
||||||
// Generate 32 random characters
|
|
||||||
for (int i = 0; i < 32; i++) {
|
|
||||||
filename[5 + i] = charset[rand() % charset_size];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the extension
|
|
||||||
filename[37] = '.';
|
|
||||||
strcpy(filename + 38, ext);
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Memory Utilities
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Safe malloc with error checking.
|
|
||||||
* Exits program on allocation failure.
|
|
||||||
*/
|
|
||||||
void *tav_malloc(size_t size) {
|
|
||||||
void *ptr = malloc(size);
|
|
||||||
if (!ptr && size > 0) {
|
|
||||||
fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Safe calloc with error checking.
|
|
||||||
* Exits program on allocation failure.
|
|
||||||
*/
|
|
||||||
void *tav_calloc(size_t count, size_t size) {
|
|
||||||
void *ptr = calloc(count, size);
|
|
||||||
if (!ptr && count > 0 && size > 0) {
|
|
||||||
fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Safe realloc with error checking.
|
|
||||||
* Exits program on allocation failure.
|
|
||||||
*/
|
|
||||||
void *tav_realloc(void *ptr, size_t size) {
|
|
||||||
void *new_ptr = realloc(ptr, size);
|
|
||||||
if (!new_ptr && size > 0) {
|
|
||||||
fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
return new_ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate aligned memory.
|
|
||||||
* Returns NULL on failure.
|
|
||||||
*/
|
|
||||||
void *tav_aligned_alloc(size_t alignment, size_t size) {
|
|
||||||
// Ensure alignment is power of 2
|
|
||||||
if (!tav_is_power_of_2(alignment)) {
|
|
||||||
fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
return _aligned_malloc(size, alignment);
|
|
||||||
#else
|
|
||||||
void *ptr = NULL;
|
|
||||||
if (posix_memalign(&ptr, alignment, size) != 0) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free aligned memory.
|
|
||||||
*/
|
|
||||||
void tav_aligned_free(void *ptr) {
|
|
||||||
#ifdef _WIN32
|
|
||||||
_aligned_free(ptr);
|
|
||||||
#else
|
|
||||||
free(ptr);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Array Utilities
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fill integer array with constant value.
|
|
||||||
*/
|
|
||||||
void tav_array_fill_int(int *array, size_t count, int value) {
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
array[i] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fill float array with constant value.
|
|
||||||
*/
|
|
||||||
void tav_array_fill_float(float *array, size_t count, float value) {
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
array[i] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copy integer array.
|
|
||||||
*/
|
|
||||||
void tav_array_copy_int(int *dst, const int *src, size_t count) {
|
|
||||||
memcpy(dst, src, count * sizeof(int));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copy float array.
|
|
||||||
*/
|
|
||||||
void tav_array_copy_float(float *dst, const float *src, size_t count) {
|
|
||||||
memcpy(dst, src, count * sizeof(float));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find maximum value in integer array.
|
|
||||||
*/
|
|
||||||
int tav_array_max_int(const int *array, size_t count) {
|
|
||||||
if (count == 0) return 0;
|
|
||||||
int max_val = array[0];
|
|
||||||
for (size_t i = 1; i < count; i++) {
|
|
||||||
if (array[i] > max_val) {
|
|
||||||
max_val = array[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return max_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find minimum value in integer array.
|
|
||||||
*/
|
|
||||||
int tav_array_min_int(const int *array, size_t count) {
|
|
||||||
if (count == 0) return 0;
|
|
||||||
int min_val = array[0];
|
|
||||||
for (size_t i = 1; i < count; i++) {
|
|
||||||
if (array[i] < min_val) {
|
|
||||||
min_val = array[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return min_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find maximum absolute value in float array.
|
|
||||||
*/
|
|
||||||
float tav_array_max_abs_float(const float *array, size_t count) {
|
|
||||||
if (count == 0) return 0.0f;
|
|
||||||
float max_abs = fabsf(array[0]);
|
|
||||||
for (size_t i = 1; i < count; i++) {
|
|
||||||
float abs_val = fabsf(array[i]);
|
|
||||||
if (abs_val > max_abs) {
|
|
||||||
max_abs = abs_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return max_abs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute sum of integer array.
|
|
||||||
*/
|
|
||||||
long long tav_array_sum_int(const int *array, size_t count) {
|
|
||||||
long long sum = 0;
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
sum += array[i];
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute sum of float array.
|
|
||||||
*/
|
|
||||||
double tav_array_sum_float(const float *array, size_t count) {
|
|
||||||
double sum = 0.0;
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
sum += array[i];
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute mean of float array.
|
|
||||||
*/
|
|
||||||
float tav_array_mean_float(const float *array, size_t count) {
|
|
||||||
if (count == 0) return 0.0f;
|
|
||||||
return (float)(tav_array_sum_float(array, count) / count);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Swap two integer values.
|
|
||||||
*/
|
|
||||||
void tav_swap_int(int *a, int *b) {
|
|
||||||
int temp = *a;
|
|
||||||
*a = *b;
|
|
||||||
*b = temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Swap two float values.
|
|
||||||
*/
|
|
||||||
void tav_swap_float(float *a, float *b) {
|
|
||||||
float temp = *a;
|
|
||||||
*a = *b;
|
|
||||||
*b = temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Swap two pointer values.
|
|
||||||
*/
|
|
||||||
void tav_swap_ptr(void **a, void **b) {
|
|
||||||
void *temp = *a;
|
|
||||||
*a = *b;
|
|
||||||
*b = temp;
|
|
||||||
}
|
|
||||||
@@ -1,165 +0,0 @@
|
|||||||
/**
|
|
||||||
* TAV Encoder - Utilities Library
|
|
||||||
*
|
|
||||||
* Public API for common utility functions and helpers.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TAV_ENCODER_UTILS_H
|
|
||||||
#define TAV_ENCODER_UTILS_H
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Math Utilities
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/** Clamp integer value to range [min, max] */
|
|
||||||
int tav_clamp_int(int x, int min, int max);
|
|
||||||
|
|
||||||
/** Clamp float value to range [min, max] */
|
|
||||||
float tav_clamp_float(float x, float min, float max);
|
|
||||||
|
|
||||||
/** Clamp double value to range [min, max] */
|
|
||||||
double tav_clamp_double(double x, double min, double max);
|
|
||||||
|
|
||||||
/** Round double to nearest integer */
|
|
||||||
int tav_iround(double v);
|
|
||||||
|
|
||||||
/** Linear interpolation between two floats */
|
|
||||||
float tav_lerp(float a, float b, float t);
|
|
||||||
|
|
||||||
/** Linear interpolation between two doubles */
|
|
||||||
double tav_lerp_double(double a, double b, double t);
|
|
||||||
|
|
||||||
/** Get minimum of two integers */
|
|
||||||
int tav_min_int(int a, int b);
|
|
||||||
|
|
||||||
/** Get maximum of two integers */
|
|
||||||
int tav_max_int(int a, int b);
|
|
||||||
|
|
||||||
/** Get minimum of two floats */
|
|
||||||
float tav_min_float(float a, float b);
|
|
||||||
|
|
||||||
/** Get maximum of two floats */
|
|
||||||
float tav_max_float(float a, float b);
|
|
||||||
|
|
||||||
/** Compute absolute value of integer */
|
|
||||||
int tav_abs_int(int x);
|
|
||||||
|
|
||||||
/** Compute absolute value of float */
|
|
||||||
float tav_abs_float(float x);
|
|
||||||
|
|
||||||
/** Sign function: returns -1, 0, or 1 */
|
|
||||||
int tav_sign(int x);
|
|
||||||
|
|
||||||
/** Check if integer is power of 2 */
|
|
||||||
int tav_is_power_of_2(int x);
|
|
||||||
|
|
||||||
/** Round up to next power of 2 */
|
|
||||||
int tav_next_power_of_2(int x);
|
|
||||||
|
|
||||||
/** Compute floor of log2(x) */
|
|
||||||
int tav_floor_log2(int x);
|
|
||||||
|
|
||||||
/** Compute ceil of log2(x) */
|
|
||||||
int tav_ceil_log2(int x);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Random Filename Generation
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a random temporary filename with .mp2 extension.
|
|
||||||
* Format: /tmp/[32 random chars].mp2
|
|
||||||
*
|
|
||||||
* @param filename Output buffer (must be at least 42 bytes)
|
|
||||||
*/
|
|
||||||
void tav_generate_random_filename(char *filename);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a random temporary filename with custom extension.
|
|
||||||
* Format: /tmp/[32 random chars].[ext]
|
|
||||||
*
|
|
||||||
* @param filename Output buffer (must be large enough)
|
|
||||||
* @param ext File extension (without leading dot)
|
|
||||||
*/
|
|
||||||
void tav_generate_random_filename_ext(char *filename, const char *ext);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Memory Utilities
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/** Safe malloc with error checking (exits on failure) */
|
|
||||||
void *tav_malloc(size_t size);
|
|
||||||
|
|
||||||
/** Safe calloc with error checking (exits on failure) */
|
|
||||||
void *tav_calloc(size_t count, size_t size);
|
|
||||||
|
|
||||||
/** Safe realloc with error checking (exits on failure) */
|
|
||||||
void *tav_realloc(void *ptr, size_t size);
|
|
||||||
|
|
||||||
/** Allocate aligned memory (returns NULL on failure) */
|
|
||||||
void *tav_aligned_alloc(size_t alignment, size_t size);
|
|
||||||
|
|
||||||
/** Free aligned memory */
|
|
||||||
void tav_aligned_free(void *ptr);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Array Utilities
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/** Fill integer array with constant value */
|
|
||||||
void tav_array_fill_int(int *array, size_t count, int value);
|
|
||||||
|
|
||||||
/** Fill float array with constant value */
|
|
||||||
void tav_array_fill_float(float *array, size_t count, float value);
|
|
||||||
|
|
||||||
/** Copy integer array */
|
|
||||||
void tav_array_copy_int(int *dst, const int *src, size_t count);
|
|
||||||
|
|
||||||
/** Copy float array */
|
|
||||||
void tav_array_copy_float(float *dst, const float *src, size_t count);
|
|
||||||
|
|
||||||
/** Find maximum value in integer array */
|
|
||||||
int tav_array_max_int(const int *array, size_t count);
|
|
||||||
|
|
||||||
/** Find minimum value in integer array */
|
|
||||||
int tav_array_min_int(const int *array, size_t count);
|
|
||||||
|
|
||||||
/** Find maximum absolute value in float array */
|
|
||||||
float tav_array_max_abs_float(const float *array, size_t count);
|
|
||||||
|
|
||||||
/** Compute sum of integer array */
|
|
||||||
long long tav_array_sum_int(const int *array, size_t count);
|
|
||||||
|
|
||||||
/** Compute sum of float array */
|
|
||||||
double tav_array_sum_float(const float *array, size_t count);
|
|
||||||
|
|
||||||
/** Compute mean of float array */
|
|
||||||
float tav_array_mean_float(const float *array, size_t count);
|
|
||||||
|
|
||||||
/** Swap two integer values */
|
|
||||||
void tav_swap_int(int *a, int *b);
|
|
||||||
|
|
||||||
/** Swap two float values */
|
|
||||||
void tav_swap_float(float *a, float *b);
|
|
||||||
|
|
||||||
/** Swap two pointer values */
|
|
||||||
void tav_swap_ptr(void **a, void **b);
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Convenience Macros (for backward compatibility)
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#define CLAMP(x, min, max) tav_clamp_int(x, min, max)
|
|
||||||
#define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TAV_ENCODER_UTILS_H
|
|
||||||
@@ -1,152 +0,0 @@
|
|||||||
// Simple range coder for TAD audio codec
|
|
||||||
// Based on range coding with Laplacian probability model
|
|
||||||
|
|
||||||
#include "range_coder.h"
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
#define TOP_VALUE 0xFFFFFFFFU
|
|
||||||
#define BOTTOM_VALUE 0x00FFFFFF
|
|
||||||
|
|
||||||
static inline void range_encoder_put_byte(RangeEncoder *enc, uint8_t byte) {
|
|
||||||
if (enc->buffer_pos < enc->buffer_capacity) {
|
|
||||||
enc->buffer[enc->buffer_pos++] = byte;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uint8_t range_decoder_get_byte(RangeDecoder *dec) {
|
|
||||||
if (dec->buffer_pos < dec->buffer_size) {
|
|
||||||
return dec->buffer[dec->buffer_pos++];
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void range_encoder_renormalise(RangeEncoder *enc) {
|
|
||||||
while (enc->range <= BOTTOM_VALUE) {
|
|
||||||
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
|
|
||||||
enc->low <<= 8;
|
|
||||||
enc->range <<= 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void range_decoder_renormalise(RangeDecoder *dec) {
|
|
||||||
while (dec->range <= BOTTOM_VALUE) {
|
|
||||||
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
|
|
||||||
dec->low <<= 8;
|
|
||||||
dec->range <<= 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity) {
|
|
||||||
enc->low = 0;
|
|
||||||
enc->range = TOP_VALUE;
|
|
||||||
enc->buffer = buffer;
|
|
||||||
enc->buffer_pos = 0;
|
|
||||||
enc->buffer_capacity = capacity;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate Laplacian CDF for a given value
|
|
||||||
// CDF(x) = 0.5 * exp(λx) for x < 0
|
|
||||||
// CDF(x) = 1 - 0.5 * exp(-λx) for x ≥ 0
|
|
||||||
static inline double laplacian_cdf(int16_t value, float lambda) {
|
|
||||||
if (value < 0) {
|
|
||||||
return 0.5 * exp(lambda * value);
|
|
||||||
} else {
|
|
||||||
return 1.0 - 0.5 * exp(-lambda * value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda) {
|
|
||||||
// Clamp to valid range
|
|
||||||
if (value < -max_abs_value) value = -max_abs_value;
|
|
||||||
if (value > max_abs_value) value = max_abs_value;
|
|
||||||
|
|
||||||
// Calculate cumulative probabilities using Laplacian distribution
|
|
||||||
// We need CDF at value and value+1 to get the probability mass for this symbol
|
|
||||||
double cdf_low = (value == -max_abs_value) ? 0.0 : laplacian_cdf(value - 1, lambda);
|
|
||||||
double cdf_high = laplacian_cdf(value, lambda);
|
|
||||||
|
|
||||||
// Normalise to get cumulative counts in range [0, SCALE]
|
|
||||||
const uint32_t SCALE = 0x10000; // 65536 for precision
|
|
||||||
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
|
|
||||||
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
|
|
||||||
|
|
||||||
// Ensure we have at least 1 unit of probability
|
|
||||||
if (cum_high <= cum_low) cum_high = cum_low + 1;
|
|
||||||
if (cum_high > SCALE) cum_high = SCALE;
|
|
||||||
|
|
||||||
// Encode using cumulative probabilities
|
|
||||||
uint64_t range_64 = (uint64_t)enc->range;
|
|
||||||
enc->low += (uint32_t)((range_64 * cum_low) / SCALE);
|
|
||||||
enc->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
|
|
||||||
|
|
||||||
range_encoder_renormalise(enc);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t range_encoder_finish(RangeEncoder *enc) {
|
|
||||||
// Flush remaining bytes
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
|
|
||||||
enc->low <<= 8;
|
|
||||||
}
|
|
||||||
return enc->buffer_pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size) {
|
|
||||||
dec->low = 0;
|
|
||||||
dec->range = TOP_VALUE;
|
|
||||||
dec->code = 0;
|
|
||||||
dec->buffer = buffer;
|
|
||||||
dec->buffer_pos = 0;
|
|
||||||
dec->buffer_size = size;
|
|
||||||
|
|
||||||
// Read initial bytes into code
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda) {
|
|
||||||
const uint32_t SCALE = 0x10000; // Must match encoder
|
|
||||||
|
|
||||||
// Calculate current position in probability space
|
|
||||||
uint64_t range_64 = (uint64_t)dec->range;
|
|
||||||
uint32_t cum_freq = (uint32_t)(((uint64_t)(dec->code - dec->low) * SCALE) / range_64);
|
|
||||||
|
|
||||||
// Binary search to find symbol whose CDF range contains cum_freq
|
|
||||||
int16_t low = -max_abs_value;
|
|
||||||
int16_t high = max_abs_value;
|
|
||||||
int16_t value = 0;
|
|
||||||
|
|
||||||
while (low <= high) {
|
|
||||||
int16_t mid = (low + high) / 2;
|
|
||||||
|
|
||||||
double cdf_low = (mid == -max_abs_value) ? 0.0 : laplacian_cdf(mid - 1, lambda);
|
|
||||||
double cdf_high = laplacian_cdf(mid, lambda);
|
|
||||||
|
|
||||||
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
|
|
||||||
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
|
|
||||||
|
|
||||||
if (cum_high <= cum_low) cum_high = cum_low + 1;
|
|
||||||
|
|
||||||
if (cum_freq >= cum_low && cum_freq < cum_high) {
|
|
||||||
// Found the symbol
|
|
||||||
value = mid;
|
|
||||||
|
|
||||||
// Update decoder state
|
|
||||||
dec->low += (uint32_t)((range_64 * cum_low) / SCALE);
|
|
||||||
dec->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
|
|
||||||
|
|
||||||
range_decoder_renormalise(dec);
|
|
||||||
return value;
|
|
||||||
} else if (cum_freq < cum_low) {
|
|
||||||
high = mid - 1;
|
|
||||||
} else {
|
|
||||||
low = mid + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: shouldn't happen with correct encoding
|
|
||||||
range_decoder_renormalise(dec);
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
#ifndef RANGE_CODER_H
|
|
||||||
#define RANGE_CODER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// Simple range coder for signed 16-bit integers
|
|
||||||
// Uses adaptive frequency model for better compression
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t low;
|
|
||||||
uint32_t range;
|
|
||||||
uint8_t *buffer;
|
|
||||||
size_t buffer_pos;
|
|
||||||
size_t buffer_capacity;
|
|
||||||
} RangeEncoder;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t low;
|
|
||||||
uint32_t range;
|
|
||||||
uint32_t code;
|
|
||||||
const uint8_t *buffer;
|
|
||||||
size_t buffer_pos;
|
|
||||||
size_t buffer_size;
|
|
||||||
} RangeDecoder;
|
|
||||||
|
|
||||||
// Initialise encoder
|
|
||||||
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity);
|
|
||||||
|
|
||||||
// Encode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
|
|
||||||
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda);
|
|
||||||
|
|
||||||
// Finalise encoding and return bytes written
|
|
||||||
size_t range_encoder_finish(RangeEncoder *enc);
|
|
||||||
|
|
||||||
// Initialise decoder
|
|
||||||
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size);
|
|
||||||
|
|
||||||
// Decode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
|
|
||||||
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda);
|
|
||||||
|
|
||||||
#endif // RANGE_CODER_H
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,344 +0,0 @@
|
|||||||
// Created by CuriousTorvald and Claude on 2025-10-24.
|
|
||||||
// TAD32 (Terrarum Advanced Audio - PCM32 version) Encoder - Standalone program
|
|
||||||
// Alternative version: PCM32 throughout encoding, PCM8 conversion only at decoder
|
|
||||||
// Uses encoder_tad32.c library for encoding functions
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <getopt.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "encoder_tad.h"
|
|
||||||
|
|
||||||
#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251107"
|
|
||||||
|
|
||||||
// TAD32 format constants
|
|
||||||
#define TAD32_DEFAULT_CHUNK_SIZE 32768 // Using a prime number to force the worst condition
|
|
||||||
|
|
||||||
// Temporary file for FFmpeg PCM extraction
|
|
||||||
char TEMP_PCM_FILE[42];
|
|
||||||
|
|
||||||
static void generate_random_filename(char *filename) {
|
|
||||||
srand(time(NULL));
|
|
||||||
|
|
||||||
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
||||||
const int charset_size = sizeof(charset) - 1;
|
|
||||||
|
|
||||||
// Start with the prefix
|
|
||||||
strcpy(filename, "/tmp/");
|
|
||||||
|
|
||||||
// Generate 32 random characters
|
|
||||||
for (int i = 0; i < 32; i++) {
|
|
||||||
filename[5 + i] = charset[rand() % charset_size];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the extension
|
|
||||||
strcpy(filename + 37, ".tad");
|
|
||||||
filename[41] = '\0'; // Null terminate
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
// Main Encoder
|
|
||||||
//=============================================================================
|
|
||||||
|
|
||||||
static void print_usage(const char *prog_name) {
|
|
||||||
printf("Usage: %s -i <input> [options]\n", prog_name);
|
|
||||||
printf("Options:\n");
|
|
||||||
printf(" -i <file> Input audio file (any format supported by FFmpeg)\n");
|
|
||||||
printf(" -o <file> Output TAD32 file (optional, auto-generated as input.qN.tad)\n");
|
|
||||||
printf(" -q <level> Quality level (0-5, default: %d)\n", TAD32_QUALITY_DEFAULT);
|
|
||||||
printf(" 0 = lowest quality/smallest (max_index=31)\n");
|
|
||||||
printf(" 1 = low quality (max_index=35)\n");
|
|
||||||
printf(" 2 = medium quality (max_index=39)\n");
|
|
||||||
printf(" 3 = good quality (max_index=47) [DEFAULT]\n");
|
|
||||||
printf(" 4 = high quality (max_index=56)\n");
|
|
||||||
printf(" 5 = very high quality/largest (max_index=89)\n");
|
|
||||||
printf(" -v Verbose output\n");
|
|
||||||
printf(" -h, --help Show this help\n");
|
|
||||||
printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
|
|
||||||
printf("Note: This is the PCM32 alternative version for comparison testing.\n");
|
|
||||||
printf(" PCM32 is processed throughout encoding; PCM8 conversion happens at decoder.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
generate_random_filename(TEMP_PCM_FILE);
|
|
||||||
|
|
||||||
char *input_file = NULL;
|
|
||||||
char *output_file = NULL;
|
|
||||||
int quality = TAD32_QUALITY_DEFAULT; // Default quality level (0-5)
|
|
||||||
float quantiser_scale = 1.0f; // Default quantiser scaling
|
|
||||||
int verbose = 0;
|
|
||||||
|
|
||||||
// Parse command line arguments
|
|
||||||
static struct option long_options[] = {
|
|
||||||
{"help", no_argument, 0, 'h'},
|
|
||||||
{0, 0, 0, 0}
|
|
||||||
};
|
|
||||||
|
|
||||||
int opt;
|
|
||||||
int option_index = 0;
|
|
||||||
while ((opt = getopt_long(argc, argv, "i:o:q:s:vh", long_options, &option_index)) != -1) {
|
|
||||||
switch (opt) {
|
|
||||||
case 'i':
|
|
||||||
input_file = optarg;
|
|
||||||
break;
|
|
||||||
case 'o':
|
|
||||||
output_file = optarg;
|
|
||||||
break;
|
|
||||||
case 'q':
|
|
||||||
quality = atoi(optarg);
|
|
||||||
if (quality < TAD32_QUALITY_MIN || quality > TAD32_QUALITY_MAX) {
|
|
||||||
fprintf(stderr, "Error: Quality must be in range %d-%d\n", TAD32_QUALITY_MIN, TAD32_QUALITY_MAX);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
quantiser_scale = atof(optarg);
|
|
||||||
if (quantiser_scale < 0.5f || quantiser_scale > 4.0f) {
|
|
||||||
fprintf(stderr, "Error: Quantiser scale must be in range 0.5-4.0\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'v':
|
|
||||||
verbose = 1;
|
|
||||||
break;
|
|
||||||
case 'h':
|
|
||||||
print_usage(argv[0]);
|
|
||||||
return 0;
|
|
||||||
default:
|
|
||||||
print_usage(argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!input_file) {
|
|
||||||
fprintf(stderr, "Error: Input file is required\n");
|
|
||||||
print_usage(argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert quality (0-5) to max_index for quantisation
|
|
||||||
int max_index = tad32_quality_to_max_index(quality);
|
|
||||||
|
|
||||||
// Generate output filename if not provided
|
|
||||||
if (!output_file) {
|
|
||||||
// Allocate space for output filename
|
|
||||||
size_t input_len = strlen(input_file);
|
|
||||||
output_file = malloc(input_len + 32); // Extra space for .qNN.tad
|
|
||||||
|
|
||||||
// Find the last directory separator
|
|
||||||
const char *basename_start = strrchr(input_file, '/');
|
|
||||||
if (!basename_start) basename_start = strrchr(input_file, '\\');
|
|
||||||
basename_start = basename_start ? basename_start + 1 : input_file;
|
|
||||||
|
|
||||||
// Copy directory part
|
|
||||||
size_t dir_len = basename_start - input_file;
|
|
||||||
strncpy(output_file, input_file, dir_len);
|
|
||||||
|
|
||||||
// Find the extension (last dot after basename)
|
|
||||||
const char *ext = strrchr(basename_start, '.');
|
|
||||||
if (ext && ext > basename_start) {
|
|
||||||
// Copy basename without extension
|
|
||||||
size_t name_len = ext - basename_start;
|
|
||||||
strncpy(output_file + dir_len, basename_start, name_len);
|
|
||||||
output_file[dir_len + name_len] = '\0';
|
|
||||||
} else {
|
|
||||||
// No extension, copy entire basename
|
|
||||||
strcpy(output_file + dir_len, basename_start);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Append .qNN.tad (use quality level for filename)
|
|
||||||
sprintf(output_file + strlen(output_file), ".q%d.tad", quality);
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
printf("Auto-generated output path: %s\n", output_file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
printf("%s\n", ENCODER_VENDOR_STRING);
|
|
||||||
printf("Input: %s\n", input_file);
|
|
||||||
printf("Output: %s\n", output_file);
|
|
||||||
printf("Quality level: %d (max_index=%d)\n", quality, max_index);
|
|
||||||
printf("Quantiser scale: %.2f\n", quantiser_scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detect original sample rate for high-quality resampling
|
|
||||||
char sample_rate_str[32] = "48000"; // Default fallback
|
|
||||||
char detect_cmd[2048];
|
|
||||||
snprintf(detect_cmd, sizeof(detect_cmd),
|
|
||||||
"ffprobe -v error -select_streams a:0 -show_entries stream=sample_rate "
|
|
||||||
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
|
|
||||||
input_file);
|
|
||||||
|
|
||||||
FILE *probe = popen(detect_cmd, "r");
|
|
||||||
if (probe) {
|
|
||||||
if (fgets(sample_rate_str, sizeof(sample_rate_str), probe)) {
|
|
||||||
// Remove newline
|
|
||||||
sample_rate_str[strcspn(sample_rate_str, "\n")] = 0;
|
|
||||||
}
|
|
||||||
pclose(probe);
|
|
||||||
}
|
|
||||||
|
|
||||||
int original_rate = atoi(sample_rate_str);
|
|
||||||
if (original_rate <= 0 || original_rate > 192000) {
|
|
||||||
original_rate = 48000; // Fallback
|
|
||||||
}
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
printf("Detected original sample rate: %d Hz\n", original_rate);
|
|
||||||
printf("Extracting and resampling audio to %d Hz...\n", TAD32_SAMPLE_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract and resample in two passes for better quality
|
|
||||||
// Pass 1: Extract at original sample rate
|
|
||||||
char temp_original_pcm[256];
|
|
||||||
snprintf(temp_original_pcm, sizeof(temp_original_pcm), "%s.orig", TEMP_PCM_FILE);
|
|
||||||
|
|
||||||
char ffmpeg_cmd[2048];
|
|
||||||
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
|
|
||||||
"ffmpeg -hide_banner -v error -i \"%s\" -f f32le -acodec pcm_f32le -ac %d -y \"%s\" 2>&1",
|
|
||||||
input_file, TAD32_CHANNELS, temp_original_pcm);
|
|
||||||
|
|
||||||
int result = system(ffmpeg_cmd);
|
|
||||||
if (result != 0) {
|
|
||||||
fprintf(stderr, "Error: FFmpeg extraction failed\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pass 2: Resample to 32kHz with high-quality SoXR resampler and highpass filter
|
|
||||||
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
|
|
||||||
"ffmpeg -hide_banner -v error -f f32le -ar %d -ac %d -i \"%s\" "
|
|
||||||
"-f f32le -acodec pcm_f32le -ar %d -ac %d "
|
|
||||||
"-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" "
|
|
||||||
"-y \"%s\" 2>&1",
|
|
||||||
original_rate, TAD32_CHANNELS, temp_original_pcm, TAD32_SAMPLE_RATE, TAD32_CHANNELS, TEMP_PCM_FILE);
|
|
||||||
|
|
||||||
result = system(ffmpeg_cmd);
|
|
||||||
remove(temp_original_pcm); // Clean up intermediate file
|
|
||||||
|
|
||||||
if (result != 0) {
|
|
||||||
fprintf(stderr, "Error: FFmpeg resampling failed\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open PCM file
|
|
||||||
FILE *pcm_file = fopen(TEMP_PCM_FILE, "rb");
|
|
||||||
if (!pcm_file) {
|
|
||||||
fprintf(stderr, "Error: Could not open temporary PCM file\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get file size
|
|
||||||
fseek(pcm_file, 0, SEEK_END);
|
|
||||||
size_t pcm_size = ftell(pcm_file);
|
|
||||||
fseek(pcm_file, 0, SEEK_SET);
|
|
||||||
|
|
||||||
size_t total_samples = pcm_size / (TAD32_CHANNELS * sizeof(float));
|
|
||||||
|
|
||||||
// Pad to even sample count
|
|
||||||
if (total_samples % 2 == 1) {
|
|
||||||
total_samples++;
|
|
||||||
if (verbose) {
|
|
||||||
printf("Odd sample count detected, padding with one zero sample\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t num_chunks = (total_samples + TAD32_DEFAULT_CHUNK_SIZE - 1) / TAD32_DEFAULT_CHUNK_SIZE;
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
printf("Total samples: %zu (%.2f seconds)\n", total_samples,
|
|
||||||
(double)total_samples / TAD32_SAMPLE_RATE);
|
|
||||||
printf("Chunks: %zu (chunk size: %d samples)\n", num_chunks, TAD32_DEFAULT_CHUNK_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open output file
|
|
||||||
FILE *output = fopen(output_file, "wb");
|
|
||||||
if (!output) {
|
|
||||||
fprintf(stderr, "Error: Could not open output file\n");
|
|
||||||
fclose(pcm_file);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process chunks using linked TAD32 encoder library
|
|
||||||
size_t total_output_size = 0;
|
|
||||||
float *chunk_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * TAD32_CHANNELS * sizeof(float));
|
|
||||||
uint8_t *output_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * 4 * sizeof(float)); // Generous buffer
|
|
||||||
|
|
||||||
for (size_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) {
|
|
||||||
size_t chunk_samples = TAD32_DEFAULT_CHUNK_SIZE;
|
|
||||||
size_t remaining = total_samples - (chunk_idx * TAD32_DEFAULT_CHUNK_SIZE);
|
|
||||||
|
|
||||||
if (remaining < TAD32_DEFAULT_CHUNK_SIZE) {
|
|
||||||
chunk_samples = remaining;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read chunk
|
|
||||||
size_t samples_read = fread(chunk_buffer, TAD32_CHANNELS * sizeof(float),
|
|
||||||
chunk_samples, pcm_file);
|
|
||||||
(void)samples_read; // Unused, but kept for compatibility
|
|
||||||
|
|
||||||
// Pad with zeros if necessary
|
|
||||||
if (chunk_samples < TAD32_DEFAULT_CHUNK_SIZE) {
|
|
||||||
memset(&chunk_buffer[chunk_samples * TAD32_CHANNELS], 0,
|
|
||||||
(TAD32_DEFAULT_CHUNK_SIZE - chunk_samples) * TAD32_CHANNELS * sizeof(float));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
|
|
||||||
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
|
|
||||||
max_index,
|
|
||||||
quantiser_scale, TAD32_ZSTD_LEVEL, output_buffer);
|
|
||||||
|
|
||||||
if (encoded_size == 0) {
|
|
||||||
fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);
|
|
||||||
free(chunk_buffer);
|
|
||||||
free(output_buffer);
|
|
||||||
fclose(pcm_file);
|
|
||||||
fclose(output);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write chunk to output
|
|
||||||
fwrite(output_buffer, 1, encoded_size, output);
|
|
||||||
total_output_size += encoded_size;
|
|
||||||
|
|
||||||
if (verbose && (chunk_idx % 10 == 0 || chunk_idx == num_chunks - 1)) {
|
|
||||||
printf("Processed chunk %zu/%zu (%.1f%%)\r", chunk_idx + 1, num_chunks,
|
|
||||||
(chunk_idx + 1) * 100.0 / num_chunks);
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print coefficient statistics if enabled
|
|
||||||
tad32_print_statistics();
|
|
||||||
tad32_free_statistics();
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
free(chunk_buffer);
|
|
||||||
free(output_buffer);
|
|
||||||
fclose(pcm_file);
|
|
||||||
fclose(output);
|
|
||||||
remove(TEMP_PCM_FILE);
|
|
||||||
|
|
||||||
// Print statistics
|
|
||||||
size_t pcmu8_size = total_samples * TAD32_CHANNELS; // PCMu8 baseline
|
|
||||||
float compression_ratio = (float)pcmu8_size / total_output_size;
|
|
||||||
|
|
||||||
printf("Encoding complete!\n");
|
|
||||||
printf("PCMu8 size: %zu bytes\n", pcmu8_size);
|
|
||||||
printf("TAD32 size: %zu bytes\n", total_output_size);
|
|
||||||
printf("Compression ratio: %.2f:1 (%.1f%% of PCMu8)\n",
|
|
||||||
compression_ratio, (total_output_size * 100.0) / pcmu8_size);
|
|
||||||
|
|
||||||
if (compression_ratio < 1.8) {
|
|
||||||
printf("Warning: Compression ratio below 2:1 target. Try lower quantisation bits or different settings.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,294 +0,0 @@
|
|||||||
// Visualise DWT Coefficients as Image
|
|
||||||
// Converts .bin coefficient file to PPM image with logarithmic color mapping
|
|
||||||
// Usage: ./visualise_coefficients <input.bin> <output.ppm> <width> <height>
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
// Logarithmic color mapping for coefficient visualisation
|
|
||||||
// Zero: Black (#000000)
|
|
||||||
// Positive: Red to Yellow (#FF0000 to #FFFF00) - logarithmic
|
|
||||||
// Negative: Blue to Cyan (#0000FF to #00FFFF) - logarithmic
|
|
||||||
typedef struct {
|
|
||||||
uint8_t r, g, b;
|
|
||||||
} rgb_t;
|
|
||||||
|
|
||||||
static rgb_t map_coefficient_to_color(int16_t coeff) {
|
|
||||||
rgb_t color = {0, 0, 0};
|
|
||||||
|
|
||||||
if (coeff == 0) {
|
|
||||||
// Zero: pure black
|
|
||||||
return color;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (coeff == 1) {
|
|
||||||
// +1: Light green #55FF55
|
|
||||||
color.r = 0x55;
|
|
||||||
color.g = 0xFF;
|
|
||||||
color.b = 0x55;
|
|
||||||
return color;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (coeff == -1) {
|
|
||||||
// -1: Dark green #005500
|
|
||||||
color.r = 0x00;
|
|
||||||
color.g = 0x55;
|
|
||||||
color.b = 0x00;
|
|
||||||
return color;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (coeff > 0) {
|
|
||||||
// Positive: Red (#FF0000) to Yellow (#FFFF00)
|
|
||||||
// Logarithmic mapping: log2(1) = 0, log2(32767) ≈ 14.99
|
|
||||||
double log_val = log2((double)coeff);
|
|
||||||
double log_max = log2(32767.0);
|
|
||||||
double normalised = log_val / log_max; // 0.0 to 1.0
|
|
||||||
|
|
||||||
color.r = 255;
|
|
||||||
color.g = (uint8_t)(normalised * 255.0);
|
|
||||||
color.b = 0;
|
|
||||||
} else {
|
|
||||||
// Negative: Blue (#0000FF) to Cyan (#00FFFF)
|
|
||||||
// Logarithmic mapping: log2(1) = 0, log2(32768) = 15
|
|
||||||
double log_val = log2((double)(-coeff));
|
|
||||||
double log_max = log2(32768.0);
|
|
||||||
double normalised = log_val / log_max; // 0.0 to 1.0
|
|
||||||
|
|
||||||
color.r = 0;
|
|
||||||
color.g = (uint8_t)(normalised * 255.0);
|
|
||||||
color.b = 255;
|
|
||||||
}
|
|
||||||
|
|
||||||
return color;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
if (argc != 5) {
|
|
||||||
printf("Usage: %s <input.bin> <output.ppm> <width> <height>\n", argv[0]);
|
|
||||||
printf("Example: %s frame_060.tavframe.y.bin output.ppm 560 448\n", argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *input_file = argv[1];
|
|
||||||
const char *output_file = argv[2];
|
|
||||||
int width = atoi(argv[3]);
|
|
||||||
int height = atoi(argv[4]);
|
|
||||||
|
|
||||||
if (width <= 0 || height <= 0) {
|
|
||||||
printf("Error: Invalid dimensions %dx%d\n", width, height);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t expected_count = width * height;
|
|
||||||
|
|
||||||
// Load coefficient file
|
|
||||||
FILE *fp_in = fopen(input_file, "rb");
|
|
||||||
if (!fp_in) {
|
|
||||||
printf("Error: Cannot open %s\n", input_file);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get file size
|
|
||||||
fseek(fp_in, 0, SEEK_END);
|
|
||||||
long file_size = ftell(fp_in);
|
|
||||||
fseek(fp_in, 0, SEEK_SET);
|
|
||||||
|
|
||||||
size_t coeff_count = file_size / sizeof(int16_t);
|
|
||||||
|
|
||||||
if (coeff_count != expected_count) {
|
|
||||||
printf("Warning: File contains %zu coefficients, expected %zu (%dx%d)\n",
|
|
||||||
coeff_count, expected_count, width, height);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate coefficient buffer
|
|
||||||
int16_t *coeffs = malloc(expected_count * sizeof(int16_t));
|
|
||||||
if (!coeffs) {
|
|
||||||
printf("Error: Memory allocation failed\n");
|
|
||||||
fclose(fp_in);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read coefficients
|
|
||||||
size_t read_count = fread(coeffs, sizeof(int16_t), expected_count, fp_in);
|
|
||||||
fclose(fp_in);
|
|
||||||
|
|
||||||
if (read_count != expected_count) {
|
|
||||||
printf("Error: Read %zu coefficients, expected %zu\n", read_count, expected_count);
|
|
||||||
free(coeffs);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Analyse coefficient distribution - Overall and per-subband
|
|
||||||
size_t zeros = 0, ones = 0, positives = 0, negatives = 0;
|
|
||||||
int16_t min_val = INT16_MAX, max_val = INT16_MIN;
|
|
||||||
|
|
||||||
// Calculate overall statistics
|
|
||||||
for (size_t i = 0; i < expected_count; i++) {
|
|
||||||
if (coeffs[i] == 0) zeros++;
|
|
||||||
else if (coeffs[i] == 1 || coeffs[i] == -1) ones++;
|
|
||||||
else if (coeffs[i] > 0) positives++;
|
|
||||||
else negatives++;
|
|
||||||
|
|
||||||
if (coeffs[i] < min_val) min_val = coeffs[i];
|
|
||||||
if (coeffs[i] > max_val) max_val = coeffs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Overall coefficient statistics:\n");
|
|
||||||
printf(" Total: %zu\n", expected_count);
|
|
||||||
printf(" Zeros: %zu (%.1f%%)\n", zeros, 100.0 * zeros / expected_count);
|
|
||||||
printf(" Ones: %zu (%.1f%%)\n", ones, 100.0 * ones / expected_count);
|
|
||||||
printf(" Positives: %zu (%.1f%%)\n", positives, 100.0 * positives / expected_count);
|
|
||||||
printf(" Negatives: %zu (%.1f%%)\n", negatives, 100.0 * negatives / expected_count);
|
|
||||||
printf(" Range: [%d, %d]\n\n", min_val, max_val);
|
|
||||||
|
|
||||||
// Per-subband statistics using 2D spatial layout
|
|
||||||
// The coefficients are stored in 2D spatial arrangement like the PPM image
|
|
||||||
int num_levels = 6;
|
|
||||||
|
|
||||||
// Helper macro to get coefficient from 2D position
|
|
||||||
#define GET_COEFF(x, y) coeffs[(y) * width + (x)]
|
|
||||||
|
|
||||||
// Calculate subband dimensions for each level
|
|
||||||
int level_w[7], level_h[7]; // level_w[1] = width/2, level_w[6] = width/64
|
|
||||||
for (int i = 1; i <= num_levels; i++) {
|
|
||||||
level_w[i] = width / (1 << i);
|
|
||||||
level_h[i] = height / (1 << i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// LL6 subband (top-left corner)
|
|
||||||
{
|
|
||||||
int ll_w = level_w[6], ll_h = level_h[6];
|
|
||||||
size_t ll_zeros = 0, ll_ones = 0, ll_pos = 0, ll_neg = 0;
|
|
||||||
int16_t ll_min = INT16_MAX, ll_max = INT16_MIN;
|
|
||||||
|
|
||||||
for (int y = 0; y < ll_h; y++) {
|
|
||||||
for (int x = 0; x < ll_w; x++) {
|
|
||||||
int16_t val = GET_COEFF(x, y);
|
|
||||||
if (val == 0) ll_zeros++;
|
|
||||||
else if (val == 1 || val == -1) ll_ones++;
|
|
||||||
else if (val > 0) ll_pos++;
|
|
||||||
else ll_neg++;
|
|
||||||
if (val < ll_min) ll_min = val;
|
|
||||||
if (val > ll_max) ll_max = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ll_total = ll_w * ll_h;
|
|
||||||
printf("LL%d subband (%dx%d):\n", num_levels, ll_w, ll_h);
|
|
||||||
printf(" Total: %zu\n", ll_total);
|
|
||||||
printf(" Zeros: %zu (%.1f%%)\n", ll_zeros, 100.0 * ll_zeros / ll_total);
|
|
||||||
printf(" Ones: %zu (%.1f%%)\n", ll_ones, 100.0 * ll_ones / ll_total);
|
|
||||||
printf(" Positives: %zu (%.1f%%)\n", ll_pos, 100.0 * ll_pos / ll_total);
|
|
||||||
printf(" Negatives: %zu (%.1f%%)\n", ll_neg, 100.0 * ll_neg / ll_total);
|
|
||||||
printf(" Range: [%d, %d]\n\n", ll_min, ll_max);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process each level from deepest (6) to finest (1)
|
|
||||||
for (int level = num_levels; level >= 1; level--) {
|
|
||||||
int half_w = level_w[level];
|
|
||||||
int half_h = level_h[level];
|
|
||||||
|
|
||||||
// LH subband (horizontal high-pass) - right of LL region
|
|
||||||
size_t lh_zeros = 0, lh_ones = 0, lh_pos = 0, lh_neg = 0;
|
|
||||||
int16_t lh_min = INT16_MAX, lh_max = INT16_MIN;
|
|
||||||
int lh_x0 = half_w, lh_y0 = 0;
|
|
||||||
int lh_x1 = half_w * 2, lh_y1 = half_h;
|
|
||||||
|
|
||||||
for (int y = lh_y0; y < lh_y1; y++) {
|
|
||||||
for (int x = lh_x0; x < lh_x1; x++) {
|
|
||||||
int16_t val = GET_COEFF(x, y);
|
|
||||||
if (val == 0) lh_zeros++;
|
|
||||||
else if (val == 1 || val == -1) lh_ones++;
|
|
||||||
else if (val > 0) lh_pos++;
|
|
||||||
else lh_neg++;
|
|
||||||
if (val < lh_min) lh_min = val;
|
|
||||||
if (val > lh_max) lh_max = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// HL subband (vertical high-pass) - below LL region
|
|
||||||
size_t hl_zeros = 0, hl_ones = 0, hl_pos = 0, hl_neg = 0;
|
|
||||||
int16_t hl_min = INT16_MAX, hl_max = INT16_MIN;
|
|
||||||
int hl_x0 = 0, hl_y0 = half_h;
|
|
||||||
int hl_x1 = half_w, hl_y1 = half_h * 2;
|
|
||||||
|
|
||||||
for (int y = hl_y0; y < hl_y1; y++) {
|
|
||||||
for (int x = hl_x0; x < hl_x1; x++) {
|
|
||||||
int16_t val = GET_COEFF(x, y);
|
|
||||||
if (val == 0) hl_zeros++;
|
|
||||||
else if (val == 1 || val == -1) hl_ones++;
|
|
||||||
else if (val > 0) hl_pos++;
|
|
||||||
else hl_neg++;
|
|
||||||
if (val < hl_min) hl_min = val;
|
|
||||||
if (val > hl_max) hl_max = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// HH subband (diagonal high-pass) - bottom-right of LL region
|
|
||||||
size_t hh_zeros = 0, hh_ones = 0, hh_pos = 0, hh_neg = 0;
|
|
||||||
int16_t hh_min = INT16_MAX, hh_max = INT16_MIN;
|
|
||||||
int hh_x0 = half_w, hh_y0 = half_h;
|
|
||||||
int hh_x1 = half_w * 2, hh_y1 = half_h * 2;
|
|
||||||
|
|
||||||
for (int y = hh_y0; y < hh_y1; y++) {
|
|
||||||
for (int x = hh_x0; x < hh_x1; x++) {
|
|
||||||
int16_t val = GET_COEFF(x, y);
|
|
||||||
if (val == 0) hh_zeros++;
|
|
||||||
else if (val == 1 || val == -1) hh_ones++;
|
|
||||||
else if (val > 0) hh_pos++;
|
|
||||||
else hh_neg++;
|
|
||||||
if (val < hh_min) hh_min = val;
|
|
||||||
if (val > hh_max) hh_max = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t sub_total = half_w * half_h;
|
|
||||||
printf("Level %d subbands (%dx%d each):\n", level, half_w, half_h);
|
|
||||||
printf(" LH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
|
|
||||||
level, sub_total, lh_zeros, 100.0*lh_zeros/sub_total, lh_ones, 100.0*lh_ones/sub_total,
|
|
||||||
lh_pos, 100.0*lh_pos/sub_total, lh_neg, 100.0*lh_neg/sub_total, lh_min, lh_max);
|
|
||||||
printf(" HL%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
|
|
||||||
level, sub_total, hl_zeros, 100.0*hl_zeros/sub_total, hl_ones, 100.0*hl_ones/sub_total,
|
|
||||||
hl_pos, 100.0*hl_pos/sub_total, hl_neg, 100.0*hl_neg/sub_total, hl_min, hl_max);
|
|
||||||
printf(" HH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n\n",
|
|
||||||
level, sub_total, hh_zeros, 100.0*hh_zeros/sub_total, hh_ones, 100.0*hh_ones/sub_total,
|
|
||||||
hh_pos, 100.0*hh_pos/sub_total, hh_neg, 100.0*hh_neg/sub_total, hh_min, hh_max);
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef GET_COEFF
|
|
||||||
|
|
||||||
// Write PPM image
|
|
||||||
FILE *fp_out = fopen(output_file, "wb");
|
|
||||||
if (!fp_out) {
|
|
||||||
printf("Error: Cannot create %s\n", output_file);
|
|
||||||
free(coeffs);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// PPM header
|
|
||||||
fprintf(fp_out, "P6\n%d %d\n255\n", width, height);
|
|
||||||
|
|
||||||
// Write pixel data
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
size_t idx = y * width + x;
|
|
||||||
rgb_t color = map_coefficient_to_color(coeffs[idx]);
|
|
||||||
fwrite(&color, 3, 1, fp_out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(fp_out);
|
|
||||||
free(coeffs);
|
|
||||||
|
|
||||||
printf("\nWrote %dx%d image to %s\n", width, height, output_file);
|
|
||||||
printf("Color mapping:\n");
|
|
||||||
printf(" Black: Zero coefficients\n");
|
|
||||||
printf(" Light Green (#55FF55): +1 coefficients\n");
|
|
||||||
printf(" Dark Green (#00AA00): -1 coefficients\n");
|
|
||||||
printf(" Red→Yellow: Positive coefficients > +1 (logarithmic)\n");
|
|
||||||
printf(" Blue→Cyan: Negative coefficients < -1 (logarithmic)\n");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,402 +0,0 @@
|
|||||||
// TAV-DT Noise Injector - Simulates satellite transmission channel noise
|
|
||||||
// Models QPSK over Ku-band satellite with AWGN and burst interference
|
|
||||||
// to compile: gcc -O2 -o tavdt_noise_injector tavdt_noise_injector.c -lm
|
|
||||||
// Created by CuriousTorvald and Claude on 2025-12-14
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <getopt.h>
|
|
||||||
#include <time.h>
|
|
||||||
|
|
||||||
// Buffer size for streaming processing
|
|
||||||
#define BUFFER_SIZE (1024 * 1024) // 1 MB chunks
|
|
||||||
|
|
||||||
// Default TAV-DT bitrate for timing calculations (~2 Mbps)
|
|
||||||
#define DEFAULT_BITRATE_BPS 2000000.0
|
|
||||||
|
|
||||||
// Global bitrate (can be overridden by --bitrate)
|
|
||||||
static double g_bitrate_bps = DEFAULT_BITRATE_BPS;
|
|
||||||
|
|
||||||
// Burst noise parameters
|
|
||||||
#define BURST_LENGTH_MEAN 100.0
|
|
||||||
#define BURST_LENGTH_STDDEV 30.0
|
|
||||||
#define BURST_LENGTH_MIN 10
|
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
// PRNG Functions (xorshift64)
|
|
||||||
//=============================================================================
|
|
||||||
|
|
||||||
static uint64_t xorshift64(uint64_t *state) {
|
|
||||||
uint64_t x = *state;
|
|
||||||
x ^= x << 13;
|
|
||||||
x ^= x >> 7;
|
|
||||||
x ^= x << 17;
|
|
||||||
return *state = x;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns uniform random in [0, 1)
|
|
||||||
static double rand_uniform(uint64_t *state) {
|
|
||||||
return (double)xorshift64(state) / (double)UINT64_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Box-Muller transform for Gaussian random numbers
|
|
||||||
static double gaussian_rand(uint64_t *state, double mean, double stddev) {
|
|
||||||
double u1 = rand_uniform(state);
|
|
||||||
double u2 = rand_uniform(state);
|
|
||||||
|
|
||||||
// Avoid log(0)
|
|
||||||
if (u1 < 1e-15) u1 = 1e-15;
|
|
||||||
|
|
||||||
double z = sqrt(-2.0 * log(u1)) * cos(2.0 * M_PI * u2);
|
|
||||||
return mean + stddev * z;
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
// BER Calculation
|
|
||||||
//=============================================================================
|
|
||||||
|
|
||||||
// Calculate BER from SNR in dB for QPSK modulation
|
|
||||||
// BER = 0.5 * erfc(sqrt(Eb/N0))
|
|
||||||
// For QPSK, Eb/N0 = SNR (2 bits per symbol)
|
|
||||||
static double snr_to_ber(double snr_db) {
|
|
||||||
double snr_linear = pow(10.0, snr_db / 10.0);
|
|
||||||
double eb_n0 = snr_linear;
|
|
||||||
return 0.5 * erfc(sqrt(eb_n0));
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
// Burst State Management
|
|
||||||
//=============================================================================
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
double current_time_sec; // Elapsed playback time
|
|
||||||
double next_burst_time; // When next burst occurs
|
|
||||||
int burst_bytes_remaining; // Bytes left in current burst (0 = no active burst)
|
|
||||||
double burst_interval; // Mean interval between bursts (60.0 / bursts_per_minute)
|
|
||||||
double burst_ber; // BER during burst
|
|
||||||
int burst_count; // Total bursts applied
|
|
||||||
int total_burst_bytes; // Total bytes affected by bursts
|
|
||||||
int verbose; // Verbose output flag
|
|
||||||
} burst_state_t;
|
|
||||||
|
|
||||||
static void burst_state_init(burst_state_t *state, double bursts_per_minute,
|
|
||||||
double burst_ber, int verbose, uint64_t *seed) {
|
|
||||||
state->current_time_sec = 0.0;
|
|
||||||
state->burst_bytes_remaining = 0;
|
|
||||||
state->burst_ber = burst_ber;
|
|
||||||
state->burst_count = 0;
|
|
||||||
state->total_burst_bytes = 0;
|
|
||||||
state->verbose = verbose;
|
|
||||||
|
|
||||||
if (bursts_per_minute > 0) {
|
|
||||||
state->burst_interval = 60.0 / bursts_per_minute;
|
|
||||||
// Schedule first burst using exponential distribution
|
|
||||||
state->next_burst_time = -state->burst_interval * log(rand_uniform(seed));
|
|
||||||
} else {
|
|
||||||
state->burst_interval = 0;
|
|
||||||
state->next_burst_time = 1e30; // Never burst
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void burst_state_advance_time(burst_state_t *state, double delta_sec, uint64_t *seed) {
|
|
||||||
double end_time = state->current_time_sec + delta_sec;
|
|
||||||
|
|
||||||
// Check if any bursts should occur during this time span
|
|
||||||
while (state->burst_interval > 0 && state->next_burst_time < end_time) {
|
|
||||||
// A burst should start during this chunk
|
|
||||||
if (state->burst_bytes_remaining == 0) {
|
|
||||||
double length = gaussian_rand(seed, BURST_LENGTH_MEAN, BURST_LENGTH_STDDEV);
|
|
||||||
state->burst_bytes_remaining = (int)fmax(BURST_LENGTH_MIN, length);
|
|
||||||
state->burst_count++;
|
|
||||||
|
|
||||||
if (state->verbose) {
|
|
||||||
fprintf(stderr, " [burst] time %.2fs, %d bytes\n",
|
|
||||||
state->next_burst_time, state->burst_bytes_remaining);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Schedule next burst
|
|
||||||
double wait = -state->burst_interval * log(rand_uniform(seed));
|
|
||||||
if (wait < 0.001) wait = 0.001; // Minimum 1ms between bursts
|
|
||||||
state->next_burst_time += wait;
|
|
||||||
}
|
|
||||||
|
|
||||||
state->current_time_sec = end_time;
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
// Noise Application Functions
|
|
||||||
//=============================================================================
|
|
||||||
|
|
||||||
// Apply AWGN-based bit errors to buffer
|
|
||||||
// Returns number of bits flipped
|
|
||||||
static int apply_background_noise(uint8_t *data, size_t len, double ber, uint64_t *seed) {
|
|
||||||
int bits_flipped = 0;
|
|
||||||
|
|
||||||
// Optimization: if BER is extremely low, use probability-based skipping
|
|
||||||
if (ber < 1e-10) {
|
|
||||||
return 0; // Effectively no errors at this BER
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < len; i++) {
|
|
||||||
for (int bit = 0; bit < 8; bit++) {
|
|
||||||
if (rand_uniform(seed) < ber) {
|
|
||||||
data[i] ^= (1 << bit);
|
|
||||||
bits_flipped++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return bits_flipped;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply burst noise to buffer (checks/updates burst state)
|
|
||||||
// Returns number of bits flipped
|
|
||||||
static int apply_burst_noise(uint8_t *data, size_t len, burst_state_t *state, uint64_t *seed) {
|
|
||||||
int bits_flipped = 0;
|
|
||||||
|
|
||||||
if (state->burst_bytes_remaining <= 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply burst BER to bytes while burst is active
|
|
||||||
size_t burst_bytes = (size_t)state->burst_bytes_remaining;
|
|
||||||
if (burst_bytes > len) {
|
|
||||||
burst_bytes = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < burst_bytes; i++) {
|
|
||||||
for (int bit = 0; bit < 8; bit++) {
|
|
||||||
if (rand_uniform(seed) < state->burst_ber) {
|
|
||||||
data[i] ^= (1 << bit);
|
|
||||||
bits_flipped++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
state->total_burst_bytes += burst_bytes;
|
|
||||||
state->burst_bytes_remaining -= burst_bytes;
|
|
||||||
|
|
||||||
return bits_flipped;
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
// Byte Position to Time Conversion
|
|
||||||
//=============================================================================
|
|
||||||
|
|
||||||
// Convert byte position to approximate playback time based on bitrate
|
|
||||||
static double bytes_to_time(size_t byte_pos) {
|
|
||||||
return (double)(byte_pos * 8) / g_bitrate_bps;
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
|
||||||
// Main Program
|
|
||||||
//=============================================================================
|
|
||||||
|
|
||||||
static void print_usage(const char *prog) {
|
|
||||||
fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
|
|
||||||
fprintf(stderr, "Simulates QPSK satellite transmission channel noise\n\n");
|
|
||||||
fprintf(stderr, "Usage: %s -i input.tavdt -o output.tavdt --snr N [options]\n\n", prog);
|
|
||||||
fprintf(stderr, "Required:\n");
|
|
||||||
fprintf(stderr, " -i, --input FILE Input TAV-DT file\n");
|
|
||||||
fprintf(stderr, " -o, --output FILE Output corrupted file\n");
|
|
||||||
fprintf(stderr, " --snr N Signal-to-noise ratio in dB (0-30)\n");
|
|
||||||
fprintf(stderr, "\nOptional:\n");
|
|
||||||
fprintf(stderr, " --burst N Burst events per minute (default: 0)\n");
|
|
||||||
fprintf(stderr, " --burst-ber N BER during burst events (default: 0.5)\n");
|
|
||||||
fprintf(stderr, " --bitrate N Stream bitrate in Mbps for timing (default: 2.0)\n");
|
|
||||||
fprintf(stderr, " --seed N RNG seed for reproducibility\n");
|
|
||||||
fprintf(stderr, " -v, --verbose Show detailed progress\n");
|
|
||||||
fprintf(stderr, " -h, --help Show this help\n");
|
|
||||||
fprintf(stderr, "\nSNR Reference:\n");
|
|
||||||
fprintf(stderr, " 0 dB: Worst case (BER ~7.9e-2, 1 in 13 bits)\n");
|
|
||||||
fprintf(stderr, " 6 dB: Poor but working (BER ~2.4e-3)\n");
|
|
||||||
fprintf(stderr, " 9 dB: Typical working (BER ~1.9e-4)\n");
|
|
||||||
fprintf(stderr, " 12 dB: Good condition (BER ~3.8e-6)\n");
|
|
||||||
fprintf(stderr, " 30 dB: Near-perfect (BER ~2.9e-16)\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
const char *input_file = NULL;
|
|
||||||
const char *output_file = NULL;
|
|
||||||
double snr_db = -1;
|
|
||||||
double bursts_per_minute = 0;
|
|
||||||
double burst_ber = 0.5;
|
|
||||||
uint64_t seed = 0;
|
|
||||||
int seed_provided = 0;
|
|
||||||
int verbose = 0;
|
|
||||||
|
|
||||||
static struct option long_options[] = {
|
|
||||||
{"input", required_argument, 0, 'i'},
|
|
||||||
{"output", required_argument, 0, 'o'},
|
|
||||||
{"snr", required_argument, 0, 's'},
|
|
||||||
{"burst", required_argument, 0, 'b'},
|
|
||||||
{"burst-ber", required_argument, 0, 'B'},
|
|
||||||
{"bitrate", required_argument, 0, 'r'},
|
|
||||||
{"seed", required_argument, 0, 'S'},
|
|
||||||
{"verbose", no_argument, 0, 'v'},
|
|
||||||
{"help", no_argument, 0, 'h'},
|
|
||||||
{0, 0, 0, 0}
|
|
||||||
};
|
|
||||||
|
|
||||||
int opt;
|
|
||||||
while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, NULL)) != -1) {
|
|
||||||
switch (opt) {
|
|
||||||
case 'i':
|
|
||||||
input_file = optarg;
|
|
||||||
break;
|
|
||||||
case 'o':
|
|
||||||
output_file = optarg;
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
snr_db = atof(optarg);
|
|
||||||
break;
|
|
||||||
case 'b':
|
|
||||||
bursts_per_minute = atof(optarg);
|
|
||||||
break;
|
|
||||||
case 'B':
|
|
||||||
burst_ber = atof(optarg);
|
|
||||||
break;
|
|
||||||
case 'r':
|
|
||||||
g_bitrate_bps = atof(optarg) * 1000000.0; // Convert Mbps to bps
|
|
||||||
break;
|
|
||||||
case 'S':
|
|
||||||
seed = strtoull(optarg, NULL, 10);
|
|
||||||
seed_provided = 1;
|
|
||||||
break;
|
|
||||||
case 'v':
|
|
||||||
verbose = 1;
|
|
||||||
break;
|
|
||||||
case 'h':
|
|
||||||
default:
|
|
||||||
print_usage(argv[0]);
|
|
||||||
return opt == 'h' ? 0 : 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate arguments
|
|
||||||
if (!input_file || !output_file || snr_db < 0) {
|
|
||||||
fprintf(stderr, "Error: Missing required arguments\n\n");
|
|
||||||
print_usage(argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (burst_ber < 0 || burst_ber > 1) {
|
|
||||||
fprintf(stderr, "Error: --burst-ber must be between 0 and 1\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize RNG
|
|
||||||
if (!seed_provided) {
|
|
||||||
seed = (uint64_t)time(NULL) ^ ((uint64_t)clock() << 32);
|
|
||||||
}
|
|
||||||
// Ensure seed is not zero (xorshift64 requirement)
|
|
||||||
if (seed == 0) seed = 0x853c49e6748fea9bULL;
|
|
||||||
// Warm up the generator (small seeds produce poor initial values)
|
|
||||||
for (int i = 0; i < 10; i++) xorshift64(&seed);
|
|
||||||
|
|
||||||
// Calculate BER from SNR
|
|
||||||
double ber = snr_to_ber(snr_db);
|
|
||||||
|
|
||||||
// Open files
|
|
||||||
FILE *in_fp = fopen(input_file, "rb");
|
|
||||||
if (!in_fp) {
|
|
||||||
fprintf(stderr, "Error: Cannot open input file: %s\n", input_file);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
FILE *out_fp = fopen(output_file, "wb");
|
|
||||||
if (!out_fp) {
|
|
||||||
fprintf(stderr, "Error: Cannot open output file: %s\n", output_file);
|
|
||||||
fclose(in_fp);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print header info
|
|
||||||
fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
|
|
||||||
fprintf(stderr, "Input: %s\n", input_file);
|
|
||||||
fprintf(stderr, "Output: %s\n", output_file);
|
|
||||||
fprintf(stderr, "SNR: %.1f dB (BER: %.2e)\n", snr_db, ber);
|
|
||||||
if (bursts_per_minute > 0) {
|
|
||||||
fprintf(stderr, "Burst: %.1f events/minute (burst BER: %.2f)\n",
|
|
||||||
bursts_per_minute, burst_ber);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "Burst: disabled\n");
|
|
||||||
}
|
|
||||||
if (seed_provided) {
|
|
||||||
fprintf(stderr, "Seed: %llu\n", (unsigned long long)seed);
|
|
||||||
}
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
|
|
||||||
// Initialize burst state
|
|
||||||
burst_state_t burst;
|
|
||||||
burst_state_init(&burst, bursts_per_minute, burst_ber, verbose, &seed);
|
|
||||||
|
|
||||||
// Allocate buffer for streaming processing
|
|
||||||
uint8_t *buffer = malloc(BUFFER_SIZE);
|
|
||||||
if (!buffer) {
|
|
||||||
fprintf(stderr, "Error: Cannot allocate buffer\n");
|
|
||||||
fclose(in_fp);
|
|
||||||
fclose(out_fp);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Processing statistics
|
|
||||||
long long total_bytes = 0;
|
|
||||||
long long bits_flipped_bg = 0;
|
|
||||||
long long bits_flipped_burst = 0;
|
|
||||||
int chunk_count = 0;
|
|
||||||
|
|
||||||
// Process file in chunks
|
|
||||||
size_t bytes_read;
|
|
||||||
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, in_fp)) > 0) {
|
|
||||||
// Calculate time delta for this chunk (for burst scheduling)
|
|
||||||
double delta_sec = bytes_to_time(bytes_read);
|
|
||||||
burst_state_advance_time(&burst, delta_sec, &seed);
|
|
||||||
|
|
||||||
// Apply noise to chunk
|
|
||||||
bits_flipped_bg += apply_background_noise(buffer, bytes_read, ber, &seed);
|
|
||||||
bits_flipped_burst += apply_burst_noise(buffer, bytes_read, &burst, &seed);
|
|
||||||
|
|
||||||
// Write corrupted chunk
|
|
||||||
fwrite(buffer, 1, bytes_read, out_fp);
|
|
||||||
|
|
||||||
total_bytes += bytes_read;
|
|
||||||
chunk_count++;
|
|
||||||
|
|
||||||
if (verbose && chunk_count % 10 == 0) {
|
|
||||||
double time_pos = bytes_to_time(total_bytes);
|
|
||||||
fprintf(stderr, "\rProcessed %.1f MB (%.1f sec)...",
|
|
||||||
total_bytes / (1024.0 * 1024.0), time_pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
fprintf(stderr, "\r \r");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up
|
|
||||||
free(buffer);
|
|
||||||
fclose(in_fp);
|
|
||||||
fclose(out_fp);
|
|
||||||
|
|
||||||
// Print summary
|
|
||||||
double duration_sec = bytes_to_time(total_bytes);
|
|
||||||
long long total_bits = total_bytes * 8;
|
|
||||||
|
|
||||||
fprintf(stderr, "Complete.\n");
|
|
||||||
fprintf(stderr, " Total bytes: %lld (%.1f sec @ ~%.1f Mbps)\n",
|
|
||||||
total_bytes, duration_sec, g_bitrate_bps / 1000000.0);
|
|
||||||
fprintf(stderr, " Background bits flipped: %lld (%.4f%%)\n",
|
|
||||||
bits_flipped_bg, 100.0 * bits_flipped_bg / total_bits);
|
|
||||||
if (bursts_per_minute > 0) {
|
|
||||||
fprintf(stderr, " Burst events: %d (%d bytes total)\n",
|
|
||||||
burst.burst_count, burst.total_burst_bytes);
|
|
||||||
fprintf(stderr, " Burst bits flipped: %lld\n", bits_flipped_burst);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,328 +0,0 @@
|
|||||||
// Test mesh warp round-trip consistency
|
|
||||||
// Warps a frame forward, then backward, and checks if we get the original back
|
|
||||||
// This is critical for MC-lifting invertibility
|
|
||||||
|
|
||||||
#include <opencv2/opencv.hpp>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctime>
|
|
||||||
|
|
||||||
// Include the mesh functions from encoder
|
|
||||||
extern "C" {
|
|
||||||
void estimate_motion_optical_flow(
|
|
||||||
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
|
|
||||||
int width, int height,
|
|
||||||
float **out_flow_x, float **out_flow_y
|
|
||||||
);
|
|
||||||
|
|
||||||
void build_mesh_from_flow(
|
|
||||||
const float *flow_x, const float *flow_y,
|
|
||||||
int width, int height,
|
|
||||||
int mesh_w, int mesh_h,
|
|
||||||
int16_t *mesh_dx, int16_t *mesh_dy
|
|
||||||
);
|
|
||||||
|
|
||||||
void smooth_mesh_laplacian(
|
|
||||||
int16_t *mesh_dx, int16_t *mesh_dy,
|
|
||||||
int mesh_width, int mesh_height,
|
|
||||||
float smoothness, int iterations
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mesh warp with bilinear interpolation (translation only)
|
|
||||||
static void apply_mesh_warp_rgb(
|
|
||||||
const cv::Mat &src,
|
|
||||||
cv::Mat &dst,
|
|
||||||
const int16_t *mesh_dx,
|
|
||||||
const int16_t *mesh_dy,
|
|
||||||
int mesh_w, int mesh_h
|
|
||||||
) {
|
|
||||||
int width = src.cols;
|
|
||||||
int height = src.rows;
|
|
||||||
int cell_w = width / mesh_w;
|
|
||||||
int cell_h = height / mesh_h;
|
|
||||||
|
|
||||||
dst = cv::Mat(height, width, CV_8UC3);
|
|
||||||
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
int cell_x = x / cell_w;
|
|
||||||
int cell_y = y / cell_h;
|
|
||||||
|
|
||||||
cell_x = std::min(cell_x, mesh_w - 2);
|
|
||||||
cell_y = std::min(cell_y, mesh_h - 2);
|
|
||||||
|
|
||||||
int idx_00 = cell_y * mesh_w + cell_x;
|
|
||||||
int idx_10 = idx_00 + 1;
|
|
||||||
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
|
|
||||||
int idx_11 = idx_01 + 1;
|
|
||||||
|
|
||||||
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
|
|
||||||
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
|
|
||||||
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
|
|
||||||
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
|
|
||||||
|
|
||||||
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
|
|
||||||
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
|
|
||||||
alpha = std::max(0.0f, std::min(1.0f, alpha));
|
|
||||||
beta = std::max(0.0f, std::min(1.0f, beta));
|
|
||||||
|
|
||||||
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
|
|
||||||
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
|
|
||||||
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
|
|
||||||
alpha * beta * (mesh_dx[idx_11] / 8.0f);
|
|
||||||
|
|
||||||
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
|
|
||||||
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
|
|
||||||
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
|
|
||||||
alpha * beta * (mesh_dy[idx_11] / 8.0f);
|
|
||||||
|
|
||||||
float src_x = x + dx;
|
|
||||||
float src_y = y + dy;
|
|
||||||
|
|
||||||
int sx0 = (int)floorf(src_x);
|
|
||||||
int sy0 = (int)floorf(src_y);
|
|
||||||
int sx1 = sx0 + 1;
|
|
||||||
int sy1 = sy0 + 1;
|
|
||||||
|
|
||||||
sx0 = std::max(0, std::min(width - 1, sx0));
|
|
||||||
sy0 = std::max(0, std::min(height - 1, sy0));
|
|
||||||
sx1 = std::max(0, std::min(width - 1, sx1));
|
|
||||||
sy1 = std::max(0, std::min(height - 1, sy1));
|
|
||||||
|
|
||||||
float fx = src_x - sx0;
|
|
||||||
float fy = src_y - sy0;
|
|
||||||
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
|
|
||||||
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
|
|
||||||
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
|
|
||||||
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
|
|
||||||
|
|
||||||
float val = (1 - fx) * (1 - fy) * val_00 +
|
|
||||||
fx * (1 - fy) * val_10 +
|
|
||||||
(1 - fx) * fy * val_01 +
|
|
||||||
fx * fy * val_11;
|
|
||||||
|
|
||||||
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
|
|
||||||
int num_tests = (argc > 2) ? atoi(argv[2]) : 5;
|
|
||||||
|
|
||||||
printf("Opening video: %s\n", video_file);
|
|
||||||
cv::VideoCapture cap(video_file);
|
|
||||||
|
|
||||||
if (!cap.isOpened()) {
|
|
||||||
fprintf(stderr, "Error: Cannot open video file\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
|
|
||||||
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
|
|
||||||
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
|
|
||||||
|
|
||||||
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
|
|
||||||
|
|
||||||
// Mesh dimensions (32×32 cells)
|
|
||||||
int mesh_cell_size = 32;
|
|
||||||
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
|
|
||||||
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
|
|
||||||
if (mesh_w < 2) mesh_w = 2;
|
|
||||||
if (mesh_h < 2) mesh_h = 2;
|
|
||||||
|
|
||||||
printf("Mesh: %dx%d (approx %dx%d px cells)\n\n",
|
|
||||||
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
|
|
||||||
|
|
||||||
float smoothness = 0.5f;
|
|
||||||
int smooth_iterations = 8;
|
|
||||||
|
|
||||||
srand(time(NULL));
|
|
||||||
|
|
||||||
double total_forward_psnr = 0.0;
|
|
||||||
double total_roundtrip_psnr = 0.0;
|
|
||||||
double total_half_roundtrip_psnr = 0.0;
|
|
||||||
|
|
||||||
for (int test = 0; test < num_tests; test++) {
|
|
||||||
int frame_num = 5 + rand() % (total_frames - 10);
|
|
||||||
|
|
||||||
printf("[Test %d/%d] Frame pair %d → %d\n", test + 1, num_tests, frame_num - 1, frame_num);
|
|
||||||
|
|
||||||
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
|
|
||||||
cv::Mat frame0, frame1;
|
|
||||||
cap >> frame0;
|
|
||||||
cap >> frame1;
|
|
||||||
|
|
||||||
if (frame0.empty() || frame1.empty()) {
|
|
||||||
fprintf(stderr, "Error reading frames\n");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::Mat frame0_rgb, frame1_rgb;
|
|
||||||
cv::cvtColor(frame0, frame0_rgb, cv::COLOR_BGR2RGB);
|
|
||||||
cv::cvtColor(frame1, frame1_rgb, cv::COLOR_BGR2RGB);
|
|
||||||
|
|
||||||
// Compute mesh (F0 → F1)
|
|
||||||
float *flow_x = nullptr, *flow_y = nullptr;
|
|
||||||
estimate_motion_optical_flow(frame0_rgb.data, frame1_rgb.data,
|
|
||||||
width, height, &flow_x, &flow_y);
|
|
||||||
|
|
||||||
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
|
|
||||||
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
|
|
||||||
|
|
||||||
// Create inverted mesh
|
|
||||||
int16_t *inv_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *inv_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
for (int i = 0; i < mesh_w * mesh_h; i++) {
|
|
||||||
inv_mesh_dx[i] = -mesh_dx[i];
|
|
||||||
inv_mesh_dy[i] = -mesh_dy[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create half-mesh for symmetric lifting test
|
|
||||||
int16_t *half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *neg_half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *neg_half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
for (int i = 0; i < mesh_w * mesh_h; i++) {
|
|
||||||
half_mesh_dx[i] = mesh_dx[i] / 2;
|
|
||||||
half_mesh_dy[i] = mesh_dy[i] / 2;
|
|
||||||
neg_half_mesh_dx[i] = -half_mesh_dx[i];
|
|
||||||
neg_half_mesh_dy[i] = -half_mesh_dy[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// TEST 1: Full forward warp quality (F0 → F1)
|
|
||||||
cv::Mat warped_forward;
|
|
||||||
apply_mesh_warp_rgb(frame0, warped_forward, mesh_dx, mesh_dy, mesh_w, mesh_h);
|
|
||||||
|
|
||||||
double forward_mse = 0.0;
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
double diff = (double)warped_forward.at<cv::Vec3b>(y, x)[c] -
|
|
||||||
(double)frame1.at<cv::Vec3b>(y, x)[c];
|
|
||||||
forward_mse += diff * diff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
forward_mse /= (width * height * 3);
|
|
||||||
double forward_psnr = (forward_mse > 0) ? 10.0 * log10(255.0 * 255.0 / forward_mse) : 999.0;
|
|
||||||
total_forward_psnr += forward_psnr;
|
|
||||||
|
|
||||||
// TEST 2: Full round-trip (F0 → forward → backward → F0')
|
|
||||||
cv::Mat roundtrip;
|
|
||||||
apply_mesh_warp_rgb(warped_forward, roundtrip, inv_mesh_dx, inv_mesh_dy, mesh_w, mesh_h);
|
|
||||||
|
|
||||||
double roundtrip_mse = 0.0;
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
double diff = (double)roundtrip.at<cv::Vec3b>(y, x)[c] -
|
|
||||||
(double)frame0.at<cv::Vec3b>(y, x)[c];
|
|
||||||
roundtrip_mse += diff * diff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
roundtrip_mse /= (width * height * 3);
|
|
||||||
double roundtrip_psnr = (roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / roundtrip_mse) : 999.0;
|
|
||||||
total_roundtrip_psnr += roundtrip_psnr;
|
|
||||||
|
|
||||||
// TEST 3: Half-step symmetric round-trip (MC-lifting style)
|
|
||||||
// F0 → +½mesh, then → -½mesh (should return to F0)
|
|
||||||
cv::Mat half_forward, half_roundtrip;
|
|
||||||
apply_mesh_warp_rgb(frame0, half_forward, half_mesh_dx, half_mesh_dy, mesh_w, mesh_h);
|
|
||||||
apply_mesh_warp_rgb(half_forward, half_roundtrip, neg_half_mesh_dx, neg_half_mesh_dy, mesh_w, mesh_h);
|
|
||||||
|
|
||||||
double half_roundtrip_mse = 0.0;
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
double diff = (double)half_roundtrip.at<cv::Vec3b>(y, x)[c] -
|
|
||||||
(double)frame0.at<cv::Vec3b>(y, x)[c];
|
|
||||||
half_roundtrip_mse += diff * diff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
half_roundtrip_mse /= (width * height * 3);
|
|
||||||
double half_roundtrip_psnr = (half_roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / half_roundtrip_mse) : 999.0;
|
|
||||||
total_half_roundtrip_psnr += half_roundtrip_psnr;
|
|
||||||
|
|
||||||
printf(" Forward warp (F0→F1): PSNR = %.2f dB\n", forward_psnr);
|
|
||||||
printf(" Full round-trip (F0→F0'): PSNR = %.2f dB\n", roundtrip_psnr);
|
|
||||||
printf(" Half round-trip (±½mesh): PSNR = %.2f dB\n", half_roundtrip_psnr);
|
|
||||||
|
|
||||||
// Compute motion stats
|
|
||||||
float avg_motion = 0.0f, max_motion = 0.0f;
|
|
||||||
for (int i = 0; i < mesh_w * mesh_h; i++) {
|
|
||||||
float dx = mesh_dx[i] / 8.0f;
|
|
||||||
float dy = mesh_dy[i] / 8.0f;
|
|
||||||
float motion = sqrtf(dx * dx + dy * dy);
|
|
||||||
avg_motion += motion;
|
|
||||||
if (motion > max_motion) max_motion = motion;
|
|
||||||
}
|
|
||||||
avg_motion /= (mesh_w * mesh_h);
|
|
||||||
printf(" Motion: avg=%.2f px, max=%.2f px\n\n", avg_motion, max_motion);
|
|
||||||
|
|
||||||
// Save visualisation for worst case
|
|
||||||
if (test == 0 || roundtrip_psnr < 30.0) {
|
|
||||||
char filename[256];
|
|
||||||
sprintf(filename, "roundtrip_%04d_original.png", frame_num);
|
|
||||||
cv::imwrite(filename, frame0);
|
|
||||||
sprintf(filename, "roundtrip_%04d_forward.png", frame_num);
|
|
||||||
cv::imwrite(filename, warped_forward);
|
|
||||||
sprintf(filename, "roundtrip_%04d_roundtrip.png", frame_num);
|
|
||||||
cv::imwrite(filename, roundtrip);
|
|
||||||
|
|
||||||
// Difference images
|
|
||||||
cv::Mat diff_roundtrip = cv::Mat::zeros(height, width, CV_8UC3);
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
int diff = abs((int)roundtrip.at<cv::Vec3b>(y, x)[c] -
|
|
||||||
(int)frame0.at<cv::Vec3b>(y, x)[c]);
|
|
||||||
diff_roundtrip.at<cv::Vec3b>(y, x)[c] = std::min(diff * 5, 255);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sprintf(filename, "roundtrip_%04d_diff.png", frame_num);
|
|
||||||
cv::imwrite(filename, diff_roundtrip);
|
|
||||||
printf(" Saved visualisation: roundtrip_%04d_*.png\n\n", frame_num);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(flow_x);
|
|
||||||
free(flow_y);
|
|
||||||
free(mesh_dx);
|
|
||||||
free(mesh_dy);
|
|
||||||
free(inv_mesh_dx);
|
|
||||||
free(inv_mesh_dy);
|
|
||||||
free(half_mesh_dx);
|
|
||||||
free(half_mesh_dy);
|
|
||||||
free(neg_half_mesh_dx);
|
|
||||||
free(neg_half_mesh_dy);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("===========================================\n");
|
|
||||||
printf("Average Results (%d tests):\n", num_tests);
|
|
||||||
printf(" Forward warp quality: %.2f dB\n", total_forward_psnr / num_tests);
|
|
||||||
printf(" Full round-trip error: %.2f dB\n", total_roundtrip_psnr / num_tests);
|
|
||||||
printf(" Half round-trip error: %.2f dB\n", total_half_roundtrip_psnr / num_tests);
|
|
||||||
printf("===========================================\n\n");
|
|
||||||
|
|
||||||
if (total_roundtrip_psnr / num_tests < 35.0) {
|
|
||||||
printf("WARNING: Round-trip PSNR < 35 dB indicates poor invertibility!\n");
|
|
||||||
printf("This will cause MC-lifting to accumulate errors and hurt compression.\n");
|
|
||||||
printf("Bilinear interpolation artifacts are likely the culprit.\n");
|
|
||||||
} else {
|
|
||||||
printf("Round-trip consistency looks acceptable (>35 dB).\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
cap.release();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,422 +0,0 @@
|
|||||||
// Visual unit test for mesh warping with hierarchical block matching and affine estimation
|
|
||||||
// Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh,
|
|
||||||
// and saves both warped and target frames for visual comparison
|
|
||||||
// Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms
|
|
||||||
|
|
||||||
#include <opencv2/opencv.hpp>
|
|
||||||
#include <opencv2/video/tracking.hpp>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctime>
|
|
||||||
|
|
||||||
// Include the mesh functions from encoder
|
|
||||||
extern "C" {
|
|
||||||
void estimate_motion_optical_flow(
|
|
||||||
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
|
|
||||||
int width, int height,
|
|
||||||
float **out_flow_x, float **out_flow_y
|
|
||||||
);
|
|
||||||
|
|
||||||
void build_mesh_from_flow(
|
|
||||||
const float *flow_x, const float *flow_y,
|
|
||||||
int width, int height,
|
|
||||||
int mesh_w, int mesh_h,
|
|
||||||
int16_t *mesh_dx, int16_t *mesh_dy
|
|
||||||
);
|
|
||||||
|
|
||||||
void smooth_mesh_laplacian(
|
|
||||||
int16_t *mesh_dx, int16_t *mesh_dy,
|
|
||||||
int mesh_width, int mesh_height,
|
|
||||||
float smoothness, int iterations
|
|
||||||
);
|
|
||||||
|
|
||||||
int estimate_cell_affine(
|
|
||||||
const float *flow_x, const float *flow_y,
|
|
||||||
int width, int height,
|
|
||||||
int cell_x, int cell_y,
|
|
||||||
int cell_w, int cell_h,
|
|
||||||
float threshold,
|
|
||||||
int16_t *out_tx, int16_t *out_ty,
|
|
||||||
int16_t *out_a11, int16_t *out_a12,
|
|
||||||
int16_t *out_a21, int16_t *out_a22
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mesh warp with bilinear interpolation and optional affine support
|
|
||||||
static void apply_mesh_warp_rgb(
|
|
||||||
const cv::Mat &src, // Input BGR image
|
|
||||||
cv::Mat &dst, // Output warped BGR image
|
|
||||||
const int16_t *mesh_dx, // Mesh motion vectors (1/8 pixel)
|
|
||||||
const int16_t *mesh_dy,
|
|
||||||
const uint8_t *affine_mask, // 1=affine, 0=translation
|
|
||||||
const int16_t *affine_a11,
|
|
||||||
const int16_t *affine_a12,
|
|
||||||
const int16_t *affine_a21,
|
|
||||||
const int16_t *affine_a22,
|
|
||||||
int mesh_w, int mesh_h
|
|
||||||
) {
|
|
||||||
int width = src.cols;
|
|
||||||
int height = src.rows;
|
|
||||||
int cell_w = width / mesh_w;
|
|
||||||
int cell_h = height / mesh_h;
|
|
||||||
|
|
||||||
dst = cv::Mat(height, width, CV_8UC3);
|
|
||||||
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
int cell_x = x / cell_w;
|
|
||||||
int cell_y = y / cell_h;
|
|
||||||
|
|
||||||
// Clamp to valid mesh range
|
|
||||||
cell_x = std::min(cell_x, mesh_w - 2);
|
|
||||||
cell_y = std::min(cell_y, mesh_h - 2);
|
|
||||||
|
|
||||||
// Four corner control points
|
|
||||||
int idx_00 = cell_y * mesh_w + cell_x;
|
|
||||||
int idx_10 = idx_00 + 1;
|
|
||||||
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
|
|
||||||
int idx_11 = idx_01 + 1;
|
|
||||||
|
|
||||||
// Control point positions
|
|
||||||
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
|
|
||||||
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
|
|
||||||
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
|
|
||||||
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
|
|
||||||
|
|
||||||
// Local coordinates
|
|
||||||
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
|
|
||||||
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
|
|
||||||
alpha = std::max(0.0f, std::min(1.0f, alpha));
|
|
||||||
beta = std::max(0.0f, std::min(1.0f, beta));
|
|
||||||
|
|
||||||
// Bilinear interpolation of motion vectors
|
|
||||||
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
|
|
||||||
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
|
|
||||||
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
|
|
||||||
alpha * beta * (mesh_dx[idx_11] / 8.0f);
|
|
||||||
|
|
||||||
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
|
|
||||||
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
|
|
||||||
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
|
|
||||||
alpha * beta * (mesh_dy[idx_11] / 8.0f);
|
|
||||||
|
|
||||||
// Check if we're using affine in this cell
|
|
||||||
// For simplicity, just use the top-left corner's affine parameters
|
|
||||||
int cell_idx = cell_y * mesh_w + cell_x;
|
|
||||||
if (affine_mask && affine_mask[cell_idx]) {
|
|
||||||
// Apply affine transform
|
|
||||||
// Compute position relative to cell center
|
|
||||||
float rel_x = x - (cell_x * cell_w + cell_w / 2.0f);
|
|
||||||
float rel_y = y - (cell_y * cell_h + cell_h / 2.0f);
|
|
||||||
|
|
||||||
float a11 = affine_a11[cell_idx] / 256.0f;
|
|
||||||
float a12 = affine_a12[cell_idx] / 256.0f;
|
|
||||||
float a21 = affine_a21[cell_idx] / 256.0f;
|
|
||||||
float a22 = affine_a22[cell_idx] / 256.0f;
|
|
||||||
|
|
||||||
// Affine warp: [x'] = [a11 a12][x] + [dx]
|
|
||||||
// [y'] [a21 a22][y] [dy]
|
|
||||||
dx = a11 * rel_x + a12 * rel_y + dx;
|
|
||||||
dy = a21 * rel_x + a22 * rel_y + dy;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Source coordinates (inverse warp)
|
|
||||||
float src_x = x + dx;
|
|
||||||
float src_y = y + dy;
|
|
||||||
|
|
||||||
// Bilinear interpolation
|
|
||||||
int sx0 = (int)floorf(src_x);
|
|
||||||
int sy0 = (int)floorf(src_y);
|
|
||||||
int sx1 = sx0 + 1;
|
|
||||||
int sy1 = sy0 + 1;
|
|
||||||
|
|
||||||
sx0 = std::max(0, std::min(width - 1, sx0));
|
|
||||||
sy0 = std::max(0, std::min(height - 1, sy0));
|
|
||||||
sx1 = std::max(0, std::min(width - 1, sx1));
|
|
||||||
sy1 = std::max(0, std::min(height - 1, sy1));
|
|
||||||
|
|
||||||
float fx = src_x - sx0;
|
|
||||||
float fy = src_y - sy0;
|
|
||||||
|
|
||||||
// Interpolate each channel
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
|
|
||||||
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
|
|
||||||
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
|
|
||||||
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
|
|
||||||
|
|
||||||
float val = (1 - fx) * (1 - fy) * val_00 +
|
|
||||||
fx * (1 - fy) * val_10 +
|
|
||||||
(1 - fx) * fy * val_01 +
|
|
||||||
fx * fy * val_11;
|
|
||||||
|
|
||||||
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create visualisation overlay showing affine cells
|
|
||||||
static void create_affine_overlay(
|
|
||||||
cv::Mat &img,
|
|
||||||
const uint8_t *affine_mask,
|
|
||||||
int mesh_w, int mesh_h
|
|
||||||
) {
|
|
||||||
int width = img.cols;
|
|
||||||
int height = img.rows;
|
|
||||||
int cell_w = width / mesh_w;
|
|
||||||
int cell_h = height / mesh_h;
|
|
||||||
|
|
||||||
for (int my = 0; my < mesh_h; my++) {
|
|
||||||
for (int mx = 0; mx < mesh_w; mx++) {
|
|
||||||
int idx = my * mesh_w + mx;
|
|
||||||
|
|
||||||
if (affine_mask[idx]) {
|
|
||||||
// Draw green rectangle for affine cells
|
|
||||||
int x0 = mx * cell_w;
|
|
||||||
int y0 = my * cell_h;
|
|
||||||
int x1 = (mx + 1) * cell_w;
|
|
||||||
int y1 = (my + 1) * cell_h;
|
|
||||||
|
|
||||||
cv::rectangle(img,
|
|
||||||
cv::Point(x0, y0),
|
|
||||||
cv::Point(x1, y1),
|
|
||||||
cv::Scalar(0, 255, 0), 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
|
|
||||||
int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5;
|
|
||||||
|
|
||||||
printf("Opening video: %s\n", video_file);
|
|
||||||
cv::VideoCapture cap(video_file);
|
|
||||||
|
|
||||||
if (!cap.isOpened()) {
|
|
||||||
fprintf(stderr, "Error: Cannot open video file %s\n", video_file);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
|
|
||||||
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
|
|
||||||
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
|
|
||||||
|
|
||||||
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
|
|
||||||
|
|
||||||
if (total_frames < 10) {
|
|
||||||
fprintf(stderr, "Error: Video too short (need at least 10 frames)\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate mesh dimensions (32×32 pixel cells, matches current encoder)
|
|
||||||
int mesh_cell_size = 32;
|
|
||||||
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
|
|
||||||
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
|
|
||||||
if (mesh_w < 2) mesh_w = 2;
|
|
||||||
if (mesh_h < 2) mesh_h = 2;
|
|
||||||
|
|
||||||
printf("Mesh: %dx%d (approx %dx%d px cells)\n",
|
|
||||||
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
|
|
||||||
|
|
||||||
// Encoder parameters (match current encoder_tav.c settings)
|
|
||||||
float smoothness = 0.5f; // Mesh smoothness weight
|
|
||||||
int smooth_iterations = 8; // Smoothing iterations
|
|
||||||
float affine_threshold = 0.40f; // 40% improvement required for affine
|
|
||||||
|
|
||||||
printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n",
|
|
||||||
smoothness, smooth_iterations, affine_threshold * 100.0f);
|
|
||||||
|
|
||||||
// Seed random number generator
|
|
||||||
srand(time(NULL));
|
|
||||||
|
|
||||||
// Pick random frames (avoid first and last 5 frames)
|
|
||||||
printf("\nTesting %d random frame pairs:\n", num_test_frames);
|
|
||||||
for (int test = 0; test < num_test_frames; test++) {
|
|
||||||
// Pick random frame (ensure we have a previous frame)
|
|
||||||
int frame_num = 5 + rand() % (total_frames - 10);
|
|
||||||
|
|
||||||
printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n",
|
|
||||||
test + 1, num_test_frames, frame_num - 1, frame_num);
|
|
||||||
|
|
||||||
// Read previous frame (source for warping)
|
|
||||||
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
|
|
||||||
|
|
||||||
cv::Mat prev_frame;
|
|
||||||
cap >> prev_frame;
|
|
||||||
if (prev_frame.empty()) {
|
|
||||||
fprintf(stderr, "Error reading frame %d\n", frame_num - 1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read current frame (target to match)
|
|
||||||
cv::Mat curr_frame;
|
|
||||||
cap >> curr_frame;
|
|
||||||
if (curr_frame.empty()) {
|
|
||||||
fprintf(stderr, "Error reading frame %d\n", frame_num);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert to RGB for block matching
|
|
||||||
cv::Mat prev_rgb, curr_rgb;
|
|
||||||
cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB);
|
|
||||||
cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB);
|
|
||||||
|
|
||||||
// Compute hierarchical block matching (replaces optical flow)
|
|
||||||
printf(" Computing hierarchical block matching...\n");
|
|
||||||
float *flow_x = nullptr, *flow_y = nullptr;
|
|
||||||
estimate_motion_optical_flow(
|
|
||||||
prev_rgb.data, curr_rgb.data,
|
|
||||||
width, height,
|
|
||||||
&flow_x, &flow_y
|
|
||||||
);
|
|
||||||
|
|
||||||
// Build mesh from flow
|
|
||||||
printf(" Building mesh from block matches...\n");
|
|
||||||
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
|
|
||||||
|
|
||||||
// Apply Laplacian smoothing
|
|
||||||
printf(" Applying Laplacian smoothing (%d iterations, %.2f weight)...\n",
|
|
||||||
smooth_iterations, smoothness);
|
|
||||||
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
|
|
||||||
|
|
||||||
// Estimate selective per-cell affine transforms
|
|
||||||
printf(" Estimating selective affine transforms (threshold=%.0f%%)...\n",
|
|
||||||
affine_threshold * 100.0f);
|
|
||||||
uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t));
|
|
||||||
int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
|
|
||||||
|
|
||||||
int cell_w = width / mesh_w;
|
|
||||||
int cell_h = height / mesh_h;
|
|
||||||
int affine_count = 0;
|
|
||||||
|
|
||||||
for (int cy = 0; cy < mesh_h; cy++) {
|
|
||||||
for (int cx = 0; cx < mesh_w; cx++) {
|
|
||||||
int cell_idx = cy * mesh_w + cx;
|
|
||||||
|
|
||||||
int16_t tx, ty, a11, a12, a21, a22;
|
|
||||||
int use_affine = estimate_cell_affine(
|
|
||||||
flow_x, flow_y,
|
|
||||||
width, height,
|
|
||||||
cx, cy, cell_w, cell_h,
|
|
||||||
affine_threshold,
|
|
||||||
&tx, &ty, &a11, &a12, &a21, &a22
|
|
||||||
);
|
|
||||||
|
|
||||||
affine_mask[cell_idx] = use_affine ? 1 : 0;
|
|
||||||
mesh_dx[cell_idx] = tx;
|
|
||||||
mesh_dy[cell_idx] = ty;
|
|
||||||
affine_a11[cell_idx] = a11;
|
|
||||||
affine_a12[cell_idx] = a12;
|
|
||||||
affine_a21[cell_idx] = a21;
|
|
||||||
affine_a22[cell_idx] = a22;
|
|
||||||
|
|
||||||
if (use_affine) affine_count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(" Affine usage: %d/%d cells (%.1f%%)\n",
|
|
||||||
affine_count, mesh_w * mesh_h,
|
|
||||||
100.0f * affine_count / (mesh_w * mesh_h));
|
|
||||||
|
|
||||||
// Warp previous frame to current frame
|
|
||||||
printf(" Warping frame with mesh + affine...\n");
|
|
||||||
cv::Mat warped;
|
|
||||||
apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy,
|
|
||||||
affine_mask, affine_a11, affine_a12, affine_a21, affine_a22,
|
|
||||||
mesh_w, mesh_h);
|
|
||||||
|
|
||||||
// Create visualisation with affine overlay
|
|
||||||
cv::Mat warped_viz = warped.clone();
|
|
||||||
create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h);
|
|
||||||
|
|
||||||
// Compute MSE between warped and target
|
|
||||||
double mse = 0.0;
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
double diff = (double)warped.at<cv::Vec3b>(y, x)[c] -
|
|
||||||
(double)curr_frame.at<cv::Vec3b>(y, x)[c];
|
|
||||||
mse += diff * diff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mse /= (width * height * 3);
|
|
||||||
double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0;
|
|
||||||
printf(" Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr);
|
|
||||||
|
|
||||||
// Save images
|
|
||||||
char filename[256];
|
|
||||||
sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1);
|
|
||||||
cv::imwrite(filename, prev_frame);
|
|
||||||
printf(" Saved source: %s\n", filename);
|
|
||||||
|
|
||||||
sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num);
|
|
||||||
cv::imwrite(filename, warped);
|
|
||||||
printf(" Saved warped: %s\n", filename);
|
|
||||||
|
|
||||||
sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num);
|
|
||||||
cv::imwrite(filename, warped_viz);
|
|
||||||
printf(" Saved warped+viz (green=affine): %s\n", filename);
|
|
||||||
|
|
||||||
sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num);
|
|
||||||
cv::imwrite(filename, curr_frame);
|
|
||||||
printf(" Saved target: %s\n", filename);
|
|
||||||
|
|
||||||
// Compute difference image
|
|
||||||
cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3);
|
|
||||||
for (int y = 0; y < height; y++) {
|
|
||||||
for (int x = 0; x < width; x++) {
|
|
||||||
for (int c = 0; c < 3; c++) {
|
|
||||||
int diff = abs((int)warped.at<cv::Vec3b>(y, x)[c] -
|
|
||||||
(int)curr_frame.at<cv::Vec3b>(y, x)[c]);
|
|
||||||
diff_img.at<cv::Vec3b>(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num);
|
|
||||||
cv::imwrite(filename, diff_img);
|
|
||||||
printf(" Saved difference (amplified 3x): %s\n", filename);
|
|
||||||
|
|
||||||
// Compute motion statistics
|
|
||||||
float max_motion = 0.0f, avg_motion = 0.0f;
|
|
||||||
for (int i = 0; i < mesh_w * mesh_h; i++) {
|
|
||||||
float dx = mesh_dx[i] / 8.0f;
|
|
||||||
float dy = mesh_dy[i] / 8.0f;
|
|
||||||
float motion = sqrtf(dx * dx + dy * dy);
|
|
||||||
avg_motion += motion;
|
|
||||||
if (motion > max_motion) max_motion = motion;
|
|
||||||
}
|
|
||||||
avg_motion /= (mesh_w * mesh_h);
|
|
||||||
printf(" Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion);
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
free(flow_x);
|
|
||||||
free(flow_y);
|
|
||||||
free(mesh_dx);
|
|
||||||
free(mesh_dy);
|
|
||||||
free(affine_mask);
|
|
||||||
free(affine_a11);
|
|
||||||
free(affine_a12);
|
|
||||||
free(affine_a21);
|
|
||||||
free(affine_a22);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("\nDone! Check output images:\n");
|
|
||||||
printf(" *_source.png: Original frame before warping\n");
|
|
||||||
printf(" *_warped.png: Warped frame (should match target)\n");
|
|
||||||
printf(" *_warped_viz.png: Warped with green overlay showing affine cells\n");
|
|
||||||
printf(" *_target.png: Target frame to match\n");
|
|
||||||
printf(" *_diff.png: Difference image (should be mostly black if warp is good)\n");
|
|
||||||
|
|
||||||
cap.release();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user