diff --git a/CLAUDE.md b/CLAUDE.md index 709aba5..6840625 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -314,3 +314,68 @@ Implemented on 2025-10-15 for improved temporal compression through group-of-pic - **Unified Compression**: Zstd compresses entire GOP as single block, finding patterns across time - **Motion Compensation**: FFT-based phase correlation provides accurate global motion estimation - **Adaptive GOPs**: Scene change detection ensures optimal GOP boundaries + +#### TAD Format (TSVM Advanced Audio) +- **Perceptual audio codec** for TSVM using DWT with 4-tap interpolating Deslauriers-Dubuc wavelets +- **C Encoder**: `video_encoder/encoder_tad.c` - Core Encoder library; `video_encoder/encoder_tad_standalone.c` - Standalone encoder with FFmpeg integration + - How to build: `make tad` + - **Quality Levels**: 0-5 (0=lowest quality/smallest, 5=highest quality/largest; designed to be in sync with TAV encoder) +- **C Decoder**: `video_encoder/decoder_tad.c` - Standalone decoder for TAD format +- **Features**: + - **32 KHz stereo**: TSVM audio hardware native format + - **Variable chunk sizes**: 1024-32768+ samples, enables flexible TAV integration + - **M/S stereo decorrelation**: Exploits stereo correlation for better compression + - **PCM16→PCM8 conversion**: Error-diffusion dithering to minimize quantization noise + - **Variable-level DD-4 DWT**: Dynamic levels (log2(chunk_size) - 2) for frequency domain analysis + - **Perceptual quantization**: Frequency-dependent weights preserving critical 2-4 KHz range + - **2-bit twobitmap significance map**: Efficient encoding of sparse coefficients + - **Optional Zstd compression**: Level 7 for additional compression +- **Usage Examples**: + ```bash + # Encode with default quality (Q3) + encoder_tad -i input.mp4 -o output.tad + + # Encode with highest quality + encoder_tad -i input.mp4 -o output.tad -q 5 + + # Encode without Zstd compression + encoder_tad -i input.mp4 -o output.tad --no-zstd + + # Verbose output with statistics + encoder_tad -i input.mp4 -o output.tad -v + + # Decode back to PCM16 + decoder_tad -i input.tad -o output.pcm + ``` +- **Format documentation**: `terranmon.txt` (search for "TSVM Advanced Audio (TAD) Format") +- **Version**: 1 (2-bit twobitmap significance map) + +**TAD Compression Performance**: +- **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input) +- **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3 +- **Audio Quality**: Preserves full 0-16 KHz bandwidth +- **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical) + +**TAD Encoding Pipeline**: +1. **FFmpeg Two-Pass Extraction**: High-quality SoXR resampling to 32 KHz with 16 Hz highpass filter +2. **PCM16→PCM8 with Dithering**: Error-diffusion dithering minimizes quantization noise +3. **M/S Stereo Decorrelation**: Transforms Left/Right to Mid/Side for better compression +4. **Variable-Level DD-4 DWT**: Deslauriers-Dubuc 4-tap interpolating wavelets with dynamic levels + - Default 32768 samples → 13 DWT levels + - Minimum 1024 samples → 8 DWT levels +5. **Frequency-Dependent Quantization**: Perceptual weights favor 2-4 KHz (speech intelligibility) +6. **Dead Zone Quantization**: Zeros high-frequency noise (highest band) +7. **2-bit Twobitmap Encoding**: Maps coefficients to 00=0, 01=+1, 10=-1, 11=other +8. **Optional Zstd Compression**: Level 7 compression on concatenated Mid+Side data + +**TAD Integration with TAV**: +TAD is designed as an includable API for TAV video encoder integration. The variable chunk size +support enables synchronized audio/video encoding where audio chunks can match video GOP boundaries. +TAV embeds TAD-compressed audio using packet type 0x24 with Zstd compression. + +**TAD Hardware Acceleration**: +TSVM accelerates TAD decoding with AudioAdapter.kt (backend) and AudioJSR223Delegate.kt (API): +- Backend decoder in AudioAdapter.kt with variable chunk size support +- API functions in AudioJSR223Delegate.kt for JavaScript access +- Supports chunk sizes from 1024 to 32768+ samples +- Dynamic DWT level calculation for optimal performance diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 265fd39..9569913 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -15,6 +15,7 @@ const UCF_VERSION = 1 const ADDRESSING_EXTERNAL = 0x01 const ADDRESSING_INTERNAL = 0x02 const SND_BASE_ADDR = audio.getBaseAddr() +const SND_MEM_ADDR = audio.getMemAddr() const pcm = require("pcm") const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728] @@ -32,6 +33,7 @@ const TAV_PACKET_AUDIO_MP2 = 0x20 const TAV_PACKET_AUDIO_NATIVE = 0x21 const TAV_PACKET_AUDIO_PCM_16LE = 0x22 const TAV_PACKET_AUDIO_ADPCM = 0x23 +const TAV_PACKET_AUDIO_TAD = 0x24 const TAV_PACKET_SUBTITLE = 0x30 const TAV_PACKET_AUDIO_BUNDLED = 0x40 // Entire MP2 audio file in single packet const TAV_PACKET_EXTENDED_HDR = 0xEF @@ -396,6 +398,7 @@ let audioBufferBytesLastFrame = 0 let frame_cnt = 0 let frametime = 1000000000.0 / header.fps let mp2Initialised = false +let tadInitialised = false let audioFired = false @@ -1337,6 +1340,20 @@ try { audio.mp2Decode() audio.mp2UploadDecoded(0) + } + else if (packetType === TAV_PACKET_AUDIO_TAD) { + // Legacy MP2 Audio packet (for backwards compatibility) + let payloadLen = seqread.readInt() + + if (!tadInitialised) { + tadInitialised = true + audio.tadSetQuality(header.qualityLevel) + } + + seqread.readBytes(payloadLen, SND_MEM_ADDR - 262144) + audio.tadDecode() + audio.tadUploadDecoded(0) + } else if (packetType === TAV_PACKET_AUDIO_NATIVE) { // PCM length must not exceed 65536 bytes! diff --git a/terranmon.txt b/terranmon.txt index 7b2a181..dbad9b5 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -965,6 +965,7 @@ transmission capability, and region-of-interest coding. 0x21: Zstd-compressed 8-bit PCM (32 KHz, audio hardware's native format) 0x22: Zstd-compressed 16-bit PCM (32 KHz, little endian) 0x23: Zstd-compressed ADPCM + 0x24: Zstd-compressed TAD 0x30: Subtitle in "Simple" format 0x31: Subtitle in "Karaoke" format @@ -1065,6 +1066,13 @@ transmission capability, and region-of-interest coding. uint32 Compressed Size * Zstd-compressed Block Data +## TAD Packet Structure + uint8 Packet type (0x24) + uint32 Compressed Size + 2 + uint16 Sample Count + uint32 Compressed Size + * Zstd-compressed TAD + ## GOP Unified Packet Structure (0x12) Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing. @@ -1507,6 +1515,241 @@ Number|Index 4096|255 +-------------------------------------------------------------------------------- + +TSVM Advanced Audio (TAD) Format +Created by CuriousTorvald and Claude on 2025-10-23 + +TAD is a perceptual audio codec for TSVM utilizing Discrete Wavelet Transform (DWT) +with 4-tap interpolating Deslauriers-Dubuc wavelets, providing efficient compression +through M/S stereo decorrelation, frequency-dependent quantization, and significance +map encoding. Designed as an includable API for integration with TAV video encoder. + +When used inside of a video codec, only zstd-compressed payload is stored, chunk length +is stored separately and quality index is shared with that of the video. + +# Suggested File Structure +\x1F T S V M T A D +[HEADER] +[CHUNK 0] +[CHUNK 1] +[CHUNK 2] +... + +## Header (16 bytes) + uint8 Magic[8]: "\x1F TSVM TAD" + uint8 Version: 1 + uint8 Quality Level: 0-5 (0=lowest quality/smallest, 5=highest quality/largest) + uint8 Flags: + - bit 0: Zstd compression enabled (1=compressed, 0=uncompressed) + - bits 1-7: Reserved (must be 0) + uint32 Sample Rate: audio sample rate in Hz (always 32000 for TSVM) + uint8 Channels: number of audio channels (always 2 for stereo) + uint8 Reserved[2]: fill with zeros + +## Audio Properties +- **Sample Rate**: 32000 Hz (TSVM audio hardware native format) +- **Channels**: 2 (stereo) +- **Input Format**: PCM16LE (16-bit signed little-endian PCM) +- **Preprocessing**: 16 Hz highpass filter applied during extraction +- **Internal Representation**: Signed PCM8 with error-diffusion dithering +- **Chunk Size**: Variable (1024-32768+ samples per channel, must be power of 2) + - Default: 32768 samples (1.024 seconds at 32 kHz) + - Minimum: 1024 samples (32 ms at 32 kHz) + - DWT levels calculated dynamically: log2(chunk_size) - 1 +- **Target Compression**: 2:1 against PCMu8 baseline + +## Chunk Structure +Each chunk encodes a variable number of stereo samples (power of 2, minimum 1024). +Default is 32768 samples (65536 total samples, 1.024 seconds). +If the audio duration doesn't align to chunk boundaries, the final chunk can use +a smaller power-of-2 size or be zero-padded. + + uint8 Significance Map Method: always 1 (2-bit twobitmap) + uint8 Compression Flag: 1=Zstd compressed, 0=uncompressed + uint16 Sample Count: number of samples per channel (must be power of 2, min 1024) + uint32 Chunk Payload Size: size of following payload in bytes + * Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set) + +### Chunk Payload Structure (before optional Zstd compression) + * Mid Channel Encoded Data + * Side Channel Encoded Data + +### Encoded Channel Data (2-bit Twobitmap Significance Map) + uint8 Significance Map[(num_samples * 2 + 7) / 8] // 2 bits per coefficient + int16 Other Values[variable length] // Non-{-1,0,+1} values + +#### 2-bit Twobitmap Encoding +Each DWT coefficient is encoded using 2 bits in the significance map: + - 00: coefficient is 0 + - 01: coefficient is +1 + - 10: coefficient is -1 + - 11: coefficient is "other" (value stored in Other Values array) + +This encoding exploits the sparsity of quantized DWT coefficients where most +values are 0, ±1 after quantization. "Other" values are stored sequentially +as int16 in the order they appear. + +## Encoding Pipeline + +### Step 1: PCM16 to PCM8 Conversion with Error-Diffusion Dithering +Input stereo PCM16LE is converted to signed PCM8 using error-diffusion dithering +to minimize quantization noise: + + dithered_value = pcm16_value / 256 + error + pcm8_value = clamp(round(dithered_value), -128, 127) + error = dithered_value - pcm8_value + +Error is propagated to the next sample (alternating between left/right channels). + +### Step 2: M/S Stereo Decorrelation +Mid-Side transformation exploits stereo correlation: + + Mid = (Left + Right) / 2 + Side = (Left - Right) / 2 + +This typically concentrates energy in the Mid channel while the Side channel +contains mostly small values, improving compression efficiency. + +### Step 3: Variable-Level DD-4 DWT +Each channel (Mid and Side) undergoes Deslauriers-Dubuc 4-tap interpolating wavelet +decomposition. The number of DWT levels is calculated dynamically based on chunk size: + + DWT Levels = log2(chunk_size) - 1 + +For the default 32768-sample chunks, this produces 14 levels with frequency subbands: + + Level 0-13: High to low frequency coefficients + DC band: Low-frequency approximation coefficients + +Sideband boundaries are calculated dynamically: + first_band_size = chunk_size >> dwt_levels + sideband[0] = 0 + sideband[1] = first_band_size + sideband[i+1] = sideband[i] + (first_band_size << (i-1)) + +For 32768 samples with 14 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768 +For 1024 samples with 9 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024 + +### Step 4: Frequency-Dependent Quantization +DWT coefficients are quantized using perceptually-tuned frequency-dependent weights: + + Base Weights by Level: + Level 0 (16-8 KHz): 3.0 + Level 1 (8-4 KHz): 2.0 + Level 2 (4-2 KHz): 1.5 + Level 3 (2-1 KHz): 1.0 + Level 4 (1-0.5 KHz): 0.75 + Level 5 (0.5-0.25 KHz): 0.5 + Level 6-7 (DC-0.25 KHz): 0.25 + +Quality scaling factor: 1.0 + (5 - quality) * 0.3 + +Final quantization step: base_weight * quality_scale + +#### Dead Zone Quantization +High-frequency coefficients (Level 0: 8-16 KHz) use dead zone quantization +where coefficients smaller than half the quantization step are zeroed: + + if (abs(coefficient) < quantization_step / 2) + coefficient = 0 + +This aggressively removes high-frequency noise while preserving important +mid-frequency content (2-4 KHz critical for speech intelligibility). + +### Step 5: 2-bit Significance Map Encoding +Quantized coefficients are encoded using the 2-bit twobitmap method (see above). + +### Step 6: Optional Zstd Compression +If enabled (default), the concatenated Mid+Side encoded data is compressed +using Zstd level 3 for additional compression without significant CPU overhead. + +## Decoding Pipeline + +### Step 1: Chunk Extraction +Read chunk header to determine significance map method and compression status. +If compressed, decompress payload using Zstd. + +### Step 2: Decode Significance Maps +Decode Mid and Side channel data using 2-bit twobitmap decoder: + - Read 2-bit codes from significance map + - Reconstruct coefficients: 0, +1, -1, or read from Other Values array + +### Step 3: Dequantization +Multiply quantized coefficients by frequency-dependent quantization steps +(same weights as encoder). + +### Step 4: Variable-Level Inverse DD-4 DWT +Reconstruct PCM8 audio from DWT coefficients using inverse DD-4 transform, +progressively doubling length from the deepest level to chunk_size samples. +The number of inverse DWT levels matches the forward transform (log2(chunk_size) - 1). + +### Step 5: M/S to L/R Conversion +Convert Mid/Side back to Left/Right stereo: + + Left = Mid + Side + Right = Mid - Side + +### Step 6: PCM8 to PCM16 Upsampling +Convert signed PCM8 back to PCM16LE by multiplying by 256: + + pcm16_value = pcm8_value * 256 + +## Compression Performance +- **Target Ratio**: 2:1 against PCMu8 (4:1 against PCM16LE input) +- **Achieved Ratio**: 2.51:1 against PCMu8 at quality level 3 +- **Quality**: Perceptually transparent at Q3+, preserves full 0-16 KHz bandwidth +- **Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical) + +## Integration with TAV Encoder +TAD is designed as an includable API for TAV video encoder integration. +The encoder can be invoked programmatically to compress audio tracks: + + #include "tad_encoder.h" + + size_t encoded_size = tad_encode_from_file( + input_audio_path, + output_tad_path, + quality_level, + use_zstd, + verbose + ); + +This allows TAV video files to embed TAD-compressed audio using packet type 0x24. + +## Audio Extraction Command +TAD encoder uses two-pass FFmpeg extraction for optimal quality: + + # Pass 1: Extract at original sample rate + ffmpeg -i input.mp4 -f s16le -ac 2 temp.pcm + + # Pass 2: High-quality resample with SoXR and highpass filter + ffmpeg -f s16le -ar {original_rate} -ac 2 -i temp.pcm \ + -ar 32000 -af "aresample=resampler=soxr:precision=28:cutoff=0.99,highpass=f=16" \ + output.pcm + +This ensures resampling happens after extraction with optimal quality parameters. + +## Hardware Acceleration API +TAD decoder may be accelerated using hardware functions in GraphicsJSR223Delegate: +- tadDecode(): Main decoding function (chunk-based) +- tadHaarIDWT(): Fast inverse Haar DWT +- tadDequantize(): Frequency-dependent dequantization + +## Usage Examples + # Encode with default quality (Q3) + tad_encoder -i input.mp4 -o output.tad + + # Encode with highest quality + tad_encoder -i input.mp4 -o output.tad -q 5 + + # Encode without Zstd compression + tad_encoder -i input.mp4 -o output.tad --no-zstd + + # Verbose output with statistics + tad_encoder -i input.mp4 -o output.tad -v + + -------------------------------------------------------------------------------- TSVM Universal Cue format diff --git a/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt index 67d555f..39dc002 100644 --- a/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt @@ -82,6 +82,7 @@ class AudioJSR223Delegate(private val vm: VM) { // fun mp2DecodeFrame(mp2: MP2Env.MP2, framePtr: Long?, pcm: Boolean, outL: Long, outR: Long) = getFirstSnd()?.mp2Env?.decodeFrame(mp2, framePtr, pcm, outL, outR) fun getBaseAddr(): Int? = getFirstSnd()?.let { return it.vm.findPeriSlotNum(it)?.times(-131072)?.minus(1) } + fun getMemAddr(): Int? = getFirstSnd()?.let { return it.vm.findPeriSlotNum(it)?.times(-1048576)?.minus(1) } fun mp2Init() = getFirstSnd()?.mmio_write(40L, 16) fun mp2Decode() = getFirstSnd()?.mmio_write(40L, 1) fun mp2InitThenDecode() = getFirstSnd()?.mmio_write(40L, 17) @@ -93,6 +94,39 @@ class AudioJSR223Delegate(private val vm: VM) { } } + // TAD (Terrarum Advanced Audio) decoder functions + fun tadSetQuality(quality: Int) { + getFirstSnd()?.mmio_write(43L, quality.toByte()) + } + + fun tadGetQuality() = getFirstSnd()?.mmio_read(43L)?.toInt() + + fun tadDecode() { + getFirstSnd()?.mmio_write(42L, 1) + } + + fun tadIsBusy() = getFirstSnd()?.mmio_read(44L)?.toInt() == 1 + + fun tadUploadDecoded(playhead: Int) { + getFirstSnd()?.let { snd -> + val ba = ByteArray(65536) // 32768 samples * 2 channels + UnsafeHelper.memcpyRaw(null, snd.tadDecodedBin.ptr, ba, UnsafeHelper.getArrayOffset(ba), 65536) + snd.playheads[playhead].pcmQueue.addLast(ba) + } + } + + fun putTadDataByPtr(ptr: Int, length: Int, destOffset: Int) { + getFirstSnd()?.let { snd -> + val vkMult = if (ptr >= 0) 1 else -1 + for (k in 0L until length) { + val vk = k * vkMult + snd.tadInputBin[k + destOffset] = vm.peek(ptr + vk)!! + } + } + } + + fun getTadData(index: Int) = getFirstSnd()?.tadDecodedBin?.get(index.toLong()) + /* diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt index 891ec6a..a2c8b0f 100644 --- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt +++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt @@ -4,6 +4,7 @@ import com.badlogic.gdx.Gdx import com.badlogic.gdx.backends.lwjgl3.audio.OpenALLwjgl3Audio import com.badlogic.gdx.utils.GdxRuntimeException import com.badlogic.gdx.utils.Queue +import io.airlift.compress.zstd.ZstdInputStream import net.torvald.UnsafeHelper import net.torvald.UnsafePtr import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint @@ -11,6 +12,7 @@ import net.torvald.tsvm.ThreeFiveMiniUfloat import net.torvald.tsvm.VM import net.torvald.tsvm.getHashStr import net.torvald.tsvm.toInt +import java.io.ByteArrayInputStream private class RenderRunnable(val playhead: AudioAdapter.Playhead) : Runnable { private fun printdbg(msg: Any) { @@ -125,6 +127,12 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { @Volatile private var mp2Busy = false + // TAD (Terrarum Advanced Audio) decoder buffers + internal val tadInputBin = UnsafeHelper.allocate(65536L, this) // Input: compressed TAD chunk (max 64KB) + internal val tadDecodedBin = UnsafeHelper.allocate(65536L, this) // Output: PCMu8 stereo (32768 samples * 2 channels) + internal var tadQuality = 2 // Quality level used during encoding (0-5) + @Volatile private var tadBusy = false + private val renderRunnables: Array private val renderThreads: Array private val writeQueueingRunnables: Array @@ -216,7 +224,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { in 0..114687 -> sampleBin[addr] in 114688..131071 -> (adi - 114688).let { instruments[it / 64].getByte(it % 64) } in 131072..262143 -> (adi - 131072).let { playdata[it / (8*64)][(it / 8) % 64].getByte(it % 8) } - else -> peek(addr % 262144) + in 262144..327679 -> tadInputBin[addr - 262144] // TAD input buffer (65536 bytes) + in 327680..393215 -> tadDecodedBin[addr - 327680] // TAD decoded output (65536 bytes) + else -> peek(addr % 393216) } } @@ -227,6 +237,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { in 0..114687 -> { sampleBin[addr] = byte } in 114688..131071 -> (adi - 114688).let { instruments[it / 64].setByte(it % 64, bi) } in 131072..262143 -> (adi - 131072).let { playdata[it / (8*64)][(it / 8) % 64].setByte(it % 8, bi) } + in 262144..327679 -> tadInputBin[addr - 262144] = byte // TAD input buffer + in 327680..393215 -> tadDecodedBin[addr - 327680] = byte // TAD decoded output } } @@ -239,6 +251,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { in 30..39 -> playheads[3].read(adi - 30) 40 -> -1 41 -> mp2Busy.toInt().toByte() + 42 -> -1 // TAD control (write-only) + 43 -> tadQuality.toByte() + 44 -> tadBusy.toInt().toByte() in 64..2367 -> mediaDecodedBin[addr - 64] in 2368..4095 -> mediaFrameBin[addr - 2368] in 4096..4097 -> 0 @@ -265,6 +280,14 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { if (bi and 16 != 0) { mp2Context = mp2Env.initialise() } if (bi and 1 != 0) decodeMp2() } + 42 -> { + // TAD control: bit 0 = decode + if (bi and 1 != 0) decodeTad() + } + 43 -> { + // TAD quality (0-5) + tadQuality = bi.coerceIn(0, 5) + } in 64..2367 -> { mediaDecodedBin[addr - 64] = byte } in 2368..4095 -> { mediaFrameBin[addr - 2368] = byte } in 32768..65535 -> { (adi - 32768).let { @@ -287,6 +310,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { pcmBin.destroy() mediaFrameBin.destroy() mediaDecodedBin.destroy() + tadInputBin.destroy() + tadDecodedBin.destroy() } else { System.err.println("AudioAdapter already disposed") @@ -304,6 +329,250 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { mp2Env.decodeFrameU8(mp2Context, periMmioBase - 2368, true, periMmioBase - 64) } + //============================================================================= + // TAD (Terrarum Advanced Audio) Decoder + //============================================================================= + + private fun decodeTad() { + tadBusy = true + try { + // Read chunk header from tadInputBin + var offset = 0L + + val sampleCount = ( + (tadInputBin[offset++].toInt() and 0xFF) or + ((tadInputBin[offset++].toInt() and 0xFF) shl 8) + ) + val payloadSize = ( + (tadInputBin[offset++].toInt() and 0xFF) or + ((tadInputBin[offset++].toInt() and 0xFF) shl 8) or + ((tadInputBin[offset++].toInt() and 0xFF) shl 16) or + ((tadInputBin[offset++].toInt() and 0xFF) shl 24) + ) + + // Decompress payload if needed + val compressed = ByteArray(payloadSize) + UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong()) + + val payload: ByteArray = try { + ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd -> + zstd.readBytes() + } + } catch (e: Exception) { + println("ERROR: Zstd decompression failed: ${e.message}") + } as ByteArray + + // Decode significance maps + val quantMid = ShortArray(sampleCount) + val quantSide = ShortArray(sampleCount) + + var payloadOffset = 0 + val midBytes = decodeSigmap2bit(payload, payloadOffset, quantMid, sampleCount) + payloadOffset += midBytes + + val sideBytes = decodeSigmap2bit(payload, payloadOffset, quantSide, sampleCount) + + // Calculate DWT levels from sample count + val dwtLevels = calculateDwtLevels(sampleCount) + + // Dequantize + val dwtMid = FloatArray(sampleCount) + val dwtSide = FloatArray(sampleCount) + dequantizeDwtCoefficients(quantMid, dwtMid, sampleCount, tadQuality, dwtLevels) + dequantizeDwtCoefficients(quantSide, dwtSide, sampleCount, tadQuality, dwtLevels) + + // Inverse DWT + dwtDD4InverseMultilevel(dwtMid, sampleCount, dwtLevels) + dwtDD4InverseMultilevel(dwtSide, sampleCount, dwtLevels) + + // Convert to signed PCM8 + val pcm8Mid = ByteArray(sampleCount) + val pcm8Side = ByteArray(sampleCount) + for (i in 0 until sampleCount) { + pcm8Mid[i] = dwtMid[i].coerceIn(-128f, 127f).toInt().toByte() + pcm8Side[i] = dwtSide[i].coerceIn(-128f, 127f).toInt().toByte() + } + + // M/S to L/R correlation and write to tadDecodedBin + for (i in 0 until sampleCount) { + val m = pcm8Mid[i].toInt() + val s = pcm8Side[i].toInt() + var l = m + s + var r = m - s + + if (l < -128) l = -128 + if (l > 127) l = 127 + if (r < -128) r = -128 + if (r > 127) r = 127 + + tadDecodedBin[i * 2L] = (l + 128).toByte() // Left (PCMu8) + tadDecodedBin[i * 2L + 1] = (r + 128).toByte() // Right (PCMu8) + } + + } catch (e: Exception) { + e.printStackTrace() + } finally { + tadBusy = false + } + } + + private fun decodeSigmap2bit(input: ByteArray, offset: Int, values: ShortArray, count: Int): Int { + val mapBytes = (count * 2 + 7) / 8 + var readPtr = offset + mapBytes + var otherIdx = 0 + + for (i in 0 until count) { + val bitPos = i * 2 + val byteIdx = offset + bitPos / 8 + val bitOffset = bitPos % 8 + + var code = ((input[byteIdx].toInt() and 0xFF) shr bitOffset) and 0x03 + + // Handle bit spillover + if (bitOffset == 7) { + code = ((input[byteIdx].toInt() and 0xFF) shr 7) or + (((input[byteIdx + 1].toInt() and 0xFF) and 0x01) shl 1) + } + + values[i] = when (code) { + 0 -> 0 + 1 -> 1 + 2 -> (-1).toShort() + 3 -> { + val v = ((input[readPtr].toInt() and 0xFF) or + ((input[readPtr + 1].toInt() and 0xFF) shl 8)).toShort() + readPtr += 2 + otherIdx++ + v + } + else -> 0 + } + } + + return mapBytes + otherIdx * 2 + } + + private fun calculateDwtLevels(chunkSize: Int): Int { + if (chunkSize < 1024) { + throw IllegalArgumentException("Chunk size $chunkSize is below minimum 1024") + } + + var levels = 0 + var size = chunkSize + while (size > 1) { + size = size shr 1 + levels++ + } + return levels - 2 // Maximum decomposition leaves 4-sample approximation + } + + private fun getQuantizationWeights(quality: Int, dwtLevels: Int): FloatArray { + // Extended base weights to support up to 16 DWT levels + val baseWeights = arrayOf( + /* 0*/floatArrayOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), + /* 1*/floatArrayOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), + /* 2*/floatArrayOf(1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /* 3*/floatArrayOf(0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /* 4*/floatArrayOf(0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /* 5*/floatArrayOf(0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /* 6*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /* 7*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /* 8*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /* 9*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /*10*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /*11*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /*12*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), + /*13*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f), + /*14*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f), + /*15*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f), + /*16*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f) + ) + val qualityScale = 1.0f + ((3 - quality) * 0.5f).coerceAtLeast(0.0f) + return FloatArray(dwtLevels) { i -> (baseWeights[dwtLevels][i.coerceIn(0, 15)] * qualityScale).coerceAtLeast(1.0f) } + } + + private fun dequantizeDwtCoefficients(quantized: ShortArray, coeffs: FloatArray, count: Int, quality: Int, dwtLevels: Int) { + val weights = getQuantizationWeights(quality, dwtLevels) + + // Calculate sideband boundaries dynamically based on chunk size and DWT levels + val firstBandSize = count shr dwtLevels + val sidebandStarts = IntArray(dwtLevels + 2) + sidebandStarts[0] = 0 + sidebandStarts[1] = firstBandSize + for (i in 2..dwtLevels + 1) { + sidebandStarts[i] = sidebandStarts[i - 1] + (firstBandSize shl (i - 2)) + } + + for (i in 0 until count) { + var sideband = dwtLevels + for (s in 0 until dwtLevels + 1) { + if (i < sidebandStarts[s + 1]) { + sideband = s + break + } + } + + val weightIdx = if (sideband == 0) 0 else sideband - 1 + val weight = weights[weightIdx.coerceIn(0, dwtLevels - 1)] + coeffs[i] = quantized[i].toFloat() * weight + } + } + + private fun dwtDD4Inverse1d(data: FloatArray, length: Int) { + if (length < 2) return + + val temp = FloatArray(length) + val half = (length + 1) / 2 + + // Split into low and high parts + for (i in 0 until half) { + temp[i] = data[i] // Even (low-pass) + } + for (i in 0 until length / 2) { + temp[half + i] = data[half + i] // Odd (high-pass) + } + + // Undo update step: s[i] -= 0.25 * (d[i-1] + d[i]) + for (i in 0 until half) { + val dCurr = if (i < length / 2) temp[half + i] else 0.0f + val dPrev = if (i > 0 && i - 1 < length / 2) temp[half + i - 1] else 0.0f + temp[i] -= 0.25f * (dPrev + dCurr) + } + + // Undo prediction step: d[i] += P(s[i-1], s[i], s[i+1], s[i+2]) + for (i in 0 until length / 2) { + val sM1 = if (i > 0) temp[i - 1] else temp[0] // mirror boundary + val s0 = temp[i] + val s1 = if (i + 1 < half) temp[i + 1] else temp[half - 1] + val s2 = if (i + 2 < half) temp[i + 2] else if (half > 1) temp[half - 2] else temp[half - 1] + + val prediction = (-1.0f/16.0f)*sM1 + (9.0f/16.0f)*s0 + (9.0f/16.0f)*s1 + (-1.0f/16.0f)*s2 + temp[half + i] += prediction + } + + // Merge evens and odds back + for (i in 0 until half) { + data[2 * i] = temp[i] + if (2 * i + 1 < length) + data[2 * i + 1] = temp[half + i] + } + } + + private fun dwtDD4InverseMultilevel(data: FloatArray, length: Int, levels: Int) { + // Calculate the length at the deepest level + var currentLength = length + for (level in 0 until levels) { + currentLength = (currentLength + 1) / 2 + } + + // Inverse transform: double size FIRST, then apply inverse DWT + for (level in levels - 1 downTo 0) { + currentLength *= 2 // MULTIPLY FIRST + if (currentLength > length) currentLength = length + dwtDD4Inverse1d(data, currentLength) // THEN apply inverse + } + } + diff --git a/video_encoder/Makefile b/video_encoder/Makefile index 57d2bc9..0547557 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -13,6 +13,7 @@ OPENCV_LIBS = $(shell pkg-config --libs opencv4) # Source files and targets TARGETS = tev tav tav_decoder +TAD_TARGETS = encoder_tad decoder_tad TEST_TARGETS = test_mesh_warp test_mesh_roundtrip # Build all encoders @@ -23,17 +24,31 @@ tev: encoder_tev.c rm -f encoder_tev $(CC) $(CFLAGS) -o encoder_tev $< $(LIBS) -tav: encoder_tav.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp - rm -f encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o +tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp + rm -f encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(CC) $(CFLAGS) -c encoder_tav.c -o encoder_tav.o + $(CC) $(CFLAGS) -c encoder_tad.c -o encoder_tad.o $(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o - $(CXX) $(CXXFLAGS) -c estimate_affine_from_blocks.cpp -o estimate_affine_from_blocks.o - $(CXX) -o encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o $(LIBS) -lfftw3f $(OPENCV_LIBS) + $(CXX) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS) tav_decoder: decoder_tav.c rm -f decoder_tav $(CC) $(CFLAGS) -o decoder_tav $< $(LIBS) +# Build TAD (Terrarum Advanced Audio) tools +encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h + rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o + $(CC) $(CFLAGS) -c encoder_tad.c -o encoder_tad.o + $(CC) $(CFLAGS) -c encoder_tad_standalone.c -o encoder_tad_standalone.o + $(CC) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS) + +decoder_tad: decoder_tad.c + rm -f decoder_tad + $(CC) $(CFLAGS) -o decoder_tad $< $(LIBS) + +# Build all TAD tools +tad: $(TAD_TARGETS) + # Build test programs test_mesh_warp: test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp rm -f test_mesh_warp test_mesh_warp.o @@ -63,31 +78,34 @@ debug: $(TARGETS) # Clean build artifacts clean: - rm -f $(TARGETS) *.o + rm -f $(TARGETS) $(TAD_TARGETS) *.o # Install (copy to PATH) -install: $(TARGETS) +install: $(TARGETS) $(TAD_TARGETS) cp encoder_tev /usr/local/bin/ cp encoder_tav /usr/local/bin/ cp decoder_tav /usr/local/bin/ + cp encoder_tad /usr/local/bin/ + cp decoder_tad /usr/local/bin/ # Check for required dependencies check-deps: @echo "Checking dependencies..." - @echo "Using Zstd compression for better efficiency" @pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install with: sudo apt install libzstd-dev" && exit 1) - @pkg-config --exists fftw3f || (echo "Error: libfftw3-dev not found. Install with: sudo apt install libfftw3-dev" && exit 1) @pkg-config --exists opencv4 || (echo "Error: OpenCV 4 not found. Install with: sudo apt install libopencv-dev" && exit 1) @echo "All dependencies found." # Help help: - @echo "TSVM Enhanced Video (TEV) Encoder" + @echo "TSVM Enhanced Video (TEV) and Audio (TAD) Encoders" @echo "" @echo "Targets:" - @echo " all - Build both encoders (default)" - @echo " tev - Build the main TEV encoder" - @echo " tav - Build the advanced TAV encoder" + @echo " all - Build video encoders (default)" + @echo " tev - Build the TEV video encoder" + @echo " tav - Build the TAV advanced video encoder" + @echo " tad - Build all TAD audio tools (encoder, decoder)" + @echo " encoder_tad - Build TAD audio encoder" + @echo " decoder_tad - Build TAD audio decoder" @echo " debug - Build with debug symbols" @echo " clean - Remove build artifacts" @echo " install - Install to /usr/local/bin" @@ -95,9 +113,10 @@ help: @echo " help - Show this help" @echo "" @echo "Usage:" - @echo " make # Build both encoders" + @echo " make # Build video encoders" @echo " make tev # Build TEV encoder" @echo " make tav # Build TAV encoder" - @echo " sudo make install # Install both encoders" + @echo " make tad # Build all TAD audio tools" + @echo " sudo make install # Install all encoders" -.PHONY: all clean install check-deps help debug +.PHONY: all clean install check-deps help debug tad diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c new file mode 100644 index 0000000..7da260f --- /dev/null +++ b/video_encoder/decoder_tad.c @@ -0,0 +1,576 @@ +// Created by CuriousTorvald and Claude on 2025-10-23. +// TAD (Terrarum Advanced Audio) Decoder - Reconstructs audio from TAD format + +#include +#include +#include +#include +#include +#include +#include + +#define DECODER_VENDOR_STRING "Decoder-TAD 20251023" + +// TAD format constants (must match encoder) +#define TAD_DEFAULT_CHUNK_SIZE 32768 +#define TAD_MIN_CHUNK_SIZE 1024 +#define TAD_SAMPLE_RATE 32000 +#define TAD_CHANNELS 2 + +// Significance map methods +#define TAD_SIGMAP_1BIT 0 +#define TAD_SIGMAP_2BIT 1 +#define TAD_SIGMAP_RLE 2 + +// Quality levels +#define TAD_QUALITY_MIN 0 +#define TAD_QUALITY_MAX 5 + +static inline float FCLAMP(float x, float min, float max) { + return x < min ? min : (x > max ? max : x); +} + +// Calculate DWT levels from chunk size (must be power of 2, >= 1024) +static int calculate_dwt_levels(int chunk_size) { + if (chunk_size < TAD_MIN_CHUNK_SIZE) { + fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE); + return -1; + } + + // Calculate levels: log2(chunk_size) - 1 + int levels = 0; + int size = chunk_size; + while (size > 1) { + size >>= 1; + levels++; + } + return levels - 2; +} + +//============================================================================= +// Haar DWT Implementation (inverse only needed for decoder) +//============================================================================= + +static void dwt_haar_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + for (int i = 0; i < half; i++) { + if (2 * i + 1 < length) { + temp[2 * i] = data[i] + data[half + i]; + temp[2 * i + 1] = data[i] - data[half + i]; + } else { + temp[2 * i] = data[i]; + } + } + + memcpy(data, temp, length * sizeof(float)); + free(temp); +} + +// Inverse 1D transform of Four-point interpolating Deslauriers-Dubuc (DD-4) +static void dwt_dd4_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + // Split into low (even) and high (odd) parts + for (int i = 0; i < half; i++) { + temp[i] = data[i]; // Even (low-pass) + } + for (int i = 0; i < length / 2; i++) { + temp[half + i] = data[half + i]; // Odd (high-pass) + } + + // Undo update step: s[i] -= 0.25 * (d[i-1] + d[i]) + for (int i = 0; i < half; i++) { + float d_curr = (i < length / 2) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f; + temp[i] -= 0.25f * (d_prev + d_curr); + } + + // Undo prediction step: d[i] += P(s[i-1], s[i], s[i+1], s[i+2]) + for (int i = 0; i < length / 2; i++) { + float s_m1, s_0, s_1, s_2; + + if (i > 0) s_m1 = temp[i - 1]; + else s_m1 = temp[0]; // mirror boundary + + s_0 = temp[i]; + + if (i + 1 < half) s_1 = temp[i + 1]; + else s_1 = temp[half - 1]; + + if (i + 2 < half) s_2 = temp[i + 2]; + else if (half > 1) s_2 = temp[half - 2]; + else s_2 = temp[half - 1]; + + float prediction = (-1.0f/16.0f)*s_m1 + (9.0f/16.0f)*s_0 + + (9.0f/16.0f)*s_1 + (-1.0f/16.0f)*s_2; + + temp[half + i] += prediction; + } + + // Merge evens and odds back into the original order + for (int i = 0; i < half; i++) { + data[2 * i] = temp[i]; + if (2 * i + 1 < length) + data[2 * i + 1] = temp[half + i]; + } + + free(temp); +} + +static void dwt_haar_inverse_multilevel(float *data, int length, int levels) { + // Calculate the length at the deepest level (size of low-pass after all forward DWTs) + int current_length = length; + for (int level = 0; level < levels; level++) { + current_length = (current_length + 1) / 2; + } + // For 8 levels on 32768: 32768→16384→8192→4096→2048→1024→512→256→128 + + // Inverse transform: double size FIRST, then apply inverse DWT + // Level 8 inverse: 128 low + 128 high → 256 reconstructed + // Level 7 inverse: 256 reconstructed + 256 high → 512 reconstructed + // ... Level 1 inverse: 16384 reconstructed + 16384 high → 32768 reconstructed + for (int level = levels - 1; level >= 0; level--) { + current_length *= 2; // MULTIPLY FIRST: 128→256, 256→512, ..., 16384→32768 + if (current_length > length) current_length = length; +// dwt_haar_inverse_1d(data, current_length); // THEN apply inverse + dwt_dd4_inverse_1d(data, current_length); // THEN apply inverse + } +} + +//============================================================================= +// M/S Stereo Correlation (inverse of decorrelation) +//============================================================================= + +static void ms_correlate(const int8_t *mid, const int8_t *side, uint8_t *left, uint8_t *right, size_t count) { + for (size_t i = 0; i < count; i++) { + // L = M + S, R = M - S + int32_t m = mid[i]; + int32_t s = side[i]; + int32_t l = m + s; + int32_t r = m - s; + + // Clamp to [-128, 127] then convert to unsigned [0, 255] + if (l < -128) l = -128; + if (l > 127) l = 127; + if (r < -128) r = -128; + if (r > 127) r = 127; + + left[i] = (uint8_t)(l + 128); + right[i] = (uint8_t)(r + 128); + } +} + +//============================================================================= +// Dequantization (inverse of quantization) +//============================================================================= + +static void get_quantization_weights(int quality, int dwt_levels, float *weights) { + const float base_weights[16][16] = { + /* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + /* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + /* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f}, + /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}, + /*16*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f} + }; + + float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f); + + for (int i = 0; i < dwt_levels; i++) { + weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f); + } +} + +static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int quality, int chunk_size, int dwt_levels) { + float weights[16]; + get_quantization_weights(quality, dwt_levels, weights); + + // Calculate sideband boundaries dynamically + int first_band_size = chunk_size >> dwt_levels; + + int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int)); + sideband_starts[0] = 0; + sideband_starts[1] = first_band_size; + for (int i = 2; i <= dwt_levels + 1; i++) { + sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2)); + } + + for (size_t i = 0; i < count; i++) { + int sideband = dwt_levels; + for (int s = 0; s <= dwt_levels; s++) { + if (i < sideband_starts[s + 1]) { + sideband = s; + break; + } + } + + // Map (dwt_levels+1) sidebands to dwt_levels weights + int weight_idx = (sideband == 0) ? 0 : sideband - 1; + if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1; + + float weight = weights[weight_idx]; + coeffs[i] = (float)quantized[i] * weight; + } + + free(sideband_starts); +} + +//============================================================================= +// Significance Map Decoding +//============================================================================= + +static size_t decode_sigmap_1bit(const uint8_t *input, int16_t *values, size_t count) { + size_t map_bytes = (count + 7) / 8; + const uint8_t *map = input; + const uint8_t *read_ptr = input + map_bytes; + + uint32_t nonzero_count = *((const uint32_t*)read_ptr); + read_ptr += sizeof(uint32_t); + + const int16_t *value_ptr = (const int16_t*)read_ptr; + uint32_t value_idx = 0; + + // Reconstruct values + for (size_t i = 0; i < count; i++) { + if (map[i / 8] & (1 << (i % 8))) { + values[i] = value_ptr[value_idx++]; + } else { + values[i] = 0; + } + } + + return map_bytes + sizeof(uint32_t) + nonzero_count * sizeof(int16_t); +} + +static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t count) { + size_t map_bytes = (count * 2 + 7) / 8; + const uint8_t *map = input; + const uint8_t *read_ptr = input + map_bytes; + + const int16_t *value_ptr = (const int16_t*)read_ptr; + uint32_t other_idx = 0; + + for (size_t i = 0; i < count; i++) { + size_t bit_pos = i * 2; + size_t byte_idx = bit_pos / 8; + size_t bit_offset = bit_pos % 8; + + uint8_t code = (map[byte_idx] >> bit_offset) & 0x03; + + // Handle bit spillover + if (bit_offset == 7) { + code = (map[byte_idx] >> 7) | ((map[byte_idx + 1] & 0x01) << 1); + } + + switch (code) { + case 0: values[i] = 0; break; + case 1: values[i] = 1; break; + case 2: values[i] = -1; break; + case 3: values[i] = value_ptr[other_idx++]; break; + } + } + + return map_bytes + other_idx * sizeof(int16_t); +} + +static size_t decode_sigmap_rle(const uint8_t *input, int16_t *values, size_t count) { + const uint8_t *read_ptr = input; + + uint32_t run_count = *((const uint32_t*)read_ptr); + read_ptr += sizeof(uint32_t); + + size_t value_idx = 0; + + for (uint32_t run = 0; run < run_count; run++) { + // Decode zero run length (varint) + uint32_t zero_run = 0; + int shift = 0; + uint8_t byte; + + do { + byte = *read_ptr++; + zero_run |= ((uint32_t)(byte & 0x7F) << shift); + shift += 7; + } while (byte & 0x80); + + // Fill zeros + for (uint32_t i = 0; i < zero_run && value_idx < count; i++) { + values[value_idx++] = 0; + } + + // Read non-zero value + int16_t val = *((const int16_t*)read_ptr); + read_ptr += sizeof(int16_t); + + if (value_idx < count && val != 0) { + values[value_idx++] = val; + } + } + + // Fill remaining with zeros + while (value_idx < count) { + values[value_idx++] = 0; + } + + return read_ptr - input; +} + +//============================================================================= +// Chunk Decoding +//============================================================================= + +static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo, + int quality, size_t *bytes_consumed, size_t *samples_decoded) { + const uint8_t *read_ptr = input; + + // Read chunk header + uint16_t sample_count = *((const uint16_t*)read_ptr); + read_ptr += sizeof(uint16_t); + uint32_t payload_size = *((const uint32_t*)read_ptr); + read_ptr += sizeof(uint32_t); + + // Calculate DWT levels from sample count + int dwt_levels = calculate_dwt_levels(sample_count); + if (dwt_levels < 0) { + fprintf(stderr, "Error: Invalid sample count %u\n", sample_count); + return -1; + } + + // Decompress if needed + const uint8_t *payload; + uint8_t *decompressed = NULL; + + // Estimate decompressed size (generous upper bound) + size_t decompressed_size = sample_count * 4 * sizeof(int16_t); + decompressed = malloc(decompressed_size); + + size_t actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size); + + if (ZSTD_isError(actual_size)) { + fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(actual_size)); + free(decompressed); + return -1; + } + + payload = decompressed; + + read_ptr += payload_size; + *bytes_consumed = read_ptr - input; + *samples_decoded = sample_count; + + // Allocate working buffers + int16_t *quant_mid = malloc(sample_count * sizeof(int16_t)); + int16_t *quant_side = malloc(sample_count * sizeof(int16_t)); + float *dwt_mid = malloc(sample_count * sizeof(float)); + float *dwt_side = malloc(sample_count * sizeof(float)); + int8_t *pcm8_mid = malloc(sample_count * sizeof(int8_t)); + int8_t *pcm8_side = malloc(sample_count * sizeof(int8_t)); + uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t)); + uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t)); + + // Decode significance maps + const uint8_t *payload_ptr = payload; + size_t mid_bytes, side_bytes; + + mid_bytes = decode_sigmap_2bit(payload_ptr, quant_mid, sample_count); + side_bytes = decode_sigmap_2bit(payload_ptr + mid_bytes, quant_side, sample_count); + + // Dequantize + dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, quality, sample_count, dwt_levels); + dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, quality, sample_count, dwt_levels); + + // Inverse DWT + dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels); + dwt_haar_inverse_multilevel(dwt_side, sample_count, dwt_levels); + + // Convert to signed PCM8 + for (size_t i = 0; i < sample_count; i++) { + float m = dwt_mid[i]; + float s = dwt_side[i]; + + // Clamp and round + if (m < -128.0f) m = -128.0f; + if (m > 127.0f) m = 127.0f; + if (s < -128.0f) s = -128.0f; + if (s > 127.0f) s = 127.0f; + + pcm8_mid[i] = (int8_t)roundf(m); + pcm8_side[i] = (int8_t)roundf(s); + } + + // M/S to L/R correlation + ms_correlate(pcm8_mid, pcm8_side, pcm8_left, pcm8_right, sample_count); + + // Interleave stereo output (PCMu8) + for (size_t i = 0; i < sample_count; i++) { + pcmu8_stereo[i * 2] = pcm8_left[i]; + pcmu8_stereo[i * 2 + 1] = pcm8_right[i]; + } + + // Cleanup + free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side); + free(pcm8_mid); free(pcm8_side); free(pcm8_left); free(pcm8_right); + if (decompressed) free(decompressed); + + return 0; +} + +//============================================================================= +// Main Decoder +//============================================================================= + +static void print_usage(const char *prog_name) { + printf("Usage: %s -i -o [options]\n", prog_name); + printf("Options:\n"); + printf(" -i Input TAD file\n"); + printf(" -o Output PCMu8 file (raw 8-bit unsigned stereo @ 32kHz)\n"); + printf(" -q <0-5> Quality level used during encoding (default: 2)\n"); + printf(" -v Verbose output\n"); + printf(" -h, --help Show this help\n"); + printf("\nVersion: %s\n", DECODER_VENDOR_STRING); + printf("Output format: PCMu8 (unsigned 8-bit) stereo @ 32000 Hz\n"); + printf("To convert to WAV: ffmpeg -f u8 -ar 32000 -ac 2 -i output.raw output.wav\n"); +} + +int main(int argc, char *argv[]) { + char *input_file = NULL; + char *output_file = NULL; + int quality = 2; // Must match encoder quality + int verbose = 0; + + int opt; + while ((opt = getopt(argc, argv, "i:o:q:vh")) != -1) { + switch (opt) { + case 'i': + input_file = optarg; + break; + case 'o': + output_file = optarg; + break; + case 'q': + quality = atoi(optarg); + if (quality < TAD_QUALITY_MIN || quality > TAD_QUALITY_MAX) { + fprintf(stderr, "Error: Quality must be between %d and %d\n", + TAD_QUALITY_MIN, TAD_QUALITY_MAX); + return 1; + } + break; + case 'v': + verbose = 1; + break; + case 'h': + print_usage(argv[0]); + return 0; + default: + print_usage(argv[0]); + return 1; + } + } + + if (!input_file || !output_file) { + fprintf(stderr, "Error: Input and output files are required\n"); + print_usage(argv[0]); + return 1; + } + + if (verbose) { + printf("%s\n", DECODER_VENDOR_STRING); + printf("Input: %s\n", input_file); + printf("Output: %s\n", output_file); + printf("Quality: %d\n", quality); + } + + // Open input file + FILE *input = fopen(input_file, "rb"); + if (!input) { + fprintf(stderr, "Error: Could not open input file: %s\n", input_file); + return 1; + } + + // Get file size + fseek(input, 0, SEEK_END); + size_t input_size = ftell(input); + fseek(input, 0, SEEK_SET); + + // Read entire file into memory + uint8_t *input_data = malloc(input_size); + fread(input_data, 1, input_size, input); + fclose(input); + + // Open output file + FILE *output = fopen(output_file, "wb"); + if (!output) { + fprintf(stderr, "Error: Could not open output file: %s\n", output_file); + free(input_data); + return 1; + } + + // Decode chunks + size_t offset = 0; + size_t chunk_count = 0; + size_t total_samples = 0; + // Allocate buffer for maximum chunk size (can handle variable sizes up to default) + uint8_t *chunk_output = malloc(TAD_DEFAULT_CHUNK_SIZE * TAD_CHANNELS); + + while (offset < input_size) { + size_t bytes_consumed, samples_decoded; + int result = decode_chunk(input_data + offset, input_size - offset, + chunk_output, quality, &bytes_consumed, &samples_decoded); + + if (result != 0) { + fprintf(stderr, "Error: Chunk decoding failed at offset %zu\n", offset); + free(input_data); + free(chunk_output); + fclose(output); + return 1; + } + + // Write decoded chunk (only the actual samples) + fwrite(chunk_output, TAD_CHANNELS, samples_decoded, output); + + offset += bytes_consumed; + total_samples += samples_decoded; + chunk_count++; + + if (verbose && (chunk_count % 10 == 0)) { + printf("Decoded chunk %zu (offset %zu/%zu, %zu samples)\r", chunk_count, offset, input_size, samples_decoded); + fflush(stdout); + } + } + + if (verbose) { + printf("\nDecoding complete!\n"); + printf("Decoded %zu chunks\n", chunk_count); + printf("Total samples: %zu (%.2f seconds)\n", + total_samples, + total_samples / (double)TAD_SAMPLE_RATE); + } + + // Cleanup + free(input_data); + free(chunk_output); + fclose(output); + + printf("Output written to: %s\n", output_file); + printf("Format: PCMu8 stereo @ %d Hz\n", TAD_SAMPLE_RATE); + + return 0; +} diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c new file mode 100644 index 0000000..552de69 --- /dev/null +++ b/video_encoder/encoder_tad.c @@ -0,0 +1,459 @@ +// Created by CuriousTorvald and Claude on 2025-10-23. +// TAD (Terrarum Advanced Audio) Encoder Library - DWT-based audio compression +// This file contains only the encoding functions for use by encoder_tad.c and encoder_tav.c + +#include +#include +#include +#include +#include +#include +#include "encoder_tad.h" + +// Forward declarations for internal functions +static void dwt_haar_forward_1d(float *data, int length); +static void dwt_dd4_forward_1d(float *data, int length); +static void dwt_97_forward_1d(float *data, int length); +static void dwt_haar_forward_multilevel(float *data, int length, int levels); +static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count); +static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error); +static void get_quantization_weights(int quality, int dwt_levels, float *weights); +static int get_deadzone_threshold(int quality); +static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels); +static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output); + +static inline float FCLAMP(float x, float min, float max) { + return x < min ? min : (x > max ? max : x); +} + +// Calculate DWT levels from chunk size (non-power-of-2 supported, >= 1024) +static int calculate_dwt_levels(int chunk_size) { + if (chunk_size < TAD_MIN_CHUNK_SIZE) { + fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE); + return -1; + } + + // For non-power-of-2, find next power of 2 and calculate levels + // Then subtract 2 for maximum decomposition + int levels = 0; + int size = chunk_size; + while (size > 1) { + size >>= 1; + levels++; + } + + // For non-power-of-2, we need to add 1 to levels + int pow2 = 1 << levels; + if (pow2 < chunk_size) { + levels++; + } + + return levels - 2; // Maximum decomposition leaves 2-sample approximation +} + +//============================================================================= +// Haar DWT Implementation +//============================================================================= + +static void dwt_haar_forward_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + // Haar transform: compute averages (low-pass) and differences (high-pass) + for (int i = 0; i < half; i++) { + if (2 * i + 1 < length) { + // Average of adjacent pairs (low-pass) + temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f; + // Difference of adjacent pairs (high-pass) + temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f; + } else { + // Handle odd length: last sample goes to low-pass + temp[i] = data[2 * i]; + if (half + i < length) { + temp[half + i] = 0.0f; + } + } + } + + memcpy(data, temp, length * sizeof(float)); + free(temp); +} + +// Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform +static void dwt_dd4_forward_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + // Split into even/odd samples + for (int i = 0; i < half; i++) { + temp[i] = data[2 * i]; // Even (low) + } + for (int i = 0; i < length / 2; i++) { + temp[half + i] = data[2 * i + 1]; // Odd (high) + } + + // DD-4 forward prediction step with four-point kernel + for (int i = 0; i < length / 2; i++) { + float s_m1, s_0, s_1, s_2; + + if (i > 0) s_m1 = temp[i - 1]; + else s_m1 = temp[0]; // Mirror boundary + + s_0 = temp[i]; + + if (i + 1 < half) s_1 = temp[i + 1]; + else s_1 = temp[half - 1]; + + if (i + 2 < half) s_2 = temp[i + 2]; + else if (half > 1) s_2 = temp[half - 2]; + else s_2 = temp[half - 1]; + + float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 + + (9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2; + + temp[half + i] -= prediction; + } + + // DD-4 update step + for (int i = 0; i < half; i++) { + float d_curr = (i < length / 2) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f; + temp[i] += 0.25f * (d_prev + d_curr); + } + + memcpy(data, temp, length * sizeof(float)); + free(temp); +} + +// 1D DWT using lifting scheme for 9/7 irreversible filter +static void dwt_97_forward_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + // Split into even/odd samples + for (int i = 0; i < half; i++) { + temp[i] = data[2 * i]; // Even (low) + } + for (int i = 0; i < length / 2; i++) { + temp[half + i] = data[2 * i + 1]; // Odd (high) + } + + // JPEG2000 9/7 forward lifting steps + const float alpha = -1.586134342f; + const float beta = -0.052980118f; + const float gamma = 0.882911076f; + const float delta = 0.443506852f; + const float K = 1.230174105f; + + // Step 1: Predict α + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] += alpha * (s_curr + s_next); + } + } + + // Step 2: Update β + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] += beta * (d_prev + d_curr); + } + + // Step 3: Predict γ + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] += gamma * (s_curr + s_next); + } + } + + // Step 4: Update δ + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] += delta * (d_prev + d_curr); + } + + // Step 5: Scaling + for (int i = 0; i < half; i++) { + temp[i] *= K; + } + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + temp[half + i] /= K; + } + } + + memcpy(data, temp, length * sizeof(float)); + free(temp); +} + +// Apply multi-level DWT (using DD-4 wavelet) +static void dwt_haar_forward_multilevel(float *data, int length, int levels) { + int current_length = length; + for (int level = 0; level < levels; level++) { + dwt_dd4_forward_1d(data, current_length); + current_length = (current_length + 1) / 2; + } +} + +//============================================================================= +// M/S Stereo Decorrelation +//============================================================================= + +static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count) { + for (size_t i = 0; i < count; i++) { + // Mid = (L + R) / 2, Side = (L - R) / 2 + int32_t l = left[i]; + int32_t r = right[i]; + mid[i] = (int8_t)((l + r) / 2); + side[i] = (int8_t)((l - r) / 2); + } +} + +//============================================================================= +// PCM16 to Signed PCM8 Conversion with Dithering +//============================================================================= + +static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error) { + for (int i = 0; i < num_samples; i++) { + for (int ch = 0; ch < 2; ch++) { // Stereo: L and R + int idx = i * 2 + ch; + int32_t sample = (int32_t)pcm16[idx]; + sample += dither_error[ch]; + int32_t quantized = sample >> 8; + if (quantized < -128) quantized = -128; + if (quantized > 127) quantized = 127; + pcm8[idx] = (int8_t)quantized; + dither_error[ch] = sample - (quantized << 8); + } + } +} + +//============================================================================= +// Quantization with Frequency-Dependent Weighting +//============================================================================= + +static void get_quantization_weights(int quality, int dwt_levels, float *weights) { + const float base_weights[16][16] = { + /* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + /* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + /* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f}, + /*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f}, + /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f} + }; + + float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f); + + for (int i = 0; i < dwt_levels; i++) { + weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f); + } +} + +static int get_deadzone_threshold(int quality) { + const int thresholds[] = {1,1,0,0,0,0}; // Q0 to Q5 + return thresholds[quality]; +} + +static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels) { + float weights[16]; + get_quantization_weights(quality, dwt_levels, weights); + int deadzone = apply_deadzone ? get_deadzone_threshold(quality) : 0; + + int first_band_size = chunk_size >> dwt_levels; + + int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int)); + sideband_starts[0] = 0; + sideband_starts[1] = first_band_size; + for (int i = 2; i <= dwt_levels + 1; i++) { + sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2)); + } + + for (size_t i = 0; i < count; i++) { + int sideband = dwt_levels; + for (int s = 0; s <= dwt_levels; s++) { + if (i < (size_t)sideband_starts[s + 1]) { + sideband = s; + break; + } + } + + int weight_idx = (sideband == 0) ? 0 : sideband - 1; + if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1; + + float weight = weights[weight_idx]; + float val = coeffs[i] / weight; + int16_t quant_val = (int16_t)roundf(val); + + if (apply_deadzone && sideband >= dwt_levels - 1) { + if (quant_val > -deadzone && quant_val < deadzone) { + quant_val = 0; + } + } + + quantized[i] = quant_val; + } + + free(sideband_starts); +} + +//============================================================================= +// Significance Map Encoding +//============================================================================= + +static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output) { + size_t map_bytes = (count * 2 + 7) / 8; + uint8_t *map = output; + memset(map, 0, map_bytes); + + uint8_t *write_ptr = output + map_bytes; + int16_t *value_ptr = (int16_t*)write_ptr; + uint32_t other_count = 0; + + for (size_t i = 0; i < count; i++) { + int16_t val = values[i]; + uint8_t code; + + if (val == 0) code = 0; // 00 + else if (val == 1) code = 1; // 01 + else if (val == -1) code = 2; // 10 + else { + code = 3; // 11 + value_ptr[other_count++] = val; + } + + size_t bit_pos = i * 2; + size_t byte_idx = bit_pos / 8; + size_t bit_offset = bit_pos % 8; + + map[byte_idx] |= (code << bit_offset); + if (bit_offset == 7 && byte_idx + 1 < map_bytes) { + map[byte_idx + 1] |= (code >> 1); + } + } + + return map_bytes + other_count * sizeof(int16_t); +} + +//============================================================================= +// Public API: Chunk Encoding +//============================================================================= + +size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality, + int use_zstd, uint8_t *output) { + // Calculate DWT levels from chunk size + int dwt_levels = calculate_dwt_levels(num_samples); + if (dwt_levels < 0) { + fprintf(stderr, "Error: Invalid chunk size %zu\n", num_samples); + return 0; + } + + // Allocate working buffers + int8_t *pcm8_stereo = malloc(num_samples * 2 * sizeof(int8_t)); + int8_t *pcm8_left = malloc(num_samples * sizeof(int8_t)); + int8_t *pcm8_right = malloc(num_samples * sizeof(int8_t)); + int8_t *pcm8_mid = malloc(num_samples * sizeof(int8_t)); + int8_t *pcm8_side = malloc(num_samples * sizeof(int8_t)); + + float *dwt_mid = malloc(num_samples * sizeof(float)); + float *dwt_side = malloc(num_samples * sizeof(float)); + + int16_t *quant_mid = malloc(num_samples * sizeof(int16_t)); + int16_t *quant_side = malloc(num_samples * sizeof(int16_t)); + + // Step 1: Convert PCM16 to signed PCM8 with dithering + int16_t dither_error[2] = {0, 0}; + convert_pcm16_to_pcm8_dithered(pcm16_stereo, pcm8_stereo, num_samples, dither_error); + + // Deinterleave stereo + for (size_t i = 0; i < num_samples; i++) { + pcm8_left[i] = pcm8_stereo[i * 2]; + pcm8_right[i] = pcm8_stereo[i * 2 + 1]; + } + + // Step 2: M/S decorrelation + ms_decorrelate(pcm8_left, pcm8_right, pcm8_mid, pcm8_side, num_samples); + + // Step 3: Convert to float and apply DWT + for (size_t i = 0; i < num_samples; i++) { + dwt_mid[i] = (float)pcm8_mid[i]; + dwt_side[i] = (float)pcm8_side[i]; + } + + dwt_haar_forward_multilevel(dwt_mid, num_samples, dwt_levels); + dwt_haar_forward_multilevel(dwt_side, num_samples, dwt_levels); + + // Step 4: Quantize with frequency-dependent weights and dead zone + quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels); + quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels); + + // Step 5: Encode with 2-bit significance map + uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int16_t)); + size_t mid_size = encode_sigmap_2bit(quant_mid, num_samples, temp_buffer); + size_t side_size = encode_sigmap_2bit(quant_side, num_samples, temp_buffer + mid_size); + + size_t uncompressed_size = mid_size + side_size; + + // Step 6: Optional Zstd compression + uint8_t *write_ptr = output; + + *((uint16_t*)write_ptr) = (uint16_t)num_samples; + write_ptr += sizeof(uint16_t); + + uint32_t *payload_size_ptr = (uint32_t*)write_ptr; + write_ptr += sizeof(uint32_t); + + size_t payload_size; + + if (use_zstd) { + size_t zstd_bound = ZSTD_compressBound(uncompressed_size); + uint8_t *zstd_buffer = malloc(zstd_bound); + + payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD_ZSTD_LEVEL); + + if (ZSTD_isError(payload_size)) { + fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size)); + free(zstd_buffer); + free(pcm8_stereo); free(pcm8_left); free(pcm8_right); + free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side); + free(quant_mid); free(quant_side); free(temp_buffer); + return 0; + } + + memcpy(write_ptr, zstd_buffer, payload_size); + free(zstd_buffer); + } else { + payload_size = uncompressed_size; + memcpy(write_ptr, temp_buffer, payload_size); + } + + *payload_size_ptr = (uint32_t)payload_size; + write_ptr += payload_size; + + // Cleanup + free(pcm8_stereo); free(pcm8_left); free(pcm8_right); + free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side); + free(quant_mid); free(quant_side); free(temp_buffer); + + return write_ptr - output; +} diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h new file mode 100644 index 0000000..f9b5443 --- /dev/null +++ b/video_encoder/encoder_tad.h @@ -0,0 +1,40 @@ +#ifndef TAD_ENCODER_H +#define TAD_ENCODER_H + +#include +#include + +// TAD (Terrarum Advanced Audio) Encoder +// DWT-based perceptual audio codec for TSVM + +// Constants +#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2) +#define TAD_SAMPLE_RATE 32000 +#define TAD_CHANNELS 2 // Stereo +#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other +#define TAD_QUALITY_MIN 0 +#define TAD_QUALITY_MAX 5 +#define TAD_QUALITY_DEFAULT 3 +#define TAD_ZSTD_LEVEL 7 + +/** + * Encode audio chunk with TAD codec + * + * @param pcm16_stereo Input PCM16LE stereo samples (interleaved L,R) + * @param num_samples Number of samples per channel (supports non-power-of-2, min 1024) + * @param quality Quality level 0-5 (0=lowest, 5=highest) + * @param use_zstd 1=enable Zstd compression, 0=disable + * @param output Output buffer (must be large enough) + * @return Number of bytes written to output, or 0 on error + * + * Output format: + * uint8 sigmap_method (always 1 = 2-bit twobitmap) + * uint8 compressed_flag (1=Zstd, 0=raw) + * uint16 sample_count (samples per channel) + * uint32 payload_size (bytes in payload) + * * payload (encoded M/S data, optionally Zstd-compressed) + */ +size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality, + int use_zstd, uint8_t *output); + +#endif // TAD_ENCODER_H diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 45f9256..11ff9ec 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -11,14 +11,14 @@ #include #include #include +#include "encoder_tad.h" // TAD audio encoder #include #include #include #include #include -#include -#define ENCODER_VENDOR_STRING "Encoder-TAV 20251023 (3d-dwt)" +#define ENCODER_VENDOR_STRING "Encoder-TAV 20251024 (3d-dwt,tad)" // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" @@ -55,6 +55,7 @@ #define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction) #define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio #define TAV_PACKET_AUDIO_PCM8 0x21 // 8-bit PCM audio (zstd compressed) +#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec) #define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet #define TAV_PACKET_AUDIO_TRACK 0x40 // Separate audio track (full MP2 file) #define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet @@ -63,6 +64,15 @@ #define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet #define TAV_PACKET_SYNC 0xFF // Sync packet +// TAD (Terrarum Advanced Audio) settings +#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2) +#define TAD_SAMPLE_RATE 32000 +#define TAD_CHANNELS 2 // Stereo +#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other +#define TAD_QUALITY_MIN 0 +#define TAD_QUALITY_MAX 5 +#define TAD_ZSTD_LEVEL 7 + // DWT settings #define TILE_SIZE_X 640 #define TILE_SIZE_Y 540 @@ -1753,6 +1763,7 @@ typedef struct tav_encoder_s { int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled) int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default) int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default) + int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level) // Frame buffers - ping-pong implementation uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous @@ -2272,6 +2283,7 @@ static void show_usage(const char *program_name) { printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n"); // printf(" --separate-audio-track Write entire audio track as single packet instead of interleaved\n"); printf(" --pcm8-audio Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n"); + printf(" --tad-audio Use TAD (DWT-based perceptual) audio codec (packet 0x24, quality follows -q)\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n"); printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n"); @@ -2361,6 +2373,7 @@ static tav_encoder_t* create_encoder(void) { enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL; enc->separate_audio_track = 0; // Default: interleave audio packets enc->pcm8_audio = 0; // Default: use MP2 audio + enc->tad_audio = 0; // Default: use MP2 audio (TAD quality follows quality_level) // GOP / temporal DWT settings enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding @@ -8050,11 +8063,15 @@ static int start_audio_conversion(tav_encoder_t *enc) { char command[2048]; - if (enc->pcm8_audio) { - // Extract PCM16LE for PCM8 mode - printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit)\n"); + if (enc->pcm8_audio || enc->tad_audio) { + // Extract PCM16LE for PCM8/TAD mode + if (enc->pcm8_audio) { + printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit PCM)\n"); + } else { + printf(" Audio format: PCM16LE 32kHz stereo (will be encoded with TAD codec)\n"); + } snprintf(command, sizeof(command), - "ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -y \"%s\" 2>/dev/null", + "ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" -y \"%s\" 2>/dev/null", enc->input_file, TSVM_AUDIO_SAMPLE_RATE, TEMP_PCM_FILE); int result = system(command); @@ -8806,6 +8823,95 @@ static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) { return 1; } +// Write TAD audio packet (0x24) with specified sample count +// Uses linked TAD encoder (encoder_tad.c) +static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) { + if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) { + return 0; + } + size_t bytes_to_read = samples_to_read * 2 * sizeof(int16_t); // Stereo PCM16LE + + // Don't read more than what's available + if (bytes_to_read > enc->audio_remaining) { + bytes_to_read = enc->audio_remaining; + samples_to_read = bytes_to_read / (2 * sizeof(int16_t)); + } + + if (samples_to_read < TAD_MIN_CHUNK_SIZE) { + // Pad to minimum size + samples_to_read = TAD_MIN_CHUNK_SIZE; + } + + // Allocate PCM16 input buffer + int16_t *pcm16_buffer = malloc(samples_to_read * 2 * sizeof(int16_t)); + + // Read PCM16LE data + size_t bytes_read = fread(pcm16_buffer, 1, bytes_to_read, enc->pcm_file); + if (bytes_read == 0) { + free(pcm16_buffer); + return 0; + } + + int samples_read = bytes_read / (2 * sizeof(int16_t)); + + // Zero-pad if needed + if (samples_read < samples_to_read) { + memset(&pcm16_buffer[samples_read * 2], 0, + (samples_to_read - samples_read) * 2 * sizeof(int16_t)); + } + + // Encode with TAD encoder (linked from encoder_tad.o) + int tad_quality = enc->quality_level; // Use video quality level for audio + if (tad_quality > TAD_QUALITY_MAX) tad_quality = TAD_QUALITY_MAX; + if (tad_quality < TAD_QUALITY_MIN) tad_quality = TAD_QUALITY_MIN; + + // Allocate output buffer (generous size for TAD chunk) + size_t max_output_size = samples_to_read * 4 * sizeof(int16_t) + 1024; + uint8_t *tad_output = malloc(max_output_size); + + size_t tad_encoded_size = tad_encode_chunk(pcm16_buffer, samples_to_read, tad_quality, 1, tad_output); + + if (tad_encoded_size == 0) { + fprintf(stderr, "Error: TAD encoding failed\n"); + free(pcm16_buffer); + free(tad_output); + return 0; + } + + // Parse TAD chunk format: [sample_count][payload_size][payload] + uint8_t *read_ptr = tad_output; + uint16_t sample_count = *((uint16_t*)read_ptr); + read_ptr += sizeof(uint16_t); + uint32_t tad_payload_size = *((uint32_t*)read_ptr); + read_ptr += sizeof(uint32_t); + uint8_t *tad_payload = read_ptr; + + // Write TAV packet 0x24: [0x24][payload_size+2][sample_count][compressed_size][compressed_data] + uint8_t packet_type = TAV_PACKET_AUDIO_TAD; + fwrite(&packet_type, 1, 1, output); + + uint32_t tav_payload_size = (uint32_t)tad_payload_size; + uint32_t tav_payload_size_plus_two = (uint32_t)tad_payload_size + 2; + fwrite(&tav_payload_size_plus_two, sizeof(uint32_t), 1, output); + fwrite(&sample_count, sizeof(uint16_t), 1, output); + fwrite(&tav_payload_size, sizeof(uint32_t), 1, output); + fwrite(tad_payload, 1, tad_payload_size, output); + + // Update audio remaining + enc->audio_remaining -= bytes_read; + + if (enc->verbose) { + printf("TAD packet: %d samples, %u bytes compressed (Q%d)\n", + sample_count, tad_payload_size, tad_quality); + } + + // Cleanup + free(pcm16_buffer); + free(tad_output); + + return 1; +} + // Write PCM8 audio packet (0x21) with specified sample count static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) { if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) { @@ -8904,6 +9010,15 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) { return 1; } + // Handle TAD mode + if (enc->tad_audio) { + if (!enc->has_audio || !enc->pcm_file) { + return 1; + } + // Write one TAD packet per frame + return write_tad_packet_samples(enc, output, enc->samples_per_frame); + } + // Handle PCM8 mode if (enc->pcm8_audio) { if (!enc->has_audio || !enc->pcm_file) { @@ -9020,6 +9135,29 @@ static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num return 1; } + // Handle TAD mode: variable chunk size support + if (enc->tad_audio) { + if (!enc->has_audio || !enc->pcm_file || num_frames == 0) { + return 1; + } + + // Calculate total samples for this GOP + int total_samples = num_frames * enc->samples_per_frame; + + // TAD supports variable chunk sizes (non-power-of-2) + // We can write the entire GOP in one packet (up to 32768+ samples) + if (enc->verbose) { + printf("TAD GOP: %d frames, %d total samples\n", num_frames, total_samples); + } + + // Write one TAD packet for the entire GOP + if (!write_tad_packet_samples(enc, output, total_samples)) { + // No more audio data + } + + return 1; + } + // Handle PCM8 mode: emit mega packet(s) evenly divided if exceeding 32768 samples if (enc->pcm8_audio) { if (!enc->has_audio || !enc->pcm_file || num_frames == 0) { @@ -9448,6 +9586,7 @@ int main(int argc, char *argv[]) { {"pcm-audio", no_argument, 0, 1027}, {"native-audio", no_argument, 0, 1027}, {"native-audio-format", no_argument, 0, 1027}, + {"tad-audio", no_argument, 0, 1028}, {"help", no_argument, 0, '?'}, {0, 0, 0, 0} }; @@ -9668,6 +9807,10 @@ int main(int argc, char *argv[]) { enc->pcm8_audio = 1; printf("8-bit PCM audio mode enabled (packet 0x21)\n"); break; + case 1028: // --tad-audio + enc->tad_audio = 1; + printf("TAD audio mode enabled (packet 0x24, quality follows -q)\n"); + break; case 'a': int bitrate = atoi(optarg); int valid_bitrate = validate_mp2_bitrate(bitrate);