mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
TAD: Terrarum Advanced Audio to use with video compression
This commit is contained in:
65
CLAUDE.md
65
CLAUDE.md
@@ -314,3 +314,68 @@ Implemented on 2025-10-15 for improved temporal compression through group-of-pic
|
||||
- **Unified Compression**: Zstd compresses entire GOP as single block, finding patterns across time
|
||||
- **Motion Compensation**: FFT-based phase correlation provides accurate global motion estimation
|
||||
- **Adaptive GOPs**: Scene change detection ensures optimal GOP boundaries
|
||||
|
||||
#### TAD Format (TSVM Advanced Audio)
|
||||
- **Perceptual audio codec** for TSVM using DWT with 4-tap interpolating Deslauriers-Dubuc wavelets
|
||||
- **C Encoder**: `video_encoder/encoder_tad.c` - Core Encoder library; `video_encoder/encoder_tad_standalone.c` - Standalone encoder with FFmpeg integration
|
||||
- How to build: `make tad`
|
||||
- **Quality Levels**: 0-5 (0=lowest quality/smallest, 5=highest quality/largest; designed to be in sync with TAV encoder)
|
||||
- **C Decoder**: `video_encoder/decoder_tad.c` - Standalone decoder for TAD format
|
||||
- **Features**:
|
||||
- **32 KHz stereo**: TSVM audio hardware native format
|
||||
- **Variable chunk sizes**: 1024-32768+ samples, enables flexible TAV integration
|
||||
- **M/S stereo decorrelation**: Exploits stereo correlation for better compression
|
||||
- **PCM16→PCM8 conversion**: Error-diffusion dithering to minimize quantization noise
|
||||
- **Variable-level DD-4 DWT**: Dynamic levels (log2(chunk_size) - 2) for frequency domain analysis
|
||||
- **Perceptual quantization**: Frequency-dependent weights preserving critical 2-4 KHz range
|
||||
- **2-bit twobitmap significance map**: Efficient encoding of sparse coefficients
|
||||
- **Optional Zstd compression**: Level 7 for additional compression
|
||||
- **Usage Examples**:
|
||||
```bash
|
||||
# Encode with default quality (Q3)
|
||||
encoder_tad -i input.mp4 -o output.tad
|
||||
|
||||
# Encode with highest quality
|
||||
encoder_tad -i input.mp4 -o output.tad -q 5
|
||||
|
||||
# Encode without Zstd compression
|
||||
encoder_tad -i input.mp4 -o output.tad --no-zstd
|
||||
|
||||
# Verbose output with statistics
|
||||
encoder_tad -i input.mp4 -o output.tad -v
|
||||
|
||||
# Decode back to PCM16
|
||||
decoder_tad -i input.tad -o output.pcm
|
||||
```
|
||||
- **Format documentation**: `terranmon.txt` (search for "TSVM Advanced Audio (TAD) Format")
|
||||
- **Version**: 1 (2-bit twobitmap significance map)
|
||||
|
||||
**TAD Compression Performance**:
|
||||
- **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input)
|
||||
- **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3
|
||||
- **Audio Quality**: Preserves full 0-16 KHz bandwidth
|
||||
- **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
|
||||
|
||||
**TAD Encoding Pipeline**:
|
||||
1. **FFmpeg Two-Pass Extraction**: High-quality SoXR resampling to 32 KHz with 16 Hz highpass filter
|
||||
2. **PCM16→PCM8 with Dithering**: Error-diffusion dithering minimizes quantization noise
|
||||
3. **M/S Stereo Decorrelation**: Transforms Left/Right to Mid/Side for better compression
|
||||
4. **Variable-Level DD-4 DWT**: Deslauriers-Dubuc 4-tap interpolating wavelets with dynamic levels
|
||||
- Default 32768 samples → 13 DWT levels
|
||||
- Minimum 1024 samples → 8 DWT levels
|
||||
5. **Frequency-Dependent Quantization**: Perceptual weights favor 2-4 KHz (speech intelligibility)
|
||||
6. **Dead Zone Quantization**: Zeros high-frequency noise (highest band)
|
||||
7. **2-bit Twobitmap Encoding**: Maps coefficients to 00=0, 01=+1, 10=-1, 11=other
|
||||
8. **Optional Zstd Compression**: Level 7 compression on concatenated Mid+Side data
|
||||
|
||||
**TAD Integration with TAV**:
|
||||
TAD is designed as an includable API for TAV video encoder integration. The variable chunk size
|
||||
support enables synchronized audio/video encoding where audio chunks can match video GOP boundaries.
|
||||
TAV embeds TAD-compressed audio using packet type 0x24 with Zstd compression.
|
||||
|
||||
**TAD Hardware Acceleration**:
|
||||
TSVM accelerates TAD decoding with AudioAdapter.kt (backend) and AudioJSR223Delegate.kt (API):
|
||||
- Backend decoder in AudioAdapter.kt with variable chunk size support
|
||||
- API functions in AudioJSR223Delegate.kt for JavaScript access
|
||||
- Supports chunk sizes from 1024 to 32768+ samples
|
||||
- Dynamic DWT level calculation for optimal performance
|
||||
|
||||
@@ -15,6 +15,7 @@ const UCF_VERSION = 1
|
||||
const ADDRESSING_EXTERNAL = 0x01
|
||||
const ADDRESSING_INTERNAL = 0x02
|
||||
const SND_BASE_ADDR = audio.getBaseAddr()
|
||||
const SND_MEM_ADDR = audio.getMemAddr()
|
||||
const pcm = require("pcm")
|
||||
const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]
|
||||
|
||||
@@ -32,6 +33,7 @@ const TAV_PACKET_AUDIO_MP2 = 0x20
|
||||
const TAV_PACKET_AUDIO_NATIVE = 0x21
|
||||
const TAV_PACKET_AUDIO_PCM_16LE = 0x22
|
||||
const TAV_PACKET_AUDIO_ADPCM = 0x23
|
||||
const TAV_PACKET_AUDIO_TAD = 0x24
|
||||
const TAV_PACKET_SUBTITLE = 0x30
|
||||
const TAV_PACKET_AUDIO_BUNDLED = 0x40 // Entire MP2 audio file in single packet
|
||||
const TAV_PACKET_EXTENDED_HDR = 0xEF
|
||||
@@ -396,6 +398,7 @@ let audioBufferBytesLastFrame = 0
|
||||
let frame_cnt = 0
|
||||
let frametime = 1000000000.0 / header.fps
|
||||
let mp2Initialised = false
|
||||
let tadInitialised = false
|
||||
let audioFired = false
|
||||
|
||||
|
||||
@@ -1337,6 +1340,20 @@ try {
|
||||
audio.mp2Decode()
|
||||
audio.mp2UploadDecoded(0)
|
||||
|
||||
}
|
||||
else if (packetType === TAV_PACKET_AUDIO_TAD) {
|
||||
// Legacy MP2 Audio packet (for backwards compatibility)
|
||||
let payloadLen = seqread.readInt()
|
||||
|
||||
if (!tadInitialised) {
|
||||
tadInitialised = true
|
||||
audio.tadSetQuality(header.qualityLevel)
|
||||
}
|
||||
|
||||
seqread.readBytes(payloadLen, SND_MEM_ADDR - 262144)
|
||||
audio.tadDecode()
|
||||
audio.tadUploadDecoded(0)
|
||||
|
||||
}
|
||||
else if (packetType === TAV_PACKET_AUDIO_NATIVE) {
|
||||
// PCM length must not exceed 65536 bytes!
|
||||
|
||||
243
terranmon.txt
243
terranmon.txt
@@ -965,6 +965,7 @@ transmission capability, and region-of-interest coding.
|
||||
0x21: Zstd-compressed 8-bit PCM (32 KHz, audio hardware's native format)
|
||||
0x22: Zstd-compressed 16-bit PCM (32 KHz, little endian)
|
||||
0x23: Zstd-compressed ADPCM
|
||||
0x24: Zstd-compressed TAD
|
||||
<subtitles>
|
||||
0x30: Subtitle in "Simple" format
|
||||
0x31: Subtitle in "Karaoke" format
|
||||
@@ -1065,6 +1066,13 @@ transmission capability, and region-of-interest coding.
|
||||
uint32 Compressed Size
|
||||
* Zstd-compressed Block Data
|
||||
|
||||
## TAD Packet Structure
|
||||
uint8 Packet type (0x24)
|
||||
uint32 Compressed Size + 2
|
||||
uint16 Sample Count
|
||||
uint32 Compressed Size
|
||||
* Zstd-compressed TAD
|
||||
|
||||
## GOP Unified Packet Structure (0x12)
|
||||
Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing.
|
||||
|
||||
@@ -1507,6 +1515,241 @@ Number|Index
|
||||
4096|255
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
TSVM Advanced Audio (TAD) Format
|
||||
Created by CuriousTorvald and Claude on 2025-10-23
|
||||
|
||||
TAD is a perceptual audio codec for TSVM utilizing Discrete Wavelet Transform (DWT)
|
||||
with 4-tap interpolating Deslauriers-Dubuc wavelets, providing efficient compression
|
||||
through M/S stereo decorrelation, frequency-dependent quantization, and significance
|
||||
map encoding. Designed as an includable API for integration with TAV video encoder.
|
||||
|
||||
When used inside of a video codec, only zstd-compressed payload is stored, chunk length
|
||||
is stored separately and quality index is shared with that of the video.
|
||||
|
||||
# Suggested File Structure
|
||||
\x1F T S V M T A D
|
||||
[HEADER]
|
||||
[CHUNK 0]
|
||||
[CHUNK 1]
|
||||
[CHUNK 2]
|
||||
...
|
||||
|
||||
## Header (16 bytes)
|
||||
uint8 Magic[8]: "\x1F TSVM TAD"
|
||||
uint8 Version: 1
|
||||
uint8 Quality Level: 0-5 (0=lowest quality/smallest, 5=highest quality/largest)
|
||||
uint8 Flags:
|
||||
- bit 0: Zstd compression enabled (1=compressed, 0=uncompressed)
|
||||
- bits 1-7: Reserved (must be 0)
|
||||
uint32 Sample Rate: audio sample rate in Hz (always 32000 for TSVM)
|
||||
uint8 Channels: number of audio channels (always 2 for stereo)
|
||||
uint8 Reserved[2]: fill with zeros
|
||||
|
||||
## Audio Properties
|
||||
- **Sample Rate**: 32000 Hz (TSVM audio hardware native format)
|
||||
- **Channels**: 2 (stereo)
|
||||
- **Input Format**: PCM16LE (16-bit signed little-endian PCM)
|
||||
- **Preprocessing**: 16 Hz highpass filter applied during extraction
|
||||
- **Internal Representation**: Signed PCM8 with error-diffusion dithering
|
||||
- **Chunk Size**: Variable (1024-32768+ samples per channel, must be power of 2)
|
||||
- Default: 32768 samples (1.024 seconds at 32 kHz)
|
||||
- Minimum: 1024 samples (32 ms at 32 kHz)
|
||||
- DWT levels calculated dynamically: log2(chunk_size) - 1
|
||||
- **Target Compression**: 2:1 against PCMu8 baseline
|
||||
|
||||
## Chunk Structure
|
||||
Each chunk encodes a variable number of stereo samples (power of 2, minimum 1024).
|
||||
Default is 32768 samples (65536 total samples, 1.024 seconds).
|
||||
If the audio duration doesn't align to chunk boundaries, the final chunk can use
|
||||
a smaller power-of-2 size or be zero-padded.
|
||||
|
||||
uint8 Significance Map Method: always 1 (2-bit twobitmap)
|
||||
uint8 Compression Flag: 1=Zstd compressed, 0=uncompressed
|
||||
uint16 Sample Count: number of samples per channel (must be power of 2, min 1024)
|
||||
uint32 Chunk Payload Size: size of following payload in bytes
|
||||
* Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set)
|
||||
|
||||
### Chunk Payload Structure (before optional Zstd compression)
|
||||
* Mid Channel Encoded Data
|
||||
* Side Channel Encoded Data
|
||||
|
||||
### Encoded Channel Data (2-bit Twobitmap Significance Map)
|
||||
uint8 Significance Map[(num_samples * 2 + 7) / 8] // 2 bits per coefficient
|
||||
int16 Other Values[variable length] // Non-{-1,0,+1} values
|
||||
|
||||
#### 2-bit Twobitmap Encoding
|
||||
Each DWT coefficient is encoded using 2 bits in the significance map:
|
||||
- 00: coefficient is 0
|
||||
- 01: coefficient is +1
|
||||
- 10: coefficient is -1
|
||||
- 11: coefficient is "other" (value stored in Other Values array)
|
||||
|
||||
This encoding exploits the sparsity of quantized DWT coefficients where most
|
||||
values are 0, ±1 after quantization. "Other" values are stored sequentially
|
||||
as int16 in the order they appear.
|
||||
|
||||
## Encoding Pipeline
|
||||
|
||||
### Step 1: PCM16 to PCM8 Conversion with Error-Diffusion Dithering
|
||||
Input stereo PCM16LE is converted to signed PCM8 using error-diffusion dithering
|
||||
to minimize quantization noise:
|
||||
|
||||
dithered_value = pcm16_value / 256 + error
|
||||
pcm8_value = clamp(round(dithered_value), -128, 127)
|
||||
error = dithered_value - pcm8_value
|
||||
|
||||
Error is propagated to the next sample (alternating between left/right channels).
|
||||
|
||||
### Step 2: M/S Stereo Decorrelation
|
||||
Mid-Side transformation exploits stereo correlation:
|
||||
|
||||
Mid = (Left + Right) / 2
|
||||
Side = (Left - Right) / 2
|
||||
|
||||
This typically concentrates energy in the Mid channel while the Side channel
|
||||
contains mostly small values, improving compression efficiency.
|
||||
|
||||
### Step 3: Variable-Level DD-4 DWT
|
||||
Each channel (Mid and Side) undergoes Deslauriers-Dubuc 4-tap interpolating wavelet
|
||||
decomposition. The number of DWT levels is calculated dynamically based on chunk size:
|
||||
|
||||
DWT Levels = log2(chunk_size) - 1
|
||||
|
||||
For the default 32768-sample chunks, this produces 14 levels with frequency subbands:
|
||||
|
||||
Level 0-13: High to low frequency coefficients
|
||||
DC band: Low-frequency approximation coefficients
|
||||
|
||||
Sideband boundaries are calculated dynamically:
|
||||
first_band_size = chunk_size >> dwt_levels
|
||||
sideband[0] = 0
|
||||
sideband[1] = first_band_size
|
||||
sideband[i+1] = sideband[i] + (first_band_size << (i-1))
|
||||
|
||||
For 32768 samples with 14 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768
|
||||
For 1024 samples with 9 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024
|
||||
|
||||
### Step 4: Frequency-Dependent Quantization
|
||||
DWT coefficients are quantized using perceptually-tuned frequency-dependent weights:
|
||||
|
||||
Base Weights by Level:
|
||||
Level 0 (16-8 KHz): 3.0
|
||||
Level 1 (8-4 KHz): 2.0
|
||||
Level 2 (4-2 KHz): 1.5
|
||||
Level 3 (2-1 KHz): 1.0
|
||||
Level 4 (1-0.5 KHz): 0.75
|
||||
Level 5 (0.5-0.25 KHz): 0.5
|
||||
Level 6-7 (DC-0.25 KHz): 0.25
|
||||
|
||||
Quality scaling factor: 1.0 + (5 - quality) * 0.3
|
||||
|
||||
Final quantization step: base_weight * quality_scale
|
||||
|
||||
#### Dead Zone Quantization
|
||||
High-frequency coefficients (Level 0: 8-16 KHz) use dead zone quantization
|
||||
where coefficients smaller than half the quantization step are zeroed:
|
||||
|
||||
if (abs(coefficient) < quantization_step / 2)
|
||||
coefficient = 0
|
||||
|
||||
This aggressively removes high-frequency noise while preserving important
|
||||
mid-frequency content (2-4 KHz critical for speech intelligibility).
|
||||
|
||||
### Step 5: 2-bit Significance Map Encoding
|
||||
Quantized coefficients are encoded using the 2-bit twobitmap method (see above).
|
||||
|
||||
### Step 6: Optional Zstd Compression
|
||||
If enabled (default), the concatenated Mid+Side encoded data is compressed
|
||||
using Zstd level 3 for additional compression without significant CPU overhead.
|
||||
|
||||
## Decoding Pipeline
|
||||
|
||||
### Step 1: Chunk Extraction
|
||||
Read chunk header to determine significance map method and compression status.
|
||||
If compressed, decompress payload using Zstd.
|
||||
|
||||
### Step 2: Decode Significance Maps
|
||||
Decode Mid and Side channel data using 2-bit twobitmap decoder:
|
||||
- Read 2-bit codes from significance map
|
||||
- Reconstruct coefficients: 0, +1, -1, or read from Other Values array
|
||||
|
||||
### Step 3: Dequantization
|
||||
Multiply quantized coefficients by frequency-dependent quantization steps
|
||||
(same weights as encoder).
|
||||
|
||||
### Step 4: Variable-Level Inverse DD-4 DWT
|
||||
Reconstruct PCM8 audio from DWT coefficients using inverse DD-4 transform,
|
||||
progressively doubling length from the deepest level to chunk_size samples.
|
||||
The number of inverse DWT levels matches the forward transform (log2(chunk_size) - 1).
|
||||
|
||||
### Step 5: M/S to L/R Conversion
|
||||
Convert Mid/Side back to Left/Right stereo:
|
||||
|
||||
Left = Mid + Side
|
||||
Right = Mid - Side
|
||||
|
||||
### Step 6: PCM8 to PCM16 Upsampling
|
||||
Convert signed PCM8 back to PCM16LE by multiplying by 256:
|
||||
|
||||
pcm16_value = pcm8_value * 256
|
||||
|
||||
## Compression Performance
|
||||
- **Target Ratio**: 2:1 against PCMu8 (4:1 against PCM16LE input)
|
||||
- **Achieved Ratio**: 2.51:1 against PCMu8 at quality level 3
|
||||
- **Quality**: Perceptually transparent at Q3+, preserves full 0-16 KHz bandwidth
|
||||
- **Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
|
||||
|
||||
## Integration with TAV Encoder
|
||||
TAD is designed as an includable API for TAV video encoder integration.
|
||||
The encoder can be invoked programmatically to compress audio tracks:
|
||||
|
||||
#include "tad_encoder.h"
|
||||
|
||||
size_t encoded_size = tad_encode_from_file(
|
||||
input_audio_path,
|
||||
output_tad_path,
|
||||
quality_level,
|
||||
use_zstd,
|
||||
verbose
|
||||
);
|
||||
|
||||
This allows TAV video files to embed TAD-compressed audio using packet type 0x24.
|
||||
|
||||
## Audio Extraction Command
|
||||
TAD encoder uses two-pass FFmpeg extraction for optimal quality:
|
||||
|
||||
# Pass 1: Extract at original sample rate
|
||||
ffmpeg -i input.mp4 -f s16le -ac 2 temp.pcm
|
||||
|
||||
# Pass 2: High-quality resample with SoXR and highpass filter
|
||||
ffmpeg -f s16le -ar {original_rate} -ac 2 -i temp.pcm \
|
||||
-ar 32000 -af "aresample=resampler=soxr:precision=28:cutoff=0.99,highpass=f=16" \
|
||||
output.pcm
|
||||
|
||||
This ensures resampling happens after extraction with optimal quality parameters.
|
||||
|
||||
## Hardware Acceleration API
|
||||
TAD decoder may be accelerated using hardware functions in GraphicsJSR223Delegate:
|
||||
- tadDecode(): Main decoding function (chunk-based)
|
||||
- tadHaarIDWT(): Fast inverse Haar DWT
|
||||
- tadDequantize(): Frequency-dependent dequantization
|
||||
|
||||
## Usage Examples
|
||||
# Encode with default quality (Q3)
|
||||
tad_encoder -i input.mp4 -o output.tad
|
||||
|
||||
# Encode with highest quality
|
||||
tad_encoder -i input.mp4 -o output.tad -q 5
|
||||
|
||||
# Encode without Zstd compression
|
||||
tad_encoder -i input.mp4 -o output.tad --no-zstd
|
||||
|
||||
# Verbose output with statistics
|
||||
tad_encoder -i input.mp4 -o output.tad -v
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
TSVM Universal Cue format
|
||||
|
||||
@@ -82,6 +82,7 @@ class AudioJSR223Delegate(private val vm: VM) {
|
||||
// fun mp2DecodeFrame(mp2: MP2Env.MP2, framePtr: Long?, pcm: Boolean, outL: Long, outR: Long) = getFirstSnd()?.mp2Env?.decodeFrame(mp2, framePtr, pcm, outL, outR)
|
||||
|
||||
fun getBaseAddr(): Int? = getFirstSnd()?.let { return it.vm.findPeriSlotNum(it)?.times(-131072)?.minus(1) }
|
||||
fun getMemAddr(): Int? = getFirstSnd()?.let { return it.vm.findPeriSlotNum(it)?.times(-1048576)?.minus(1) }
|
||||
fun mp2Init() = getFirstSnd()?.mmio_write(40L, 16)
|
||||
fun mp2Decode() = getFirstSnd()?.mmio_write(40L, 1)
|
||||
fun mp2InitThenDecode() = getFirstSnd()?.mmio_write(40L, 17)
|
||||
@@ -93,6 +94,39 @@ class AudioJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
// TAD (Terrarum Advanced Audio) decoder functions
|
||||
fun tadSetQuality(quality: Int) {
|
||||
getFirstSnd()?.mmio_write(43L, quality.toByte())
|
||||
}
|
||||
|
||||
fun tadGetQuality() = getFirstSnd()?.mmio_read(43L)?.toInt()
|
||||
|
||||
fun tadDecode() {
|
||||
getFirstSnd()?.mmio_write(42L, 1)
|
||||
}
|
||||
|
||||
fun tadIsBusy() = getFirstSnd()?.mmio_read(44L)?.toInt() == 1
|
||||
|
||||
fun tadUploadDecoded(playhead: Int) {
|
||||
getFirstSnd()?.let { snd ->
|
||||
val ba = ByteArray(65536) // 32768 samples * 2 channels
|
||||
UnsafeHelper.memcpyRaw(null, snd.tadDecodedBin.ptr, ba, UnsafeHelper.getArrayOffset(ba), 65536)
|
||||
snd.playheads[playhead].pcmQueue.addLast(ba)
|
||||
}
|
||||
}
|
||||
|
||||
fun putTadDataByPtr(ptr: Int, length: Int, destOffset: Int) {
|
||||
getFirstSnd()?.let { snd ->
|
||||
val vkMult = if (ptr >= 0) 1 else -1
|
||||
for (k in 0L until length) {
|
||||
val vk = k * vkMult
|
||||
snd.tadInputBin[k + destOffset] = vm.peek(ptr + vk)!!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun getTadData(index: Int) = getFirstSnd()?.tadDecodedBin?.get(index.toLong())
|
||||
|
||||
|
||||
|
||||
/*
|
||||
|
||||
@@ -4,6 +4,7 @@ import com.badlogic.gdx.Gdx
|
||||
import com.badlogic.gdx.backends.lwjgl3.audio.OpenALLwjgl3Audio
|
||||
import com.badlogic.gdx.utils.GdxRuntimeException
|
||||
import com.badlogic.gdx.utils.Queue
|
||||
import io.airlift.compress.zstd.ZstdInputStream
|
||||
import net.torvald.UnsafeHelper
|
||||
import net.torvald.UnsafePtr
|
||||
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
|
||||
@@ -11,6 +12,7 @@ import net.torvald.tsvm.ThreeFiveMiniUfloat
|
||||
import net.torvald.tsvm.VM
|
||||
import net.torvald.tsvm.getHashStr
|
||||
import net.torvald.tsvm.toInt
|
||||
import java.io.ByteArrayInputStream
|
||||
|
||||
private class RenderRunnable(val playhead: AudioAdapter.Playhead) : Runnable {
|
||||
private fun printdbg(msg: Any) {
|
||||
@@ -125,6 +127,12 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
||||
|
||||
@Volatile private var mp2Busy = false
|
||||
|
||||
// TAD (Terrarum Advanced Audio) decoder buffers
|
||||
internal val tadInputBin = UnsafeHelper.allocate(65536L, this) // Input: compressed TAD chunk (max 64KB)
|
||||
internal val tadDecodedBin = UnsafeHelper.allocate(65536L, this) // Output: PCMu8 stereo (32768 samples * 2 channels)
|
||||
internal var tadQuality = 2 // Quality level used during encoding (0-5)
|
||||
@Volatile private var tadBusy = false
|
||||
|
||||
private val renderRunnables: Array<RenderRunnable>
|
||||
private val renderThreads: Array<Thread>
|
||||
private val writeQueueingRunnables: Array<WriteQueueingRunnable>
|
||||
@@ -216,7 +224,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
||||
in 0..114687 -> sampleBin[addr]
|
||||
in 114688..131071 -> (adi - 114688).let { instruments[it / 64].getByte(it % 64) }
|
||||
in 131072..262143 -> (adi - 131072).let { playdata[it / (8*64)][(it / 8) % 64].getByte(it % 8) }
|
||||
else -> peek(addr % 262144)
|
||||
in 262144..327679 -> tadInputBin[addr - 262144] // TAD input buffer (65536 bytes)
|
||||
in 327680..393215 -> tadDecodedBin[addr - 327680] // TAD decoded output (65536 bytes)
|
||||
else -> peek(addr % 393216)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,6 +237,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
||||
in 0..114687 -> { sampleBin[addr] = byte }
|
||||
in 114688..131071 -> (adi - 114688).let { instruments[it / 64].setByte(it % 64, bi) }
|
||||
in 131072..262143 -> (adi - 131072).let { playdata[it / (8*64)][(it / 8) % 64].setByte(it % 8, bi) }
|
||||
in 262144..327679 -> tadInputBin[addr - 262144] = byte // TAD input buffer
|
||||
in 327680..393215 -> tadDecodedBin[addr - 327680] = byte // TAD decoded output
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,6 +251,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
||||
in 30..39 -> playheads[3].read(adi - 30)
|
||||
40 -> -1
|
||||
41 -> mp2Busy.toInt().toByte()
|
||||
42 -> -1 // TAD control (write-only)
|
||||
43 -> tadQuality.toByte()
|
||||
44 -> tadBusy.toInt().toByte()
|
||||
in 64..2367 -> mediaDecodedBin[addr - 64]
|
||||
in 2368..4095 -> mediaFrameBin[addr - 2368]
|
||||
in 4096..4097 -> 0
|
||||
@@ -265,6 +280,14 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
||||
if (bi and 16 != 0) { mp2Context = mp2Env.initialise() }
|
||||
if (bi and 1 != 0) decodeMp2()
|
||||
}
|
||||
42 -> {
|
||||
// TAD control: bit 0 = decode
|
||||
if (bi and 1 != 0) decodeTad()
|
||||
}
|
||||
43 -> {
|
||||
// TAD quality (0-5)
|
||||
tadQuality = bi.coerceIn(0, 5)
|
||||
}
|
||||
in 64..2367 -> { mediaDecodedBin[addr - 64] = byte }
|
||||
in 2368..4095 -> { mediaFrameBin[addr - 2368] = byte }
|
||||
in 32768..65535 -> { (adi - 32768).let {
|
||||
@@ -287,6 +310,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
||||
pcmBin.destroy()
|
||||
mediaFrameBin.destroy()
|
||||
mediaDecodedBin.destroy()
|
||||
tadInputBin.destroy()
|
||||
tadDecodedBin.destroy()
|
||||
}
|
||||
else {
|
||||
System.err.println("AudioAdapter already disposed")
|
||||
@@ -304,6 +329,250 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
|
||||
mp2Env.decodeFrameU8(mp2Context, periMmioBase - 2368, true, periMmioBase - 64)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// TAD (Terrarum Advanced Audio) Decoder
|
||||
//=============================================================================
|
||||
|
||||
private fun decodeTad() {
|
||||
tadBusy = true
|
||||
try {
|
||||
// Read chunk header from tadInputBin
|
||||
var offset = 0L
|
||||
|
||||
val sampleCount = (
|
||||
(tadInputBin[offset++].toInt() and 0xFF) or
|
||||
((tadInputBin[offset++].toInt() and 0xFF) shl 8)
|
||||
)
|
||||
val payloadSize = (
|
||||
(tadInputBin[offset++].toInt() and 0xFF) or
|
||||
((tadInputBin[offset++].toInt() and 0xFF) shl 8) or
|
||||
((tadInputBin[offset++].toInt() and 0xFF) shl 16) or
|
||||
((tadInputBin[offset++].toInt() and 0xFF) shl 24)
|
||||
)
|
||||
|
||||
// Decompress payload if needed
|
||||
val compressed = ByteArray(payloadSize)
|
||||
UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong())
|
||||
|
||||
val payload: ByteArray = try {
|
||||
ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
|
||||
zstd.readBytes()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
println("ERROR: Zstd decompression failed: ${e.message}")
|
||||
} as ByteArray
|
||||
|
||||
// Decode significance maps
|
||||
val quantMid = ShortArray(sampleCount)
|
||||
val quantSide = ShortArray(sampleCount)
|
||||
|
||||
var payloadOffset = 0
|
||||
val midBytes = decodeSigmap2bit(payload, payloadOffset, quantMid, sampleCount)
|
||||
payloadOffset += midBytes
|
||||
|
||||
val sideBytes = decodeSigmap2bit(payload, payloadOffset, quantSide, sampleCount)
|
||||
|
||||
// Calculate DWT levels from sample count
|
||||
val dwtLevels = calculateDwtLevels(sampleCount)
|
||||
|
||||
// Dequantize
|
||||
val dwtMid = FloatArray(sampleCount)
|
||||
val dwtSide = FloatArray(sampleCount)
|
||||
dequantizeDwtCoefficients(quantMid, dwtMid, sampleCount, tadQuality, dwtLevels)
|
||||
dequantizeDwtCoefficients(quantSide, dwtSide, sampleCount, tadQuality, dwtLevels)
|
||||
|
||||
// Inverse DWT
|
||||
dwtDD4InverseMultilevel(dwtMid, sampleCount, dwtLevels)
|
||||
dwtDD4InverseMultilevel(dwtSide, sampleCount, dwtLevels)
|
||||
|
||||
// Convert to signed PCM8
|
||||
val pcm8Mid = ByteArray(sampleCount)
|
||||
val pcm8Side = ByteArray(sampleCount)
|
||||
for (i in 0 until sampleCount) {
|
||||
pcm8Mid[i] = dwtMid[i].coerceIn(-128f, 127f).toInt().toByte()
|
||||
pcm8Side[i] = dwtSide[i].coerceIn(-128f, 127f).toInt().toByte()
|
||||
}
|
||||
|
||||
// M/S to L/R correlation and write to tadDecodedBin
|
||||
for (i in 0 until sampleCount) {
|
||||
val m = pcm8Mid[i].toInt()
|
||||
val s = pcm8Side[i].toInt()
|
||||
var l = m + s
|
||||
var r = m - s
|
||||
|
||||
if (l < -128) l = -128
|
||||
if (l > 127) l = 127
|
||||
if (r < -128) r = -128
|
||||
if (r > 127) r = 127
|
||||
|
||||
tadDecodedBin[i * 2L] = (l + 128).toByte() // Left (PCMu8)
|
||||
tadDecodedBin[i * 2L + 1] = (r + 128).toByte() // Right (PCMu8)
|
||||
}
|
||||
|
||||
} catch (e: Exception) {
|
||||
e.printStackTrace()
|
||||
} finally {
|
||||
tadBusy = false
|
||||
}
|
||||
}
|
||||
|
||||
private fun decodeSigmap2bit(input: ByteArray, offset: Int, values: ShortArray, count: Int): Int {
|
||||
val mapBytes = (count * 2 + 7) / 8
|
||||
var readPtr = offset + mapBytes
|
||||
var otherIdx = 0
|
||||
|
||||
for (i in 0 until count) {
|
||||
val bitPos = i * 2
|
||||
val byteIdx = offset + bitPos / 8
|
||||
val bitOffset = bitPos % 8
|
||||
|
||||
var code = ((input[byteIdx].toInt() and 0xFF) shr bitOffset) and 0x03
|
||||
|
||||
// Handle bit spillover
|
||||
if (bitOffset == 7) {
|
||||
code = ((input[byteIdx].toInt() and 0xFF) shr 7) or
|
||||
(((input[byteIdx + 1].toInt() and 0xFF) and 0x01) shl 1)
|
||||
}
|
||||
|
||||
values[i] = when (code) {
|
||||
0 -> 0
|
||||
1 -> 1
|
||||
2 -> (-1).toShort()
|
||||
3 -> {
|
||||
val v = ((input[readPtr].toInt() and 0xFF) or
|
||||
((input[readPtr + 1].toInt() and 0xFF) shl 8)).toShort()
|
||||
readPtr += 2
|
||||
otherIdx++
|
||||
v
|
||||
}
|
||||
else -> 0
|
||||
}
|
||||
}
|
||||
|
||||
return mapBytes + otherIdx * 2
|
||||
}
|
||||
|
||||
private fun calculateDwtLevels(chunkSize: Int): Int {
|
||||
if (chunkSize < 1024) {
|
||||
throw IllegalArgumentException("Chunk size $chunkSize is below minimum 1024")
|
||||
}
|
||||
|
||||
var levels = 0
|
||||
var size = chunkSize
|
||||
while (size > 1) {
|
||||
size = size shr 1
|
||||
levels++
|
||||
}
|
||||
return levels - 2 // Maximum decomposition leaves 4-sample approximation
|
||||
}
|
||||
|
||||
private fun getQuantizationWeights(quality: Int, dwtLevels: Int): FloatArray {
|
||||
// Extended base weights to support up to 16 DWT levels
|
||||
val baseWeights = arrayOf(
|
||||
/* 0*/floatArrayOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f),
|
||||
/* 1*/floatArrayOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f),
|
||||
/* 2*/floatArrayOf(1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/* 3*/floatArrayOf(0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/* 4*/floatArrayOf(0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/* 5*/floatArrayOf(0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/* 6*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/* 7*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/* 8*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/* 9*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/*10*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/*11*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/*12*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/*13*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f),
|
||||
/*14*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f),
|
||||
/*15*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f),
|
||||
/*16*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f)
|
||||
)
|
||||
val qualityScale = 1.0f + ((3 - quality) * 0.5f).coerceAtLeast(0.0f)
|
||||
return FloatArray(dwtLevels) { i -> (baseWeights[dwtLevels][i.coerceIn(0, 15)] * qualityScale).coerceAtLeast(1.0f) }
|
||||
}
|
||||
|
||||
private fun dequantizeDwtCoefficients(quantized: ShortArray, coeffs: FloatArray, count: Int, quality: Int, dwtLevels: Int) {
|
||||
val weights = getQuantizationWeights(quality, dwtLevels)
|
||||
|
||||
// Calculate sideband boundaries dynamically based on chunk size and DWT levels
|
||||
val firstBandSize = count shr dwtLevels
|
||||
val sidebandStarts = IntArray(dwtLevels + 2)
|
||||
sidebandStarts[0] = 0
|
||||
sidebandStarts[1] = firstBandSize
|
||||
for (i in 2..dwtLevels + 1) {
|
||||
sidebandStarts[i] = sidebandStarts[i - 1] + (firstBandSize shl (i - 2))
|
||||
}
|
||||
|
||||
for (i in 0 until count) {
|
||||
var sideband = dwtLevels
|
||||
for (s in 0 until dwtLevels + 1) {
|
||||
if (i < sidebandStarts[s + 1]) {
|
||||
sideband = s
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
val weightIdx = if (sideband == 0) 0 else sideband - 1
|
||||
val weight = weights[weightIdx.coerceIn(0, dwtLevels - 1)]
|
||||
coeffs[i] = quantized[i].toFloat() * weight
|
||||
}
|
||||
}
|
||||
|
||||
private fun dwtDD4Inverse1d(data: FloatArray, length: Int) {
|
||||
if (length < 2) return
|
||||
|
||||
val temp = FloatArray(length)
|
||||
val half = (length + 1) / 2
|
||||
|
||||
// Split into low and high parts
|
||||
for (i in 0 until half) {
|
||||
temp[i] = data[i] // Even (low-pass)
|
||||
}
|
||||
for (i in 0 until length / 2) {
|
||||
temp[half + i] = data[half + i] // Odd (high-pass)
|
||||
}
|
||||
|
||||
// Undo update step: s[i] -= 0.25 * (d[i-1] + d[i])
|
||||
for (i in 0 until half) {
|
||||
val dCurr = if (i < length / 2) temp[half + i] else 0.0f
|
||||
val dPrev = if (i > 0 && i - 1 < length / 2) temp[half + i - 1] else 0.0f
|
||||
temp[i] -= 0.25f * (dPrev + dCurr)
|
||||
}
|
||||
|
||||
// Undo prediction step: d[i] += P(s[i-1], s[i], s[i+1], s[i+2])
|
||||
for (i in 0 until length / 2) {
|
||||
val sM1 = if (i > 0) temp[i - 1] else temp[0] // mirror boundary
|
||||
val s0 = temp[i]
|
||||
val s1 = if (i + 1 < half) temp[i + 1] else temp[half - 1]
|
||||
val s2 = if (i + 2 < half) temp[i + 2] else if (half > 1) temp[half - 2] else temp[half - 1]
|
||||
|
||||
val prediction = (-1.0f/16.0f)*sM1 + (9.0f/16.0f)*s0 + (9.0f/16.0f)*s1 + (-1.0f/16.0f)*s2
|
||||
temp[half + i] += prediction
|
||||
}
|
||||
|
||||
// Merge evens and odds back
|
||||
for (i in 0 until half) {
|
||||
data[2 * i] = temp[i]
|
||||
if (2 * i + 1 < length)
|
||||
data[2 * i + 1] = temp[half + i]
|
||||
}
|
||||
}
|
||||
|
||||
private fun dwtDD4InverseMultilevel(data: FloatArray, length: Int, levels: Int) {
|
||||
// Calculate the length at the deepest level
|
||||
var currentLength = length
|
||||
for (level in 0 until levels) {
|
||||
currentLength = (currentLength + 1) / 2
|
||||
}
|
||||
|
||||
// Inverse transform: double size FIRST, then apply inverse DWT
|
||||
for (level in levels - 1 downTo 0) {
|
||||
currentLength *= 2 // MULTIPLY FIRST
|
||||
if (currentLength > length) currentLength = length
|
||||
dwtDD4Inverse1d(data, currentLength) // THEN apply inverse
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ OPENCV_LIBS = $(shell pkg-config --libs opencv4)
|
||||
|
||||
# Source files and targets
|
||||
TARGETS = tev tav tav_decoder
|
||||
TAD_TARGETS = encoder_tad decoder_tad
|
||||
TEST_TARGETS = test_mesh_warp test_mesh_roundtrip
|
||||
|
||||
# Build all encoders
|
||||
@@ -23,17 +24,31 @@ tev: encoder_tev.c
|
||||
rm -f encoder_tev
|
||||
$(CC) $(CFLAGS) -o encoder_tev $< $(LIBS)
|
||||
|
||||
tav: encoder_tav.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
|
||||
rm -f encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o
|
||||
tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
|
||||
rm -f encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o
|
||||
$(CC) $(CFLAGS) -c encoder_tav.c -o encoder_tav.o
|
||||
$(CC) $(CFLAGS) -c encoder_tad.c -o encoder_tad.o
|
||||
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o
|
||||
$(CXX) $(CXXFLAGS) -c estimate_affine_from_blocks.cpp -o estimate_affine_from_blocks.o
|
||||
$(CXX) -o encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o $(LIBS) -lfftw3f $(OPENCV_LIBS)
|
||||
$(CXX) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
|
||||
|
||||
tav_decoder: decoder_tav.c
|
||||
rm -f decoder_tav
|
||||
$(CC) $(CFLAGS) -o decoder_tav $< $(LIBS)
|
||||
|
||||
# Build TAD (Terrarum Advanced Audio) tools
|
||||
encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h
|
||||
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
|
||||
$(CC) $(CFLAGS) -c encoder_tad.c -o encoder_tad.o
|
||||
$(CC) $(CFLAGS) -c encoder_tad_standalone.c -o encoder_tad_standalone.o
|
||||
$(CC) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
|
||||
|
||||
decoder_tad: decoder_tad.c
|
||||
rm -f decoder_tad
|
||||
$(CC) $(CFLAGS) -o decoder_tad $< $(LIBS)
|
||||
|
||||
# Build all TAD tools
|
||||
tad: $(TAD_TARGETS)
|
||||
|
||||
# Build test programs
|
||||
test_mesh_warp: test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
|
||||
rm -f test_mesh_warp test_mesh_warp.o
|
||||
@@ -63,31 +78,34 @@ debug: $(TARGETS)
|
||||
|
||||
# Clean build artifacts
|
||||
clean:
|
||||
rm -f $(TARGETS) *.o
|
||||
rm -f $(TARGETS) $(TAD_TARGETS) *.o
|
||||
|
||||
# Install (copy to PATH)
|
||||
install: $(TARGETS)
|
||||
install: $(TARGETS) $(TAD_TARGETS)
|
||||
cp encoder_tev /usr/local/bin/
|
||||
cp encoder_tav /usr/local/bin/
|
||||
cp decoder_tav /usr/local/bin/
|
||||
cp encoder_tad /usr/local/bin/
|
||||
cp decoder_tad /usr/local/bin/
|
||||
|
||||
# Check for required dependencies
|
||||
check-deps:
|
||||
@echo "Checking dependencies..."
|
||||
@echo "Using Zstd compression for better efficiency"
|
||||
@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install with: sudo apt install libzstd-dev" && exit 1)
|
||||
@pkg-config --exists fftw3f || (echo "Error: libfftw3-dev not found. Install with: sudo apt install libfftw3-dev" && exit 1)
|
||||
@pkg-config --exists opencv4 || (echo "Error: OpenCV 4 not found. Install with: sudo apt install libopencv-dev" && exit 1)
|
||||
@echo "All dependencies found."
|
||||
|
||||
# Help
|
||||
help:
|
||||
@echo "TSVM Enhanced Video (TEV) Encoder"
|
||||
@echo "TSVM Enhanced Video (TEV) and Audio (TAD) Encoders"
|
||||
@echo ""
|
||||
@echo "Targets:"
|
||||
@echo " all - Build both encoders (default)"
|
||||
@echo " tev - Build the main TEV encoder"
|
||||
@echo " tav - Build the advanced TAV encoder"
|
||||
@echo " all - Build video encoders (default)"
|
||||
@echo " tev - Build the TEV video encoder"
|
||||
@echo " tav - Build the TAV advanced video encoder"
|
||||
@echo " tad - Build all TAD audio tools (encoder, decoder)"
|
||||
@echo " encoder_tad - Build TAD audio encoder"
|
||||
@echo " decoder_tad - Build TAD audio decoder"
|
||||
@echo " debug - Build with debug symbols"
|
||||
@echo " clean - Remove build artifacts"
|
||||
@echo " install - Install to /usr/local/bin"
|
||||
@@ -95,9 +113,10 @@ help:
|
||||
@echo " help - Show this help"
|
||||
@echo ""
|
||||
@echo "Usage:"
|
||||
@echo " make # Build both encoders"
|
||||
@echo " make # Build video encoders"
|
||||
@echo " make tev # Build TEV encoder"
|
||||
@echo " make tav # Build TAV encoder"
|
||||
@echo " sudo make install # Install both encoders"
|
||||
@echo " make tad # Build all TAD audio tools"
|
||||
@echo " sudo make install # Install all encoders"
|
||||
|
||||
.PHONY: all clean install check-deps help debug
|
||||
.PHONY: all clean install check-deps help debug tad
|
||||
|
||||
576
video_encoder/decoder_tad.c
Normal file
576
video_encoder/decoder_tad.c
Normal file
@@ -0,0 +1,576 @@
|
||||
// Created by CuriousTorvald and Claude on 2025-10-23.
|
||||
// TAD (Terrarum Advanced Audio) Decoder - Reconstructs audio from TAD format
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <zstd.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#define DECODER_VENDOR_STRING "Decoder-TAD 20251023"
|
||||
|
||||
// TAD format constants (must match encoder)
|
||||
#define TAD_DEFAULT_CHUNK_SIZE 32768
|
||||
#define TAD_MIN_CHUNK_SIZE 1024
|
||||
#define TAD_SAMPLE_RATE 32000
|
||||
#define TAD_CHANNELS 2
|
||||
|
||||
// Significance map methods
|
||||
#define TAD_SIGMAP_1BIT 0
|
||||
#define TAD_SIGMAP_2BIT 1
|
||||
#define TAD_SIGMAP_RLE 2
|
||||
|
||||
// Quality levels
|
||||
#define TAD_QUALITY_MIN 0
|
||||
#define TAD_QUALITY_MAX 5
|
||||
|
||||
static inline float FCLAMP(float x, float min, float max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
// Calculate DWT levels from chunk size (must be power of 2, >= 1024)
|
||||
static int calculate_dwt_levels(int chunk_size) {
|
||||
if (chunk_size < TAD_MIN_CHUNK_SIZE) {
|
||||
fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Calculate levels: log2(chunk_size) - 1
|
||||
int levels = 0;
|
||||
int size = chunk_size;
|
||||
while (size > 1) {
|
||||
size >>= 1;
|
||||
levels++;
|
||||
}
|
||||
return levels - 2;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Haar DWT Implementation (inverse only needed for decoder)
|
||||
//=============================================================================
|
||||
|
||||
static void dwt_haar_inverse_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
for (int i = 0; i < half; i++) {
|
||||
if (2 * i + 1 < length) {
|
||||
temp[2 * i] = data[i] + data[half + i];
|
||||
temp[2 * i + 1] = data[i] - data[half + i];
|
||||
} else {
|
||||
temp[2 * i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Inverse 1D transform of Four-point interpolating Deslauriers-Dubuc (DD-4)
|
||||
static void dwt_dd4_inverse_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Split into low (even) and high (odd) parts
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] = data[i]; // Even (low-pass)
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
temp[half + i] = data[half + i]; // Odd (high-pass)
|
||||
}
|
||||
|
||||
// Undo update step: s[i] -= 0.25 * (d[i-1] + d[i])
|
||||
for (int i = 0; i < half; i++) {
|
||||
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
|
||||
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
|
||||
temp[i] -= 0.25f * (d_prev + d_curr);
|
||||
}
|
||||
|
||||
// Undo prediction step: d[i] += P(s[i-1], s[i], s[i+1], s[i+2])
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
float s_m1, s_0, s_1, s_2;
|
||||
|
||||
if (i > 0) s_m1 = temp[i - 1];
|
||||
else s_m1 = temp[0]; // mirror boundary
|
||||
|
||||
s_0 = temp[i];
|
||||
|
||||
if (i + 1 < half) s_1 = temp[i + 1];
|
||||
else s_1 = temp[half - 1];
|
||||
|
||||
if (i + 2 < half) s_2 = temp[i + 2];
|
||||
else if (half > 1) s_2 = temp[half - 2];
|
||||
else s_2 = temp[half - 1];
|
||||
|
||||
float prediction = (-1.0f/16.0f)*s_m1 + (9.0f/16.0f)*s_0 +
|
||||
(9.0f/16.0f)*s_1 + (-1.0f/16.0f)*s_2;
|
||||
|
||||
temp[half + i] += prediction;
|
||||
}
|
||||
|
||||
// Merge evens and odds back into the original order
|
||||
for (int i = 0; i < half; i++) {
|
||||
data[2 * i] = temp[i];
|
||||
if (2 * i + 1 < length)
|
||||
data[2 * i + 1] = temp[half + i];
|
||||
}
|
||||
|
||||
free(temp);
|
||||
}
|
||||
|
||||
static void dwt_haar_inverse_multilevel(float *data, int length, int levels) {
|
||||
// Calculate the length at the deepest level (size of low-pass after all forward DWTs)
|
||||
int current_length = length;
|
||||
for (int level = 0; level < levels; level++) {
|
||||
current_length = (current_length + 1) / 2;
|
||||
}
|
||||
// For 8 levels on 32768: 32768→16384→8192→4096→2048→1024→512→256→128
|
||||
|
||||
// Inverse transform: double size FIRST, then apply inverse DWT
|
||||
// Level 8 inverse: 128 low + 128 high → 256 reconstructed
|
||||
// Level 7 inverse: 256 reconstructed + 256 high → 512 reconstructed
|
||||
// ... Level 1 inverse: 16384 reconstructed + 16384 high → 32768 reconstructed
|
||||
for (int level = levels - 1; level >= 0; level--) {
|
||||
current_length *= 2; // MULTIPLY FIRST: 128→256, 256→512, ..., 16384→32768
|
||||
if (current_length > length) current_length = length;
|
||||
// dwt_haar_inverse_1d(data, current_length); // THEN apply inverse
|
||||
dwt_dd4_inverse_1d(data, current_length); // THEN apply inverse
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// M/S Stereo Correlation (inverse of decorrelation)
|
||||
//=============================================================================
|
||||
|
||||
static void ms_correlate(const int8_t *mid, const int8_t *side, uint8_t *left, uint8_t *right, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
// L = M + S, R = M - S
|
||||
int32_t m = mid[i];
|
||||
int32_t s = side[i];
|
||||
int32_t l = m + s;
|
||||
int32_t r = m - s;
|
||||
|
||||
// Clamp to [-128, 127] then convert to unsigned [0, 255]
|
||||
if (l < -128) l = -128;
|
||||
if (l > 127) l = 127;
|
||||
if (r < -128) r = -128;
|
||||
if (r > 127) r = 127;
|
||||
|
||||
left[i] = (uint8_t)(l + 128);
|
||||
right[i] = (uint8_t)(r + 128);
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Dequantization (inverse of quantization)
|
||||
//=============================================================================
|
||||
|
||||
static void get_quantization_weights(int quality, int dwt_levels, float *weights) {
|
||||
const float base_weights[16][16] = {
|
||||
/* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f},
|
||||
/*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f},
|
||||
/*16*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f}
|
||||
};
|
||||
|
||||
float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f);
|
||||
|
||||
for (int i = 0; i < dwt_levels; i++) {
|
||||
weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f);
|
||||
}
|
||||
}
|
||||
|
||||
static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int quality, int chunk_size, int dwt_levels) {
|
||||
float weights[16];
|
||||
get_quantization_weights(quality, dwt_levels, weights);
|
||||
|
||||
// Calculate sideband boundaries dynamically
|
||||
int first_band_size = chunk_size >> dwt_levels;
|
||||
|
||||
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
|
||||
sideband_starts[0] = 0;
|
||||
sideband_starts[1] = first_band_size;
|
||||
for (int i = 2; i <= dwt_levels + 1; i++) {
|
||||
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
int sideband = dwt_levels;
|
||||
for (int s = 0; s <= dwt_levels; s++) {
|
||||
if (i < sideband_starts[s + 1]) {
|
||||
sideband = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Map (dwt_levels+1) sidebands to dwt_levels weights
|
||||
int weight_idx = (sideband == 0) ? 0 : sideband - 1;
|
||||
if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
|
||||
|
||||
float weight = weights[weight_idx];
|
||||
coeffs[i] = (float)quantized[i] * weight;
|
||||
}
|
||||
|
||||
free(sideband_starts);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Significance Map Decoding
|
||||
//=============================================================================
|
||||
|
||||
static size_t decode_sigmap_1bit(const uint8_t *input, int16_t *values, size_t count) {
|
||||
size_t map_bytes = (count + 7) / 8;
|
||||
const uint8_t *map = input;
|
||||
const uint8_t *read_ptr = input + map_bytes;
|
||||
|
||||
uint32_t nonzero_count = *((const uint32_t*)read_ptr);
|
||||
read_ptr += sizeof(uint32_t);
|
||||
|
||||
const int16_t *value_ptr = (const int16_t*)read_ptr;
|
||||
uint32_t value_idx = 0;
|
||||
|
||||
// Reconstruct values
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
if (map[i / 8] & (1 << (i % 8))) {
|
||||
values[i] = value_ptr[value_idx++];
|
||||
} else {
|
||||
values[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return map_bytes + sizeof(uint32_t) + nonzero_count * sizeof(int16_t);
|
||||
}
|
||||
|
||||
static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t count) {
|
||||
size_t map_bytes = (count * 2 + 7) / 8;
|
||||
const uint8_t *map = input;
|
||||
const uint8_t *read_ptr = input + map_bytes;
|
||||
|
||||
const int16_t *value_ptr = (const int16_t*)read_ptr;
|
||||
uint32_t other_idx = 0;
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
size_t bit_pos = i * 2;
|
||||
size_t byte_idx = bit_pos / 8;
|
||||
size_t bit_offset = bit_pos % 8;
|
||||
|
||||
uint8_t code = (map[byte_idx] >> bit_offset) & 0x03;
|
||||
|
||||
// Handle bit spillover
|
||||
if (bit_offset == 7) {
|
||||
code = (map[byte_idx] >> 7) | ((map[byte_idx + 1] & 0x01) << 1);
|
||||
}
|
||||
|
||||
switch (code) {
|
||||
case 0: values[i] = 0; break;
|
||||
case 1: values[i] = 1; break;
|
||||
case 2: values[i] = -1; break;
|
||||
case 3: values[i] = value_ptr[other_idx++]; break;
|
||||
}
|
||||
}
|
||||
|
||||
return map_bytes + other_idx * sizeof(int16_t);
|
||||
}
|
||||
|
||||
static size_t decode_sigmap_rle(const uint8_t *input, int16_t *values, size_t count) {
|
||||
const uint8_t *read_ptr = input;
|
||||
|
||||
uint32_t run_count = *((const uint32_t*)read_ptr);
|
||||
read_ptr += sizeof(uint32_t);
|
||||
|
||||
size_t value_idx = 0;
|
||||
|
||||
for (uint32_t run = 0; run < run_count; run++) {
|
||||
// Decode zero run length (varint)
|
||||
uint32_t zero_run = 0;
|
||||
int shift = 0;
|
||||
uint8_t byte;
|
||||
|
||||
do {
|
||||
byte = *read_ptr++;
|
||||
zero_run |= ((uint32_t)(byte & 0x7F) << shift);
|
||||
shift += 7;
|
||||
} while (byte & 0x80);
|
||||
|
||||
// Fill zeros
|
||||
for (uint32_t i = 0; i < zero_run && value_idx < count; i++) {
|
||||
values[value_idx++] = 0;
|
||||
}
|
||||
|
||||
// Read non-zero value
|
||||
int16_t val = *((const int16_t*)read_ptr);
|
||||
read_ptr += sizeof(int16_t);
|
||||
|
||||
if (value_idx < count && val != 0) {
|
||||
values[value_idx++] = val;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill remaining with zeros
|
||||
while (value_idx < count) {
|
||||
values[value_idx++] = 0;
|
||||
}
|
||||
|
||||
return read_ptr - input;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Chunk Decoding
|
||||
//=============================================================================
|
||||
|
||||
static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
|
||||
int quality, size_t *bytes_consumed, size_t *samples_decoded) {
|
||||
const uint8_t *read_ptr = input;
|
||||
|
||||
// Read chunk header
|
||||
uint16_t sample_count = *((const uint16_t*)read_ptr);
|
||||
read_ptr += sizeof(uint16_t);
|
||||
uint32_t payload_size = *((const uint32_t*)read_ptr);
|
||||
read_ptr += sizeof(uint32_t);
|
||||
|
||||
// Calculate DWT levels from sample count
|
||||
int dwt_levels = calculate_dwt_levels(sample_count);
|
||||
if (dwt_levels < 0) {
|
||||
fprintf(stderr, "Error: Invalid sample count %u\n", sample_count);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Decompress if needed
|
||||
const uint8_t *payload;
|
||||
uint8_t *decompressed = NULL;
|
||||
|
||||
// Estimate decompressed size (generous upper bound)
|
||||
size_t decompressed_size = sample_count * 4 * sizeof(int16_t);
|
||||
decompressed = malloc(decompressed_size);
|
||||
|
||||
size_t actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size);
|
||||
|
||||
if (ZSTD_isError(actual_size)) {
|
||||
fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(actual_size));
|
||||
free(decompressed);
|
||||
return -1;
|
||||
}
|
||||
|
||||
payload = decompressed;
|
||||
|
||||
read_ptr += payload_size;
|
||||
*bytes_consumed = read_ptr - input;
|
||||
*samples_decoded = sample_count;
|
||||
|
||||
// Allocate working buffers
|
||||
int16_t *quant_mid = malloc(sample_count * sizeof(int16_t));
|
||||
int16_t *quant_side = malloc(sample_count * sizeof(int16_t));
|
||||
float *dwt_mid = malloc(sample_count * sizeof(float));
|
||||
float *dwt_side = malloc(sample_count * sizeof(float));
|
||||
int8_t *pcm8_mid = malloc(sample_count * sizeof(int8_t));
|
||||
int8_t *pcm8_side = malloc(sample_count * sizeof(int8_t));
|
||||
uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t));
|
||||
uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t));
|
||||
|
||||
// Decode significance maps
|
||||
const uint8_t *payload_ptr = payload;
|
||||
size_t mid_bytes, side_bytes;
|
||||
|
||||
mid_bytes = decode_sigmap_2bit(payload_ptr, quant_mid, sample_count);
|
||||
side_bytes = decode_sigmap_2bit(payload_ptr + mid_bytes, quant_side, sample_count);
|
||||
|
||||
// Dequantize
|
||||
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, quality, sample_count, dwt_levels);
|
||||
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, quality, sample_count, dwt_levels);
|
||||
|
||||
// Inverse DWT
|
||||
dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
|
||||
dwt_haar_inverse_multilevel(dwt_side, sample_count, dwt_levels);
|
||||
|
||||
// Convert to signed PCM8
|
||||
for (size_t i = 0; i < sample_count; i++) {
|
||||
float m = dwt_mid[i];
|
||||
float s = dwt_side[i];
|
||||
|
||||
// Clamp and round
|
||||
if (m < -128.0f) m = -128.0f;
|
||||
if (m > 127.0f) m = 127.0f;
|
||||
if (s < -128.0f) s = -128.0f;
|
||||
if (s > 127.0f) s = 127.0f;
|
||||
|
||||
pcm8_mid[i] = (int8_t)roundf(m);
|
||||
pcm8_side[i] = (int8_t)roundf(s);
|
||||
}
|
||||
|
||||
// M/S to L/R correlation
|
||||
ms_correlate(pcm8_mid, pcm8_side, pcm8_left, pcm8_right, sample_count);
|
||||
|
||||
// Interleave stereo output (PCMu8)
|
||||
for (size_t i = 0; i < sample_count; i++) {
|
||||
pcmu8_stereo[i * 2] = pcm8_left[i];
|
||||
pcmu8_stereo[i * 2 + 1] = pcm8_right[i];
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side);
|
||||
free(pcm8_mid); free(pcm8_side); free(pcm8_left); free(pcm8_right);
|
||||
if (decompressed) free(decompressed);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Main Decoder
|
||||
//=============================================================================
|
||||
|
||||
static void print_usage(const char *prog_name) {
|
||||
printf("Usage: %s -i <input> -o <output> [options]\n", prog_name);
|
||||
printf("Options:\n");
|
||||
printf(" -i <file> Input TAD file\n");
|
||||
printf(" -o <file> Output PCMu8 file (raw 8-bit unsigned stereo @ 32kHz)\n");
|
||||
printf(" -q <0-5> Quality level used during encoding (default: 2)\n");
|
||||
printf(" -v Verbose output\n");
|
||||
printf(" -h, --help Show this help\n");
|
||||
printf("\nVersion: %s\n", DECODER_VENDOR_STRING);
|
||||
printf("Output format: PCMu8 (unsigned 8-bit) stereo @ 32000 Hz\n");
|
||||
printf("To convert to WAV: ffmpeg -f u8 -ar 32000 -ac 2 -i output.raw output.wav\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
char *input_file = NULL;
|
||||
char *output_file = NULL;
|
||||
int quality = 2; // Must match encoder quality
|
||||
int verbose = 0;
|
||||
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "i:o:q:vh")) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
input_file = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
output_file = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
quality = atoi(optarg);
|
||||
if (quality < TAD_QUALITY_MIN || quality > TAD_QUALITY_MAX) {
|
||||
fprintf(stderr, "Error: Quality must be between %d and %d\n",
|
||||
TAD_QUALITY_MIN, TAD_QUALITY_MAX);
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
verbose = 1;
|
||||
break;
|
||||
case 'h':
|
||||
print_usage(argv[0]);
|
||||
return 0;
|
||||
default:
|
||||
print_usage(argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!input_file || !output_file) {
|
||||
fprintf(stderr, "Error: Input and output files are required\n");
|
||||
print_usage(argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
printf("%s\n", DECODER_VENDOR_STRING);
|
||||
printf("Input: %s\n", input_file);
|
||||
printf("Output: %s\n", output_file);
|
||||
printf("Quality: %d\n", quality);
|
||||
}
|
||||
|
||||
// Open input file
|
||||
FILE *input = fopen(input_file, "rb");
|
||||
if (!input) {
|
||||
fprintf(stderr, "Error: Could not open input file: %s\n", input_file);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Get file size
|
||||
fseek(input, 0, SEEK_END);
|
||||
size_t input_size = ftell(input);
|
||||
fseek(input, 0, SEEK_SET);
|
||||
|
||||
// Read entire file into memory
|
||||
uint8_t *input_data = malloc(input_size);
|
||||
fread(input_data, 1, input_size, input);
|
||||
fclose(input);
|
||||
|
||||
// Open output file
|
||||
FILE *output = fopen(output_file, "wb");
|
||||
if (!output) {
|
||||
fprintf(stderr, "Error: Could not open output file: %s\n", output_file);
|
||||
free(input_data);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Decode chunks
|
||||
size_t offset = 0;
|
||||
size_t chunk_count = 0;
|
||||
size_t total_samples = 0;
|
||||
// Allocate buffer for maximum chunk size (can handle variable sizes up to default)
|
||||
uint8_t *chunk_output = malloc(TAD_DEFAULT_CHUNK_SIZE * TAD_CHANNELS);
|
||||
|
||||
while (offset < input_size) {
|
||||
size_t bytes_consumed, samples_decoded;
|
||||
int result = decode_chunk(input_data + offset, input_size - offset,
|
||||
chunk_output, quality, &bytes_consumed, &samples_decoded);
|
||||
|
||||
if (result != 0) {
|
||||
fprintf(stderr, "Error: Chunk decoding failed at offset %zu\n", offset);
|
||||
free(input_data);
|
||||
free(chunk_output);
|
||||
fclose(output);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Write decoded chunk (only the actual samples)
|
||||
fwrite(chunk_output, TAD_CHANNELS, samples_decoded, output);
|
||||
|
||||
offset += bytes_consumed;
|
||||
total_samples += samples_decoded;
|
||||
chunk_count++;
|
||||
|
||||
if (verbose && (chunk_count % 10 == 0)) {
|
||||
printf("Decoded chunk %zu (offset %zu/%zu, %zu samples)\r", chunk_count, offset, input_size, samples_decoded);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
printf("\nDecoding complete!\n");
|
||||
printf("Decoded %zu chunks\n", chunk_count);
|
||||
printf("Total samples: %zu (%.2f seconds)\n",
|
||||
total_samples,
|
||||
total_samples / (double)TAD_SAMPLE_RATE);
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
free(input_data);
|
||||
free(chunk_output);
|
||||
fclose(output);
|
||||
|
||||
printf("Output written to: %s\n", output_file);
|
||||
printf("Format: PCMu8 stereo @ %d Hz\n", TAD_SAMPLE_RATE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
459
video_encoder/encoder_tad.c
Normal file
459
video_encoder/encoder_tad.c
Normal file
@@ -0,0 +1,459 @@
|
||||
// Created by CuriousTorvald and Claude on 2025-10-23.
|
||||
// TAD (Terrarum Advanced Audio) Encoder Library - DWT-based audio compression
|
||||
// This file contains only the encoding functions for use by encoder_tad.c and encoder_tav.c
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <zstd.h>
|
||||
#include "encoder_tad.h"
|
||||
|
||||
// Forward declarations for internal functions
|
||||
static void dwt_haar_forward_1d(float *data, int length);
|
||||
static void dwt_dd4_forward_1d(float *data, int length);
|
||||
static void dwt_97_forward_1d(float *data, int length);
|
||||
static void dwt_haar_forward_multilevel(float *data, int length, int levels);
|
||||
static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count);
|
||||
static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error);
|
||||
static void get_quantization_weights(int quality, int dwt_levels, float *weights);
|
||||
static int get_deadzone_threshold(int quality);
|
||||
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels);
|
||||
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output);
|
||||
|
||||
static inline float FCLAMP(float x, float min, float max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
// Calculate DWT levels from chunk size (non-power-of-2 supported, >= 1024)
|
||||
static int calculate_dwt_levels(int chunk_size) {
|
||||
if (chunk_size < TAD_MIN_CHUNK_SIZE) {
|
||||
fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// For non-power-of-2, find next power of 2 and calculate levels
|
||||
// Then subtract 2 for maximum decomposition
|
||||
int levels = 0;
|
||||
int size = chunk_size;
|
||||
while (size > 1) {
|
||||
size >>= 1;
|
||||
levels++;
|
||||
}
|
||||
|
||||
// For non-power-of-2, we need to add 1 to levels
|
||||
int pow2 = 1 << levels;
|
||||
if (pow2 < chunk_size) {
|
||||
levels++;
|
||||
}
|
||||
|
||||
return levels - 2; // Maximum decomposition leaves 2-sample approximation
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Haar DWT Implementation
|
||||
//=============================================================================
|
||||
|
||||
static void dwt_haar_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Haar transform: compute averages (low-pass) and differences (high-pass)
|
||||
for (int i = 0; i < half; i++) {
|
||||
if (2 * i + 1 < length) {
|
||||
// Average of adjacent pairs (low-pass)
|
||||
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
|
||||
// Difference of adjacent pairs (high-pass)
|
||||
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
|
||||
} else {
|
||||
// Handle odd length: last sample goes to low-pass
|
||||
temp[i] = data[2 * i];
|
||||
if (half + i < length) {
|
||||
temp[half + i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform
|
||||
static void dwt_dd4_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Split into even/odd samples
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] = data[2 * i]; // Even (low)
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
temp[half + i] = data[2 * i + 1]; // Odd (high)
|
||||
}
|
||||
|
||||
// DD-4 forward prediction step with four-point kernel
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
float s_m1, s_0, s_1, s_2;
|
||||
|
||||
if (i > 0) s_m1 = temp[i - 1];
|
||||
else s_m1 = temp[0]; // Mirror boundary
|
||||
|
||||
s_0 = temp[i];
|
||||
|
||||
if (i + 1 < half) s_1 = temp[i + 1];
|
||||
else s_1 = temp[half - 1];
|
||||
|
||||
if (i + 2 < half) s_2 = temp[i + 2];
|
||||
else if (half > 1) s_2 = temp[half - 2];
|
||||
else s_2 = temp[half - 1];
|
||||
|
||||
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
|
||||
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
|
||||
|
||||
temp[half + i] -= prediction;
|
||||
}
|
||||
|
||||
// DD-4 update step
|
||||
for (int i = 0; i < half; i++) {
|
||||
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
|
||||
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
|
||||
temp[i] += 0.25f * (d_prev + d_curr);
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// 1D DWT using lifting scheme for 9/7 irreversible filter
|
||||
static void dwt_97_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Split into even/odd samples
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] = data[2 * i]; // Even (low)
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
temp[half + i] = data[2 * i + 1]; // Odd (high)
|
||||
}
|
||||
|
||||
// JPEG2000 9/7 forward lifting steps
|
||||
const float alpha = -1.586134342f;
|
||||
const float beta = -0.052980118f;
|
||||
const float gamma = 0.882911076f;
|
||||
const float delta = 0.443506852f;
|
||||
const float K = 1.230174105f;
|
||||
|
||||
// Step 1: Predict α
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
if (half + i < length) {
|
||||
float s_curr = temp[i];
|
||||
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
|
||||
temp[half + i] += alpha * (s_curr + s_next);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Update β
|
||||
for (int i = 0; i < half; i++) {
|
||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
||||
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
|
||||
temp[i] += beta * (d_prev + d_curr);
|
||||
}
|
||||
|
||||
// Step 3: Predict γ
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
if (half + i < length) {
|
||||
float s_curr = temp[i];
|
||||
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
|
||||
temp[half + i] += gamma * (s_curr + s_next);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Update δ
|
||||
for (int i = 0; i < half; i++) {
|
||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
||||
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
|
||||
temp[i] += delta * (d_prev + d_curr);
|
||||
}
|
||||
|
||||
// Step 5: Scaling
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] *= K;
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
if (half + i < length) {
|
||||
temp[half + i] /= K;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Apply multi-level DWT (using DD-4 wavelet)
|
||||
static void dwt_haar_forward_multilevel(float *data, int length, int levels) {
|
||||
int current_length = length;
|
||||
for (int level = 0; level < levels; level++) {
|
||||
dwt_dd4_forward_1d(data, current_length);
|
||||
current_length = (current_length + 1) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// M/S Stereo Decorrelation
|
||||
//=============================================================================
|
||||
|
||||
static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
// Mid = (L + R) / 2, Side = (L - R) / 2
|
||||
int32_t l = left[i];
|
||||
int32_t r = right[i];
|
||||
mid[i] = (int8_t)((l + r) / 2);
|
||||
side[i] = (int8_t)((l - r) / 2);
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// PCM16 to Signed PCM8 Conversion with Dithering
|
||||
//=============================================================================
|
||||
|
||||
static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error) {
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
for (int ch = 0; ch < 2; ch++) { // Stereo: L and R
|
||||
int idx = i * 2 + ch;
|
||||
int32_t sample = (int32_t)pcm16[idx];
|
||||
sample += dither_error[ch];
|
||||
int32_t quantized = sample >> 8;
|
||||
if (quantized < -128) quantized = -128;
|
||||
if (quantized > 127) quantized = 127;
|
||||
pcm8[idx] = (int8_t)quantized;
|
||||
dither_error[ch] = sample - (quantized << 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Quantization with Frequency-Dependent Weighting
|
||||
//=============================================================================
|
||||
|
||||
static void get_quantization_weights(int quality, int dwt_levels, float *weights) {
|
||||
const float base_weights[16][16] = {
|
||||
/* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f},
|
||||
/*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}
|
||||
};
|
||||
|
||||
float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f);
|
||||
|
||||
for (int i = 0; i < dwt_levels; i++) {
|
||||
weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f);
|
||||
}
|
||||
}
|
||||
|
||||
static int get_deadzone_threshold(int quality) {
|
||||
const int thresholds[] = {1,1,0,0,0,0}; // Q0 to Q5
|
||||
return thresholds[quality];
|
||||
}
|
||||
|
||||
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels) {
|
||||
float weights[16];
|
||||
get_quantization_weights(quality, dwt_levels, weights);
|
||||
int deadzone = apply_deadzone ? get_deadzone_threshold(quality) : 0;
|
||||
|
||||
int first_band_size = chunk_size >> dwt_levels;
|
||||
|
||||
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
|
||||
sideband_starts[0] = 0;
|
||||
sideband_starts[1] = first_band_size;
|
||||
for (int i = 2; i <= dwt_levels + 1; i++) {
|
||||
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
int sideband = dwt_levels;
|
||||
for (int s = 0; s <= dwt_levels; s++) {
|
||||
if (i < (size_t)sideband_starts[s + 1]) {
|
||||
sideband = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int weight_idx = (sideband == 0) ? 0 : sideband - 1;
|
||||
if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
|
||||
|
||||
float weight = weights[weight_idx];
|
||||
float val = coeffs[i] / weight;
|
||||
int16_t quant_val = (int16_t)roundf(val);
|
||||
|
||||
if (apply_deadzone && sideband >= dwt_levels - 1) {
|
||||
if (quant_val > -deadzone && quant_val < deadzone) {
|
||||
quant_val = 0;
|
||||
}
|
||||
}
|
||||
|
||||
quantized[i] = quant_val;
|
||||
}
|
||||
|
||||
free(sideband_starts);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Significance Map Encoding
|
||||
//=============================================================================
|
||||
|
||||
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output) {
|
||||
size_t map_bytes = (count * 2 + 7) / 8;
|
||||
uint8_t *map = output;
|
||||
memset(map, 0, map_bytes);
|
||||
|
||||
uint8_t *write_ptr = output + map_bytes;
|
||||
int16_t *value_ptr = (int16_t*)write_ptr;
|
||||
uint32_t other_count = 0;
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
int16_t val = values[i];
|
||||
uint8_t code;
|
||||
|
||||
if (val == 0) code = 0; // 00
|
||||
else if (val == 1) code = 1; // 01
|
||||
else if (val == -1) code = 2; // 10
|
||||
else {
|
||||
code = 3; // 11
|
||||
value_ptr[other_count++] = val;
|
||||
}
|
||||
|
||||
size_t bit_pos = i * 2;
|
||||
size_t byte_idx = bit_pos / 8;
|
||||
size_t bit_offset = bit_pos % 8;
|
||||
|
||||
map[byte_idx] |= (code << bit_offset);
|
||||
if (bit_offset == 7 && byte_idx + 1 < map_bytes) {
|
||||
map[byte_idx + 1] |= (code >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
return map_bytes + other_count * sizeof(int16_t);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Public API: Chunk Encoding
|
||||
//=============================================================================
|
||||
|
||||
size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality,
|
||||
int use_zstd, uint8_t *output) {
|
||||
// Calculate DWT levels from chunk size
|
||||
int dwt_levels = calculate_dwt_levels(num_samples);
|
||||
if (dwt_levels < 0) {
|
||||
fprintf(stderr, "Error: Invalid chunk size %zu\n", num_samples);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Allocate working buffers
|
||||
int8_t *pcm8_stereo = malloc(num_samples * 2 * sizeof(int8_t));
|
||||
int8_t *pcm8_left = malloc(num_samples * sizeof(int8_t));
|
||||
int8_t *pcm8_right = malloc(num_samples * sizeof(int8_t));
|
||||
int8_t *pcm8_mid = malloc(num_samples * sizeof(int8_t));
|
||||
int8_t *pcm8_side = malloc(num_samples * sizeof(int8_t));
|
||||
|
||||
float *dwt_mid = malloc(num_samples * sizeof(float));
|
||||
float *dwt_side = malloc(num_samples * sizeof(float));
|
||||
|
||||
int16_t *quant_mid = malloc(num_samples * sizeof(int16_t));
|
||||
int16_t *quant_side = malloc(num_samples * sizeof(int16_t));
|
||||
|
||||
// Step 1: Convert PCM16 to signed PCM8 with dithering
|
||||
int16_t dither_error[2] = {0, 0};
|
||||
convert_pcm16_to_pcm8_dithered(pcm16_stereo, pcm8_stereo, num_samples, dither_error);
|
||||
|
||||
// Deinterleave stereo
|
||||
for (size_t i = 0; i < num_samples; i++) {
|
||||
pcm8_left[i] = pcm8_stereo[i * 2];
|
||||
pcm8_right[i] = pcm8_stereo[i * 2 + 1];
|
||||
}
|
||||
|
||||
// Step 2: M/S decorrelation
|
||||
ms_decorrelate(pcm8_left, pcm8_right, pcm8_mid, pcm8_side, num_samples);
|
||||
|
||||
// Step 3: Convert to float and apply DWT
|
||||
for (size_t i = 0; i < num_samples; i++) {
|
||||
dwt_mid[i] = (float)pcm8_mid[i];
|
||||
dwt_side[i] = (float)pcm8_side[i];
|
||||
}
|
||||
|
||||
dwt_haar_forward_multilevel(dwt_mid, num_samples, dwt_levels);
|
||||
dwt_haar_forward_multilevel(dwt_side, num_samples, dwt_levels);
|
||||
|
||||
// Step 4: Quantize with frequency-dependent weights and dead zone
|
||||
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels);
|
||||
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels);
|
||||
|
||||
// Step 5: Encode with 2-bit significance map
|
||||
uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int16_t));
|
||||
size_t mid_size = encode_sigmap_2bit(quant_mid, num_samples, temp_buffer);
|
||||
size_t side_size = encode_sigmap_2bit(quant_side, num_samples, temp_buffer + mid_size);
|
||||
|
||||
size_t uncompressed_size = mid_size + side_size;
|
||||
|
||||
// Step 6: Optional Zstd compression
|
||||
uint8_t *write_ptr = output;
|
||||
|
||||
*((uint16_t*)write_ptr) = (uint16_t)num_samples;
|
||||
write_ptr += sizeof(uint16_t);
|
||||
|
||||
uint32_t *payload_size_ptr = (uint32_t*)write_ptr;
|
||||
write_ptr += sizeof(uint32_t);
|
||||
|
||||
size_t payload_size;
|
||||
|
||||
if (use_zstd) {
|
||||
size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
|
||||
uint8_t *zstd_buffer = malloc(zstd_bound);
|
||||
|
||||
payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD_ZSTD_LEVEL);
|
||||
|
||||
if (ZSTD_isError(payload_size)) {
|
||||
fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
|
||||
free(zstd_buffer);
|
||||
free(pcm8_stereo); free(pcm8_left); free(pcm8_right);
|
||||
free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side);
|
||||
free(quant_mid); free(quant_side); free(temp_buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
memcpy(write_ptr, zstd_buffer, payload_size);
|
||||
free(zstd_buffer);
|
||||
} else {
|
||||
payload_size = uncompressed_size;
|
||||
memcpy(write_ptr, temp_buffer, payload_size);
|
||||
}
|
||||
|
||||
*payload_size_ptr = (uint32_t)payload_size;
|
||||
write_ptr += payload_size;
|
||||
|
||||
// Cleanup
|
||||
free(pcm8_stereo); free(pcm8_left); free(pcm8_right);
|
||||
free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side);
|
||||
free(quant_mid); free(quant_side); free(temp_buffer);
|
||||
|
||||
return write_ptr - output;
|
||||
}
|
||||
40
video_encoder/encoder_tad.h
Normal file
40
video_encoder/encoder_tad.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef TAD_ENCODER_H
|
||||
#define TAD_ENCODER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// TAD (Terrarum Advanced Audio) Encoder
|
||||
// DWT-based perceptual audio codec for TSVM
|
||||
|
||||
// Constants
|
||||
#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2)
|
||||
#define TAD_SAMPLE_RATE 32000
|
||||
#define TAD_CHANNELS 2 // Stereo
|
||||
#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other
|
||||
#define TAD_QUALITY_MIN 0
|
||||
#define TAD_QUALITY_MAX 5
|
||||
#define TAD_QUALITY_DEFAULT 3
|
||||
#define TAD_ZSTD_LEVEL 7
|
||||
|
||||
/**
|
||||
* Encode audio chunk with TAD codec
|
||||
*
|
||||
* @param pcm16_stereo Input PCM16LE stereo samples (interleaved L,R)
|
||||
* @param num_samples Number of samples per channel (supports non-power-of-2, min 1024)
|
||||
* @param quality Quality level 0-5 (0=lowest, 5=highest)
|
||||
* @param use_zstd 1=enable Zstd compression, 0=disable
|
||||
* @param output Output buffer (must be large enough)
|
||||
* @return Number of bytes written to output, or 0 on error
|
||||
*
|
||||
* Output format:
|
||||
* uint8 sigmap_method (always 1 = 2-bit twobitmap)
|
||||
* uint8 compressed_flag (1=Zstd, 0=raw)
|
||||
* uint16 sample_count (samples per channel)
|
||||
* uint32 payload_size (bytes in payload)
|
||||
* * payload (encoded M/S data, optionally Zstd-compressed)
|
||||
*/
|
||||
size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality,
|
||||
int use_zstd, uint8_t *output);
|
||||
|
||||
#endif // TAD_ENCODER_H
|
||||
@@ -11,14 +11,14 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
#include <getopt.h>
|
||||
#include "encoder_tad.h" // TAD audio encoder
|
||||
#include <ctype.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
#include <fftw3.h>
|
||||
|
||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251023 (3d-dwt)"
|
||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251024 (3d-dwt,tad)"
|
||||
|
||||
// TSVM Advanced Video (TAV) format constants
|
||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
||||
@@ -55,6 +55,7 @@
|
||||
#define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
|
||||
#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio
|
||||
#define TAV_PACKET_AUDIO_PCM8 0x21 // 8-bit PCM audio (zstd compressed)
|
||||
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec)
|
||||
#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet
|
||||
#define TAV_PACKET_AUDIO_TRACK 0x40 // Separate audio track (full MP2 file)
|
||||
#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet
|
||||
@@ -63,6 +64,15 @@
|
||||
#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet
|
||||
#define TAV_PACKET_SYNC 0xFF // Sync packet
|
||||
|
||||
// TAD (Terrarum Advanced Audio) settings
|
||||
#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2)
|
||||
#define TAD_SAMPLE_RATE 32000
|
||||
#define TAD_CHANNELS 2 // Stereo
|
||||
#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other
|
||||
#define TAD_QUALITY_MIN 0
|
||||
#define TAD_QUALITY_MAX 5
|
||||
#define TAD_ZSTD_LEVEL 7
|
||||
|
||||
// DWT settings
|
||||
#define TILE_SIZE_X 640
|
||||
#define TILE_SIZE_Y 540
|
||||
@@ -1753,6 +1763,7 @@ typedef struct tav_encoder_s {
|
||||
int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled)
|
||||
int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
|
||||
int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
|
||||
int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
|
||||
|
||||
// Frame buffers - ping-pong implementation
|
||||
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
|
||||
@@ -2272,6 +2283,7 @@ static void show_usage(const char *program_name) {
|
||||
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
|
||||
// printf(" --separate-audio-track Write entire audio track as single packet instead of interleaved\n");
|
||||
printf(" --pcm8-audio Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
|
||||
printf(" --tad-audio Use TAD (DWT-based perceptual) audio codec (packet 0x24, quality follows -q)\n");
|
||||
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
||||
printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n");
|
||||
printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n");
|
||||
@@ -2361,6 +2373,7 @@ static tav_encoder_t* create_encoder(void) {
|
||||
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
|
||||
enc->separate_audio_track = 0; // Default: interleave audio packets
|
||||
enc->pcm8_audio = 0; // Default: use MP2 audio
|
||||
enc->tad_audio = 0; // Default: use MP2 audio (TAD quality follows quality_level)
|
||||
|
||||
// GOP / temporal DWT settings
|
||||
enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding
|
||||
@@ -8050,11 +8063,15 @@ static int start_audio_conversion(tav_encoder_t *enc) {
|
||||
|
||||
char command[2048];
|
||||
|
||||
if (enc->pcm8_audio) {
|
||||
// Extract PCM16LE for PCM8 mode
|
||||
printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit)\n");
|
||||
if (enc->pcm8_audio || enc->tad_audio) {
|
||||
// Extract PCM16LE for PCM8/TAD mode
|
||||
if (enc->pcm8_audio) {
|
||||
printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit PCM)\n");
|
||||
} else {
|
||||
printf(" Audio format: PCM16LE 32kHz stereo (will be encoded with TAD codec)\n");
|
||||
}
|
||||
snprintf(command, sizeof(command),
|
||||
"ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -y \"%s\" 2>/dev/null",
|
||||
"ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" -y \"%s\" 2>/dev/null",
|
||||
enc->input_file, TSVM_AUDIO_SAMPLE_RATE, TEMP_PCM_FILE);
|
||||
|
||||
int result = system(command);
|
||||
@@ -8806,6 +8823,95 @@ static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Write TAD audio packet (0x24) with specified sample count
|
||||
// Uses linked TAD encoder (encoder_tad.c)
|
||||
static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) {
|
||||
if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) {
|
||||
return 0;
|
||||
}
|
||||
size_t bytes_to_read = samples_to_read * 2 * sizeof(int16_t); // Stereo PCM16LE
|
||||
|
||||
// Don't read more than what's available
|
||||
if (bytes_to_read > enc->audio_remaining) {
|
||||
bytes_to_read = enc->audio_remaining;
|
||||
samples_to_read = bytes_to_read / (2 * sizeof(int16_t));
|
||||
}
|
||||
|
||||
if (samples_to_read < TAD_MIN_CHUNK_SIZE) {
|
||||
// Pad to minimum size
|
||||
samples_to_read = TAD_MIN_CHUNK_SIZE;
|
||||
}
|
||||
|
||||
// Allocate PCM16 input buffer
|
||||
int16_t *pcm16_buffer = malloc(samples_to_read * 2 * sizeof(int16_t));
|
||||
|
||||
// Read PCM16LE data
|
||||
size_t bytes_read = fread(pcm16_buffer, 1, bytes_to_read, enc->pcm_file);
|
||||
if (bytes_read == 0) {
|
||||
free(pcm16_buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int samples_read = bytes_read / (2 * sizeof(int16_t));
|
||||
|
||||
// Zero-pad if needed
|
||||
if (samples_read < samples_to_read) {
|
||||
memset(&pcm16_buffer[samples_read * 2], 0,
|
||||
(samples_to_read - samples_read) * 2 * sizeof(int16_t));
|
||||
}
|
||||
|
||||
// Encode with TAD encoder (linked from encoder_tad.o)
|
||||
int tad_quality = enc->quality_level; // Use video quality level for audio
|
||||
if (tad_quality > TAD_QUALITY_MAX) tad_quality = TAD_QUALITY_MAX;
|
||||
if (tad_quality < TAD_QUALITY_MIN) tad_quality = TAD_QUALITY_MIN;
|
||||
|
||||
// Allocate output buffer (generous size for TAD chunk)
|
||||
size_t max_output_size = samples_to_read * 4 * sizeof(int16_t) + 1024;
|
||||
uint8_t *tad_output = malloc(max_output_size);
|
||||
|
||||
size_t tad_encoded_size = tad_encode_chunk(pcm16_buffer, samples_to_read, tad_quality, 1, tad_output);
|
||||
|
||||
if (tad_encoded_size == 0) {
|
||||
fprintf(stderr, "Error: TAD encoding failed\n");
|
||||
free(pcm16_buffer);
|
||||
free(tad_output);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Parse TAD chunk format: [sample_count][payload_size][payload]
|
||||
uint8_t *read_ptr = tad_output;
|
||||
uint16_t sample_count = *((uint16_t*)read_ptr);
|
||||
read_ptr += sizeof(uint16_t);
|
||||
uint32_t tad_payload_size = *((uint32_t*)read_ptr);
|
||||
read_ptr += sizeof(uint32_t);
|
||||
uint8_t *tad_payload = read_ptr;
|
||||
|
||||
// Write TAV packet 0x24: [0x24][payload_size+2][sample_count][compressed_size][compressed_data]
|
||||
uint8_t packet_type = TAV_PACKET_AUDIO_TAD;
|
||||
fwrite(&packet_type, 1, 1, output);
|
||||
|
||||
uint32_t tav_payload_size = (uint32_t)tad_payload_size;
|
||||
uint32_t tav_payload_size_plus_two = (uint32_t)tad_payload_size + 2;
|
||||
fwrite(&tav_payload_size_plus_two, sizeof(uint32_t), 1, output);
|
||||
fwrite(&sample_count, sizeof(uint16_t), 1, output);
|
||||
fwrite(&tav_payload_size, sizeof(uint32_t), 1, output);
|
||||
fwrite(tad_payload, 1, tad_payload_size, output);
|
||||
|
||||
// Update audio remaining
|
||||
enc->audio_remaining -= bytes_read;
|
||||
|
||||
if (enc->verbose) {
|
||||
printf("TAD packet: %d samples, %u bytes compressed (Q%d)\n",
|
||||
sample_count, tad_payload_size, tad_quality);
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
free(pcm16_buffer);
|
||||
free(tad_output);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Write PCM8 audio packet (0x21) with specified sample count
|
||||
static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) {
|
||||
if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) {
|
||||
@@ -8904,6 +9010,15 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Handle TAD mode
|
||||
if (enc->tad_audio) {
|
||||
if (!enc->has_audio || !enc->pcm_file) {
|
||||
return 1;
|
||||
}
|
||||
// Write one TAD packet per frame
|
||||
return write_tad_packet_samples(enc, output, enc->samples_per_frame);
|
||||
}
|
||||
|
||||
// Handle PCM8 mode
|
||||
if (enc->pcm8_audio) {
|
||||
if (!enc->has_audio || !enc->pcm_file) {
|
||||
@@ -9020,6 +9135,29 @@ static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Handle TAD mode: variable chunk size support
|
||||
if (enc->tad_audio) {
|
||||
if (!enc->has_audio || !enc->pcm_file || num_frames == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Calculate total samples for this GOP
|
||||
int total_samples = num_frames * enc->samples_per_frame;
|
||||
|
||||
// TAD supports variable chunk sizes (non-power-of-2)
|
||||
// We can write the entire GOP in one packet (up to 32768+ samples)
|
||||
if (enc->verbose) {
|
||||
printf("TAD GOP: %d frames, %d total samples\n", num_frames, total_samples);
|
||||
}
|
||||
|
||||
// Write one TAD packet for the entire GOP
|
||||
if (!write_tad_packet_samples(enc, output, total_samples)) {
|
||||
// No more audio data
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Handle PCM8 mode: emit mega packet(s) evenly divided if exceeding 32768 samples
|
||||
if (enc->pcm8_audio) {
|
||||
if (!enc->has_audio || !enc->pcm_file || num_frames == 0) {
|
||||
@@ -9448,6 +9586,7 @@ int main(int argc, char *argv[]) {
|
||||
{"pcm-audio", no_argument, 0, 1027},
|
||||
{"native-audio", no_argument, 0, 1027},
|
||||
{"native-audio-format", no_argument, 0, 1027},
|
||||
{"tad-audio", no_argument, 0, 1028},
|
||||
{"help", no_argument, 0, '?'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
@@ -9668,6 +9807,10 @@ int main(int argc, char *argv[]) {
|
||||
enc->pcm8_audio = 1;
|
||||
printf("8-bit PCM audio mode enabled (packet 0x21)\n");
|
||||
break;
|
||||
case 1028: // --tad-audio
|
||||
enc->tad_audio = 1;
|
||||
printf("TAD audio mode enabled (packet 0x24, quality follows -q)\n");
|
||||
break;
|
||||
case 'a':
|
||||
int bitrate = atoi(optarg);
|
||||
int valid_bitrate = validate_mp2_bitrate(bitrate);
|
||||
|
||||
Reference in New Issue
Block a user