TAD: Terrarum Advanced Audio to use with video compression

This commit is contained in:
minjaesong
2025-10-23 18:56:57 +09:00
parent 6f669f4fd9
commit a9319fd812
10 changed files with 1887 additions and 22 deletions

View File

@@ -13,6 +13,7 @@ OPENCV_LIBS = $(shell pkg-config --libs opencv4)
# Source files and targets
TARGETS = tev tav tav_decoder
TAD_TARGETS = encoder_tad decoder_tad
TEST_TARGETS = test_mesh_warp test_mesh_roundtrip
# Build all encoders
@@ -23,17 +24,31 @@ tev: encoder_tev.c
rm -f encoder_tev
$(CC) $(CFLAGS) -o encoder_tev $< $(LIBS)
tav: encoder_tav.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
rm -f encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o
tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
rm -f encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o
$(CC) $(CFLAGS) -c encoder_tav.c -o encoder_tav.o
$(CC) $(CFLAGS) -c encoder_tad.c -o encoder_tad.o
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o
$(CXX) $(CXXFLAGS) -c estimate_affine_from_blocks.cpp -o estimate_affine_from_blocks.o
$(CXX) -o encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o $(LIBS) -lfftw3f $(OPENCV_LIBS)
$(CXX) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
tav_decoder: decoder_tav.c
rm -f decoder_tav
$(CC) $(CFLAGS) -o decoder_tav $< $(LIBS)
# Build TAD (Terrarum Advanced Audio) tools
encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
$(CC) $(CFLAGS) -c encoder_tad.c -o encoder_tad.o
$(CC) $(CFLAGS) -c encoder_tad_standalone.c -o encoder_tad_standalone.o
$(CC) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
decoder_tad: decoder_tad.c
rm -f decoder_tad
$(CC) $(CFLAGS) -o decoder_tad $< $(LIBS)
# Build all TAD tools
tad: $(TAD_TARGETS)
# Build test programs
test_mesh_warp: test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
rm -f test_mesh_warp test_mesh_warp.o
@@ -63,31 +78,34 @@ debug: $(TARGETS)
# Clean build artifacts
clean:
rm -f $(TARGETS) *.o
rm -f $(TARGETS) $(TAD_TARGETS) *.o
# Install (copy to PATH)
install: $(TARGETS)
install: $(TARGETS) $(TAD_TARGETS)
cp encoder_tev /usr/local/bin/
cp encoder_tav /usr/local/bin/
cp decoder_tav /usr/local/bin/
cp encoder_tad /usr/local/bin/
cp decoder_tad /usr/local/bin/
# Check for required dependencies
check-deps:
@echo "Checking dependencies..."
@echo "Using Zstd compression for better efficiency"
@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install with: sudo apt install libzstd-dev" && exit 1)
@pkg-config --exists fftw3f || (echo "Error: libfftw3-dev not found. Install with: sudo apt install libfftw3-dev" && exit 1)
@pkg-config --exists opencv4 || (echo "Error: OpenCV 4 not found. Install with: sudo apt install libopencv-dev" && exit 1)
@echo "All dependencies found."
# Help
help:
@echo "TSVM Enhanced Video (TEV) Encoder"
@echo "TSVM Enhanced Video (TEV) and Audio (TAD) Encoders"
@echo ""
@echo "Targets:"
@echo " all - Build both encoders (default)"
@echo " tev - Build the main TEV encoder"
@echo " tav - Build the advanced TAV encoder"
@echo " all - Build video encoders (default)"
@echo " tev - Build the TEV video encoder"
@echo " tav - Build the TAV advanced video encoder"
@echo " tad - Build all TAD audio tools (encoder, decoder)"
@echo " encoder_tad - Build TAD audio encoder"
@echo " decoder_tad - Build TAD audio decoder"
@echo " debug - Build with debug symbols"
@echo " clean - Remove build artifacts"
@echo " install - Install to /usr/local/bin"
@@ -95,9 +113,10 @@ help:
@echo " help - Show this help"
@echo ""
@echo "Usage:"
@echo " make # Build both encoders"
@echo " make # Build video encoders"
@echo " make tev # Build TEV encoder"
@echo " make tav # Build TAV encoder"
@echo " sudo make install # Install both encoders"
@echo " make tad # Build all TAD audio tools"
@echo " sudo make install # Install all encoders"
.PHONY: all clean install check-deps help debug
.PHONY: all clean install check-deps help debug tad

576
video_encoder/decoder_tad.c Normal file
View File

@@ -0,0 +1,576 @@
// Created by CuriousTorvald and Claude on 2025-10-23.
// TAD (Terrarum Advanced Audio) Decoder - Reconstructs audio from TAD format
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include <getopt.h>
#define DECODER_VENDOR_STRING "Decoder-TAD 20251023"
// TAD format constants (must match encoder)
#define TAD_DEFAULT_CHUNK_SIZE 32768
#define TAD_MIN_CHUNK_SIZE 1024
#define TAD_SAMPLE_RATE 32000
#define TAD_CHANNELS 2
// Significance map methods
#define TAD_SIGMAP_1BIT 0
#define TAD_SIGMAP_2BIT 1
#define TAD_SIGMAP_RLE 2
// Quality levels
#define TAD_QUALITY_MIN 0
#define TAD_QUALITY_MAX 5
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
// Calculate DWT levels from chunk size (must be power of 2, >= 1024)
static int calculate_dwt_levels(int chunk_size) {
if (chunk_size < TAD_MIN_CHUNK_SIZE) {
fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE);
return -1;
}
// Calculate levels: log2(chunk_size) - 1
int levels = 0;
int size = chunk_size;
while (size > 1) {
size >>= 1;
levels++;
}
return levels - 2;
}
//=============================================================================
// Haar DWT Implementation (inverse only needed for decoder)
//=============================================================================
static void dwt_haar_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
temp[2 * i] = data[i] + data[half + i];
temp[2 * i + 1] = data[i] - data[half + i];
} else {
temp[2 * i] = data[i];
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// Inverse 1D transform of Four-point interpolating Deslauriers-Dubuc (DD-4)
static void dwt_dd4_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into low (even) and high (odd) parts
for (int i = 0; i < half; i++) {
temp[i] = data[i]; // Even (low-pass)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[half + i]; // Odd (high-pass)
}
// Undo update step: s[i] -= 0.25 * (d[i-1] + d[i])
for (int i = 0; i < half; i++) {
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
temp[i] -= 0.25f * (d_prev + d_curr);
}
// Undo prediction step: d[i] += P(s[i-1], s[i], s[i+1], s[i+2])
for (int i = 0; i < length / 2; i++) {
float s_m1, s_0, s_1, s_2;
if (i > 0) s_m1 = temp[i - 1];
else s_m1 = temp[0]; // mirror boundary
s_0 = temp[i];
if (i + 1 < half) s_1 = temp[i + 1];
else s_1 = temp[half - 1];
if (i + 2 < half) s_2 = temp[i + 2];
else if (half > 1) s_2 = temp[half - 2];
else s_2 = temp[half - 1];
float prediction = (-1.0f/16.0f)*s_m1 + (9.0f/16.0f)*s_0 +
(9.0f/16.0f)*s_1 + (-1.0f/16.0f)*s_2;
temp[half + i] += prediction;
}
// Merge evens and odds back into the original order
for (int i = 0; i < half; i++) {
data[2 * i] = temp[i];
if (2 * i + 1 < length)
data[2 * i + 1] = temp[half + i];
}
free(temp);
}
static void dwt_haar_inverse_multilevel(float *data, int length, int levels) {
// Calculate the length at the deepest level (size of low-pass after all forward DWTs)
int current_length = length;
for (int level = 0; level < levels; level++) {
current_length = (current_length + 1) / 2;
}
// For 8 levels on 32768: 32768→16384→8192→4096→2048→1024→512→256→128
// Inverse transform: double size FIRST, then apply inverse DWT
// Level 8 inverse: 128 low + 128 high → 256 reconstructed
// Level 7 inverse: 256 reconstructed + 256 high → 512 reconstructed
// ... Level 1 inverse: 16384 reconstructed + 16384 high → 32768 reconstructed
for (int level = levels - 1; level >= 0; level--) {
current_length *= 2; // MULTIPLY FIRST: 128→256, 256→512, ..., 16384→32768
if (current_length > length) current_length = length;
// dwt_haar_inverse_1d(data, current_length); // THEN apply inverse
dwt_dd4_inverse_1d(data, current_length); // THEN apply inverse
}
}
//=============================================================================
// M/S Stereo Correlation (inverse of decorrelation)
//=============================================================================
static void ms_correlate(const int8_t *mid, const int8_t *side, uint8_t *left, uint8_t *right, size_t count) {
for (size_t i = 0; i < count; i++) {
// L = M + S, R = M - S
int32_t m = mid[i];
int32_t s = side[i];
int32_t l = m + s;
int32_t r = m - s;
// Clamp to [-128, 127] then convert to unsigned [0, 255]
if (l < -128) l = -128;
if (l > 127) l = 127;
if (r < -128) r = -128;
if (r > 127) r = 127;
left[i] = (uint8_t)(l + 128);
right[i] = (uint8_t)(r + 128);
}
}
//=============================================================================
// Dequantization (inverse of quantization)
//=============================================================================
static void get_quantization_weights(int quality, int dwt_levels, float *weights) {
const float base_weights[16][16] = {
/* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
/* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
/* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f},
/*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f},
/*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f},
/*16*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f}
};
float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f);
for (int i = 0; i < dwt_levels; i++) {
weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f);
}
}
static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int quality, int chunk_size, int dwt_levels) {
float weights[16];
get_quantization_weights(quality, dwt_levels, weights);
// Calculate sideband boundaries dynamically
int first_band_size = chunk_size >> dwt_levels;
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
}
for (size_t i = 0; i < count; i++) {
int sideband = dwt_levels;
for (int s = 0; s <= dwt_levels; s++) {
if (i < sideband_starts[s + 1]) {
sideband = s;
break;
}
}
// Map (dwt_levels+1) sidebands to dwt_levels weights
int weight_idx = (sideband == 0) ? 0 : sideband - 1;
if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
float weight = weights[weight_idx];
coeffs[i] = (float)quantized[i] * weight;
}
free(sideband_starts);
}
//=============================================================================
// Significance Map Decoding
//=============================================================================
static size_t decode_sigmap_1bit(const uint8_t *input, int16_t *values, size_t count) {
size_t map_bytes = (count + 7) / 8;
const uint8_t *map = input;
const uint8_t *read_ptr = input + map_bytes;
uint32_t nonzero_count = *((const uint32_t*)read_ptr);
read_ptr += sizeof(uint32_t);
const int16_t *value_ptr = (const int16_t*)read_ptr;
uint32_t value_idx = 0;
// Reconstruct values
for (size_t i = 0; i < count; i++) {
if (map[i / 8] & (1 << (i % 8))) {
values[i] = value_ptr[value_idx++];
} else {
values[i] = 0;
}
}
return map_bytes + sizeof(uint32_t) + nonzero_count * sizeof(int16_t);
}
static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t count) {
size_t map_bytes = (count * 2 + 7) / 8;
const uint8_t *map = input;
const uint8_t *read_ptr = input + map_bytes;
const int16_t *value_ptr = (const int16_t*)read_ptr;
uint32_t other_idx = 0;
for (size_t i = 0; i < count; i++) {
size_t bit_pos = i * 2;
size_t byte_idx = bit_pos / 8;
size_t bit_offset = bit_pos % 8;
uint8_t code = (map[byte_idx] >> bit_offset) & 0x03;
// Handle bit spillover
if (bit_offset == 7) {
code = (map[byte_idx] >> 7) | ((map[byte_idx + 1] & 0x01) << 1);
}
switch (code) {
case 0: values[i] = 0; break;
case 1: values[i] = 1; break;
case 2: values[i] = -1; break;
case 3: values[i] = value_ptr[other_idx++]; break;
}
}
return map_bytes + other_idx * sizeof(int16_t);
}
static size_t decode_sigmap_rle(const uint8_t *input, int16_t *values, size_t count) {
const uint8_t *read_ptr = input;
uint32_t run_count = *((const uint32_t*)read_ptr);
read_ptr += sizeof(uint32_t);
size_t value_idx = 0;
for (uint32_t run = 0; run < run_count; run++) {
// Decode zero run length (varint)
uint32_t zero_run = 0;
int shift = 0;
uint8_t byte;
do {
byte = *read_ptr++;
zero_run |= ((uint32_t)(byte & 0x7F) << shift);
shift += 7;
} while (byte & 0x80);
// Fill zeros
for (uint32_t i = 0; i < zero_run && value_idx < count; i++) {
values[value_idx++] = 0;
}
// Read non-zero value
int16_t val = *((const int16_t*)read_ptr);
read_ptr += sizeof(int16_t);
if (value_idx < count && val != 0) {
values[value_idx++] = val;
}
}
// Fill remaining with zeros
while (value_idx < count) {
values[value_idx++] = 0;
}
return read_ptr - input;
}
//=============================================================================
// Chunk Decoding
//=============================================================================
static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
int quality, size_t *bytes_consumed, size_t *samples_decoded) {
const uint8_t *read_ptr = input;
// Read chunk header
uint16_t sample_count = *((const uint16_t*)read_ptr);
read_ptr += sizeof(uint16_t);
uint32_t payload_size = *((const uint32_t*)read_ptr);
read_ptr += sizeof(uint32_t);
// Calculate DWT levels from sample count
int dwt_levels = calculate_dwt_levels(sample_count);
if (dwt_levels < 0) {
fprintf(stderr, "Error: Invalid sample count %u\n", sample_count);
return -1;
}
// Decompress if needed
const uint8_t *payload;
uint8_t *decompressed = NULL;
// Estimate decompressed size (generous upper bound)
size_t decompressed_size = sample_count * 4 * sizeof(int16_t);
decompressed = malloc(decompressed_size);
size_t actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size);
if (ZSTD_isError(actual_size)) {
fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(actual_size));
free(decompressed);
return -1;
}
payload = decompressed;
read_ptr += payload_size;
*bytes_consumed = read_ptr - input;
*samples_decoded = sample_count;
// Allocate working buffers
int16_t *quant_mid = malloc(sample_count * sizeof(int16_t));
int16_t *quant_side = malloc(sample_count * sizeof(int16_t));
float *dwt_mid = malloc(sample_count * sizeof(float));
float *dwt_side = malloc(sample_count * sizeof(float));
int8_t *pcm8_mid = malloc(sample_count * sizeof(int8_t));
int8_t *pcm8_side = malloc(sample_count * sizeof(int8_t));
uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t));
uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t));
// Decode significance maps
const uint8_t *payload_ptr = payload;
size_t mid_bytes, side_bytes;
mid_bytes = decode_sigmap_2bit(payload_ptr, quant_mid, sample_count);
side_bytes = decode_sigmap_2bit(payload_ptr + mid_bytes, quant_side, sample_count);
// Dequantize
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, quality, sample_count, dwt_levels);
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, quality, sample_count, dwt_levels);
// Inverse DWT
dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
dwt_haar_inverse_multilevel(dwt_side, sample_count, dwt_levels);
// Convert to signed PCM8
for (size_t i = 0; i < sample_count; i++) {
float m = dwt_mid[i];
float s = dwt_side[i];
// Clamp and round
if (m < -128.0f) m = -128.0f;
if (m > 127.0f) m = 127.0f;
if (s < -128.0f) s = -128.0f;
if (s > 127.0f) s = 127.0f;
pcm8_mid[i] = (int8_t)roundf(m);
pcm8_side[i] = (int8_t)roundf(s);
}
// M/S to L/R correlation
ms_correlate(pcm8_mid, pcm8_side, pcm8_left, pcm8_right, sample_count);
// Interleave stereo output (PCMu8)
for (size_t i = 0; i < sample_count; i++) {
pcmu8_stereo[i * 2] = pcm8_left[i];
pcmu8_stereo[i * 2 + 1] = pcm8_right[i];
}
// Cleanup
free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side);
free(pcm8_mid); free(pcm8_side); free(pcm8_left); free(pcm8_right);
if (decompressed) free(decompressed);
return 0;
}
//=============================================================================
// Main Decoder
//=============================================================================
static void print_usage(const char *prog_name) {
printf("Usage: %s -i <input> -o <output> [options]\n", prog_name);
printf("Options:\n");
printf(" -i <file> Input TAD file\n");
printf(" -o <file> Output PCMu8 file (raw 8-bit unsigned stereo @ 32kHz)\n");
printf(" -q <0-5> Quality level used during encoding (default: 2)\n");
printf(" -v Verbose output\n");
printf(" -h, --help Show this help\n");
printf("\nVersion: %s\n", DECODER_VENDOR_STRING);
printf("Output format: PCMu8 (unsigned 8-bit) stereo @ 32000 Hz\n");
printf("To convert to WAV: ffmpeg -f u8 -ar 32000 -ac 2 -i output.raw output.wav\n");
}
int main(int argc, char *argv[]) {
char *input_file = NULL;
char *output_file = NULL;
int quality = 2; // Must match encoder quality
int verbose = 0;
int opt;
while ((opt = getopt(argc, argv, "i:o:q:vh")) != -1) {
switch (opt) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 'q':
quality = atoi(optarg);
if (quality < TAD_QUALITY_MIN || quality > TAD_QUALITY_MAX) {
fprintf(stderr, "Error: Quality must be between %d and %d\n",
TAD_QUALITY_MIN, TAD_QUALITY_MAX);
return 1;
}
break;
case 'v':
verbose = 1;
break;
case 'h':
print_usage(argv[0]);
return 0;
default:
print_usage(argv[0]);
return 1;
}
}
if (!input_file || !output_file) {
fprintf(stderr, "Error: Input and output files are required\n");
print_usage(argv[0]);
return 1;
}
if (verbose) {
printf("%s\n", DECODER_VENDOR_STRING);
printf("Input: %s\n", input_file);
printf("Output: %s\n", output_file);
printf("Quality: %d\n", quality);
}
// Open input file
FILE *input = fopen(input_file, "rb");
if (!input) {
fprintf(stderr, "Error: Could not open input file: %s\n", input_file);
return 1;
}
// Get file size
fseek(input, 0, SEEK_END);
size_t input_size = ftell(input);
fseek(input, 0, SEEK_SET);
// Read entire file into memory
uint8_t *input_data = malloc(input_size);
fread(input_data, 1, input_size, input);
fclose(input);
// Open output file
FILE *output = fopen(output_file, "wb");
if (!output) {
fprintf(stderr, "Error: Could not open output file: %s\n", output_file);
free(input_data);
return 1;
}
// Decode chunks
size_t offset = 0;
size_t chunk_count = 0;
size_t total_samples = 0;
// Allocate buffer for maximum chunk size (can handle variable sizes up to default)
uint8_t *chunk_output = malloc(TAD_DEFAULT_CHUNK_SIZE * TAD_CHANNELS);
while (offset < input_size) {
size_t bytes_consumed, samples_decoded;
int result = decode_chunk(input_data + offset, input_size - offset,
chunk_output, quality, &bytes_consumed, &samples_decoded);
if (result != 0) {
fprintf(stderr, "Error: Chunk decoding failed at offset %zu\n", offset);
free(input_data);
free(chunk_output);
fclose(output);
return 1;
}
// Write decoded chunk (only the actual samples)
fwrite(chunk_output, TAD_CHANNELS, samples_decoded, output);
offset += bytes_consumed;
total_samples += samples_decoded;
chunk_count++;
if (verbose && (chunk_count % 10 == 0)) {
printf("Decoded chunk %zu (offset %zu/%zu, %zu samples)\r", chunk_count, offset, input_size, samples_decoded);
fflush(stdout);
}
}
if (verbose) {
printf("\nDecoding complete!\n");
printf("Decoded %zu chunks\n", chunk_count);
printf("Total samples: %zu (%.2f seconds)\n",
total_samples,
total_samples / (double)TAD_SAMPLE_RATE);
}
// Cleanup
free(input_data);
free(chunk_output);
fclose(output);
printf("Output written to: %s\n", output_file);
printf("Format: PCMu8 stereo @ %d Hz\n", TAD_SAMPLE_RATE);
return 0;
}

459
video_encoder/encoder_tad.c Normal file
View File

@@ -0,0 +1,459 @@
// Created by CuriousTorvald and Claude on 2025-10-23.
// TAD (Terrarum Advanced Audio) Encoder Library - DWT-based audio compression
// This file contains only the encoding functions for use by encoder_tad.c and encoder_tav.c
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include "encoder_tad.h"
// Forward declarations for internal functions
static void dwt_haar_forward_1d(float *data, int length);
static void dwt_dd4_forward_1d(float *data, int length);
static void dwt_97_forward_1d(float *data, int length);
static void dwt_haar_forward_multilevel(float *data, int length, int levels);
static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count);
static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error);
static void get_quantization_weights(int quality, int dwt_levels, float *weights);
static int get_deadzone_threshold(int quality);
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels);
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output);
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
// Calculate DWT levels from chunk size (non-power-of-2 supported, >= 1024)
static int calculate_dwt_levels(int chunk_size) {
if (chunk_size < TAD_MIN_CHUNK_SIZE) {
fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE);
return -1;
}
// For non-power-of-2, find next power of 2 and calculate levels
// Then subtract 2 for maximum decomposition
int levels = 0;
int size = chunk_size;
while (size > 1) {
size >>= 1;
levels++;
}
// For non-power-of-2, we need to add 1 to levels
int pow2 = 1 << levels;
if (pow2 < chunk_size) {
levels++;
}
return levels - 2; // Maximum decomposition leaves 2-sample approximation
}
//=============================================================================
// Haar DWT Implementation
//=============================================================================
static void dwt_haar_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Haar transform: compute averages (low-pass) and differences (high-pass)
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
// Average of adjacent pairs (low-pass)
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
// Difference of adjacent pairs (high-pass)
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
} else {
// Handle odd length: last sample goes to low-pass
temp[i] = data[2 * i];
if (half + i < length) {
temp[half + i] = 0.0f;
}
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform
static void dwt_dd4_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i]; // Even (low)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1]; // Odd (high)
}
// DD-4 forward prediction step with four-point kernel
for (int i = 0; i < length / 2; i++) {
float s_m1, s_0, s_1, s_2;
if (i > 0) s_m1 = temp[i - 1];
else s_m1 = temp[0]; // Mirror boundary
s_0 = temp[i];
if (i + 1 < half) s_1 = temp[i + 1];
else s_1 = temp[half - 1];
if (i + 2 < half) s_2 = temp[i + 2];
else if (half > 1) s_2 = temp[half - 2];
else s_2 = temp[half - 1];
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
temp[half + i] -= prediction;
}
// DD-4 update step
for (int i = 0; i < half; i++) {
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
temp[i] += 0.25f * (d_prev + d_curr);
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// 1D DWT using lifting scheme for 9/7 irreversible filter
static void dwt_97_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i]; // Even (low)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1]; // Odd (high)
}
// JPEG2000 9/7 forward lifting steps
const float alpha = -1.586134342f;
const float beta = -0.052980118f;
const float gamma = 0.882911076f;
const float delta = 0.443506852f;
const float K = 1.230174105f;
// Step 1: Predict α
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += alpha * (s_curr + s_next);
}
}
// Step 2: Update β
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += beta * (d_prev + d_curr);
}
// Step 3: Predict γ
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += gamma * (s_curr + s_next);
}
}
// Step 4: Update δ
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += delta * (d_prev + d_curr);
}
// Step 5: Scaling
for (int i = 0; i < half; i++) {
temp[i] *= K;
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K;
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// Apply multi-level DWT (using DD-4 wavelet)
static void dwt_haar_forward_multilevel(float *data, int length, int levels) {
int current_length = length;
for (int level = 0; level < levels; level++) {
dwt_dd4_forward_1d(data, current_length);
current_length = (current_length + 1) / 2;
}
}
//=============================================================================
// M/S Stereo Decorrelation
//=============================================================================
static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count) {
for (size_t i = 0; i < count; i++) {
// Mid = (L + R) / 2, Side = (L - R) / 2
int32_t l = left[i];
int32_t r = right[i];
mid[i] = (int8_t)((l + r) / 2);
side[i] = (int8_t)((l - r) / 2);
}
}
//=============================================================================
// PCM16 to Signed PCM8 Conversion with Dithering
//=============================================================================
static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error) {
for (int i = 0; i < num_samples; i++) {
for (int ch = 0; ch < 2; ch++) { // Stereo: L and R
int idx = i * 2 + ch;
int32_t sample = (int32_t)pcm16[idx];
sample += dither_error[ch];
int32_t quantized = sample >> 8;
if (quantized < -128) quantized = -128;
if (quantized > 127) quantized = 127;
pcm8[idx] = (int8_t)quantized;
dither_error[ch] = sample - (quantized << 8);
}
}
}
//=============================================================================
// Quantization with Frequency-Dependent Weighting
//=============================================================================
static void get_quantization_weights(int quality, int dwt_levels, float *weights) {
const float base_weights[16][16] = {
/* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
/* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
/* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
/*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f},
/*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f},
/*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}
};
float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f);
for (int i = 0; i < dwt_levels; i++) {
weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f);
}
}
static int get_deadzone_threshold(int quality) {
const int thresholds[] = {1,1,0,0,0,0}; // Q0 to Q5
return thresholds[quality];
}
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels) {
float weights[16];
get_quantization_weights(quality, dwt_levels, weights);
int deadzone = apply_deadzone ? get_deadzone_threshold(quality) : 0;
int first_band_size = chunk_size >> dwt_levels;
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
}
for (size_t i = 0; i < count; i++) {
int sideband = dwt_levels;
for (int s = 0; s <= dwt_levels; s++) {
if (i < (size_t)sideband_starts[s + 1]) {
sideband = s;
break;
}
}
int weight_idx = (sideband == 0) ? 0 : sideband - 1;
if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
float weight = weights[weight_idx];
float val = coeffs[i] / weight;
int16_t quant_val = (int16_t)roundf(val);
if (apply_deadzone && sideband >= dwt_levels - 1) {
if (quant_val > -deadzone && quant_val < deadzone) {
quant_val = 0;
}
}
quantized[i] = quant_val;
}
free(sideband_starts);
}
//=============================================================================
// Significance Map Encoding
//=============================================================================
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output) {
size_t map_bytes = (count * 2 + 7) / 8;
uint8_t *map = output;
memset(map, 0, map_bytes);
uint8_t *write_ptr = output + map_bytes;
int16_t *value_ptr = (int16_t*)write_ptr;
uint32_t other_count = 0;
for (size_t i = 0; i < count; i++) {
int16_t val = values[i];
uint8_t code;
if (val == 0) code = 0; // 00
else if (val == 1) code = 1; // 01
else if (val == -1) code = 2; // 10
else {
code = 3; // 11
value_ptr[other_count++] = val;
}
size_t bit_pos = i * 2;
size_t byte_idx = bit_pos / 8;
size_t bit_offset = bit_pos % 8;
map[byte_idx] |= (code << bit_offset);
if (bit_offset == 7 && byte_idx + 1 < map_bytes) {
map[byte_idx + 1] |= (code >> 1);
}
}
return map_bytes + other_count * sizeof(int16_t);
}
//=============================================================================
// Public API: Chunk Encoding
//=============================================================================
size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality,
int use_zstd, uint8_t *output) {
// Calculate DWT levels from chunk size
int dwt_levels = calculate_dwt_levels(num_samples);
if (dwt_levels < 0) {
fprintf(stderr, "Error: Invalid chunk size %zu\n", num_samples);
return 0;
}
// Allocate working buffers
int8_t *pcm8_stereo = malloc(num_samples * 2 * sizeof(int8_t));
int8_t *pcm8_left = malloc(num_samples * sizeof(int8_t));
int8_t *pcm8_right = malloc(num_samples * sizeof(int8_t));
int8_t *pcm8_mid = malloc(num_samples * sizeof(int8_t));
int8_t *pcm8_side = malloc(num_samples * sizeof(int8_t));
float *dwt_mid = malloc(num_samples * sizeof(float));
float *dwt_side = malloc(num_samples * sizeof(float));
int16_t *quant_mid = malloc(num_samples * sizeof(int16_t));
int16_t *quant_side = malloc(num_samples * sizeof(int16_t));
// Step 1: Convert PCM16 to signed PCM8 with dithering
int16_t dither_error[2] = {0, 0};
convert_pcm16_to_pcm8_dithered(pcm16_stereo, pcm8_stereo, num_samples, dither_error);
// Deinterleave stereo
for (size_t i = 0; i < num_samples; i++) {
pcm8_left[i] = pcm8_stereo[i * 2];
pcm8_right[i] = pcm8_stereo[i * 2 + 1];
}
// Step 2: M/S decorrelation
ms_decorrelate(pcm8_left, pcm8_right, pcm8_mid, pcm8_side, num_samples);
// Step 3: Convert to float and apply DWT
for (size_t i = 0; i < num_samples; i++) {
dwt_mid[i] = (float)pcm8_mid[i];
dwt_side[i] = (float)pcm8_side[i];
}
dwt_haar_forward_multilevel(dwt_mid, num_samples, dwt_levels);
dwt_haar_forward_multilevel(dwt_side, num_samples, dwt_levels);
// Step 4: Quantize with frequency-dependent weights and dead zone
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels);
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels);
// Step 5: Encode with 2-bit significance map
uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int16_t));
size_t mid_size = encode_sigmap_2bit(quant_mid, num_samples, temp_buffer);
size_t side_size = encode_sigmap_2bit(quant_side, num_samples, temp_buffer + mid_size);
size_t uncompressed_size = mid_size + side_size;
// Step 6: Optional Zstd compression
uint8_t *write_ptr = output;
*((uint16_t*)write_ptr) = (uint16_t)num_samples;
write_ptr += sizeof(uint16_t);
uint32_t *payload_size_ptr = (uint32_t*)write_ptr;
write_ptr += sizeof(uint32_t);
size_t payload_size;
if (use_zstd) {
size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
uint8_t *zstd_buffer = malloc(zstd_bound);
payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD_ZSTD_LEVEL);
if (ZSTD_isError(payload_size)) {
fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
free(zstd_buffer);
free(pcm8_stereo); free(pcm8_left); free(pcm8_right);
free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side);
free(quant_mid); free(quant_side); free(temp_buffer);
return 0;
}
memcpy(write_ptr, zstd_buffer, payload_size);
free(zstd_buffer);
} else {
payload_size = uncompressed_size;
memcpy(write_ptr, temp_buffer, payload_size);
}
*payload_size_ptr = (uint32_t)payload_size;
write_ptr += payload_size;
// Cleanup
free(pcm8_stereo); free(pcm8_left); free(pcm8_right);
free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side);
free(quant_mid); free(quant_side); free(temp_buffer);
return write_ptr - output;
}

View File

@@ -0,0 +1,40 @@
#ifndef TAD_ENCODER_H
#define TAD_ENCODER_H
#include <stdint.h>
#include <stddef.h>
// TAD (Terrarum Advanced Audio) Encoder
// DWT-based perceptual audio codec for TSVM
// Constants
#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2)
#define TAD_SAMPLE_RATE 32000
#define TAD_CHANNELS 2 // Stereo
#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other
#define TAD_QUALITY_MIN 0
#define TAD_QUALITY_MAX 5
#define TAD_QUALITY_DEFAULT 3
#define TAD_ZSTD_LEVEL 7
/**
* Encode audio chunk with TAD codec
*
* @param pcm16_stereo Input PCM16LE stereo samples (interleaved L,R)
* @param num_samples Number of samples per channel (supports non-power-of-2, min 1024)
* @param quality Quality level 0-5 (0=lowest, 5=highest)
* @param use_zstd 1=enable Zstd compression, 0=disable
* @param output Output buffer (must be large enough)
* @return Number of bytes written to output, or 0 on error
*
* Output format:
* uint8 sigmap_method (always 1 = 2-bit twobitmap)
* uint8 compressed_flag (1=Zstd, 0=raw)
* uint16 sample_count (samples per channel)
* uint32 payload_size (bytes in payload)
* * payload (encoded M/S data, optionally Zstd-compressed)
*/
size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality,
int use_zstd, uint8_t *output);
#endif // TAD_ENCODER_H

View File

@@ -11,14 +11,14 @@
#include <unistd.h>
#include <sys/wait.h>
#include <getopt.h>
#include "encoder_tad.h" // TAD audio encoder
#include <ctype.h>
#include <sys/time.h>
#include <time.h>
#include <limits.h>
#include <float.h>
#include <fftw3.h>
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251023 (3d-dwt)"
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251024 (3d-dwt,tad)"
// TSVM Advanced Video (TAV) format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
@@ -55,6 +55,7 @@
#define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio
#define TAV_PACKET_AUDIO_PCM8 0x21 // 8-bit PCM audio (zstd compressed)
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec)
#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet
#define TAV_PACKET_AUDIO_TRACK 0x40 // Separate audio track (full MP2 file)
#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet
@@ -63,6 +64,15 @@
#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet
#define TAV_PACKET_SYNC 0xFF // Sync packet
// TAD (Terrarum Advanced Audio) settings
#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2)
#define TAD_SAMPLE_RATE 32000
#define TAD_CHANNELS 2 // Stereo
#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other
#define TAD_QUALITY_MIN 0
#define TAD_QUALITY_MAX 5
#define TAD_ZSTD_LEVEL 7
// DWT settings
#define TILE_SIZE_X 640
#define TILE_SIZE_Y 540
@@ -1753,6 +1763,7 @@ typedef struct tav_encoder_s {
int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled)
int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
// Frame buffers - ping-pong implementation
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
@@ -2272,6 +2283,7 @@ static void show_usage(const char *program_name) {
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
// printf(" --separate-audio-track Write entire audio track as single packet instead of interleaved\n");
printf(" --pcm8-audio Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
printf(" --tad-audio Use TAD (DWT-based perceptual) audio codec (packet 0x24, quality follows -q)\n");
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n");
printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n");
@@ -2361,6 +2373,7 @@ static tav_encoder_t* create_encoder(void) {
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
enc->separate_audio_track = 0; // Default: interleave audio packets
enc->pcm8_audio = 0; // Default: use MP2 audio
enc->tad_audio = 0; // Default: use MP2 audio (TAD quality follows quality_level)
// GOP / temporal DWT settings
enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding
@@ -8050,11 +8063,15 @@ static int start_audio_conversion(tav_encoder_t *enc) {
char command[2048];
if (enc->pcm8_audio) {
// Extract PCM16LE for PCM8 mode
printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit)\n");
if (enc->pcm8_audio || enc->tad_audio) {
// Extract PCM16LE for PCM8/TAD mode
if (enc->pcm8_audio) {
printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit PCM)\n");
} else {
printf(" Audio format: PCM16LE 32kHz stereo (will be encoded with TAD codec)\n");
}
snprintf(command, sizeof(command),
"ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -y \"%s\" 2>/dev/null",
"ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" -y \"%s\" 2>/dev/null",
enc->input_file, TSVM_AUDIO_SAMPLE_RATE, TEMP_PCM_FILE);
int result = system(command);
@@ -8806,6 +8823,95 @@ static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) {
return 1;
}
// Write TAD audio packet (0x24) with specified sample count
// Uses linked TAD encoder (encoder_tad.c)
static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) {
if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) {
return 0;
}
size_t bytes_to_read = samples_to_read * 2 * sizeof(int16_t); // Stereo PCM16LE
// Don't read more than what's available
if (bytes_to_read > enc->audio_remaining) {
bytes_to_read = enc->audio_remaining;
samples_to_read = bytes_to_read / (2 * sizeof(int16_t));
}
if (samples_to_read < TAD_MIN_CHUNK_SIZE) {
// Pad to minimum size
samples_to_read = TAD_MIN_CHUNK_SIZE;
}
// Allocate PCM16 input buffer
int16_t *pcm16_buffer = malloc(samples_to_read * 2 * sizeof(int16_t));
// Read PCM16LE data
size_t bytes_read = fread(pcm16_buffer, 1, bytes_to_read, enc->pcm_file);
if (bytes_read == 0) {
free(pcm16_buffer);
return 0;
}
int samples_read = bytes_read / (2 * sizeof(int16_t));
// Zero-pad if needed
if (samples_read < samples_to_read) {
memset(&pcm16_buffer[samples_read * 2], 0,
(samples_to_read - samples_read) * 2 * sizeof(int16_t));
}
// Encode with TAD encoder (linked from encoder_tad.o)
int tad_quality = enc->quality_level; // Use video quality level for audio
if (tad_quality > TAD_QUALITY_MAX) tad_quality = TAD_QUALITY_MAX;
if (tad_quality < TAD_QUALITY_MIN) tad_quality = TAD_QUALITY_MIN;
// Allocate output buffer (generous size for TAD chunk)
size_t max_output_size = samples_to_read * 4 * sizeof(int16_t) + 1024;
uint8_t *tad_output = malloc(max_output_size);
size_t tad_encoded_size = tad_encode_chunk(pcm16_buffer, samples_to_read, tad_quality, 1, tad_output);
if (tad_encoded_size == 0) {
fprintf(stderr, "Error: TAD encoding failed\n");
free(pcm16_buffer);
free(tad_output);
return 0;
}
// Parse TAD chunk format: [sample_count][payload_size][payload]
uint8_t *read_ptr = tad_output;
uint16_t sample_count = *((uint16_t*)read_ptr);
read_ptr += sizeof(uint16_t);
uint32_t tad_payload_size = *((uint32_t*)read_ptr);
read_ptr += sizeof(uint32_t);
uint8_t *tad_payload = read_ptr;
// Write TAV packet 0x24: [0x24][payload_size+2][sample_count][compressed_size][compressed_data]
uint8_t packet_type = TAV_PACKET_AUDIO_TAD;
fwrite(&packet_type, 1, 1, output);
uint32_t tav_payload_size = (uint32_t)tad_payload_size;
uint32_t tav_payload_size_plus_two = (uint32_t)tad_payload_size + 2;
fwrite(&tav_payload_size_plus_two, sizeof(uint32_t), 1, output);
fwrite(&sample_count, sizeof(uint16_t), 1, output);
fwrite(&tav_payload_size, sizeof(uint32_t), 1, output);
fwrite(tad_payload, 1, tad_payload_size, output);
// Update audio remaining
enc->audio_remaining -= bytes_read;
if (enc->verbose) {
printf("TAD packet: %d samples, %u bytes compressed (Q%d)\n",
sample_count, tad_payload_size, tad_quality);
}
// Cleanup
free(pcm16_buffer);
free(tad_output);
return 1;
}
// Write PCM8 audio packet (0x21) with specified sample count
static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) {
if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) {
@@ -8904,6 +9010,15 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
return 1;
}
// Handle TAD mode
if (enc->tad_audio) {
if (!enc->has_audio || !enc->pcm_file) {
return 1;
}
// Write one TAD packet per frame
return write_tad_packet_samples(enc, output, enc->samples_per_frame);
}
// Handle PCM8 mode
if (enc->pcm8_audio) {
if (!enc->has_audio || !enc->pcm_file) {
@@ -9020,6 +9135,29 @@ static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num
return 1;
}
// Handle TAD mode: variable chunk size support
if (enc->tad_audio) {
if (!enc->has_audio || !enc->pcm_file || num_frames == 0) {
return 1;
}
// Calculate total samples for this GOP
int total_samples = num_frames * enc->samples_per_frame;
// TAD supports variable chunk sizes (non-power-of-2)
// We can write the entire GOP in one packet (up to 32768+ samples)
if (enc->verbose) {
printf("TAD GOP: %d frames, %d total samples\n", num_frames, total_samples);
}
// Write one TAD packet for the entire GOP
if (!write_tad_packet_samples(enc, output, total_samples)) {
// No more audio data
}
return 1;
}
// Handle PCM8 mode: emit mega packet(s) evenly divided if exceeding 32768 samples
if (enc->pcm8_audio) {
if (!enc->has_audio || !enc->pcm_file || num_frames == 0) {
@@ -9448,6 +9586,7 @@ int main(int argc, char *argv[]) {
{"pcm-audio", no_argument, 0, 1027},
{"native-audio", no_argument, 0, 1027},
{"native-audio-format", no_argument, 0, 1027},
{"tad-audio", no_argument, 0, 1028},
{"help", no_argument, 0, '?'},
{0, 0, 0, 0}
};
@@ -9668,6 +9807,10 @@ int main(int argc, char *argv[]) {
enc->pcm8_audio = 1;
printf("8-bit PCM audio mode enabled (packet 0x21)\n");
break;
case 1028: // --tad-audio
enc->tad_audio = 1;
printf("TAD audio mode enabled (packet 0x24, quality follows -q)\n");
break;
case 'a':
int bitrate = atoi(optarg);
int valid_bitrate = validate_mp2_bitrate(bitrate);