TAD: Terrarum Advanced Audio to use with video compression

2026-03-12 06:01:50 +09:00 · 2025-10-23 18:56:57 +09:00
parent 6f669f4fd9
commit a9319fd812
10 changed files with 1887 additions and 22 deletions
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -11,14 +11,14 @@
 #include <unistd.h>
 #include <sys/wait.h>
 #include <getopt.h>
+#include "encoder_tad.h"  // TAD audio encoder
 #include <ctype.h>
 #include <sys/time.h>
 #include <time.h>
 #include <limits.h>
 #include <float.h>
-#include <fftw3.h>

-#define ENCODER_VENDOR_STRING "Encoder-TAV 20251023 (3d-dwt)"
+#define ENCODER_VENDOR_STRING "Encoder-TAV 20251024 (3d-dwt,tad)"

 // TSVM Advanced Video (TAV) format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVM TAV"
@@ -55,6 +55,7 @@
 #define TAV_PACKET_BFRAME_ADAPTIVE 0x17  // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
 #define TAV_PACKET_AUDIO_MP2       0x20  // MP2 audio
 #define TAV_PACKET_AUDIO_PCM8      0x21  // 8-bit PCM audio (zstd compressed)
+#define TAV_PACKET_AUDIO_TAD       0x24  // TAD audio (DWT-based perceptual codec)
 #define TAV_PACKET_SUBTITLE        0x30  // Subtitle packet
 #define TAV_PACKET_AUDIO_TRACK     0x40  // Separate audio track (full MP2 file)
 #define TAV_PACKET_EXTENDED_HDR    0xEF  // Extended header packet
@@ -63,6 +64,15 @@
 #define TAV_PACKET_SYNC_NTSC       0xFE  // NTSC Sync packet
 #define TAV_PACKET_SYNC            0xFF  // Sync packet

+// TAD (Terrarum Advanced Audio) settings
+#define TAD_MIN_CHUNK_SIZE 1024       // Minimum: 1024 samples (supports non-power-of-2)
+#define TAD_SAMPLE_RATE 32000
+#define TAD_CHANNELS 2  // Stereo
+#define TAD_SIGMAP_2BIT 1  // 2-bit: 00=0, 01=+1, 10=-1, 11=other
+#define TAD_QUALITY_MIN 0
+#define TAD_QUALITY_MAX 5
+#define TAD_ZSTD_LEVEL 7
+
 // DWT settings
 #define TILE_SIZE_X 640
 #define TILE_SIZE_Y 540
@@ -1753,6 +1763,7 @@ typedef struct tav_encoder_s {
    int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled)
    int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
    int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
+    int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)

    // Frame buffers - ping-pong implementation
    uint8_t *frame_rgb[2];      // [0] and [1] alternate between current and previous
@@ -2272,6 +2283,7 @@ static void show_usage(const char *program_name) {
    printf("                          Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
 //    printf("  --separate-audio-track  Write entire audio track as single packet instead of interleaved\n");
    printf("  --pcm8-audio            Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
+    printf("  --tad-audio             Use TAD (DWT-based perceptual) audio codec (packet 0x24, quality follows -q)\n");
    printf("  -S, --subtitles FILE    SubRip (.srt) or SAMI (.smi) subtitle file\n");
    printf("  --fontrom-lo FILE       Low font ROM file for internationalised subtitles\n");
    printf("  --fontrom-hi FILE       High font ROM file for internationalised subtitles\n");
@@ -2361,6 +2373,7 @@ static tav_encoder_t* create_encoder(void) {
    enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
    enc->separate_audio_track = 0;  // Default: interleave audio packets
    enc->pcm8_audio = 0;  // Default: use MP2 audio
+    enc->tad_audio = 0;  // Default: use MP2 audio (TAD quality follows quality_level)

    // GOP / temporal DWT settings
    enc->enable_temporal_dwt = 1;  // Mutually exclusive with use_delta_encoding
@@ -8050,11 +8063,15 @@ static int start_audio_conversion(tav_encoder_t *enc) {

    char command[2048];

-    if (enc->pcm8_audio) {
-        // Extract PCM16LE for PCM8 mode
-        printf("  Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit)\n");
+    if (enc->pcm8_audio || enc->tad_audio) {
+        // Extract PCM16LE for PCM8/TAD mode
+        if (enc->pcm8_audio) {
+            printf("  Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit PCM)\n");
+        } else {
+            printf("  Audio format: PCM16LE 32kHz stereo (will be encoded with TAD codec)\n");
+        }
        snprintf(command, sizeof(command),
-            "ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -y \"%s\" 2>/dev/null",
+            "ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" -y \"%s\" 2>/dev/null",
            enc->input_file, TSVM_AUDIO_SAMPLE_RATE, TEMP_PCM_FILE);

        int result = system(command);
@@ -8806,6 +8823,95 @@ static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) {
    return 1;
 }

+// Write TAD audio packet (0x24) with specified sample count
+// Uses linked TAD encoder (encoder_tad.c)
+static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) {
+    if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) {
+        return 0;
+    }
+    size_t bytes_to_read = samples_to_read * 2 * sizeof(int16_t);  // Stereo PCM16LE
+
+    // Don't read more than what's available
+    if (bytes_to_read > enc->audio_remaining) {
+        bytes_to_read = enc->audio_remaining;
+        samples_to_read = bytes_to_read / (2 * sizeof(int16_t));
+    }
+
+    if (samples_to_read < TAD_MIN_CHUNK_SIZE) {
+        // Pad to minimum size
+        samples_to_read = TAD_MIN_CHUNK_SIZE;
+    }
+
+    // Allocate PCM16 input buffer
+    int16_t *pcm16_buffer = malloc(samples_to_read * 2 * sizeof(int16_t));
+
+    // Read PCM16LE data
+    size_t bytes_read = fread(pcm16_buffer, 1, bytes_to_read, enc->pcm_file);
+    if (bytes_read == 0) {
+        free(pcm16_buffer);
+        return 0;
+    }
+
+    int samples_read = bytes_read / (2 * sizeof(int16_t));
+
+    // Zero-pad if needed
+    if (samples_read < samples_to_read) {
+        memset(&pcm16_buffer[samples_read * 2], 0,
+               (samples_to_read - samples_read) * 2 * sizeof(int16_t));
+    }
+
+    // Encode with TAD encoder (linked from encoder_tad.o)
+    int tad_quality = enc->quality_level;  // Use video quality level for audio
+    if (tad_quality > TAD_QUALITY_MAX) tad_quality = TAD_QUALITY_MAX;
+    if (tad_quality < TAD_QUALITY_MIN) tad_quality = TAD_QUALITY_MIN;
+
+    // Allocate output buffer (generous size for TAD chunk)
+    size_t max_output_size = samples_to_read * 4 * sizeof(int16_t) + 1024;
+    uint8_t *tad_output = malloc(max_output_size);
+
+    size_t tad_encoded_size = tad_encode_chunk(pcm16_buffer, samples_to_read, tad_quality, 1, tad_output);
+
+    if (tad_encoded_size == 0) {
+        fprintf(stderr, "Error: TAD encoding failed\n");
+        free(pcm16_buffer);
+        free(tad_output);
+        return 0;
+    }
+
+    // Parse TAD chunk format: [sample_count][payload_size][payload]
+    uint8_t *read_ptr = tad_output;
+    uint16_t sample_count = *((uint16_t*)read_ptr);
+    read_ptr += sizeof(uint16_t);
+    uint32_t tad_payload_size = *((uint32_t*)read_ptr);
+    read_ptr += sizeof(uint32_t);
+    uint8_t *tad_payload = read_ptr;
+
+    // Write TAV packet 0x24: [0x24][payload_size+2][sample_count][compressed_size][compressed_data]
+    uint8_t packet_type = TAV_PACKET_AUDIO_TAD;
+    fwrite(&packet_type, 1, 1, output);
+
+    uint32_t tav_payload_size = (uint32_t)tad_payload_size;
+    uint32_t tav_payload_size_plus_two = (uint32_t)tad_payload_size + 2;
+    fwrite(&tav_payload_size_plus_two, sizeof(uint32_t), 1, output);
+    fwrite(&sample_count, sizeof(uint16_t), 1, output);
+    fwrite(&tav_payload_size, sizeof(uint32_t), 1, output);
+    fwrite(tad_payload, 1, tad_payload_size, output);
+
+    // Update audio remaining
+    enc->audio_remaining -= bytes_read;
+
+    if (enc->verbose) {
+        printf("TAD packet: %d samples, %u bytes compressed (Q%d)\n",
+               sample_count, tad_payload_size, tad_quality);
+    }
+
+    // Cleanup
+    free(pcm16_buffer);
+    free(tad_output);
+
+    return 1;
+}
+
 // Write PCM8 audio packet (0x21) with specified sample count
 static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) {
    if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) {
@@ -8904,6 +9010,15 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
        return 1;
    }

+    // Handle TAD mode
+    if (enc->tad_audio) {
+        if (!enc->has_audio || !enc->pcm_file) {
+            return 1;
+        }
+        // Write one TAD packet per frame
+        return write_tad_packet_samples(enc, output, enc->samples_per_frame);
+    }
+
    // Handle PCM8 mode
    if (enc->pcm8_audio) {
        if (!enc->has_audio || !enc->pcm_file) {
@@ -9020,6 +9135,29 @@ static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num
        return 1;
    }

+    // Handle TAD mode: variable chunk size support
+    if (enc->tad_audio) {
+        if (!enc->has_audio || !enc->pcm_file || num_frames == 0) {
+            return 1;
+        }
+
+        // Calculate total samples for this GOP
+        int total_samples = num_frames * enc->samples_per_frame;
+
+        // TAD supports variable chunk sizes (non-power-of-2)
+        // We can write the entire GOP in one packet (up to 32768+ samples)
+        if (enc->verbose) {
+            printf("TAD GOP: %d frames, %d total samples\n", num_frames, total_samples);
+        }
+
+        // Write one TAD packet for the entire GOP
+        if (!write_tad_packet_samples(enc, output, total_samples)) {
+            // No more audio data
+        }
+
+        return 1;
+    }
+
    // Handle PCM8 mode: emit mega packet(s) evenly divided if exceeding 32768 samples
    if (enc->pcm8_audio) {
        if (!enc->has_audio || !enc->pcm_file || num_frames == 0) {
@@ -9448,6 +9586,7 @@ int main(int argc, char *argv[]) {
        {"pcm-audio", no_argument, 0, 1027},
        {"native-audio", no_argument, 0, 1027},
        {"native-audio-format", no_argument, 0, 1027},
+        {"tad-audio", no_argument, 0, 1028},
        {"help", no_argument, 0, '?'},
        {0, 0, 0, 0}
    };
@@ -9668,6 +9807,10 @@ int main(int argc, char *argv[]) {
                enc->pcm8_audio = 1;
                printf("8-bit PCM audio mode enabled (packet 0x21)\n");
                break;
+            case 1028: // --tad-audio
+                enc->tad_audio = 1;
+                printf("TAD audio mode enabled (packet 0x24, quality follows -q)\n");
+                break;
            case 'a':
                int bitrate = atoi(optarg);
                int valid_bitrate = validate_mp2_bitrate(bitrate);