diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 9569913..645db75 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -1342,8 +1342,7 @@ try { } else if (packetType === TAV_PACKET_AUDIO_TAD) { - // Legacy MP2 Audio packet (for backwards compatibility) - let payloadLen = seqread.readInt() + let payloadLen = seqread.readInt() // compressed size + 6 if (!tadInitialised) { tadInitialised = true diff --git a/terranmon.txt b/terranmon.txt index 86b32d1..cb90d82 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -1068,7 +1068,7 @@ transmission capability, and region-of-interest coding. ## TAD Packet Structure uint8 Packet type (0x24) - uint32 Compressed Size + 2 + uint32 Compressed Size + 6 uint16 Sample Count uint32 Compressed Size * Zstd-compressed TAD @@ -1781,6 +1781,8 @@ Memory Space 114688..131071 RW: Instrument bin (256 instruments, 64 bytes each) 131072..196607 RW: Play data 1 196608..262143 RW: Play data 2 +262144..327679 RW: TAD Input Buffer +327680..393215 RW: TAD Decode Output Sample bin: just raw sample data thrown in there. You need to keep track of starting point for each sample @@ -1823,7 +1825,7 @@ Sound Adapter MMIO ... auto-fill to Play head #4 -40 WO: Media Decoder Control +40 WO: MP2 Decoder Control Write 16 to initialise the MP2 context (call this before the decoding of NEW music) Write 1 to decode the frame as MP2 @@ -1832,6 +1834,11 @@ Sound Adapter MMIO 41 RO: Media Decoder Status Non-zero value indicates the decoder is busy +42 WO: TAD Decoder Control + Write 1 to decode TAD data +43 RW: TAD Quality + Must be set to appropriate value before decoding + 64..2367 RW: MP2 Decoded Samples (unsigned 8-bit stereo) 2368..4095 RW: MP2 Frame to be decoded 4096..4097 RO: MP2 Frame guard bytes; always return 0 on read diff --git a/video_encoder/Makefile b/video_encoder/Makefile index fc79e41..c176d8b 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -12,7 +12,7 @@ OPENCV_CFLAGS = $(shell pkg-config --cflags opencv4) OPENCV_LIBS = $(shell pkg-config --libs opencv4) # Source files and targets -TARGETS = tev tav tav_decoder +TARGETS = tev tav tav_decoder tav_inspector TAD_TARGETS = encoder_tad decoder_tad TEST_TARGETS = test_mesh_warp test_mesh_roundtrip @@ -35,6 +35,10 @@ tav_decoder: decoder_tav.c rm -f decoder_tav $(CC) $(CFLAGS) -o decoder_tav $< $(LIBS) +tav_inspector: tav_inspector.c + rm -f tav_inspector + $(CC) $(CFLAGS) -o tav_inspector $< $(LIBS) + # Build TAD (Terrarum Advanced Audio) tools encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index fb7ef73..ae4fe99 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -146,7 +146,7 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f} }; - float quality_scale = 4.0f * (1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f)); + float quality_scale = 4.0f * (1.0f + FCLAMP((4 - quality) * 0.5f, 0.0f, 1000.0f)); for (int i = 0; i < dwt_levels; i++) { weights[i] = base_weights[dwt_levels][i] * quality_scale; @@ -154,7 +154,7 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights } static int get_deadzone_threshold(int quality) { - const int thresholds[] = {1,1,1,1,1,1}; // Q0 to Q5 + const int thresholds[] = {0,0,0,0,0,0}; // Q0 to Q5 return thresholds[quality]; } diff --git a/video_encoder/encoder_tad_standalone.c b/video_encoder/encoder_tad_standalone.c new file mode 100644 index 0000000..170ab0d --- /dev/null +++ b/video_encoder/encoder_tad_standalone.c @@ -0,0 +1,294 @@ +// Created by CuriousTorvald and Claude on 2025-10-24. +// TAD32 (Terrarum Advanced Audio - PCM16 version) Encoder - Standalone program +// Alternative version: PCM16 throughout encoding, PCM8 conversion only at decoder +// Uses encoder_tad32.c library for encoding functions + +#include +#include +#include +#include +#include +#include +#include "encoder_tad.h" + +#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251024" + +// TAD32 format constants +#define TAD32_DEFAULT_CHUNK_SIZE 32768 // Default: power of 2 for optimal performance (2^15) + +// Temporary file for FFmpeg PCM extraction +char TEMP_PCM_FILE[42]; + +static void generate_random_filename(char *filename) { + srand(time(NULL)); + + const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const int charset_size = sizeof(charset) - 1; + + // Start with the prefix + strcpy(filename, "/tmp/"); + + // Generate 32 random characters + for (int i = 0; i < 32; i++) { + filename[5 + i] = charset[rand() % charset_size]; + } + + // Add the extension + strcpy(filename + 37, ".tad"); + filename[41] = '\0'; // Null terminate +} + +//============================================================================= +// Main Encoder +//============================================================================= + +static void print_usage(const char *prog_name) { + printf("Usage: %s -i -o [options]\n", prog_name); + printf("Options:\n"); + printf(" -i Input audio file (any format supported by FFmpeg)\n"); + printf(" -o Output TAD32 file\n"); + printf(" -q <0-5> Quality level (default: %d, higher = better quality)\n", TAD32_QUALITY_DEFAULT); + printf(" --no-zstd Disable Zstd compression\n"); + printf(" -v Verbose output\n"); + printf(" -h, --help Show this help\n"); + printf("\nVersion: %s\n", ENCODER_VENDOR_STRING); + printf("Note: This is the PCM16 alternative version for comparison testing.\n"); + printf(" PCM16 is processed throughout encoding; PCM8 conversion happens at decoder.\n"); +} + +int main(int argc, char *argv[]) { + generate_random_filename(TEMP_PCM_FILE); + + char *input_file = NULL; + char *output_file = NULL; + int quality = TAD32_QUALITY_DEFAULT; + int use_zstd = 1; + int verbose = 0; + + // Parse command line arguments + static struct option long_options[] = { + {"no-zstd", no_argument, 0, 'z'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; + + int opt; + int option_index = 0; + while ((opt = getopt_long(argc, argv, "i:o:q:vh", long_options, &option_index)) != -1) { + switch (opt) { + case 'i': + input_file = optarg; + break; + case 'o': + output_file = optarg; + break; + case 'q': + quality = atoi(optarg); + if (quality < TAD32_QUALITY_MIN || quality > TAD32_QUALITY_MAX) { + fprintf(stderr, "Error: Quality must be between %d and %d\n", + TAD32_QUALITY_MIN, TAD32_QUALITY_MAX); + return 1; + } + break; + case 'z': + use_zstd = 0; + break; + case 'v': + verbose = 1; + break; + case 'h': + print_usage(argv[0]); + return 0; + default: + print_usage(argv[0]); + return 1; + } + } + + if (!input_file || !output_file) { + fprintf(stderr, "Error: Input and output files are required\n"); + print_usage(argv[0]); + return 1; + } + + if (verbose) { + printf("%s\n", ENCODER_VENDOR_STRING); + printf("Input: %s\n", input_file); + printf("Output: %s\n", output_file); + printf("Quality: %d\n", quality); + printf("Significance map: 2-bit\n"); + printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled"); + } + + // Detect original sample rate for high-quality resampling + char sample_rate_str[32] = "48000"; // Default fallback + char detect_cmd[2048]; + snprintf(detect_cmd, sizeof(detect_cmd), + "ffprobe -v error -select_streams a:0 -show_entries stream=sample_rate " + "-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null", + input_file); + + FILE *probe = popen(detect_cmd, "r"); + if (probe) { + if (fgets(sample_rate_str, sizeof(sample_rate_str), probe)) { + // Remove newline + sample_rate_str[strcspn(sample_rate_str, "\n")] = 0; + } + pclose(probe); + } + + int original_rate = atoi(sample_rate_str); + if (original_rate <= 0 || original_rate > 192000) { + original_rate = 48000; // Fallback + } + + if (verbose) { + printf("Detected original sample rate: %d Hz\n", original_rate); + printf("Extracting and resampling audio to %d Hz...\n", TAD32_SAMPLE_RATE); + } + + // Extract and resample in two passes for better quality + // Pass 1: Extract at original sample rate + char temp_original_pcm[256]; + snprintf(temp_original_pcm, sizeof(temp_original_pcm), "%s.orig", TEMP_PCM_FILE); + + char ffmpeg_cmd[2048]; + snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd), + "ffmpeg -hide_banner -v error -i \"%s\" -f f32le -acodec pcm_f32le -ac %d -y \"%s\" 2>&1", + input_file, TAD32_CHANNELS, temp_original_pcm); + + int result = system(ffmpeg_cmd); + if (result != 0) { + fprintf(stderr, "Error: FFmpeg extraction failed\n"); + return 1; + } + + // Pass 2: Resample to 32kHz with high-quality SoXR resampler and highpass filter + snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd), + "ffmpeg -hide_banner -v error -f f32le -ar %d -ac %d -i \"%s\" " + "-f f32le -acodec pcm_f32le -ar %d -ac %d " + "-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" " + "-y \"%s\" 2>&1", + original_rate, TAD32_CHANNELS, temp_original_pcm, TAD32_SAMPLE_RATE, TAD32_CHANNELS, TEMP_PCM_FILE); + + result = system(ffmpeg_cmd); + remove(temp_original_pcm); // Clean up intermediate file + + if (result != 0) { + fprintf(stderr, "Error: FFmpeg resampling failed\n"); + return 1; + } + + // Open PCM file + FILE *pcm_file = fopen(TEMP_PCM_FILE, "rb"); + if (!pcm_file) { + fprintf(stderr, "Error: Could not open temporary PCM file\n"); + return 1; + } + + // Get file size + fseek(pcm_file, 0, SEEK_END); + size_t pcm_size = ftell(pcm_file); + fseek(pcm_file, 0, SEEK_SET); + + size_t total_samples = pcm_size / (TAD32_CHANNELS * sizeof(float)); + + // Pad to even sample count + if (total_samples % 2 == 1) { + total_samples++; + if (verbose) { + printf("Odd sample count detected, padding with one zero sample\n"); + } + } + + size_t num_chunks = (total_samples + TAD32_DEFAULT_CHUNK_SIZE - 1) / TAD32_DEFAULT_CHUNK_SIZE; + + if (verbose) { + printf("Total samples: %zu (%.2f seconds)\n", total_samples, + (double)total_samples / TAD32_SAMPLE_RATE); + printf("Chunks: %zu (chunk size: %d samples)\n", num_chunks, TAD32_DEFAULT_CHUNK_SIZE); + } + + // Open output file + FILE *output = fopen(output_file, "wb"); + if (!output) { + fprintf(stderr, "Error: Could not open output file\n"); + fclose(pcm_file); + return 1; + } + + // Process chunks using linked TAD32 encoder library + size_t total_output_size = 0; + float *chunk_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * TAD32_CHANNELS * sizeof(float)); + uint8_t *output_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * 4 * sizeof(float)); // Generous buffer + + for (size_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) { + size_t chunk_samples = TAD32_DEFAULT_CHUNK_SIZE; + size_t remaining = total_samples - (chunk_idx * TAD32_DEFAULT_CHUNK_SIZE); + + if (remaining < TAD32_DEFAULT_CHUNK_SIZE) { + chunk_samples = remaining; + } + + // Read chunk + size_t samples_read = fread(chunk_buffer, TAD32_CHANNELS * sizeof(float), + chunk_samples, pcm_file); + (void)samples_read; // Unused, but kept for compatibility + + // Pad with zeros if necessary + if (chunk_samples < TAD32_DEFAULT_CHUNK_SIZE) { + memset(&chunk_buffer[chunk_samples * TAD32_CHANNELS], 0, + (TAD32_DEFAULT_CHUNK_SIZE - chunk_samples) * TAD32_CHANNELS * sizeof(float)); + } + + // Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c + size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE, quality, + use_zstd, output_buffer); + + if (encoded_size == 0) { + fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx); + free(chunk_buffer); + free(output_buffer); + fclose(pcm_file); + fclose(output); + return 1; + } + + // Write chunk to output + fwrite(output_buffer, 1, encoded_size, output); + total_output_size += encoded_size; + + if (verbose && (chunk_idx % 10 == 0 || chunk_idx == num_chunks - 1)) { + printf("Processed chunk %zu/%zu (%.1f%%)\r", chunk_idx + 1, num_chunks, + (chunk_idx + 1) * 100.0 / num_chunks); + fflush(stdout); + } + } + + if (verbose) { + printf("\n"); + } + + // Cleanup + free(chunk_buffer); + free(output_buffer); + fclose(pcm_file); + fclose(output); + remove(TEMP_PCM_FILE); + + // Print statistics + size_t pcmu8_size = total_samples * TAD32_CHANNELS; // PCMu8 baseline + float compression_ratio = (float)pcmu8_size / total_output_size; + + printf("Encoding complete!\n"); + printf("PCMu8 size: %zu bytes\n", pcmu8_size); + printf("TAD32 size: %zu bytes\n", total_output_size); + printf("Compression ratio: %.2f:1 (%.1f%% of PCMu8)\n", + compression_ratio, (total_output_size * 100.0) / pcmu8_size); + + if (compression_ratio < 1.8) { + printf("Warning: Compression ratio below 2:1 target. Try higher quality or different settings.\n"); + } + + return 0; +} diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 11ff9ec..e7f39c3 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -65,13 +65,14 @@ #define TAV_PACKET_SYNC 0xFF // Sync packet // TAD (Terrarum Advanced Audio) settings -#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2) -#define TAD_SAMPLE_RATE 32000 -#define TAD_CHANNELS 2 // Stereo -#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other -#define TAD_QUALITY_MIN 0 -#define TAD_QUALITY_MAX 5 -#define TAD_ZSTD_LEVEL 7 +// TAD32 constants (updated to match Float32 version) +#define TAD32_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples +#define TAD32_SAMPLE_RATE 32000 +#define TAD32_CHANNELS 2 // Stereo +#define TAD32_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other +#define TAD32_QUALITY_MIN 0 +#define TAD32_QUALITY_MAX 5 +#define TAD32_ZSTD_LEVEL 7 // DWT settings #define TILE_SIZE_X 640 @@ -1711,7 +1712,7 @@ typedef struct tav_encoder_s { FILE *output_fp; FILE *mp2_file; FILE *ffmpeg_video_pipe; - FILE *pcm_file; // PCM16LE audio file for PCM8 mode + FILE *pcm_file; // Float32LE audio file for PCM8/TAD32 mode // Video parameters int width, height; @@ -1869,9 +1870,9 @@ typedef struct tav_encoder_s { // PCM8 audio processing int samples_per_frame; // Number of stereo samples per video frame - int16_t *pcm16_buffer; // Buffer for reading PCM16LE data + float *pcm32_buffer; // Buffer for reading Float32LE data uint8_t *pcm8_buffer; // Buffer for converted PCM8 data - int16_t dither_error[2]; // Dithering error for stereo channels [L, R] + float dither_error[2][2]; // 2nd-order noise shaping error: [channel][history] // Subtitle processing subtitle_entry_t *subtitles; @@ -8064,14 +8065,14 @@ static int start_audio_conversion(tav_encoder_t *enc) { char command[2048]; if (enc->pcm8_audio || enc->tad_audio) { - // Extract PCM16LE for PCM8/TAD mode + // Extract Float32LE for PCM8/TAD32 mode if (enc->pcm8_audio) { - printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit PCM)\n"); + printf(" Audio format: Float32LE 32kHz stereo (will be converted to 8-bit PCM)\n"); } else { - printf(" Audio format: PCM16LE 32kHz stereo (will be encoded with TAD codec)\n"); + printf(" Audio format: Float32LE 32kHz stereo (will be encoded with TAD32 codec)\n"); } snprintf(command, sizeof(command), - "ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" -y \"%s\" 2>/dev/null", + "ffmpeg -v quiet -i \"%s\" -f f32le -acodec pcm_f32le -ar %d -ac 2 -af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" -y \"%s\" 2>/dev/null", enc->input_file, TSVM_AUDIO_SAMPLE_RATE, TEMP_PCM_FILE); int result = system(command); @@ -8085,9 +8086,11 @@ static int start_audio_conversion(tav_encoder_t *enc) { // Calculate samples per frame: ceil(sample_rate / fps) enc->samples_per_frame = (TSVM_AUDIO_SAMPLE_RATE + enc->output_fps - 1) / enc->output_fps; - // Initialize dithering error - enc->dither_error[0] = 0; - enc->dither_error[1] = 0; + // Initialize 2nd-order noise shaping error history + enc->dither_error[0][0] = 0.0f; + enc->dither_error[0][1] = 0.0f; + enc->dither_error[1][0] = 0.0f; + enc->dither_error[1][1] = 0.0f; if (enc->verbose) { printf(" PCM8: %d samples per frame\n", enc->samples_per_frame); @@ -8741,32 +8744,62 @@ static long write_extended_header(tav_encoder_t *enc) { return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type } -// Convert PCM16LE to unsigned 8-bit PCM with error-diffusion dithering -static void convert_pcm16_to_pcm8_dithered(tav_encoder_t *enc, const int16_t *pcm16, uint8_t *pcm8, int num_samples) { +// Uniform random in [0, 1) for TPDF dithering +static inline float frand01(void) { + return (float)rand() / ((float)RAND_MAX + 1.0f); +} + +// TPDF (Triangular Probability Density Function) noise in [-1, +1) +static inline float tpdf1(void) { + return (frand01() - frand01()); +} + +// Convert Float32LE to unsigned 8-bit PCM with 2nd-order noise-shaped dithering +// Matches decoder_tad.c dithering algorithm for optimal quality +static void convert_pcm32_to_pcm8_dithered(tav_encoder_t *enc, const float *pcm32, uint8_t *pcm8, int num_samples) { + const float b1 = 1.5f; // 1st feedback coefficient + const float b2 = -0.75f; // 2nd feedback coefficient + const float scale = 127.5f; + const float bias = 128.0f; + for (int i = 0; i < num_samples; i++) { for (int ch = 0; ch < 2; ch++) { // Stereo: L and R int idx = i * 2 + ch; - // Convert signed 16-bit [-32768, 32767] to unsigned 8-bit [0, 255] - // First scale to [0, 65535], then add dithering error - int32_t sample = (int32_t)pcm16[idx] + 32768; // Now in [0, 65535] + // Input float in range [-1.0, 1.0] + float sample = pcm32[idx]; - // Add accumulated dithering error - sample += enc->dither_error[ch]; + // Clamp to valid range + if (sample < -1.0f) sample = -1.0f; + if (sample > 1.0f) sample = 1.0f; - // Quantize to 8-bit (divide by 256) - int32_t quantized = sample >> 8; + // Apply 2nd-order noise shaping feedback + float feedback = b1 * enc->dither_error[ch][0] + b2 * enc->dither_error[ch][1]; - // Clamp to [0, 255] - if (quantized < 0) quantized = 0; - if (quantized > 255) quantized = 255; + // Add TPDF dither (±0.5 LSB) + float dither = 0.5f * tpdf1(); - // Store 8-bit value - pcm8[idx] = (uint8_t)quantized; + // Shaped signal + float shaped = sample + feedback + dither / scale; - // Calculate quantization error for next sample (error diffusion) - // Error = original - (quantized * 256) - enc->dither_error[ch] = sample - (quantized << 8); + // Clamp shaped signal + if (shaped < -1.0f) shaped = -1.0f; + if (shaped > 1.0f) shaped = 1.0f; + + // Quantize to signed 8-bit range [-128, 127] + int q = (int)lrintf(shaped * scale); + if (q < -128) q = -128; + else if (q > 127) q = 127; + + // Convert to unsigned 8-bit [0, 255] + pcm8[idx] = (uint8_t)(q + (int)bias); + + // Calculate quantization error for feedback + float qerr = shaped - (float)q / scale; + + // Update error history (shift and store) + enc->dither_error[ch][1] = enc->dither_error[ch][0]; + enc->dither_error[ch][0] = qerr; } } } @@ -8824,56 +8857,57 @@ static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) { } // Write TAD audio packet (0x24) with specified sample count -// Uses linked TAD encoder (encoder_tad.c) +// Uses linked TAD32 encoder (encoder_tad.c) - Float32 version static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) { if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) { return 0; } - size_t bytes_to_read = samples_to_read * 2 * sizeof(int16_t); // Stereo PCM16LE + size_t bytes_to_read = samples_to_read * 2 * sizeof(float); // Stereo Float32LE // Don't read more than what's available if (bytes_to_read > enc->audio_remaining) { bytes_to_read = enc->audio_remaining; - samples_to_read = bytes_to_read / (2 * sizeof(int16_t)); + samples_to_read = bytes_to_read / (2 * sizeof(float)); } - if (samples_to_read < TAD_MIN_CHUNK_SIZE) { + if (samples_to_read < TAD32_MIN_CHUNK_SIZE) { // Pad to minimum size - samples_to_read = TAD_MIN_CHUNK_SIZE; + samples_to_read = TAD32_MIN_CHUNK_SIZE; } - // Allocate PCM16 input buffer - int16_t *pcm16_buffer = malloc(samples_to_read * 2 * sizeof(int16_t)); + // Allocate Float32 input buffer + float *pcm32_buffer = malloc(samples_to_read * 2 * sizeof(float)); - // Read PCM16LE data - size_t bytes_read = fread(pcm16_buffer, 1, bytes_to_read, enc->pcm_file); + // Read Float32LE data + size_t bytes_read = fread(pcm32_buffer, 1, bytes_to_read, enc->pcm_file); if (bytes_read == 0) { - free(pcm16_buffer); + free(pcm32_buffer); return 0; } - int samples_read = bytes_read / (2 * sizeof(int16_t)); + int samples_read = bytes_read / (2 * sizeof(float)); // Zero-pad if needed if (samples_read < samples_to_read) { - memset(&pcm16_buffer[samples_read * 2], 0, - (samples_to_read - samples_read) * 2 * sizeof(int16_t)); + memset(&pcm32_buffer[samples_read * 2], 0, + (samples_to_read - samples_read) * 2 * sizeof(float)); } - // Encode with TAD encoder (linked from encoder_tad.o) + // Encode with TAD32 encoder (linked from encoder_tad.o) + // Input is already Float32LE in range [-1.0, 1.0] from FFmpeg int tad_quality = enc->quality_level; // Use video quality level for audio - if (tad_quality > TAD_QUALITY_MAX) tad_quality = TAD_QUALITY_MAX; - if (tad_quality < TAD_QUALITY_MIN) tad_quality = TAD_QUALITY_MIN; + if (tad_quality > TAD32_QUALITY_MAX) tad_quality = TAD32_QUALITY_MAX; + if (tad_quality < TAD32_QUALITY_MIN) tad_quality = TAD32_QUALITY_MIN; // Allocate output buffer (generous size for TAD chunk) size_t max_output_size = samples_to_read * 4 * sizeof(int16_t) + 1024; uint8_t *tad_output = malloc(max_output_size); - size_t tad_encoded_size = tad_encode_chunk(pcm16_buffer, samples_to_read, tad_quality, 1, tad_output); + size_t tad_encoded_size = tad32_encode_chunk(pcm32_buffer, samples_to_read, tad_quality, 1, tad_output); if (tad_encoded_size == 0) { - fprintf(stderr, "Error: TAD encoding failed\n"); - free(pcm16_buffer); + fprintf(stderr, "Error: TAD32 encoding failed\n"); + free(pcm32_buffer); free(tad_output); return 0; } @@ -8891,8 +8925,8 @@ static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int sample fwrite(&packet_type, 1, 1, output); uint32_t tav_payload_size = (uint32_t)tad_payload_size; - uint32_t tav_payload_size_plus_two = (uint32_t)tad_payload_size + 2; - fwrite(&tav_payload_size_plus_two, sizeof(uint32_t), 1, output); + uint32_t tav_payload_size_plus_6 = (uint32_t)tad_payload_size + 6; + fwrite(&tav_payload_size_plus_6, sizeof(uint32_t), 1, output); fwrite(&sample_count, sizeof(uint16_t), 1, output); fwrite(&tav_payload_size, sizeof(uint32_t), 1, output); fwrite(tad_payload, 1, tad_payload_size, output); @@ -8901,12 +8935,12 @@ static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int sample enc->audio_remaining -= bytes_read; if (enc->verbose) { - printf("TAD packet: %d samples, %u bytes compressed (Q%d)\n", + printf("TAD32 packet: %d samples, %u bytes compressed (Q%d)\n", sample_count, tad_payload_size, tad_quality); } // Cleanup - free(pcm16_buffer); + free(pcm32_buffer); free(tad_output); return 1; @@ -8917,12 +8951,12 @@ static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int sampl if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) { return 0; } - size_t bytes_to_read = samples_to_read * 2 * sizeof(int16_t); // Stereo PCM16LE + size_t bytes_to_read = samples_to_read * 2 * sizeof(float); // Stereo Float32LE // Don't read more than what's available if (bytes_to_read > enc->audio_remaining) { bytes_to_read = enc->audio_remaining; - samples_to_read = bytes_to_read / (2 * sizeof(int16_t)); + samples_to_read = bytes_to_read / (2 * sizeof(float)); } if (samples_to_read == 0) { @@ -8931,23 +8965,23 @@ static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int sampl // Allocate buffers if needed (size for max samples: 32768) int max_samples = 32768; // Maximum samples per packet - if (!enc->pcm16_buffer) { - enc->pcm16_buffer = malloc(max_samples * 2 * sizeof(int16_t)); + if (!enc->pcm32_buffer) { + enc->pcm32_buffer = malloc(max_samples * 2 * sizeof(float)); } if (!enc->pcm8_buffer) { enc->pcm8_buffer = malloc(max_samples * 2); } - // Read PCM16LE data - size_t bytes_read = fread(enc->pcm16_buffer, 1, bytes_to_read, enc->pcm_file); + // Read Float32LE data + size_t bytes_read = fread(enc->pcm32_buffer, 1, bytes_to_read, enc->pcm_file); if (bytes_read == 0) { return 0; } - int samples_read = bytes_read / (2 * sizeof(int16_t)); + int samples_read = bytes_read / (2 * sizeof(float)); // Convert to PCM8 with dithering - convert_pcm16_to_pcm8_dithered(enc, enc->pcm16_buffer, enc->pcm8_buffer, samples_read); + convert_pcm32_to_pcm8_dithered(enc, enc->pcm32_buffer, enc->pcm8_buffer, samples_read); // Compress with zstd size_t pcm8_size = samples_read * 2; // Stereo @@ -8985,10 +9019,10 @@ static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int sampl // Debug: Show first few samples if (samples_read > 0) { - printf(" First samples (PCM16→PCM8): "); + printf(" First samples (Float32→PCM8): "); for (int i = 0; i < 4 && i < samples_read; i++) { - printf("[%d,%d]→[%d,%d] ", - enc->pcm16_buffer[i*2], enc->pcm16_buffer[i*2+1], + printf("[%.3f,%.3f]→[%d,%d] ", + enc->pcm32_buffer[i*2], enc->pcm32_buffer[i*2+1], enc->pcm8_buffer[i*2], enc->pcm8_buffer[i*2+1]); } printf("\n"); @@ -10662,7 +10696,7 @@ static void cleanup_encoder(tav_encoder_t *enc) { } // Free PCM8 buffers - free(enc->pcm16_buffer); + free(enc->pcm32_buffer); free(enc->pcm8_buffer); free(enc->input_file); diff --git a/video_encoder/tav_inspector.c b/video_encoder/tav_inspector.c index c2b1c0f..f8be00e 100644 --- a/video_encoder/tav_inspector.c +++ b/video_encoder/tav_inspector.c @@ -25,6 +25,7 @@ #define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction) #define TAV_PACKET_AUDIO_MP2 0x20 #define TAV_PACKET_AUDIO_PCM8 0x21 +#define TAV_PACKET_AUDIO_TAD 0x24 #define TAV_PACKET_SUBTITLE 0x30 #define TAV_PACKET_SUBTITLE_KAR 0x31 #define TAV_PACKET_AUDIO_TRACK 0x40 @@ -70,6 +71,10 @@ typedef struct { int gop_sync_count; int total_gop_frames; int audio_count; + int audio_mp2_count; + int audio_pcm8_count; + int audio_tad_count; + int audio_track_count; int subtitle_count; int timecode_count; int sync_count; @@ -81,6 +86,10 @@ typedef struct { int unknown_count; uint64_t total_video_bytes; uint64_t total_audio_bytes; + uint64_t audio_mp2_bytes; + uint64_t audio_pcm8_bytes; + uint64_t audio_tad_bytes; + uint64_t audio_track_bytes; } packet_stats_t; // Display options @@ -109,6 +118,7 @@ const char* get_packet_type_name(uint8_t type) { case TAV_PACKET_BFRAME_ADAPTIVE: return "B-FRAME (quadtree)"; case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2"; case TAV_PACKET_AUDIO_PCM8: return "AUDIO PCM8 (zstd)"; + case TAV_PACKET_AUDIO_TAD: return "AUDIO TAD (zstd)"; case TAV_PACKET_SUBTITLE: return "SUBTITLE (Simple)"; case TAV_PACKET_SUBTITLE_KAR: return "SUBTITLE (Karaoke)"; case TAV_PACKET_AUDIO_TRACK: return "AUDIO TRACK (Separate MP2)"; @@ -139,7 +149,8 @@ int should_display_packet(uint8_t type, display_options_t *opts) { if (opts->show_video && (type == TAV_PACKET_IFRAME || type == TAV_PACKET_PFRAME || type == TAV_PACKET_GOP_UNIFIED || type == TAV_PACKET_GOP_SYNC || (type >= 0x70 && type <= 0x7F))) return 1; - if (opts->show_audio && type == TAV_PACKET_AUDIO_MP2) return 1; + if (opts->show_audio && (type == TAV_PACKET_AUDIO_MP2 || type == TAV_PACKET_AUDIO_PCM8 || + type == TAV_PACKET_AUDIO_TAD || type == TAV_PACKET_AUDIO_TRACK)) return 1; if (opts->show_subtitles && (type == TAV_PACKET_SUBTITLE || type == TAV_PACKET_SUBTITLE_KAR)) return 1; if (opts->show_timecode && type == TAV_PACKET_TIMECODE) return 1; if (opts->show_metadata && (type >= 0xE0 && type <= 0xE4)) return 1; @@ -439,15 +450,89 @@ int main(int argc, char *argv[]) { return 1; } - // Skip header (32 bytes) - fseek(fp, 32, SEEK_SET); - + // Parse and display header if (!opts.summary_only) { printf("TAV Packet Inspector\n"); printf("File: %s\n", filename); printf("==================================================\n\n"); } + // Read TAV header (32 bytes) + uint8_t header[32]; + if (fread(header, 1, 32, fp) != 32) { + fprintf(stderr, "Error: Failed to read TAV header\n"); + fclose(fp); + return 1; + } + + // Verify magic number + const char *magic = "\x1F\x54\x53\x56\x4D\x54\x41\x56"; // "\x1FTSVM TAV" + if (memcmp(header, magic, 8) != 0) { + fprintf(stderr, "Error: Invalid TAV magic number\n"); + fclose(fp); + return 1; + } + + if (!opts.summary_only) { + // Parse header fields + uint8_t version = header[8]; + uint16_t width = *((uint16_t*)&header[9]); + uint16_t height = *((uint16_t*)&header[11]); + uint8_t fps = header[13]; + uint32_t total_frames = *((uint32_t*)&header[14]); + uint8_t wavelet = header[18]; + uint8_t decomp_levels = header[19]; + uint8_t quant_y = header[20]; + uint8_t quant_co = header[21]; + uint8_t quant_cg = header[22]; + uint8_t extra_flags = header[23]; + uint8_t video_flags = header[24]; + uint8_t quality = header[25]; + uint8_t channel_layout = header[26]; + uint8_t entropy_coder = header[27]; + +static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096}; +static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"}; + + int is_monoblock = (3 <= version && version <= 6); + int is_perceptual = (5 <= version && version <= 8); + +static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, uniform", "YCoCg monoblock, uniform", "ICtCp monoblock, uniform", "YCoCg monoblock, perceptual", "ICtCp monoblock, perceptual", "YCoCg tiled, perceptual", "ICtCp tiled, perceptual"}; + + printf("TAV Header:\n"); + printf(" Version: %d (%s)\n", version, VERDESC[version]); + printf(" Resolution: %dx%d\n", width, height); + printf(" Frame rate: %d fps", fps); + if (video_flags & 0x02) printf(" (NTSC)"); + printf("\n"); + printf(" Total frames: %u\n", total_frames); + printf(" Wavelet: %d", wavelet); + const char *wavelet_names[] = {"LGT 5/3", "CDF 9/7", "CDF 13/7", "Reserved", "Reserved", + "Reserved", "Reserved", "Reserved", "Reserved", + "Reserved", "Reserved", "Reserved", "Reserved", + "Reserved", "Reserved", "Reserved", "DD-4"}; + if (wavelet < 17) printf(" (%s)", wavelet_names[wavelet == 16 ? 16 : (wavelet > 16 ? wavelet : wavelet)]); + if (wavelet == 255) printf(" (Haar)"); + printf("\n"); + printf(" Decomp levels: %d\n", decomp_levels); + printf(" Quantizers: Y=%d, Co=%d, Cg=%d (Index=%d,%d,%d)\n", QLUT[quant_y], QLUT[quant_co], QLUT[quant_cg], quant_y, quant_co, quant_cg); + if (quality > 0) + printf(" Quality: %d\n", quality - 1); + else + printf(" Quality: n/a\n"); + printf(" Channel layout: %s\n", CLAYOUT[channel_layout]); + printf(" Entropy coder: %s\n", entropy_coder == 0 ? "Twobit-map" : "EZBC"); + printf(" Flags:\n"); + printf(" Has audio: %s\n", (extra_flags & 0x01) ? "Yes" : "No"); + printf(" Has subtitles: %s\n", (extra_flags & 0x02) ? "Yes" : "No"); + printf(" Progressive: %s\n", (video_flags & 0x01) ? "No (interlaced)" : "Yes"); + printf(" Lossless: %s\n", (video_flags & 0x04) ? "Yes" : "No"); + if (extra_flags & 0x04) printf(" Progressive TX: Enabled\n"); + if (extra_flags & 0x08) printf(" ROI encoding: Enabled\n"); + printf("\nPackets:\n"); + printf("==================================================\n"); + } + packet_stats_t stats = {0}; int packet_num = 0; @@ -622,9 +707,11 @@ int main(int argc, char *argv[]) { case TAV_PACKET_AUDIO_MP2: { stats.audio_count++; + stats.audio_mp2_count++; uint32_t size; if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break; stats.total_audio_bytes += size; + stats.audio_mp2_bytes += size; if (!opts.summary_only && display) { printf(" - size=%u bytes", size); @@ -635,9 +722,11 @@ int main(int argc, char *argv[]) { case TAV_PACKET_AUDIO_PCM8: { stats.audio_count++; + stats.audio_pcm8_count++; uint32_t size; if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break; stats.total_audio_bytes += size; + stats.audio_pcm8_bytes += size; if (!opts.summary_only && display) { printf(" - size=%u bytes (zstd compressed)", size); @@ -646,11 +735,41 @@ int main(int argc, char *argv[]) { break; } + case TAV_PACKET_AUDIO_TAD: { + stats.audio_count++; + stats.audio_tad_count++; + // Read payload_size + 2 + uint32_t payload_size_plus_6; + if (fread(&payload_size_plus_6, sizeof(uint32_t), 1, fp) != 1) break; + + // Read sample count + uint16_t sample_count; + if (fread(&sample_count, sizeof(uint16_t), 1, fp) != 1) break; + + // Read compressed size + uint32_t compressed_size; + if (fread(&compressed_size, sizeof(uint32_t), 1, fp) != 1) break; + + stats.total_audio_bytes += compressed_size; + stats.audio_tad_bytes += compressed_size; + + if (!opts.summary_only && display) { + printf(" - samples=%u, size=%u bytes (zstd compressed TAD32)", + sample_count, compressed_size); + } + + // Skip compressed data + fseek(fp, compressed_size, SEEK_CUR); + break; + } + case TAV_PACKET_AUDIO_TRACK: { stats.audio_count++; + stats.audio_track_count++; uint32_t size; if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break; stats.total_audio_bytes += size; + stats.audio_track_bytes += size; if (!opts.summary_only && display) { printf(" - size=%u bytes (separate track)", size); @@ -756,7 +875,31 @@ int main(int argc, char *argv[]) { (unsigned long long)stats.total_video_bytes, stats.total_video_bytes / 1024.0 / 1024.0); printf("\nAudio:\n"); - printf(" MP2 packets: %d\n", stats.audio_count); + printf(" Total packets: %d\n", stats.audio_count); + if (stats.audio_mp2_count > 0) { + printf(" MP2: %d packets, %llu bytes (%.2f MB)\n", + stats.audio_mp2_count, + (unsigned long long)stats.audio_mp2_bytes, + stats.audio_mp2_bytes / 1024.0 / 1024.0); + } + if (stats.audio_pcm8_count > 0) { + printf(" PCM8 (zstd): %d packets, %llu bytes (%.2f MB)\n", + stats.audio_pcm8_count, + (unsigned long long)stats.audio_pcm8_bytes, + stats.audio_pcm8_bytes / 1024.0 / 1024.0); + } + if (stats.audio_tad_count > 0) { + printf(" TAD32 (zstd): %d packets, %llu bytes (%.2f MB)\n", + stats.audio_tad_count, + (unsigned long long)stats.audio_tad_bytes, + stats.audio_tad_bytes / 1024.0 / 1024.0); + } + if (stats.audio_track_count > 0) { + printf(" Separate track: %d packets, %llu bytes (%.2f MB)\n", + stats.audio_track_count, + (unsigned long long)stats.audio_track_bytes, + stats.audio_track_bytes / 1024.0 / 1024.0); + } printf(" Total audio bytes: %llu (%.2f MB)\n", (unsigned long long)stats.total_audio_bytes, stats.total_audio_bytes / 1024.0 / 1024.0);