From 9dc71095a0bfc6ed09c19f9e35940d772fadc89d Mon Sep 17 00:00:00 2001 From: minjaesong Date: Fri, 24 Oct 2025 05:31:38 +0900 Subject: [PATCH] TAD: now processing entirely in float --- terranmon.txt | 38 ++----- video_encoder/Makefile | 12 ++- video_encoder/decoder_tad.c | 160 ++++++++++----------------- video_encoder/encoder_tad.c | 210 ++++++++---------------------------- video_encoder/encoder_tad.h | 38 +++---- 5 files changed, 139 insertions(+), 319 deletions(-) diff --git a/terranmon.txt b/terranmon.txt index dbad9b5..86b32d1 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -1550,7 +1550,7 @@ is stored separately and quality index is shared with that of the video. ## Audio Properties - **Sample Rate**: 32000 Hz (TSVM audio hardware native format) - **Channels**: 2 (stereo) -- **Input Format**: PCM16LE (16-bit signed little-endian PCM) +- **Input Format**: PCM32fLE (32-bit float little-endian PCM) - **Preprocessing**: 16 Hz highpass filter applied during extraction - **Internal Representation**: Signed PCM8 with error-diffusion dithering - **Chunk Size**: Variable (1024-32768+ samples per channel, must be power of 2) @@ -1565,8 +1565,6 @@ Default is 32768 samples (65536 total samples, 1.024 seconds). If the audio duration doesn't align to chunk boundaries, the final chunk can use a smaller power-of-2 size or be zero-padded. - uint8 Significance Map Method: always 1 (2-bit twobitmap) - uint8 Compression Flag: 1=Zstd compressed, 0=uncompressed uint16 Sample Count: number of samples per channel (must be power of 2, min 1024) uint32 Chunk Payload Size: size of following payload in bytes * Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set) @@ -1592,13 +1590,9 @@ as int16 in the order they appear. ## Encoding Pipeline -### Step 1: PCM16 to PCM8 Conversion with Error-Diffusion Dithering -Input stereo PCM16LE is converted to signed PCM8 using error-diffusion dithering -to minimize quantization noise: - - dithered_value = pcm16_value / 256 + error - pcm8_value = clamp(round(dithered_value), -128, 127) - error = dithered_value - pcm8_value +### Step 1: PCM32f to PCM8 Conversion with Error-Diffusion Dithering +Input stereo PCM32fLE is converted to signed PCM8 using second-order noise-shaped +error-diffusion dithering to minimize quantization noise. Error is propagated to the next sample (alternating between left/right channels). @@ -1632,18 +1626,7 @@ For 32768 samples with 14 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256 For 1024 samples with 9 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024 ### Step 4: Frequency-Dependent Quantization -DWT coefficients are quantized using perceptually-tuned frequency-dependent weights: - - Base Weights by Level: - Level 0 (16-8 KHz): 3.0 - Level 1 (8-4 KHz): 2.0 - Level 2 (4-2 KHz): 1.5 - Level 3 (2-1 KHz): 1.0 - Level 4 (1-0.5 KHz): 0.75 - Level 5 (0.5-0.25 KHz): 0.5 - Level 6-7 (DC-0.25 KHz): 0.25 - -Quality scaling factor: 1.0 + (5 - quality) * 0.3 +DWT coefficients are quantized using perceptually-tuned frequency-dependent weights. Final quantization step: base_weight * quality_scale @@ -1690,13 +1673,8 @@ Convert Mid/Side back to Left/Right stereo: Left = Mid + Side Right = Mid - Side -### Step 6: PCM8 to PCM16 Upsampling -Convert signed PCM8 back to PCM16LE by multiplying by 256: - - pcm16_value = pcm8_value * 256 - ## Compression Performance -- **Target Ratio**: 2:1 against PCMu8 (4:1 against PCM16LE input) +- **Target Ratio**: 2:1 against PCMu8 - **Achieved Ratio**: 2.51:1 against PCMu8 at quality level 3 - **Quality**: Perceptually transparent at Q3+, preserves full 0-16 KHz bandwidth - **Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical) @@ -1721,10 +1699,10 @@ This allows TAV video files to embed TAD-compressed audio using packet type 0x24 TAD encoder uses two-pass FFmpeg extraction for optimal quality: # Pass 1: Extract at original sample rate - ffmpeg -i input.mp4 -f s16le -ac 2 temp.pcm + ffmpeg -i input.mp4 -f f32le -ac 2 temp.pcm # Pass 2: High-quality resample with SoXR and highpass filter - ffmpeg -f s16le -ar {original_rate} -ac 2 -i temp.pcm \ + ffmpeg -f f32le -ar {original_rate} -ac 2 -i temp.pcm \ -ar 32000 -af "aresample=resampler=soxr:precision=28:cutoff=0.99,highpass=f=16" \ output.pcm diff --git a/video_encoder/Makefile b/video_encoder/Makefile index 0547557..fc79e41 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -78,7 +78,7 @@ debug: $(TARGETS) # Clean build artifacts clean: - rm -f $(TARGETS) $(TAD_TARGETS) *.o + rm -f $(TARGETS) $(TAD_TARGETS) $(TAD16_TARGETS) $(TAD10_TARGETS) *.o # Install (copy to PATH) install: $(TARGETS) $(TAD_TARGETS) @@ -106,6 +106,12 @@ help: @echo " tad - Build all TAD audio tools (encoder, decoder)" @echo " encoder_tad - Build TAD audio encoder" @echo " decoder_tad - Build TAD audio decoder" + @echo " tad16 - Build TAD16 tools (PCM16 alternative for comparison)" + @echo " encoder_tad16- Build TAD16 audio encoder (PCM16 version)" + @echo " decoder_tad16- Build TAD16 audio decoder (PCM16 version)" + @echo " tad10 - Build TAD10 tools (PCM10 alternative for comparison)" + @echo " encoder_tad10- Build TAD10 audio encoder (PCM10 version)" + @echo " decoder_tad10- Build TAD10 audio decoder (PCM10 version)" @echo " debug - Build with debug symbols" @echo " clean - Remove build artifacts" @echo " install - Install to /usr/local/bin" @@ -117,6 +123,8 @@ help: @echo " make tev # Build TEV encoder" @echo " make tav # Build TAV encoder" @echo " make tad # Build all TAD audio tools" + @echo " make tad16 # Build TAD16 tools (for comparison testing)" + @echo " make tad10 # Build TAD10 tools (for comparison testing)" @echo " sudo make install # Install all encoders" -.PHONY: all clean install check-deps help debug tad +.PHONY: all clean install check-deps help debug tad tad16 tad10 diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c index 7da260f..afb82e5 100644 --- a/video_encoder/decoder_tad.c +++ b/video_encoder/decoder_tad.c @@ -12,6 +12,7 @@ #define DECODER_VENDOR_STRING "Decoder-TAD 20251023" // TAD format constants (must match encoder) +#define TAD_COEFF_SCALAR 1024.0f #define TAD_DEFAULT_CHUNK_SIZE 32768 #define TAD_MIN_CHUNK_SIZE 1024 #define TAD_SAMPLE_RATE 32000 @@ -148,22 +149,58 @@ static void dwt_haar_inverse_multilevel(float *data, int length, int levels) { // M/S Stereo Correlation (inverse of decorrelation) //============================================================================= -static void ms_correlate(const int8_t *mid, const int8_t *side, uint8_t *left, uint8_t *right, size_t count) { +// Uniform random in [0, 1) +static inline float frand01(void) { + return (float)rand() / ((float)RAND_MAX + 1.0f); +} + +// TPDF noise in [-1, +1) +static inline float tpdf1(void) { + return (frand01() - frand01()); +} + +static void ms_correlate(const float *mid, const float *side, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) { + const float b1 = 1.5f; // 1st feedback coefficient + const float b2 = -0.75f; // 2nd feedback coefficient + const float scale = 127.5f; + const float bias = 128.0f; + for (size_t i = 0; i < count; i++) { - // L = M + S, R = M - S - int32_t m = mid[i]; - int32_t s = side[i]; - int32_t l = m + s; - int32_t r = m - s; + // Decode M/S → L/R + float m = mid[i]; + float s = side[i]; + float l = FCLAMP(m + s, -1.0f, 1.0f); + float r = FCLAMP(m - s, -1.0f, 1.0f); - // Clamp to [-128, 127] then convert to unsigned [0, 255] - if (l < -128) l = -128; - if (l > 127) l = 127; - if (r < -128) r = -128; - if (r > 127) r = 127; + // --- LEFT channel --- + float feedbackL = b1 * dither_error[0][0] + b2 * dither_error[0][1]; + float ditherL = 0.5f * tpdf1(); // ±0.5 LSB TPDF + float shapedL = l + feedbackL + ditherL / scale; + shapedL = FCLAMP(shapedL, -1.0f, 1.0f); - left[i] = (uint8_t)(l + 128); - right[i] = (uint8_t)(r + 128); + int qL = (int)lrintf(shapedL * scale); + if (qL < -128) qL = -128; + else if (qL > 127) qL = 127; + left[i] = (uint8_t)(qL + bias); + + float qerrL = shapedL - (float)qL / scale; + dither_error[0][1] = dither_error[0][0]; // shift history + dither_error[0][0] = qerrL; + + // --- RIGHT channel --- + float feedbackR = b1 * dither_error[1][0] + b2 * dither_error[1][1]; + float ditherR = 0.5f * tpdf1(); + float shapedR = r + feedbackR + ditherR / scale; + shapedR = FCLAMP(shapedR, -1.0f, 1.0f); + + int qR = (int)lrintf(shapedR * scale); + if (qR < -128) qR = -128; + else if (qR > 127) qR = 127; + right[i] = (uint8_t)(qR + bias); + + float qerrR = shapedR - (float)qR / scale; + dither_error[1][1] = dither_error[1][0]; + dither_error[1][0] = qerrR; } } @@ -188,11 +225,10 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights /*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}, /*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f}, /*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f}, - /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}, - /*16*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f} + /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f} }; - float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f); + float quality_scale = 4.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f); for (int i = 0; i < dwt_levels; i++) { weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f); @@ -227,7 +263,7 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1; float weight = weights[weight_idx]; - coeffs[i] = (float)quantized[i] * weight; + coeffs[i] = (float)quantized[i] * weight / TAD_COEFF_SCALAR; } free(sideband_starts); @@ -237,29 +273,6 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, // Significance Map Decoding //============================================================================= -static size_t decode_sigmap_1bit(const uint8_t *input, int16_t *values, size_t count) { - size_t map_bytes = (count + 7) / 8; - const uint8_t *map = input; - const uint8_t *read_ptr = input + map_bytes; - - uint32_t nonzero_count = *((const uint32_t*)read_ptr); - read_ptr += sizeof(uint32_t); - - const int16_t *value_ptr = (const int16_t*)read_ptr; - uint32_t value_idx = 0; - - // Reconstruct values - for (size_t i = 0; i < count; i++) { - if (map[i / 8] & (1 << (i % 8))) { - values[i] = value_ptr[value_idx++]; - } else { - values[i] = 0; - } - } - - return map_bytes + sizeof(uint32_t) + nonzero_count * sizeof(int16_t); -} - static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t count) { size_t map_bytes = (count * 2 + 7) / 8; const uint8_t *map = input; @@ -291,48 +304,6 @@ static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t c return map_bytes + other_idx * sizeof(int16_t); } -static size_t decode_sigmap_rle(const uint8_t *input, int16_t *values, size_t count) { - const uint8_t *read_ptr = input; - - uint32_t run_count = *((const uint32_t*)read_ptr); - read_ptr += sizeof(uint32_t); - - size_t value_idx = 0; - - for (uint32_t run = 0; run < run_count; run++) { - // Decode zero run length (varint) - uint32_t zero_run = 0; - int shift = 0; - uint8_t byte; - - do { - byte = *read_ptr++; - zero_run |= ((uint32_t)(byte & 0x7F) << shift); - shift += 7; - } while (byte & 0x80); - - // Fill zeros - for (uint32_t i = 0; i < zero_run && value_idx < count; i++) { - values[value_idx++] = 0; - } - - // Read non-zero value - int16_t val = *((const int16_t*)read_ptr); - read_ptr += sizeof(int16_t); - - if (value_idx < count && val != 0) { - values[value_idx++] = val; - } - } - - // Fill remaining with zeros - while (value_idx < count) { - values[value_idx++] = 0; - } - - return read_ptr - input; -} - //============================================================================= // Chunk Decoding //============================================================================= @@ -381,8 +352,6 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ int16_t *quant_side = malloc(sample_count * sizeof(int16_t)); float *dwt_mid = malloc(sample_count * sizeof(float)); float *dwt_side = malloc(sample_count * sizeof(float)); - int8_t *pcm8_mid = malloc(sample_count * sizeof(int8_t)); - int8_t *pcm8_side = malloc(sample_count * sizeof(int8_t)); uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t)); uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t)); @@ -401,23 +370,10 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels); dwt_haar_inverse_multilevel(dwt_side, sample_count, dwt_levels); - // Convert to signed PCM8 - for (size_t i = 0; i < sample_count; i++) { - float m = dwt_mid[i]; - float s = dwt_side[i]; - - // Clamp and round - if (m < -128.0f) m = -128.0f; - if (m > 127.0f) m = 127.0f; - if (s < -128.0f) s = -128.0f; - if (s > 127.0f) s = 127.0f; - - pcm8_mid[i] = (int8_t)roundf(m); - pcm8_side[i] = (int8_t)roundf(s); - } + float err[2][2] = {{0,0},{0,0}}; // M/S to L/R correlation - ms_correlate(pcm8_mid, pcm8_side, pcm8_left, pcm8_right, sample_count); + ms_correlate(dwt_mid, dwt_side, pcm8_left, pcm8_right, sample_count, err); // Interleave stereo output (PCMu8) for (size_t i = 0; i < sample_count; i++) { @@ -427,7 +383,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_ // Cleanup free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side); - free(pcm8_mid); free(pcm8_side); free(pcm8_left); free(pcm8_right); + free(pcm8_left); free(pcm8_right); if (decompressed) free(decompressed); return 0; @@ -442,7 +398,7 @@ static void print_usage(const char *prog_name) { printf("Options:\n"); printf(" -i Input TAD file\n"); printf(" -o Output PCMu8 file (raw 8-bit unsigned stereo @ 32kHz)\n"); - printf(" -q <0-5> Quality level used during encoding (default: 2)\n"); + printf(" -q <0-5> Quality level used during encoding (default: 3)\n"); printf(" -v Verbose output\n"); printf(" -h, --help Show this help\n"); printf("\nVersion: %s\n", DECODER_VENDOR_STRING); @@ -453,7 +409,7 @@ static void print_usage(const char *prog_name) { int main(int argc, char *argv[]) { char *input_file = NULL; char *output_file = NULL; - int quality = 2; // Must match encoder quality + int quality = 3; // Must match encoder quality int verbose = 0; int opt; diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c index 552de69..fb7ef73 100644 --- a/video_encoder/encoder_tad.c +++ b/video_encoder/encoder_tad.c @@ -1,6 +1,7 @@ -// Created by CuriousTorvald and Claude on 2025-10-23. -// TAD (Terrarum Advanced Audio) Encoder Library - DWT-based audio compression -// This file contains only the encoding functions for use by encoder_tad.c and encoder_tav.c +// Created by CuriousTorvald and Claude on 2025-10-24. +// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder Library +// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder +// This file contains only the encoding functions for comparison testing #include #include @@ -11,12 +12,9 @@ #include "encoder_tad.h" // Forward declarations for internal functions -static void dwt_haar_forward_1d(float *data, int length); static void dwt_dd4_forward_1d(float *data, int length); -static void dwt_97_forward_1d(float *data, int length); -static void dwt_haar_forward_multilevel(float *data, int length, int levels); -static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count); -static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error); +static void dwt_dd4_forward_multilevel(float *data, int length, int levels); +static void ms_decorrelate_16(const float *left, const float *right, float *mid, float *side, size_t count); static void get_quantization_weights(int quality, int dwt_levels, float *weights); static int get_deadzone_threshold(int quality); static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels); @@ -26,15 +24,13 @@ static inline float FCLAMP(float x, float min, float max) { return x < min ? min : (x > max ? max : x); } -// Calculate DWT levels from chunk size (non-power-of-2 supported, >= 1024) +// Calculate DWT levels from chunk size static int calculate_dwt_levels(int chunk_size) { - if (chunk_size < TAD_MIN_CHUNK_SIZE) { - fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE); + if (chunk_size < TAD32_MIN_CHUNK_SIZE) { + fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD32_MIN_CHUNK_SIZE); return -1; } - // For non-power-of-2, find next power of 2 and calculate levels - // Then subtract 2 for maximum decomposition int levels = 0; int size = chunk_size; while (size > 1) { @@ -48,39 +44,13 @@ static int calculate_dwt_levels(int chunk_size) { levels++; } - return levels - 2; // Maximum decomposition leaves 2-sample approximation + return levels - 2; // Maximum decomposition } //============================================================================= -// Haar DWT Implementation +// DD-4 DWT Implementation //============================================================================= -static void dwt_haar_forward_1d(float *data, int length) { - if (length < 2) return; - - float *temp = malloc(length * sizeof(float)); - int half = (length + 1) / 2; - - // Haar transform: compute averages (low-pass) and differences (high-pass) - for (int i = 0; i < half; i++) { - if (2 * i + 1 < length) { - // Average of adjacent pairs (low-pass) - temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f; - // Difference of adjacent pairs (high-pass) - temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f; - } else { - // Handle odd length: last sample goes to low-pass - temp[i] = data[2 * i]; - if (half + i < length) { - temp[half + i] = 0.0f; - } - } - } - - memcpy(data, temp, length * sizeof(float)); - free(temp); -} - // Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform static void dwt_dd4_forward_1d(float *data, int length) { if (length < 2) return; @@ -129,76 +99,8 @@ static void dwt_dd4_forward_1d(float *data, int length) { free(temp); } -// 1D DWT using lifting scheme for 9/7 irreversible filter -static void dwt_97_forward_1d(float *data, int length) { - if (length < 2) return; - - float *temp = malloc(length * sizeof(float)); - int half = (length + 1) / 2; - - // Split into even/odd samples - for (int i = 0; i < half; i++) { - temp[i] = data[2 * i]; // Even (low) - } - for (int i = 0; i < length / 2; i++) { - temp[half + i] = data[2 * i + 1]; // Odd (high) - } - - // JPEG2000 9/7 forward lifting steps - const float alpha = -1.586134342f; - const float beta = -0.052980118f; - const float gamma = 0.882911076f; - const float delta = 0.443506852f; - const float K = 1.230174105f; - - // Step 1: Predict α - for (int i = 0; i < length / 2; i++) { - if (half + i < length) { - float s_curr = temp[i]; - float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; - temp[half + i] += alpha * (s_curr + s_next); - } - } - - // Step 2: Update β - for (int i = 0; i < half; i++) { - float d_curr = (half + i < length) ? temp[half + i] : 0.0f; - float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; - temp[i] += beta * (d_prev + d_curr); - } - - // Step 3: Predict γ - for (int i = 0; i < length / 2; i++) { - if (half + i < length) { - float s_curr = temp[i]; - float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; - temp[half + i] += gamma * (s_curr + s_next); - } - } - - // Step 4: Update δ - for (int i = 0; i < half; i++) { - float d_curr = (half + i < length) ? temp[half + i] : 0.0f; - float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; - temp[i] += delta * (d_prev + d_curr); - } - - // Step 5: Scaling - for (int i = 0; i < half; i++) { - temp[i] *= K; - } - for (int i = 0; i < length / 2; i++) { - if (half + i < length) { - temp[half + i] /= K; - } - } - - memcpy(data, temp, length * sizeof(float)); - free(temp); -} - // Apply multi-level DWT (using DD-4 wavelet) -static void dwt_haar_forward_multilevel(float *data, int length, int levels) { +static void dwt_dd4_forward_multilevel(float *data, int length, int levels) { int current_length = length; for (int level = 0; level < levels; level++) { dwt_dd4_forward_1d(data, current_length); @@ -207,35 +109,16 @@ static void dwt_haar_forward_multilevel(float *data, int length, int levels) { } //============================================================================= -// M/S Stereo Decorrelation +// M/S Stereo Decorrelation (PCM32f version) //============================================================================= -static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count) { +static void ms_decorrelate_16(const float *left, const float *right, float *mid, float *side, size_t count) { for (size_t i = 0; i < count; i++) { // Mid = (L + R) / 2, Side = (L - R) / 2 - int32_t l = left[i]; - int32_t r = right[i]; - mid[i] = (int8_t)((l + r) / 2); - side[i] = (int8_t)((l - r) / 2); - } -} - -//============================================================================= -// PCM16 to Signed PCM8 Conversion with Dithering -//============================================================================= - -static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error) { - for (int i = 0; i < num_samples; i++) { - for (int ch = 0; ch < 2; ch++) { // Stereo: L and R - int idx = i * 2 + ch; - int32_t sample = (int32_t)pcm16[idx]; - sample += dither_error[ch]; - int32_t quantized = sample >> 8; - if (quantized < -128) quantized = -128; - if (quantized > 127) quantized = 127; - pcm8[idx] = (int8_t)quantized; - dither_error[ch] = sample - (quantized << 8); - } + float l = left[i]; + float r = right[i]; + mid[i] = (l + r) / 2.0f; + side[i] = (l - r) / 2.0f; } } @@ -263,15 +146,15 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f} }; - float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f); + float quality_scale = 4.0f * (1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f)); for (int i = 0; i < dwt_levels; i++) { - weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f); + weights[i] = base_weights[dwt_levels][i] * quality_scale; } } static int get_deadzone_threshold(int quality) { - const int thresholds[] = {1,1,0,0,0,0}; // Q0 to Q5 + const int thresholds[] = {1,1,1,1,1,1}; // Q0 to Q5 return thresholds[quality]; } @@ -302,7 +185,7 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1; float weight = weights[weight_idx]; - float val = coeffs[i] / weight; + float val = coeffs[i] / weight * TAD32_COEFF_SCALAR; int16_t quant_val = (int16_t)roundf(val); if (apply_deadzone && sideband >= dwt_levels - 1) { @@ -359,8 +242,8 @@ static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *o // Public API: Chunk Encoding //============================================================================= -size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality, - int use_zstd, uint8_t *output) { +size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int quality, + int use_zstd, uint8_t *output) { // Calculate DWT levels from chunk size int dwt_levels = calculate_dwt_levels(num_samples); if (dwt_levels < 0) { @@ -368,12 +251,11 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua return 0; } - // Allocate working buffers - int8_t *pcm8_stereo = malloc(num_samples * 2 * sizeof(int8_t)); - int8_t *pcm8_left = malloc(num_samples * sizeof(int8_t)); - int8_t *pcm8_right = malloc(num_samples * sizeof(int8_t)); - int8_t *pcm8_mid = malloc(num_samples * sizeof(int8_t)); - int8_t *pcm8_side = malloc(num_samples * sizeof(int8_t)); + // Allocate working buffers (PCM32f throughout, int32 coefficients) + float *pcm32_left = malloc(num_samples * sizeof(float)); + float *pcm32_right = malloc(num_samples * sizeof(float)); + float *pcm32_mid = malloc(num_samples * sizeof(float)); + float *pcm32_side = malloc(num_samples * sizeof(float)); float *dwt_mid = malloc(num_samples * sizeof(float)); float *dwt_side = malloc(num_samples * sizeof(float)); @@ -381,34 +263,30 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua int16_t *quant_mid = malloc(num_samples * sizeof(int16_t)); int16_t *quant_side = malloc(num_samples * sizeof(int16_t)); - // Step 1: Convert PCM16 to signed PCM8 with dithering - int16_t dither_error[2] = {0, 0}; - convert_pcm16_to_pcm8_dithered(pcm16_stereo, pcm8_stereo, num_samples, dither_error); - - // Deinterleave stereo + // Step 1: Deinterleave stereo for (size_t i = 0; i < num_samples; i++) { - pcm8_left[i] = pcm8_stereo[i * 2]; - pcm8_right[i] = pcm8_stereo[i * 2 + 1]; + pcm32_left[i] = pcm32_stereo[i * 2]; + pcm32_right[i] = pcm32_stereo[i * 2 + 1]; } // Step 2: M/S decorrelation - ms_decorrelate(pcm8_left, pcm8_right, pcm8_mid, pcm8_side, num_samples); + ms_decorrelate_16(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples); // Step 3: Convert to float and apply DWT for (size_t i = 0; i < num_samples; i++) { - dwt_mid[i] = (float)pcm8_mid[i]; - dwt_side[i] = (float)pcm8_side[i]; + dwt_mid[i] = pcm32_mid[i]; + dwt_side[i] = pcm32_side[i]; } - dwt_haar_forward_multilevel(dwt_mid, num_samples, dwt_levels); - dwt_haar_forward_multilevel(dwt_side, num_samples, dwt_levels); + dwt_dd4_forward_multilevel(dwt_mid, num_samples, dwt_levels); + dwt_dd4_forward_multilevel(dwt_side, num_samples, dwt_levels); // Step 4: Quantize with frequency-dependent weights and dead zone quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels); quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels); - // Step 5: Encode with 2-bit significance map - uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int16_t)); + // Step 5: Encode with 2-bit significance map (32-bit version) + uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int32_t)); size_t mid_size = encode_sigmap_2bit(quant_mid, num_samples, temp_buffer); size_t side_size = encode_sigmap_2bit(quant_side, num_samples, temp_buffer + mid_size); @@ -429,13 +307,13 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua size_t zstd_bound = ZSTD_compressBound(uncompressed_size); uint8_t *zstd_buffer = malloc(zstd_bound); - payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD_ZSTD_LEVEL); + payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL); if (ZSTD_isError(payload_size)) { fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size)); free(zstd_buffer); - free(pcm8_stereo); free(pcm8_left); free(pcm8_right); - free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side); + free(pcm32_left); free(pcm32_right); + free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side); free(quant_mid); free(quant_side); free(temp_buffer); return 0; } @@ -451,8 +329,8 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua write_ptr += payload_size; // Cleanup - free(pcm8_stereo); free(pcm8_left); free(pcm8_right); - free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side); + free(pcm32_left); free(pcm32_right); + free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side); free(quant_mid); free(quant_side); free(temp_buffer); return write_ptr - output; diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h index f9b5443..29542c9 100644 --- a/video_encoder/encoder_tad.h +++ b/video_encoder/encoder_tad.h @@ -1,40 +1,40 @@ -#ifndef TAD_ENCODER_H -#define TAD_ENCODER_H +#ifndef TAD32_ENCODER_H +#define TAD32_ENCODER_H #include #include -// TAD (Terrarum Advanced Audio) Encoder +// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder // DWT-based perceptual audio codec for TSVM +// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder // Constants -#define TAD_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples (supports non-power-of-2) -#define TAD_SAMPLE_RATE 32000 -#define TAD_CHANNELS 2 // Stereo -#define TAD_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other -#define TAD_QUALITY_MIN 0 -#define TAD_QUALITY_MAX 5 -#define TAD_QUALITY_DEFAULT 3 -#define TAD_ZSTD_LEVEL 7 +#define TAD32_COEFF_SCALAR 1024.0f +#define TAD32_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples +#define TAD32_SAMPLE_RATE 32000 +#define TAD32_CHANNELS 2 // Stereo +#define TAD32_SIGMAP_2BIT 1 // 2-bit: 00=0, 01=+1, 10=-1, 11=other +#define TAD32_QUALITY_MIN 0 +#define TAD32_QUALITY_MAX 5 +#define TAD32_QUALITY_DEFAULT 3 +#define TAD32_ZSTD_LEVEL 7 /** - * Encode audio chunk with TAD codec + * Encode audio chunk with TAD32 codec (PCM32f version) * - * @param pcm16_stereo Input PCM16LE stereo samples (interleaved L,R) - * @param num_samples Number of samples per channel (supports non-power-of-2, min 1024) + * @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R) + * @param num_samples Number of samples per channel (min 1024) * @param quality Quality level 0-5 (0=lowest, 5=highest) * @param use_zstd 1=enable Zstd compression, 0=disable * @param output Output buffer (must be large enough) * @return Number of bytes written to output, or 0 on error * * Output format: - * uint8 sigmap_method (always 1 = 2-bit twobitmap) - * uint8 compressed_flag (1=Zstd, 0=raw) * uint16 sample_count (samples per channel) * uint32 payload_size (bytes in payload) * * payload (encoded M/S data, optionally Zstd-compressed) */ -size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality, - int use_zstd, uint8_t *output); +size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int quality, + int use_zstd, uint8_t *output); -#endif // TAD_ENCODER_H +#endif // TAD32_ENCODER_H