From 9dc71095a0bfc6ed09c19f9e35940d772fadc89d Mon Sep 17 00:00:00 2001
From: minjaesong <alswo9628@gmail.com>
Date: Fri, 24 Oct 2025 05:31:38 +0900
Subject: [PATCH] TAD: now processing entirely in float

---
 terranmon.txt               |  38 ++-----
 video_encoder/Makefile      |  12 ++-
 video_encoder/decoder_tad.c | 160 ++++++++++-----------------
 video_encoder/encoder_tad.c | 210 ++++++++----------------------------
 video_encoder/encoder_tad.h |  38 +++----
 5 files changed, 139 insertions(+), 319 deletions(-)

diff --git a/terranmon.txt b/terranmon.txt
index dbad9b5..86b32d1 100644
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -1550,7 +1550,7 @@ is stored separately and quality index is shared with that of the video.
 ## Audio Properties
 - **Sample Rate**: 32000 Hz (TSVM audio hardware native format)
 - **Channels**: 2 (stereo)
-- **Input Format**: PCM16LE (16-bit signed little-endian PCM)
+- **Input Format**: PCM32fLE (32-bit float little-endian PCM)
 - **Preprocessing**: 16 Hz highpass filter applied during extraction
 - **Internal Representation**: Signed PCM8 with error-diffusion dithering
 - **Chunk Size**: Variable (1024-32768+ samples per channel, must be power of 2)
@@ -1565,8 +1565,6 @@ Default is 32768 samples (65536 total samples, 1.024 seconds).
 If the audio duration doesn't align to chunk boundaries, the final chunk can use
 a smaller power-of-2 size or be zero-padded.
 
-    uint8  Significance Map Method: always 1 (2-bit twobitmap)
-    uint8  Compression Flag: 1=Zstd compressed, 0=uncompressed
     uint16 Sample Count: number of samples per channel (must be power of 2, min 1024)
     uint32 Chunk Payload Size: size of following payload in bytes
     *      Chunk Payload: encoded M/S stereo data (Zstd compressed if flag set)
@@ -1592,13 +1590,9 @@ as int16 in the order they appear.
 
 ## Encoding Pipeline
 
-### Step 1: PCM16 to PCM8 Conversion with Error-Diffusion Dithering
-Input stereo PCM16LE is converted to signed PCM8 using error-diffusion dithering
-to minimize quantization noise:
-
-    dithered_value = pcm16_value / 256 + error
-    pcm8_value = clamp(round(dithered_value), -128, 127)
-    error = dithered_value - pcm8_value
+### Step 1: PCM32f to PCM8 Conversion with Error-Diffusion Dithering
+Input stereo PCM32fLE is converted to signed PCM8 using second-order noise-shaped
+error-diffusion dithering to minimize quantization noise.
 
 Error is propagated to the next sample (alternating between left/right channels).
 
@@ -1632,18 +1626,7 @@ For 32768 samples with 14 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256
 For 1024 samples with 9 levels: boundaries at 0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024
 
 ### Step 4: Frequency-Dependent Quantization
-DWT coefficients are quantized using perceptually-tuned frequency-dependent weights:
-
-    Base Weights by Level:
-    Level 0 (16-8 KHz):     3.0
-    Level 1 (8-4 KHz):      2.0
-    Level 2 (4-2 KHz):      1.5
-    Level 3 (2-1 KHz):      1.0
-    Level 4 (1-0.5 KHz):    0.75
-    Level 5 (0.5-0.25 KHz): 0.5
-    Level 6-7 (DC-0.25 KHz): 0.25
-
-Quality scaling factor: 1.0 + (5 - quality) * 0.3
+DWT coefficients are quantized using perceptually-tuned frequency-dependent weights.
 
 Final quantization step: base_weight * quality_scale
 
@@ -1690,13 +1673,8 @@ Convert Mid/Side back to Left/Right stereo:
     Left = Mid + Side
     Right = Mid - Side
 
-### Step 6: PCM8 to PCM16 Upsampling
-Convert signed PCM8 back to PCM16LE by multiplying by 256:
-
-    pcm16_value = pcm8_value * 256
-
 ## Compression Performance
-- **Target Ratio**: 2:1 against PCMu8 (4:1 against PCM16LE input)
+- **Target Ratio**: 2:1 against PCMu8
 - **Achieved Ratio**: 2.51:1 against PCMu8 at quality level 3
 - **Quality**: Perceptually transparent at Q3+, preserves full 0-16 KHz bandwidth
 - **Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
@@ -1721,10 +1699,10 @@ This allows TAV video files to embed TAD-compressed audio using packet type 0x24
 TAD encoder uses two-pass FFmpeg extraction for optimal quality:
 
     # Pass 1: Extract at original sample rate
-    ffmpeg -i input.mp4 -f s16le -ac 2 temp.pcm
+    ffmpeg -i input.mp4 -f f32le -ac 2 temp.pcm
 
     # Pass 2: High-quality resample with SoXR and highpass filter
-    ffmpeg -f s16le -ar {original_rate} -ac 2 -i temp.pcm \
+    ffmpeg -f f32le -ar {original_rate} -ac 2 -i temp.pcm \
            -ar 32000 -af "aresample=resampler=soxr:precision=28:cutoff=0.99,highpass=f=16" \
            output.pcm
 
diff --git a/video_encoder/Makefile b/video_encoder/Makefile
index 0547557..fc79e41 100644
--- a/video_encoder/Makefile
+++ b/video_encoder/Makefile
@@ -78,7 +78,7 @@ debug: $(TARGETS)
 
 # Clean build artifacts
 clean:
-	rm -f $(TARGETS) $(TAD_TARGETS) *.o
+	rm -f $(TARGETS) $(TAD_TARGETS) $(TAD16_TARGETS) $(TAD10_TARGETS) *.o
 
 # Install (copy to PATH)
 install: $(TARGETS) $(TAD_TARGETS)
@@ -106,6 +106,12 @@ help:
 	@echo "  tad          - Build all TAD audio tools (encoder, decoder)"
 	@echo "  encoder_tad  - Build TAD audio encoder"
 	@echo "  decoder_tad  - Build TAD audio decoder"
+	@echo "  tad16        - Build TAD16 tools (PCM16 alternative for comparison)"
+	@echo "  encoder_tad16- Build TAD16 audio encoder (PCM16 version)"
+	@echo "  decoder_tad16- Build TAD16 audio decoder (PCM16 version)"
+	@echo "  tad10        - Build TAD10 tools (PCM10 alternative for comparison)"
+	@echo "  encoder_tad10- Build TAD10 audio encoder (PCM10 version)"
+	@echo "  decoder_tad10- Build TAD10 audio decoder (PCM10 version)"
 	@echo "  debug        - Build with debug symbols"
 	@echo "  clean        - Remove build artifacts"
 	@echo "  install      - Install to /usr/local/bin"
@@ -117,6 +123,8 @@ help:
 	@echo "  make tev           # Build TEV encoder"
 	@echo "  make tav           # Build TAV encoder"
 	@echo "  make tad           # Build all TAD audio tools"
+	@echo "  make tad16         # Build TAD16 tools (for comparison testing)"
+	@echo "  make tad10         # Build TAD10 tools (for comparison testing)"
 	@echo "  sudo make install  # Install all encoders"
 
-.PHONY: all clean install check-deps help debug tad
+.PHONY: all clean install check-deps help debug tad tad16 tad10
diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c
index 7da260f..afb82e5 100644
--- a/video_encoder/decoder_tad.c
+++ b/video_encoder/decoder_tad.c
@@ -12,6 +12,7 @@
 #define DECODER_VENDOR_STRING "Decoder-TAD 20251023"
 
 // TAD format constants (must match encoder)
+#define TAD_COEFF_SCALAR 1024.0f
 #define TAD_DEFAULT_CHUNK_SIZE 32768
 #define TAD_MIN_CHUNK_SIZE 1024
 #define TAD_SAMPLE_RATE 32000
@@ -148,22 +149,58 @@ static void dwt_haar_inverse_multilevel(float *data, int length, int levels) {
 // M/S Stereo Correlation (inverse of decorrelation)
 //=============================================================================
 
-static void ms_correlate(const int8_t *mid, const int8_t *side, uint8_t *left, uint8_t *right, size_t count) {
+// Uniform random in [0, 1)
+static inline float frand01(void) {
+    return (float)rand() / ((float)RAND_MAX + 1.0f);
+}
+
+// TPDF noise in [-1, +1)
+static inline float tpdf1(void) {
+    return (frand01() - frand01());
+}
+
+static void ms_correlate(const float *mid, const float *side, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) {
+    const float b1 = 1.5f;   // 1st feedback coefficient
+    const float b2 = -0.75f; // 2nd feedback coefficient
+    const float scale = 127.5f;
+    const float bias  = 128.0f;
+
     for (size_t i = 0; i < count; i++) {
-        // L = M + S, R = M - S
-        int32_t m = mid[i];
-        int32_t s = side[i];
-        int32_t l = m + s;
-        int32_t r = m - s;
+        // Decode M/S → L/R
+        float m = mid[i];
+        float s = side[i];
+        float l = FCLAMP(m + s, -1.0f, 1.0f);
+        float r = FCLAMP(m - s, -1.0f, 1.0f);
 
-        // Clamp to [-128, 127] then convert to unsigned [0, 255]
-        if (l < -128) l = -128;
-        if (l > 127) l = 127;
-        if (r < -128) r = -128;
-        if (r > 127) r = 127;
+        // --- LEFT channel ---
+        float feedbackL = b1 * dither_error[0][0] + b2 * dither_error[0][1];
+        float ditherL = 0.5f * tpdf1(); // ±0.5 LSB TPDF
+        float shapedL = l + feedbackL + ditherL / scale;
+        shapedL = FCLAMP(shapedL, -1.0f, 1.0f);
 
-        left[i] = (uint8_t)(l + 128);
-        right[i] = (uint8_t)(r + 128);
+        int qL = (int)lrintf(shapedL * scale);
+        if (qL < -128) qL = -128;
+        else if (qL > 127) qL = 127;
+        left[i] = (uint8_t)(qL + bias);
+
+        float qerrL = shapedL - (float)qL / scale;
+        dither_error[0][1] = dither_error[0][0]; // shift history
+        dither_error[0][0] = qerrL;
+
+        // --- RIGHT channel ---
+        float feedbackR = b1 * dither_error[1][0] + b2 * dither_error[1][1];
+        float ditherR = 0.5f * tpdf1();
+        float shapedR = r + feedbackR + ditherR / scale;
+        shapedR = FCLAMP(shapedR, -1.0f, 1.0f);
+
+        int qR = (int)lrintf(shapedR * scale);
+        if (qR < -128) qR = -128;
+        else if (qR > 127) qR = 127;
+        right[i] = (uint8_t)(qR + bias);
+
+        float qerrR = shapedR - (float)qR / scale;
+        dither_error[1][1] = dither_error[1][0];
+        dither_error[1][0] = qerrR;
     }
 }
 
@@ -188,11 +225,10 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights
         /*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
         /*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f},
         /*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f},
-        /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f},
-        /*16*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f}
+        /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}
     };
 
-    float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f);
+    float quality_scale = 4.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f);
 
     for (int i = 0; i < dwt_levels; i++) {
         weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f);
@@ -227,7 +263,7 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs,
         if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
 
         float weight = weights[weight_idx];
-        coeffs[i] = (float)quantized[i] * weight;
+        coeffs[i] = (float)quantized[i] * weight / TAD_COEFF_SCALAR;
     }
 
     free(sideband_starts);
@@ -237,29 +273,6 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs,
 // Significance Map Decoding
 //=============================================================================
 
-static size_t decode_sigmap_1bit(const uint8_t *input, int16_t *values, size_t count) {
-    size_t map_bytes = (count + 7) / 8;
-    const uint8_t *map = input;
-    const uint8_t *read_ptr = input + map_bytes;
-
-    uint32_t nonzero_count = *((const uint32_t*)read_ptr);
-    read_ptr += sizeof(uint32_t);
-
-    const int16_t *value_ptr = (const int16_t*)read_ptr;
-    uint32_t value_idx = 0;
-
-    // Reconstruct values
-    for (size_t i = 0; i < count; i++) {
-        if (map[i / 8] & (1 << (i % 8))) {
-            values[i] = value_ptr[value_idx++];
-        } else {
-            values[i] = 0;
-        }
-    }
-
-    return map_bytes + sizeof(uint32_t) + nonzero_count * sizeof(int16_t);
-}
-
 static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t count) {
     size_t map_bytes = (count * 2 + 7) / 8;
     const uint8_t *map = input;
@@ -291,48 +304,6 @@ static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t c
     return map_bytes + other_idx * sizeof(int16_t);
 }
 
-static size_t decode_sigmap_rle(const uint8_t *input, int16_t *values, size_t count) {
-    const uint8_t *read_ptr = input;
-
-    uint32_t run_count = *((const uint32_t*)read_ptr);
-    read_ptr += sizeof(uint32_t);
-
-    size_t value_idx = 0;
-
-    for (uint32_t run = 0; run < run_count; run++) {
-        // Decode zero run length (varint)
-        uint32_t zero_run = 0;
-        int shift = 0;
-        uint8_t byte;
-
-        do {
-            byte = *read_ptr++;
-            zero_run |= ((uint32_t)(byte & 0x7F) << shift);
-            shift += 7;
-        } while (byte & 0x80);
-
-        // Fill zeros
-        for (uint32_t i = 0; i < zero_run && value_idx < count; i++) {
-            values[value_idx++] = 0;
-        }
-
-        // Read non-zero value
-        int16_t val = *((const int16_t*)read_ptr);
-        read_ptr += sizeof(int16_t);
-
-        if (value_idx < count && val != 0) {
-            values[value_idx++] = val;
-        }
-    }
-
-    // Fill remaining with zeros
-    while (value_idx < count) {
-        values[value_idx++] = 0;
-    }
-
-    return read_ptr - input;
-}
-
 //=============================================================================
 // Chunk Decoding
 //=============================================================================
@@ -381,8 +352,6 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
     int16_t *quant_side = malloc(sample_count * sizeof(int16_t));
     float *dwt_mid = malloc(sample_count * sizeof(float));
     float *dwt_side = malloc(sample_count * sizeof(float));
-    int8_t *pcm8_mid = malloc(sample_count * sizeof(int8_t));
-    int8_t *pcm8_side = malloc(sample_count * sizeof(int8_t));
     uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t));
     uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t));
 
@@ -401,23 +370,10 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
     dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
     dwt_haar_inverse_multilevel(dwt_side, sample_count, dwt_levels);
 
-    // Convert to signed PCM8
-    for (size_t i = 0; i < sample_count; i++) {
-        float m = dwt_mid[i];
-        float s = dwt_side[i];
-
-        // Clamp and round
-        if (m < -128.0f) m = -128.0f;
-        if (m > 127.0f) m = 127.0f;
-        if (s < -128.0f) s = -128.0f;
-        if (s > 127.0f) s = 127.0f;
-
-        pcm8_mid[i] = (int8_t)roundf(m);
-        pcm8_side[i] = (int8_t)roundf(s);
-    }
+    float err[2][2] = {{0,0},{0,0}};
 
     // M/S to L/R correlation
-    ms_correlate(pcm8_mid, pcm8_side, pcm8_left, pcm8_right, sample_count);
+    ms_correlate(dwt_mid, dwt_side, pcm8_left, pcm8_right, sample_count, err);
 
     // Interleave stereo output (PCMu8)
     for (size_t i = 0; i < sample_count; i++) {
@@ -427,7 +383,7 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
 
     // Cleanup
     free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side);
-    free(pcm8_mid); free(pcm8_side); free(pcm8_left); free(pcm8_right);
+    free(pcm8_left); free(pcm8_right);
     if (decompressed) free(decompressed);
 
     return 0;
@@ -442,7 +398,7 @@ static void print_usage(const char *prog_name) {
     printf("Options:\n");
     printf("  -i <file>       Input TAD file\n");
     printf("  -o <file>       Output PCMu8 file (raw 8-bit unsigned stereo @ 32kHz)\n");
-    printf("  -q <0-5>        Quality level used during encoding (default: 2)\n");
+    printf("  -q <0-5>        Quality level used during encoding (default: 3)\n");
     printf("  -v              Verbose output\n");
     printf("  -h, --help      Show this help\n");
     printf("\nVersion: %s\n", DECODER_VENDOR_STRING);
@@ -453,7 +409,7 @@ static void print_usage(const char *prog_name) {
 int main(int argc, char *argv[]) {
     char *input_file = NULL;
     char *output_file = NULL;
-    int quality = 2;  // Must match encoder quality
+    int quality = 3;  // Must match encoder quality
     int verbose = 0;
 
     int opt;
diff --git a/video_encoder/encoder_tad.c b/video_encoder/encoder_tad.c
index 552de69..fb7ef73 100644
--- a/video_encoder/encoder_tad.c
+++ b/video_encoder/encoder_tad.c
@@ -1,6 +1,7 @@
-// Created by CuriousTorvald and Claude on 2025-10-23.
-// TAD (Terrarum Advanced Audio) Encoder Library - DWT-based audio compression
-// This file contains only the encoding functions for use by encoder_tad.c and encoder_tav.c
+// Created by CuriousTorvald and Claude on 2025-10-24.
+// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder Library
+// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
+// This file contains only the encoding functions for comparison testing
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -11,12 +12,9 @@
 #include "encoder_tad.h"
 
 // Forward declarations for internal functions
-static void dwt_haar_forward_1d(float *data, int length);
 static void dwt_dd4_forward_1d(float *data, int length);
-static void dwt_97_forward_1d(float *data, int length);
-static void dwt_haar_forward_multilevel(float *data, int length, int levels);
-static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count);
-static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error);
+static void dwt_dd4_forward_multilevel(float *data, int length, int levels);
+static void ms_decorrelate_16(const float *left, const float *right, float *mid, float *side, size_t count);
 static void get_quantization_weights(int quality, int dwt_levels, float *weights);
 static int get_deadzone_threshold(int quality);
 static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels);
@@ -26,15 +24,13 @@ static inline float FCLAMP(float x, float min, float max) {
     return x < min ? min : (x > max ? max : x);
 }
 
-// Calculate DWT levels from chunk size (non-power-of-2 supported, >= 1024)
+// Calculate DWT levels from chunk size
 static int calculate_dwt_levels(int chunk_size) {
-    if (chunk_size < TAD_MIN_CHUNK_SIZE) {
-        fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE);
+    if (chunk_size < TAD32_MIN_CHUNK_SIZE) {
+        fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD32_MIN_CHUNK_SIZE);
         return -1;
     }
 
-    // For non-power-of-2, find next power of 2 and calculate levels
-    // Then subtract 2 for maximum decomposition
     int levels = 0;
     int size = chunk_size;
     while (size > 1) {
@@ -48,39 +44,13 @@ static int calculate_dwt_levels(int chunk_size) {
         levels++;
     }
 
-    return levels - 2;  // Maximum decomposition leaves 2-sample approximation
+    return levels - 2;  // Maximum decomposition
 }
 
 //=============================================================================
-// Haar DWT Implementation
+// DD-4 DWT Implementation
 //=============================================================================
 
-static void dwt_haar_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Haar transform: compute averages (low-pass) and differences (high-pass)
-    for (int i = 0; i < half; i++) {
-        if (2 * i + 1 < length) {
-            // Average of adjacent pairs (low-pass)
-            temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
-            // Difference of adjacent pairs (high-pass)
-            temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
-        } else {
-            // Handle odd length: last sample goes to low-pass
-            temp[i] = data[2 * i];
-            if (half + i < length) {
-                temp[half + i] = 0.0f;
-            }
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
 // Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform
 static void dwt_dd4_forward_1d(float *data, int length) {
     if (length < 2) return;
@@ -129,76 +99,8 @@ static void dwt_dd4_forward_1d(float *data, int length) {
     free(temp);
 }
 
-// 1D DWT using lifting scheme for 9/7 irreversible filter
-static void dwt_97_forward_1d(float *data, int length) {
-    if (length < 2) return;
-
-    float *temp = malloc(length * sizeof(float));
-    int half = (length + 1) / 2;
-
-    // Split into even/odd samples
-    for (int i = 0; i < half; i++) {
-        temp[i] = data[2 * i];           // Even (low)
-    }
-    for (int i = 0; i < length / 2; i++) {
-        temp[half + i] = data[2 * i + 1]; // Odd (high)
-    }
-
-    // JPEG2000 9/7 forward lifting steps
-    const float alpha = -1.586134342f;
-    const float beta = -0.052980118f;
-    const float gamma = 0.882911076f;
-    const float delta = 0.443506852f;
-    const float K = 1.230174105f;
-
-    // Step 1: Predict α
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] += alpha * (s_curr + s_next);
-        }
-    }
-
-    // Step 2: Update β
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] += beta * (d_prev + d_curr);
-    }
-
-    // Step 3: Predict γ
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            float s_curr = temp[i];
-            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
-            temp[half + i] += gamma * (s_curr + s_next);
-        }
-    }
-
-    // Step 4: Update δ
-    for (int i = 0; i < half; i++) {
-        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
-        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
-        temp[i] += delta * (d_prev + d_curr);
-    }
-
-    // Step 5: Scaling
-    for (int i = 0; i < half; i++) {
-        temp[i] *= K;
-    }
-    for (int i = 0; i < length / 2; i++) {
-        if (half + i < length) {
-            temp[half + i] /= K;
-        }
-    }
-
-    memcpy(data, temp, length * sizeof(float));
-    free(temp);
-}
-
 // Apply multi-level DWT (using DD-4 wavelet)
-static void dwt_haar_forward_multilevel(float *data, int length, int levels) {
+static void dwt_dd4_forward_multilevel(float *data, int length, int levels) {
     int current_length = length;
     for (int level = 0; level < levels; level++) {
         dwt_dd4_forward_1d(data, current_length);
@@ -207,35 +109,16 @@ static void dwt_haar_forward_multilevel(float *data, int length, int levels) {
 }
 
 //=============================================================================
-// M/S Stereo Decorrelation
+// M/S Stereo Decorrelation (PCM32f version)
 //=============================================================================
 
-static void ms_decorrelate(const int8_t *left, const int8_t *right, int8_t *mid, int8_t *side, size_t count) {
+static void ms_decorrelate_16(const float *left, const float *right, float *mid, float *side, size_t count) {
     for (size_t i = 0; i < count; i++) {
         // Mid = (L + R) / 2, Side = (L - R) / 2
-        int32_t l = left[i];
-        int32_t r = right[i];
-        mid[i] = (int8_t)((l + r) / 2);
-        side[i] = (int8_t)((l - r) / 2);
-    }
-}
-
-//=============================================================================
-// PCM16 to Signed PCM8 Conversion with Dithering
-//=============================================================================
-
-static void convert_pcm16_to_pcm8_dithered(const int16_t *pcm16, int8_t *pcm8, int num_samples, int16_t *dither_error) {
-    for (int i = 0; i < num_samples; i++) {
-        for (int ch = 0; ch < 2; ch++) {  // Stereo: L and R
-            int idx = i * 2 + ch;
-            int32_t sample = (int32_t)pcm16[idx];
-            sample += dither_error[ch];
-            int32_t quantized = sample >> 8;
-            if (quantized < -128) quantized = -128;
-            if (quantized > 127) quantized = 127;
-            pcm8[idx] = (int8_t)quantized;
-            dither_error[ch] = sample - (quantized << 8);
-        }
+        float l = left[i];
+        float r = right[i];
+        mid[i] = (l + r) / 2.0f;
+        side[i] = (l - r) / 2.0f;
     }
 }
 
@@ -263,15 +146,15 @@ static void get_quantization_weights(int quality, int dwt_levels, float *weights
         /*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}
     };
 
-    float quality_scale = 1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f);
+    float quality_scale = 4.0f * (1.0f + FCLAMP((3 - quality) * 0.5f, 0.0f, 1000.0f));
 
     for (int i = 0; i < dwt_levels; i++) {
-        weights[i] = FCLAMP(base_weights[dwt_levels][i] * quality_scale, 1.0f, 1000.0f);
+        weights[i] = base_weights[dwt_levels][i] * quality_scale;
     }
 }
 
 static int get_deadzone_threshold(int quality) {
-    const int thresholds[] = {1,1,0,0,0,0};  // Q0 to Q5
+    const int thresholds[] = {1,1,1,1,1,1};  // Q0 to Q5
     return thresholds[quality];
 }
 
@@ -302,7 +185,7 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s
         if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
 
         float weight = weights[weight_idx];
-        float val = coeffs[i] / weight;
+        float val = coeffs[i] / weight * TAD32_COEFF_SCALAR;
         int16_t quant_val = (int16_t)roundf(val);
 
         if (apply_deadzone && sideband >= dwt_levels - 1) {
@@ -359,8 +242,8 @@ static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *o
 // Public API: Chunk Encoding
 //=============================================================================
 
-size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality,
-                        int use_zstd, uint8_t *output) {
+size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int quality,
+                          int use_zstd, uint8_t *output) {
     // Calculate DWT levels from chunk size
     int dwt_levels = calculate_dwt_levels(num_samples);
     if (dwt_levels < 0) {
@@ -368,12 +251,11 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua
         return 0;
     }
 
-    // Allocate working buffers
-    int8_t *pcm8_stereo = malloc(num_samples * 2 * sizeof(int8_t));
-    int8_t *pcm8_left = malloc(num_samples * sizeof(int8_t));
-    int8_t *pcm8_right = malloc(num_samples * sizeof(int8_t));
-    int8_t *pcm8_mid = malloc(num_samples * sizeof(int8_t));
-    int8_t *pcm8_side = malloc(num_samples * sizeof(int8_t));
+    // Allocate working buffers (PCM32f throughout, int32 coefficients)
+    float *pcm32_left = malloc(num_samples * sizeof(float));
+    float *pcm32_right = malloc(num_samples * sizeof(float));
+    float *pcm32_mid = malloc(num_samples * sizeof(float));
+    float *pcm32_side = malloc(num_samples * sizeof(float));
 
     float *dwt_mid = malloc(num_samples * sizeof(float));
     float *dwt_side = malloc(num_samples * sizeof(float));
@@ -381,34 +263,30 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua
     int16_t *quant_mid = malloc(num_samples * sizeof(int16_t));
     int16_t *quant_side = malloc(num_samples * sizeof(int16_t));
 
-    // Step 1: Convert PCM16 to signed PCM8 with dithering
-    int16_t dither_error[2] = {0, 0};
-    convert_pcm16_to_pcm8_dithered(pcm16_stereo, pcm8_stereo, num_samples, dither_error);
-
-    // Deinterleave stereo
+    // Step 1: Deinterleave stereo
     for (size_t i = 0; i < num_samples; i++) {
-        pcm8_left[i] = pcm8_stereo[i * 2];
-        pcm8_right[i] = pcm8_stereo[i * 2 + 1];
+        pcm32_left[i] = pcm32_stereo[i * 2];
+        pcm32_right[i] = pcm32_stereo[i * 2 + 1];
     }
 
     // Step 2: M/S decorrelation
-    ms_decorrelate(pcm8_left, pcm8_right, pcm8_mid, pcm8_side, num_samples);
+    ms_decorrelate_16(pcm32_left, pcm32_right, pcm32_mid, pcm32_side, num_samples);
 
     // Step 3: Convert to float and apply DWT
     for (size_t i = 0; i < num_samples; i++) {
-        dwt_mid[i] = (float)pcm8_mid[i];
-        dwt_side[i] = (float)pcm8_side[i];
+        dwt_mid[i] = pcm32_mid[i];
+        dwt_side[i] = pcm32_side[i];
     }
 
-    dwt_haar_forward_multilevel(dwt_mid, num_samples, dwt_levels);
-    dwt_haar_forward_multilevel(dwt_side, num_samples, dwt_levels);
+    dwt_dd4_forward_multilevel(dwt_mid, num_samples, dwt_levels);
+    dwt_dd4_forward_multilevel(dwt_side, num_samples, dwt_levels);
 
     // Step 4: Quantize with frequency-dependent weights and dead zone
     quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels);
     quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels);
 
-    // Step 5: Encode with 2-bit significance map
-    uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int16_t));
+    // Step 5: Encode with 2-bit significance map (32-bit version)
+    uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int32_t));
     size_t mid_size = encode_sigmap_2bit(quant_mid, num_samples, temp_buffer);
     size_t side_size = encode_sigmap_2bit(quant_side, num_samples, temp_buffer + mid_size);
 
@@ -429,13 +307,13 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua
         size_t zstd_bound = ZSTD_compressBound(uncompressed_size);
         uint8_t *zstd_buffer = malloc(zstd_bound);
 
-        payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD_ZSTD_LEVEL);
+        payload_size = ZSTD_compress(zstd_buffer, zstd_bound, temp_buffer, uncompressed_size, TAD32_ZSTD_LEVEL);
 
         if (ZSTD_isError(payload_size)) {
             fprintf(stderr, "Error: Zstd compression failed: %s\n", ZSTD_getErrorName(payload_size));
             free(zstd_buffer);
-            free(pcm8_stereo); free(pcm8_left); free(pcm8_right);
-            free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side);
+            free(pcm32_left); free(pcm32_right);
+            free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
             free(quant_mid); free(quant_side); free(temp_buffer);
             return 0;
         }
@@ -451,8 +329,8 @@ size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int qua
     write_ptr += payload_size;
 
     // Cleanup
-    free(pcm8_stereo); free(pcm8_left); free(pcm8_right);
-    free(pcm8_mid); free(pcm8_side); free(dwt_mid); free(dwt_side);
+    free(pcm32_left); free(pcm32_right);
+    free(pcm32_mid); free(pcm32_side); free(dwt_mid); free(dwt_side);
     free(quant_mid); free(quant_side); free(temp_buffer);
 
     return write_ptr - output;
diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h
index f9b5443..29542c9 100644
--- a/video_encoder/encoder_tad.h
+++ b/video_encoder/encoder_tad.h
@@ -1,40 +1,40 @@
-#ifndef TAD_ENCODER_H
-#define TAD_ENCODER_H
+#ifndef TAD32_ENCODER_H
+#define TAD32_ENCODER_H
 
 #include <stdint.h>
 #include <stddef.h>
 
-// TAD (Terrarum Advanced Audio) Encoder
+// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder
 // DWT-based perceptual audio codec for TSVM
+// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
 
 // Constants
-#define TAD_MIN_CHUNK_SIZE 1024       // Minimum: 1024 samples (supports non-power-of-2)
-#define TAD_SAMPLE_RATE 32000
-#define TAD_CHANNELS 2  // Stereo
-#define TAD_SIGMAP_2BIT 1  // 2-bit: 00=0, 01=+1, 10=-1, 11=other
-#define TAD_QUALITY_MIN 0
-#define TAD_QUALITY_MAX 5
-#define TAD_QUALITY_DEFAULT 3
-#define TAD_ZSTD_LEVEL 7
+#define TAD32_COEFF_SCALAR 1024.0f
+#define TAD32_MIN_CHUNK_SIZE 1024       // Minimum: 1024 samples
+#define TAD32_SAMPLE_RATE 32000
+#define TAD32_CHANNELS 2  // Stereo
+#define TAD32_SIGMAP_2BIT 1  // 2-bit: 00=0, 01=+1, 10=-1, 11=other
+#define TAD32_QUALITY_MIN 0
+#define TAD32_QUALITY_MAX 5
+#define TAD32_QUALITY_DEFAULT 3
+#define TAD32_ZSTD_LEVEL 7
 
 /**
- * Encode audio chunk with TAD codec
+ * Encode audio chunk with TAD32 codec (PCM32f version)
  *
- * @param pcm16_stereo  Input PCM16LE stereo samples (interleaved L,R)
- * @param num_samples   Number of samples per channel (supports non-power-of-2, min 1024)
+ * @param pcm32_stereo  Input PCM32fLE stereo samples (interleaved L,R)
+ * @param num_samples   Number of samples per channel (min 1024)
  * @param quality       Quality level 0-5 (0=lowest, 5=highest)
  * @param use_zstd      1=enable Zstd compression, 0=disable
  * @param output        Output buffer (must be large enough)
  * @return              Number of bytes written to output, or 0 on error
  *
  * Output format:
- *   uint8  sigmap_method (always 1 = 2-bit twobitmap)
- *   uint8  compressed_flag (1=Zstd, 0=raw)
  *   uint16 sample_count (samples per channel)
  *   uint32 payload_size (bytes in payload)
  *   *      payload (encoded M/S data, optionally Zstd-compressed)
  */
-size_t tad_encode_chunk(const int16_t *pcm16_stereo, size_t num_samples, int quality,
-                        int use_zstd, uint8_t *output);
+size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int quality,
+                          int use_zstd, uint8_t *output);
 
-#endif // TAD_ENCODER_H
+#endif // TAD32_ENCODER_H