tav: librarying

2026-06-10 06:54:04 +09:00 · 2025-12-05 03:39:32 +09:00
parent d3cc05789f
commit 94ae24e9e4
32 changed files with 7073 additions and 14028 deletions
--- a/video_encoder/lib/libtaddec/decoder_tad.c
+++ b/video_encoder/lib/libtaddec/decoder_tad.c
--- a/video_encoder/lib/libtadenc/encoder_tad.c
+++ b/video_encoder/lib/libtadenc/encoder_tad.c
--- a/video_encoder/lib/libtavdec/tav_video_decoder.c
+++ b/video_encoder/lib/libtavdec/tav_video_decoder.c
--- a/video_encoder/lib/libtavenc/README.md
+++ b/video_encoder/lib/libtavenc/README.md
@@ -0,0 +1,354 @@
+# libtavenc - TAV Video Encoder Library
+
+**libtavenc** is a high-performance video encoding library implementing the TSVM Advanced Video (TAV) codec. It provides a clean C API for encoding RGB24 video frames using discrete wavelet transform (DWT) with perceptual quantization and GOP-based temporal compression.
+
+## Features
+
+- **Multiple Wavelet Types**: CDF 5/3, CDF 9/7, CDF 13/7, DD-4, Haar
+- **3D DWT GOP Encoding**: Temporal + spatial wavelet compression
+- **Perceptual Quantization**: HVS-optimized coefficient scaling
+- **EZBC Entropy Coding**: Efficient coefficient compression with Zstd
+- **Multi-threading**: Internal thread pool for optimal performance
+- **Color Spaces**: YCoCg-R (default) and ICtCp (for HDR)
+- **Quality Levels**: 0-5 (0=lowest/smallest, 5=highest/largest)
+
+## Building
+
+```bash
+# Build static library
+make lib/libtavenc.a
+
+# Build with encoder CLI
+make encoder_tav
+
+# Install library and headers
+make install-libs PREFIX=/usr/local
+```
+
+## Quick Start
+
+### Basic Encoding
+
+```c
+#include "tav_encoder_lib.h"
+#include <stdio.h>
+
+int main() {
+    // Initialize encoder parameters
+    tav_encoder_params_t params;
+    tav_encoder_params_init(&params, 1920, 1080);
+
+    // Configure encoding options
+    params.fps_num = 60;
+    params.fps_den = 1;
+    params.wavelet_type = 1;        // CDF 9/7 (default)
+    params.quality_y = 3;            // Quality level 3
+    params.quality_co = 3;
+    params.quality_cg = 3;
+    params.enable_temporal_dwt = 1;  // Enable 3D GOP encoding
+    params.gop_size = 0;             // Auto-calculate (typically 16-24)
+    params.num_threads = 4;          // 4 worker threads
+
+    // Create encoder context
+    tav_encoder_context_t *ctx = tav_encoder_create(&params);
+    if (!ctx) {
+        fprintf(stderr, "Failed to create encoder\n");
+        return -1;
+    }
+
+    // Get actual parameters (with auto-calculated values)
+    tav_encoder_get_params(ctx, &params);
+    printf("GOP size: %d frames\n", params.gop_size);
+
+    // Encode frames
+    uint8_t *rgb_frame = /* ... load RGB24 frame ... */;
+    tav_encoder_packet_t *packet;
+
+    for (int i = 0; i < num_frames; i++) {
+        int result = tav_encoder_encode_frame(ctx, rgb_frame, i, &packet);
+
+        if (result == 1) {
+            // Packet ready (GOP completed)
+            fwrite(packet->data, 1, packet->size, outfile);
+            tav_encoder_free_packet(packet);
+        }
+        else if (result == 0) {
+            // Frame buffered, waiting for GOP to fill
+        }
+        else {
+            // Error
+            fprintf(stderr, "Encoding error: %s\n", tav_encoder_get_error(ctx));
+            break;
+        }
+    }
+
+    // Flush remaining frames
+    while (tav_encoder_flush(ctx, &packet) == 1) {
+        fwrite(packet->data, 1, packet->size, outfile);
+        tav_encoder_free_packet(packet);
+    }
+
+    // Cleanup
+    tav_encoder_free(ctx);
+    return 0;
+}
+```
+
+### Stateless GOP Encoding (Multi-threaded)
+
+The library provides `tav_encoder_encode_gop()` for stateless GOP encoding, perfect for multi-threaded applications:
+
+```c
+#include "tav_encoder_lib.h"
+#include <pthread.h>
+
+typedef struct {
+    tav_encoder_params_t params;
+    uint8_t **rgb_frames;
+    int num_frames;
+    int *frame_numbers;
+    tav_encoder_packet_t *output_packet;
+} gop_encode_job_t;
+
+void *encode_gop_thread(void *arg) {
+    gop_encode_job_t *job = (gop_encode_job_t *)arg;
+
+    // Create thread-local encoder context
+    tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
+    if (!ctx) {
+        return NULL;
+    }
+
+    // Encode entire GOP at once (stateless, thread-safe)
+    tav_encoder_encode_gop(ctx,
+                           (const uint8_t **)job->rgb_frames,
+                           job->num_frames,
+                           job->frame_numbers,
+                           &job->output_packet);
+
+    tav_encoder_free(ctx);
+    return NULL;
+}
+
+int main() {
+    // Setup parameters
+    tav_encoder_params_t params;
+    tav_encoder_params_init(&params, 1920, 1080);
+    params.enable_temporal_dwt = 1;
+    params.gop_size = 24;
+
+    // Create worker threads
+    pthread_t threads[4];
+    gop_encode_job_t jobs[4];
+
+    for (int i = 0; i < 4; i++) {
+        jobs[i].params = params;
+        jobs[i].rgb_frames = /* ... load GOP frames ... */;
+        jobs[i].num_frames = 24;
+        jobs[i].frame_numbers = /* ... frame indices ... */;
+
+        pthread_create(&threads[i], NULL, encode_gop_thread, &jobs[i]);
+    }
+
+    // Wait for completion
+    for (int i = 0; i < 4; i++) {
+        pthread_join(threads[i], NULL);
+
+        // Write output packet
+        if (jobs[i].output_packet) {
+            fwrite(jobs[i].output_packet->data, 1,
+                   jobs[i].output_packet->size, outfile);
+            tav_encoder_free_packet(jobs[i].output_packet);
+        }
+    }
+
+    return 0;
+}
+```
+
+## API Reference
+
+### Context Management
+
+#### `tav_encoder_create()`
+Creates encoder context with specified parameters. Allocates internal buffers and initializes thread pool if multi-threading enabled.
+
+**Returns**: Encoder context or NULL on failure
+
+#### `tav_encoder_free()`
+Frees encoder context and all resources. Any unflushed GOP frames are lost.
+
+#### `tav_encoder_get_error()`
+Returns last error message string.
+
+#### `tav_encoder_get_params()`
+Gets encoder parameters with calculated values (e.g., auto-calculated GOP size, decomposition levels).
+
+### Frame Encoding
+
+#### `tav_encoder_encode_frame()`
+Encodes single RGB24 frame. Frames are buffered until GOP is full.
+
+**Parameters**:
+- `rgb_frame`: RGB24 planar format `[R...][G...][B...]`, width×height×3 bytes
+- `frame_pts`: Presentation timestamp (frame number or time)
+- `packet`: Output packet pointer (NULL if GOP not ready)
+
+**Returns**:
+- `1`: Packet ready (GOP completed)
+- `0`: Frame buffered, waiting for more frames
+- `-1`: Error
+
+#### `tav_encoder_flush()`
+Flushes remaining buffered frames and encodes final GOP. Call at end of stream.
+
+**Returns**:
+- `1`: Packet ready
+- `0`: No more packets
+- `-1`: Error
+
+#### `tav_encoder_encode_gop()`
+Stateless GOP encoding. Thread-safe with separate contexts.
+
+**Parameters**:
+- `rgb_frames`: Array of RGB24 frames `[frame][width×height×3]`
+- `num_frames`: Number of frames in GOP (1-24)
+- `frame_numbers`: Frame indices for timecodes (can be NULL)
+- `packet`: Output packet pointer
+
+**Returns**: `1` on success, `-1` on error
+
+### Packet Management
+
+#### `tav_encoder_free_packet()`
+Frees packet returned by encoding functions.
+
+## Encoder Parameters
+
+### Video Dimensions
+- `width`, `height`: Frame dimensions (must be even)
+- `fps_num`, `fps_den`: Framerate (e.g., 60/1 for 60fps)
+
+### Wavelet Configuration
+- `wavelet_type`: Spatial wavelet
+  - `0`: CDF 5/3 (reversible, lossless-capable)
+  - `1`: CDF 9/7 (default, best compression)
+  - `2`: CDF 13/7 (experimental)
+  - `16`: DD-4 (four-point interpolating)
+  - `255`: Haar (demonstration)
+- `temporal_wavelet`: Temporal wavelet for 3D DWT
+  - `0`: Haar (default for sports/high motion)
+  - `1`: CDF 5/3 (smooth motion)
+- `decomp_levels`: Spatial DWT levels (0=auto, typically 6)
+- `temporal_levels`: Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
+
+### Color Space
+- `channel_layout`:
+  - `0`: YCoCg-R (default, efficient chroma)
+  - `1`: ICtCp (for HDR/BT.2100 sources)
+- `perceptual_tuning`: 1=enable HVS perceptual quantization (default), 0=uniform
+
+### GOP Configuration
+- `enable_temporal_dwt`: 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
+- `gop_size`: Frames per GOP (8, 16, or 24; 0=auto based on framerate)
+- `enable_two_pass`: 1=enable two-pass with scene change detection (default), 0=single-pass
+
+### Quality Control
+- `quality_y`: Luma quality (0-5, default: 3)
+- `quality_co`: Orange chrominance quality (0-5, default: 3)
+- `quality_cg`: Green chrominance quality (0-5, default: 3)
+- `dead_zone_threshold`: Dead-zone quantization (0=disabled, 1-10 typical)
+
+### Entropy Coding
+- `entropy_coder`:
+  - `0`: Twobitmap (default, fast)
+  - `1`: EZBC (better compression for high-quality)
+- `zstd_level`: Zstd compression level (3-22, default: 7)
+
+### Multi-threading
+- `num_threads`: Worker threads
+  - `0`: Single-threaded (default for CLI)
+  - `-1`: Auto-detect CPU cores
+  - `1-16`: Explicit thread count
+
+### Encoder Presets
+- `encoder_preset`: Preset flags
+  - `0x01`: Sports mode (finer temporal quantization)
+  - `0x02`: Anime mode (disable grain)
+
+## TAV Packet Types
+
+Output packets have type field indicating content:
+
+- `0x10`: I-frame (intra-only, single frame)
+- `0x11`: P-frame (delta from previous)
+- `0x12`: GOP unified (3D DWT, multiple frames)
+- `0x24`: TAD audio (DWT-based audio codec)
+- `0xF0`: Loop point start
+- `0xFC`: GOP sync (frame count marker)
+- `0xFD`: Timecode metadata
+
+## Performance Notes
+
+### Threading Model
+- Library manages internal thread pool when `num_threads > 0`
+- GOP encoding is parallelized across worker threads
+- For CLI tools: use `num_threads=0` (single-threaded) to avoid double-threading with external parallelism
+- For library integration: use `num_threads=-1` or explicit count for optimal performance
+
+### Memory Usage
+- Each encoder context allocates:
+  - GOP buffer: `gop_size × width × height × 3` bytes (RGB frames)
+  - DWT coefficients: `~width × height × 12` bytes per channel
+  - Thread pool: `num_threads × (GOP buffer + workspace)`
+- Typical 1920×1080 encoder with GOP=24: ~180 MB per context
+
+### Encoding Speed
+- Single-threaded: 10-15 fps (1920×1080 on modern CPU)
+- Multi-threaded (4 threads): 30-40 fps
+- GOP size affects latency: larger GOP = higher latency, better compression
+
+## Integration with TAD Audio
+
+TAV files typically include TAD-compressed audio. Link with both libraries:
+
+```c
+#include "tav_encoder_lib.h"
+#include "encoder_tad.h"
+
+// Encode video frame
+tav_encoder_encode_frame(video_ctx, rgb_frame, pts, &video_packet);
+
+// Encode audio chunk (32kHz stereo, float samples)
+tad32_encode_chunk(audio_ctx, pcm_samples, num_samples, &audio_data, &audio_size);
+
+// Mux both into TAV file (interleave by frame PTS)
+```
+
+## Error Handling
+
+All functions return error codes and set error message accessible via `tav_encoder_get_error()`:
+
+```c
+if (tav_encoder_encode_frame(ctx, frame, pts, &packet) < 0) {
+    fprintf(stderr, "Encoding failed: %s\n", tav_encoder_get_error(ctx));
+    // Handle error
+}
+```
+
+## Limitations
+
+- Maximum resolution: 8192×8192
+- GOP size: 1-48 frames
+- Single-tile encoding only (no spatial tiling)
+- Requires even width and height
+
+## License
+
+Part of the TSVM project.
+
+## See Also
+
+- `include/tav_encoder_lib.h` - Complete API documentation
+- `src/encoder_tav.c` - CLI reference implementation
+- `lib/libtadenc/` - TAD audio encoder library
--- a/video_encoder/lib/libtavenc/tav_encoder_color.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_color.c
@@ -0,0 +1,255 @@
+/**
+ * TAV Encoder - Color Space Conversion Library
+ *
+ * Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
+ * for the TSVM Advanced Video (TAV) encoder.
+ *
+ * Extracted from encoder_tav.c as part of library refactoring.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+// =============================================================================
+// Utility Functions
+// =============================================================================
+
+static inline int CLAMP(int x, int min, int max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+static inline float FCLAMP(float x, float min, float max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+static inline int iround(double v) {
+    return (int)floor(v + 0.5);
+}
+
+// =============================================================================
+// sRGB Gamma Helpers
+// =============================================================================
+
+static inline double srgb_linearise(double val) {
+    if (val <= 0.04045) return val / 12.92;
+    return pow((val + 0.055) / 1.055, 2.4);
+}
+
+static inline double srgb_unlinearise(double val) {
+    if (val <= 0.0031308) return 12.92 * val;
+    return 1.055 * pow(val, 1.0/2.4) - 0.055;
+}
+
+// =============================================================================
+// HLG (Hybrid Log-Gamma) Transfer Functions
+// =============================================================================
+
+static inline double HLG_OETF(double E) {
+    const double a = 0.17883277;
+    const double b = 0.28466892;  // 1 - 4*a
+    const double c = 0.55991073;  // 0.5 - a*ln(4*a)
+
+    if (E <= 1.0/12.0) return sqrt(3.0 * E);
+    return a * log(12.0 * E - b) + c;
+}
+
+static inline double HLG_EOTF(double Ep) {
+    const double a = 0.17883277;
+    const double b = 0.28466892;
+    const double c = 0.55991073;
+
+    if (Ep <= 0.5) {
+        double val = Ep * Ep / 3.0;
+        return val;
+    }
+    double val = (exp((Ep - c) / a) + b) / 12.0;
+    return val;
+}
+
+// =============================================================================
+// Color Space Transformation Matrices
+// =============================================================================
+
+// BT.2100 RGB -> LMS matrix
+static const double M_RGB_TO_LMS[3][3] = {
+    {1688.0/4096, 2146.0/4096,  262.0/4096},
+    { 683.0/4096, 2951.0/4096,  462.0/4096},
+    {  99.0/4096,  309.0/4096, 3688.0/4096}
+};
+
+// LMS -> RGB inverse matrix
+static const double M_LMS_TO_RGB[3][3] = {
+    { 6.1723815689243215, -5.319534979827695,   0.14699442094633924},
+    {-1.3243428148026244,  2.560286104841917,  -0.2359203727576164},
+    {-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
+};
+
+// ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
+static const double M_LMSPRIME_TO_ICTCP[3][3] = {
+    { 2048.0/4096.0,   2048.0/4096.0,     0.0          },
+    { 3625.0/4096.0,  -7465.0/4096.0,  3840.0/4096.0   },
+    { 9500.0/4096.0,  -9212.0/4096.0,  -288.0/4096.0   }
+};
+
+// ICtCp -> L' M' S' inverse matrix
+static const double M_ICTCP_TO_LMSPRIME[3][3] = {
+    { 1.0,   0.015718580108730416,   0.2095810681164055 },
+    { 1.0,  -0.015718580108730416,  -0.20958106811640548},
+    { 1.0,   1.0212710798422344,    -0.6052744909924316 }
+};
+
+// =============================================================================
+// YCoCg-R Color Space Conversion
+// =============================================================================
+
+/**
+ * Convert RGB24 to YCoCg-R color space for a full frame.
+ *
+ * YCoCg-R is a reversible color transform optimized for compression:
+ * - Y  = luma (G + (R-B)/2)
+ * - Co = orange chrominance (R - B)
+ * - Cg = green chrominance (G - (R+B)/2)
+ *
+ * @param rgb    Input RGB24 data (planar: RRRR...GGGG...BBBB...)
+ * @param y      Output luma channel
+ * @param co     Output orange chrominance
+ * @param cg     Output green chrominance
+ * @param width  Frame width
+ * @param height Frame height
+ */
+void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
+                      int width, int height)
+{
+    const int total_pixels = width * height;
+
+    // Process 4 pixels at a time for better cache utilization
+    int i = 0;
+    const int simd_end = (total_pixels / 4) * 4;
+
+    // Vectorized processing for groups of 4 pixels
+    for (i = 0; i < simd_end; i += 4) {
+        const uint8_t *rgb_ptr = &rgb[i * 3];
+
+        // Process 4 pixels simultaneously with loop unrolling
+        for (int j = 0; j < 4; j++) {
+            const int idx = i + j;
+            const float r = rgb_ptr[j * 3 + 0];
+            const float g = rgb_ptr[j * 3 + 1];
+            const float b = rgb_ptr[j * 3 + 2];
+
+            // YCoCg-R transform
+            co[idx] = r - b;
+            const float tmp = b + co[idx] * 0.5f;
+            cg[idx] = g - tmp;
+            y[idx] = tmp + cg[idx] * 0.5f;
+        }
+    }
+
+    // Handle remaining pixels (1-3 pixels)
+    for (; i < total_pixels; i++) {
+        const float r = rgb[i * 3 + 0];
+        const float g = rgb[i * 3 + 1];
+        const float b = rgb[i * 3 + 2];
+
+        co[i] = r - b;
+        const float tmp = b + co[i] * 0.5f;
+        cg[i] = g - tmp;
+        y[i] = tmp + cg[i] * 0.5f;
+    }
+}
+
+// =============================================================================
+// ICtCp Color Space Conversion (HDR-capable)
+// =============================================================================
+
+/**
+ * Convert sRGB8 to ICtCp color space using HLG transfer function.
+ *
+ * ICtCp is a perceptually uniform color space designed for HDR content:
+ * - I  = intensity (luma)
+ * - Ct = tritanope (blue-yellow)
+ * - Cp = protanope (red-green)
+ *
+ * Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
+ *
+ * @param r8     Input red component (0-255)
+ * @param g8     Input green component (0-255)
+ * @param b8     Input blue component (0-255)
+ * @param out_I  Output intensity (0-255)
+ * @param out_Ct Output tritanope (0-255, centered at 127.5)
+ * @param out_Cp Output protanope (0-255, centered at 127.5)
+ */
+void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
+                             double *out_I, double *out_Ct, double *out_Cp)
+{
+    // 1) Linearize sRGB to 0..1
+    double r = srgb_linearise((double)r8 / 255.0);
+    double g = srgb_linearise((double)g8 / 255.0);
+    double b = srgb_linearise((double)b8 / 255.0);
+
+    // 2) Linear RGB -> LMS (3x3 multiply)
+    double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
+    double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
+    double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
+
+    // 3) Apply HLG OETF (Hybrid Log-Gamma)
+    double Lp = HLG_OETF(L);
+    double Mp = HLG_OETF(M);
+    double Sp = HLG_OETF(S);
+
+    // 4) L'M'S' -> ICtCp
+    double I  = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
+    double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
+    double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
+
+    // 5) Scale and offset to 0-255 range
+    *out_I = FCLAMP(I * 255.0, 0.0, 255.0);
+    *out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
+    *out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
+}
+
+/**
+ * Convert ICtCp back to sRGB8 using HLG inverse transfer function.
+ *
+ * @param I8  Input intensity (0-255)
+ * @param Ct8 Input tritanope (0-255, centered at 127.5)
+ * @param Cp8 Input protanope (0-255, centered at 127.5)
+ * @param r8  Output red component (0-255)
+ * @param g8  Output green component (0-255)
+ * @param b8  Output blue component (0-255)
+ */
+void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
+                             uint8_t *r8, uint8_t *g8, uint8_t *b8)
+{
+    // 1) Denormalize from 0-255 range
+    double I = I8 / 255.0;
+    double Ct = (Ct8 - 127.5) / 255.0;
+    double Cp = (Cp8 - 127.5) / 255.0;
+
+    // 2) ICtCp -> L' M' S' (3x3 inverse multiply)
+    double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
+    double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
+    double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
+
+    // 3) Apply HLG inverse EOTF
+    double L = HLG_EOTF(Lp);
+    double M = HLG_EOTF(Mp);
+    double S = HLG_EOTF(Sp);
+
+    // 4) LMS -> linear sRGB (3x3 inverse multiply)
+    double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
+    double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
+    double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
+
+    // 5) Apply sRGB gamma and convert to 0-255 with rounding
+    double r = srgb_unlinearise(r_lin);
+    double g = srgb_unlinearise(g_lin);
+    double b = srgb_unlinearise(b_lin);
+
+    *r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
+    *g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
+    *b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
+}
--- a/video_encoder/lib/libtavenc/tav_encoder_color.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_color.h
@@ -0,0 +1,67 @@
+/**
+ * TAV Encoder - Color Space Conversion Library
+ *
+ * Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
+ */
+
+#ifndef TAV_ENCODER_COLOR_H
+#define TAV_ENCODER_COLOR_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =============================================================================
+// YCoCg-R Color Space Conversion
+// =============================================================================
+
+/**
+ * Convert RGB24 to YCoCg-R color space for a full frame.
+ *
+ * @param rgb    Input RGB24 data (interleaved: RGBRGBRGB...)
+ * @param y      Output luma channel
+ * @param co     Output orange chrominance
+ * @param cg     Output green chrominance
+ * @param width  Frame width
+ * @param height Frame height
+ */
+void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
+                      int width, int height);
+
+// =============================================================================
+// ICtCp Color Space Conversion (HDR-capable)
+// =============================================================================
+
+/**
+ * Convert sRGB8 to ICtCp color space using HLG transfer function.
+ *
+ * @param r8     Input red component (0-255)
+ * @param g8     Input green component (0-255)
+ * @param b8     Input blue component (0-255)
+ * @param out_I  Output intensity (0-255)
+ * @param out_Ct Output tritanope (0-255, centered at 127.5)
+ * @param out_Cp Output protanope (0-255, centered at 127.5)
+ */
+void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
+                             double *out_I, double *out_Ct, double *out_Cp);
+
+/**
+ * Convert ICtCp back to sRGB8 using HLG inverse transfer function.
+ *
+ * @param I8  Input intensity (0-255)
+ * @param Ct8 Input tritanope (0-255, centered at 127.5)
+ * @param Cp8 Input protanope (0-255, centered at 127.5)
+ * @param r8  Output red component (0-255)
+ * @param g8  Output green component (0-255)
+ * @param b8  Output blue component (0-255)
+ */
+void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
+                             uint8_t *r8, uint8_t *g8, uint8_t *b8);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TAV_ENCODER_COLOR_H
--- a/video_encoder/lib/libtavenc/tav_encoder_dwt.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_dwt.c
@@ -0,0 +1,619 @@
+/**
+ * TAV Encoder - Discrete Wavelet Transform (DWT) Library
+ *
+ * Provides multi-resolution wavelet decomposition for video compression.
+ * Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
+ *
+ * Extracted from encoder_tav.c as part of library refactoring.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+// =============================================================================
+// Wavelet Type Constants
+// =============================================================================
+
+#define WAVELET_5_3_REVERSIBLE 0       // CDF 5/3 - Lossless capable
+#define WAVELET_9_7_IRREVERSIBLE 1     // CDF 9/7 - Higher compression (default)
+#define WAVELET_BIORTHOGONAL_13_7 2    // Biorthogonal 13/7
+#define WAVELET_DD4 16                 // Deslauriers-Dubuc 4-point interpolating
+#define WAVELET_HAAR 255               // Haar - Simplest wavelet
+
+// =============================================================================
+// 1D Forward DWT Transforms
+// =============================================================================
+
+/**
+ * CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
+ *
+ * Uses lifting scheme with predict and update steps.
+ * Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
+ *
+ * @param data   In/out signal data (modified in-place)
+ * @param length Signal length (handles non-power-of-2)
+ */
+static void dwt_53_forward_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = calloc(length, sizeof(float));
+    int half = (length + 1) / 2;
+
+    // Predict step (high-pass)
+    for (int i = 0; i < half; i++) {
+        int idx = 2 * i + 1;
+        if (idx < length) {
+            float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
+            temp[half + i] = data[idx] - pred;
+        }
+    }
+
+    // Update step (low-pass)
+    for (int i = 0; i < half; i++) {
+        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
+                               (i < half - 1 ? temp[half + i] : 0));
+        temp[i] = data[2 * i] + update;
+    }
+
+    memcpy(data, temp, length * sizeof(float));
+    free(temp);
+}
+
+/**
+ * CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
+ *
+ * Five-step lifting scheme with scaling for optimal compression.
+ * Output layout: [LL...LL, HH...HH]
+ *
+ * @param data   In/out signal data
+ * @param length Signal length
+ */
+static void dwt_97_forward_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    // Split into even/odd samples
+    for (int i = 0; i < half; i++) {
+        temp[i] = data[2 * i];           // Even (low)
+    }
+    for (int i = 0; i < length / 2; i++) {
+        temp[half + i] = data[2 * i + 1]; // Odd (high)
+    }
+
+    // JPEG2000 9/7 lifting coefficients
+    const float alpha = -1.586134342f;
+    const float beta = -0.052980118f;
+    const float gamma = 0.882911076f;
+    const float delta = 0.443506852f;
+    const float K = 1.230174105f;
+
+    // Step 1: Predict α
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            float s_curr = temp[i];
+            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
+            temp[half + i] += alpha * (s_curr + s_next);
+        }
+    }
+
+    // Step 2: Update β
+    for (int i = 0; i < half; i++) {
+        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
+        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
+        temp[i] += beta * (d_prev + d_curr);
+    }
+
+    // Step 3: Predict γ
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            float s_curr = temp[i];
+            float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
+            temp[half + i] += gamma * (s_curr + s_next);
+        }
+    }
+
+    // Step 4: Update δ
+    for (int i = 0; i < half; i++) {
+        float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
+        float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
+        temp[i] += delta * (d_prev + d_curr);
+    }
+
+    // Step 5: Scaling
+    for (int i = 0; i < half; i++) {
+        temp[i] *= K;
+    }
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            temp[half + i] /= K;
+        }
+    }
+
+    memcpy(data, temp, length * sizeof(float));
+    free(temp);
+}
+
+/**
+ * CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
+ *
+ * Same structure as 9/7 irreversible but uses integer arithmetic.
+ *
+ * @param data   In/out signal data
+ * @param length Signal length
+ */
+static void dwt_97_iint_forward_1d(float *data, int length) {
+    if (length < 2) return;
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    for (int i = 0; i < half; ++i) temp[i] = data[2*i];
+    for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
+
+    const int SHIFT = 16;
+    const int64_t ROUND = 1LL << (SHIFT - 1);
+    const int64_t A = -103949;  // α
+    const int64_t B = -3472;    // β
+    const int64_t G = 57862;    // γ
+    const int64_t D = 29066;    // δ
+    const int64_t K_FP  = 80542;  // ≈ 1.230174105 * 2^16
+    const int64_t Ki_FP = 53283;  // ≈ (1/1.230174105) * 2^16
+
+    #define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
+
+    // Predict α
+    for (int i = 0; i < length/2; ++i) {
+        int s = temp[i];
+        int sn = (i+1<half)? temp[i+1] : s;
+        temp[half+i] += RN(A * (int64_t)(s + sn));
+    }
+
+    // Update β
+    for (int i = 0; i < half; ++i) {
+        int d = (half+i<length)? temp[half+i]:0;
+        int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
+        temp[i] += RN(B * (int64_t)(dp + d));
+    }
+
+    // Predict γ
+    for (int i = 0; i < length/2; ++i) {
+        int s = temp[i];
+        int sn = (i+1<half)? temp[i+1]:s;
+        temp[half+i] += RN(G * (int64_t)(s + sn));
+    }
+
+    // Update δ
+    for (int i = 0; i < half; ++i) {
+        int d = (half+i<length)? temp[half+i]:0;
+        int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
+        temp[i] += RN(D * (int64_t)(dp + d));
+    }
+
+    // Scaling
+    for (int i = 0; i < half; ++i) {
+        temp[i] = (((int64_t)temp[i] * K_FP  + ROUND) >> SHIFT);
+    }
+    for (int i = 0; i < length/2; ++i) {
+        if (half + i < length) {
+            temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
+        }
+    }
+
+    memcpy(data, temp, length * sizeof(float));
+    free(temp);
+    #undef RN
+}
+
+/**
+ * Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
+ *
+ * Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
+ * Good for smooth signals and still images.
+ *
+ * @param data   In/out signal data
+ * @param length Signal length
+ */
+static void dwt_dd4_forward_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    // Split into even/odd samples
+    for (int i = 0; i < half; i++) {
+        temp[i] = data[2 * i];
+    }
+    for (int i = 0; i < length / 2; i++) {
+        temp[half + i] = data[2 * i + 1];
+    }
+
+    // DD-4 prediction step with four-point kernel
+    for (int i = 0; i < length / 2; i++) {
+        // Get four neighbouring even samples with symmetric boundary extension
+        float s_m1, s_0, s_1, s_2;
+
+        s_m1 = (i > 0) ? temp[i - 1] : temp[0];
+        s_0 = temp[i];
+        s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
+        s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
+
+        float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
+                          (9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
+
+        temp[half + i] -= prediction;
+    }
+
+    // DD-4 update step
+    for (int i = 0; i < half; i++) {
+        float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
+        float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
+        temp[i] += 0.25f * (d_prev + d_curr);
+    }
+
+    memcpy(data, temp, length * sizeof(float));
+    free(temp);
+}
+
+/**
+ * Biorthogonal 13/7 wavelet forward 1D.
+ *
+ * Analysis filters: Low-pass (13 taps), High-pass (7 taps)
+ * Simplified implementation using 5/3 structure with scaling.
+ *
+ * @param data   In/out signal data
+ * @param length Signal length
+ */
+static void dwt_bior137_forward_1d(float *data, int length) {
+    if (length < 2) return;
+
+    const float K = 1.230174105f;
+
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    // Predict step (high-pass)
+    for (int i = 0; i < half; i++) {
+        int idx = 2 * i + 1;
+        if (idx < length) {
+            float left = data[2 * i];
+            float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
+            float prediction = 0.5f * (left + right);
+            temp[half + i] = data[idx] - prediction;
+        }
+    }
+
+    // Update step (low-pass)
+    for (int i = 0; i < half; i++) {
+        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
+                               (i < half - 1 ? temp[half + i] : 0));
+        temp[i] = data[2 * i] + update;
+    }
+
+    // Scaling
+    for (int i = 0; i < half; i++) {
+        temp[i] *= K;
+    }
+    for (int i = 0; i < length / 2; i++) {
+        if (half + i < length) {
+            temp[half + i] /= K;
+        }
+    }
+
+    memcpy(data, temp, length * sizeof(float));
+    free(temp);
+}
+
+/**
+ * Haar wavelet forward 1D transform.
+ *
+ * The simplest wavelet: averages (low-pass) and differences (high-pass).
+ * Useful for temporal DWT in GOPs.
+ *
+ * @param data   In/out signal data
+ * @param length Signal length
+ */
+static void dwt_haar_forward_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    for (int i = 0; i < half; i++) {
+        if (2 * i + 1 < length) {
+            temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
+            temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
+        } else {
+            temp[i] = data[2 * i];
+            if (half + i < length) {
+                temp[half + i] = 0.0f;
+            }
+        }
+    }
+
+    memcpy(data, temp, length * sizeof(float));
+    free(temp);
+}
+
+// =============================================================================
+// 1D Inverse DWT Transforms
+// =============================================================================
+
+/**
+ * CDF 5/3 reversible wavelet inverse 1D transform.
+ *
+ * Reverses dwt_53_forward_1d() transform exactly.
+ *
+ * @param data   In/out coefficient data
+ * @param length Signal length
+ */
+static void dwt_53_inverse_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    // Copy low-pass and high-pass coefficients
+    memcpy(temp, data, length * sizeof(float));
+
+    // Undo update step
+    for (int i = 0; i < half; i++) {
+        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
+                               (i < half - 1 ? temp[half + i] : 0));
+        temp[i] -= update;
+    }
+
+    // Undo predict step
+    for (int i = 0; i < half; i++) {
+        int idx = 2 * i + 1;
+        if (idx < length) {
+            float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
+            data[2 * i] = temp[i];
+            data[idx] = temp[half + i] + pred;
+        } else {
+            data[2 * i] = temp[i];
+        }
+    }
+
+    free(temp);
+}
+
+/**
+ * Haar wavelet inverse 1D transform.
+ *
+ * Reverses dwt_haar_forward_1d() transform.
+ *
+ * @param data   In/out coefficient data
+ * @param length Signal length
+ */
+static void dwt_haar_inverse_1d(float *data, int length) {
+    if (length < 2) return;
+
+    float *temp = malloc(length * sizeof(float));
+    int half = (length + 1) / 2;
+
+    // Reconstruct from averages and differences
+    for (int i = 0; i < half; i++) {
+        if (2 * i + 1 < length) {
+            temp[2 * i] = data[i] + data[half + i];
+            temp[2 * i + 1] = data[i] - data[half + i];
+        } else {
+            temp[2 * i] = data[i];
+        }
+    }
+
+    memcpy(data, temp, length * sizeof(float));
+    free(temp);
+}
+
+// =============================================================================
+// 2D DWT Transform
+// =============================================================================
+
+/**
+ * Apply 2D forward DWT to a frame (in-place).
+ *
+ * Applies separable 1D transforms: horizontal (rows), then vertical (columns).
+ * Supports multi-level decomposition.
+ *
+ * @param data        In/out 2D image data (row-major, width stride)
+ * @param width       Image width
+ * @param height      Image height
+ * @param levels      Number of decomposition levels
+ * @param filter_type Wavelet type (WAVELET_* constant)
+ */
+void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
+    const int max_size = (width > height) ? width : height;
+    float *temp_row = malloc(max_size * sizeof(float));
+    float *temp_col = malloc(max_size * sizeof(float));
+
+    // Pre-calculate dimensions for each level
+    int *widths = malloc((levels + 1) * sizeof(int));
+    int *heights = malloc((levels + 1) * sizeof(int));
+    widths[0] = width;
+    heights[0] = height;
+    for (int i = 1; i <= levels; i++) {
+        widths[i] = (widths[i - 1] + 1) / 2;
+        heights[i] = (heights[i - 1] + 1) / 2;
+    }
+
+    // Apply multi-level decomposition
+    for (int level = 0; level < levels; level++) {
+        int current_width = widths[level];
+        int current_height = heights[level];
+        if (current_width < 1 || current_height < 1) break;
+
+        // Row transform (horizontal)
+        for (int y = 0; y < current_height; y++) {
+            // Extract row
+            for (int x = 0; x < current_width; x++) {
+                temp_row[x] = data[y * width + x];
+            }
+
+            // Apply 1D DWT
+            switch (filter_type) {
+                case WAVELET_5_3_REVERSIBLE:
+                    dwt_53_forward_1d(temp_row, current_width);
+                    break;
+                case WAVELET_9_7_IRREVERSIBLE:
+                    dwt_97_forward_1d(temp_row, current_width);
+                    break;
+                case WAVELET_BIORTHOGONAL_13_7:
+                    dwt_bior137_forward_1d(temp_row, current_width);
+                    break;
+                case WAVELET_DD4:
+                    dwt_dd4_forward_1d(temp_row, current_width);
+                    break;
+                case WAVELET_HAAR:
+                    dwt_haar_forward_1d(temp_row, current_width);
+                    break;
+            }
+
+            // Write back
+            for (int x = 0; x < current_width; x++) {
+                data[y * width + x] = temp_row[x];
+            }
+        }
+
+        // Column transform (vertical)
+        for (int x = 0; x < current_width; x++) {
+            // Extract column
+            for (int y = 0; y < current_height; y++) {
+                temp_col[y] = data[y * width + x];
+            }
+
+            // Apply 1D DWT
+            switch (filter_type) {
+                case WAVELET_5_3_REVERSIBLE:
+                    dwt_53_forward_1d(temp_col, current_height);
+                    break;
+                case WAVELET_9_7_IRREVERSIBLE:
+                    dwt_97_forward_1d(temp_col, current_height);
+                    break;
+                case WAVELET_BIORTHOGONAL_13_7:
+                    dwt_bior137_forward_1d(temp_col, current_height);
+                    break;
+                case WAVELET_DD4:
+                    dwt_dd4_forward_1d(temp_col, current_height);
+                    break;
+                case WAVELET_HAAR:
+                    dwt_haar_forward_1d(temp_col, current_height);
+                    break;
+            }
+
+            // Write back
+            for (int y = 0; y < current_height; y++) {
+                data[y * width + x] = temp_col[y];
+            }
+        }
+    }
+
+    free(widths);
+    free(heights);
+    free(temp_row);
+    free(temp_col);
+}
+
+// =============================================================================
+// 3D DWT Transform (Temporal + Spatial)
+// =============================================================================
+
+/**
+ * Apply 3D forward DWT to a GOP (group of pictures).
+ *
+ * First applies temporal DWT across frames at each spatial location,
+ * then applies 2D spatial DWT to each resulting temporal subband.
+ *
+ * @param gop_data        Array of frame pointers [num_frames][width*height]
+ * @param width           Frame width
+ * @param height          Frame height
+ * @param num_frames      Number of frames in GOP
+ * @param spatial_levels  Number of 2D spatial decomposition levels
+ * @param temporal_levels Number of 1D temporal decomposition levels
+ * @param spatial_filter  Wavelet type for spatial transform
+ * @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
+ */
+void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
+                        int spatial_levels, int temporal_levels,
+                        int spatial_filter, int temporal_filter) {
+    if (num_frames < 2 || width < 2 || height < 2) return;
+
+    float *temporal_line = malloc(num_frames * sizeof(float));
+
+    // Pre-calculate temporal lengths for non-power-of-2 GOPs
+    int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
+    temporal_lengths[0] = num_frames;
+    for (int i = 1; i <= temporal_levels; i++) {
+        temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
+    }
+
+    // Step 1: Apply temporal DWT across frames
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            int pixel_idx = y * width + x;
+
+            // Extract temporal signal
+            for (int t = 0; t < num_frames; t++) {
+                temporal_line[t] = gop_data[t][pixel_idx];
+            }
+
+            // Apply temporal DWT with multiple levels
+            for (int level = 0; level < temporal_levels; level++) {
+                int level_frames = temporal_lengths[level];
+                if (level_frames >= 2) {
+                    if (temporal_filter == 255) {
+                        // Haar temporal (default)
+                        dwt_haar_forward_1d(temporal_line, level_frames);
+                    } else if (temporal_filter == 0) {
+                        // CDF 5/3 temporal
+                        dwt_53_forward_1d(temporal_line, level_frames);
+                    } else {
+                        // Fallback to Haar for unsupported wavelets
+                        dwt_haar_forward_1d(temporal_line, level_frames);
+                    }
+                }
+            }
+
+            // Write back temporal coefficients
+            for (int t = 0; t < num_frames; t++) {
+                gop_data[t][pixel_idx] = temporal_line[t];
+            }
+        }
+    }
+
+    free(temporal_lengths);
+    free(temporal_line);
+
+    // Step 2: Apply 2D spatial DWT to each temporal subband
+    for (int t = 0; t < num_frames; t++) {
+        tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
+    }
+}
+
+// =============================================================================
+// Utility Functions
+// =============================================================================
+
+/**
+ * Calculate recommended number of decomposition levels for given dimensions.
+ *
+ * @param width  Image width
+ * @param height Image height
+ * @return       Recommended number of levels (1-6)
+ */
+int tav_dwt_calculate_levels(int width, int height) {
+    int levels = 0;
+    int min_size = (width < height) ? width : height;
+
+    // Keep halving until we reach minimum size
+    while (min_size >= 32) {
+        min_size /= 2;
+        levels++;
+    }
+
+    // Cap at reasonable maximum
+    return (levels > 6) ? 6 : levels;
+}
--- a/video_encoder/lib/libtavenc/tav_encoder_dwt.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_dwt.h
@@ -0,0 +1,88 @@
+/**
+ * TAV Encoder - Discrete Wavelet Transform Library
+ *
+ * Public API for multi-resolution wavelet decomposition.
+ * Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
+ */
+
+#ifndef TAV_ENCODER_DWT_H
+#define TAV_ENCODER_DWT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =============================================================================
+// Wavelet Type Constants
+// =============================================================================
+
+#define WAVELET_5_3_REVERSIBLE 0      // CDF 5/3 reversible (lossless capable)
+#define WAVELET_9_7_IRREVERSIBLE 1    // CDF 9/7 JPEG2000 (default, best compression)
+#define WAVELET_BIORTHOGONAL_13_7 2   // CDF 13/7 experimental
+#define WAVELET_DD4 16                // Deslauriers-Dubuc 4-point interpolating
+#define WAVELET_HAAR 255              // Haar (demonstration only)
+
+// =============================================================================
+// 2D Discrete Wavelet Transform
+// =============================================================================
+
+/**
+ * Apply 2D wavelet transform to spatial data.
+ *
+ * Uses separable 1D transforms: apply horizontal rows, then vertical columns.
+ * Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
+ *
+ * @param data         Input/output data array (modified in-place)
+ * @param width        Frame width
+ * @param height       Frame height
+ * @param levels       Number of decomposition levels (0 = auto-calculate)
+ * @param filter_type  Wavelet type (WAVELET_* constants)
+ */
+void tav_dwt_2d_forward(float *data, int width, int height,
+                        int levels, int filter_type);
+
+// =============================================================================
+// 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
+// =============================================================================
+
+/**
+ * Apply 3D wavelet transform to group-of-pictures (GOP).
+ *
+ * Process:
+ * 1. Apply temporal 1D DWT across frames at each spatial position
+ * 2. Apply spatial 2D DWT to each temporal subband frame
+ *
+ * @param gop_data         Array of frame pointers [num_frames]
+ * @param width            Frame width
+ * @param height           Frame height
+ * @param num_frames       Number of frames in GOP
+ * @param spatial_levels   Spatial decomposition levels (0 = auto)
+ * @param temporal_levels  Temporal decomposition levels
+ * @param spatial_filter   Wavelet type for spatial transform
+ * @param temporal_filter  Wavelet type for temporal transform
+ */
+void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
+                        int spatial_levels, int temporal_levels,
+                        int spatial_filter, int temporal_filter);
+
+// =============================================================================
+// Utility Functions
+// =============================================================================
+
+/**
+ * Calculate optimal number of decomposition levels for given dimensions.
+ *
+ * Uses formula: floor(log2(min(width, height))) - 1
+ * Ensures at least 2x2 low-pass subband remains after decomposition.
+ *
+ * @param width   Frame width
+ * @param height  Frame height
+ * @return        Recommended number of levels
+ */
+int tav_dwt_calculate_levels(int width, int height);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TAV_ENCODER_DWT_H
--- a/video_encoder/lib/libtavenc/tav_encoder_ezbc.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_ezbc.c
@@ -0,0 +1,415 @@
+/**
+ * TAV Encoder - EZBC (Embedded Zero Block Coding) Library
+ *
+ * Implements binary tree embedded zero block coding for efficient storage
+ * of sparse wavelet coefficients. Exploits coefficient sparsity through
+ * hierarchical significance testing and progressive bitplane encoding.
+ *
+ * Extracted from encoder_tav.c as part of library refactoring.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+#include <math.h>
+
+// =============================================================================
+// EZBC Structures
+// =============================================================================
+
+/**
+ * Bitstream writer for bit-level encoding.
+ */
+typedef struct {
+    uint8_t *data;
+    size_t capacity;
+    size_t byte_pos;
+    uint8_t bit_pos;  // 0-7, current bit position in current byte
+} bitstream_t;
+
+/**
+ * Block structure for EZBC quadtree decomposition.
+ */
+typedef struct {
+    int x, y;           // Top-left position in 2D coefficient array
+    int width, height;  // Block dimensions
+} ezbc_block_t;
+
+/**
+ * Queue for EZBC block processing.
+ */
+typedef struct {
+    ezbc_block_t *blocks;
+    size_t count;
+    size_t capacity;
+} block_queue_t;
+
+/**
+ * Track coefficient state for refinement.
+ */
+typedef struct {
+    bool significant;     // Has been marked significant
+    int first_bitplane;   // Bitplane where it became significant
+} coeff_state_t;
+
+/**
+ * EZBC encoding context for recursive processing.
+ */
+typedef struct {
+    bitstream_t *bs;
+    int16_t *coeffs;
+    coeff_state_t *states;
+    int width;
+    int height;
+    int bitplane;
+    int threshold;
+    block_queue_t *next_insignificant;
+    block_queue_t *next_significant;
+    int *sign_count;
+} ezbc_context_t;
+
+// =============================================================================
+// Bitstream Operations
+// =============================================================================
+
+/**
+ * Initialize bitstream with initial capacity.
+ */
+static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
+    // Ensure minimum capacity to avoid issues with zero-size allocations
+    if (initial_capacity < 64) initial_capacity = 64;
+    bs->capacity = initial_capacity;
+    bs->data = calloc(1, initial_capacity);
+    if (!bs->data) {
+        fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
+        exit(1);
+    }
+    bs->byte_pos = 0;
+    bs->bit_pos = 0;
+}
+
+/**
+ * Write a single bit to bitstream.
+ */
+static void bitstream_write_bit(bitstream_t *bs, int bit) {
+    // Grow if needed
+    if (bs->byte_pos >= bs->capacity) {
+        size_t old_capacity = bs->capacity;
+        bs->capacity *= 2;
+        bs->data = realloc(bs->data, bs->capacity);
+        // Clear only the newly allocated memory region
+        memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
+    }
+
+    if (bit) {
+        bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
+    }
+
+    bs->bit_pos++;
+    if (bs->bit_pos == 8) {
+        bs->bit_pos = 0;
+        bs->byte_pos++;
+    }
+}
+
+/**
+ * Write multiple bits to bitstream (LSB first).
+ */
+static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
+    for (int i = 0; i < num_bits; i++) {
+        bitstream_write_bit(bs, (value >> i) & 1);
+    }
+}
+
+/**
+ * Get current bitstream size in bytes.
+ */
+static size_t bitstream_size(bitstream_t *bs) {
+    return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
+}
+
+/**
+ * Free bitstream buffer.
+ */
+static void bitstream_free(bitstream_t *bs) {
+    free(bs->data);
+}
+
+// =============================================================================
+// Block Queue Operations
+// =============================================================================
+
+/**
+ * Initialize block queue with initial capacity.
+ */
+static void queue_init(block_queue_t *q) {
+    q->capacity = 1024;
+    q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
+    q->count = 0;
+}
+
+/**
+ * Push block onto queue, growing if needed.
+ */
+static void queue_push(block_queue_t *q, ezbc_block_t block) {
+    if (q->count >= q->capacity) {
+        q->capacity *= 2;
+        q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
+    }
+    q->blocks[q->count++] = block;
+}
+
+/**
+ * Free block queue.
+ */
+static void queue_free(block_queue_t *q) {
+    free(q->blocks);
+}
+
+// =============================================================================
+// EZBC Helper Functions
+// =============================================================================
+
+/**
+ * Check if all coefficients in block have |coeff| < threshold.
+ */
+static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
+                                const ezbc_block_t *block, int threshold) {
+    for (int y = block->y; y < block->y + block->height && y < height; y++) {
+        for (int x = block->x; x < block->x + block->width && x < width; x++) {
+            int idx = y * width + x;
+            if (abs(coeffs[idx]) >= threshold) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+/**
+ * Find maximum absolute value in coefficient array.
+ */
+static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
+    int max_abs = 0;
+    for (size_t i = 0; i < count; i++) {
+        int abs_val = abs(coeffs[i]);
+        if (abs_val > max_abs) {
+            max_abs = abs_val;
+        }
+    }
+    return max_abs;
+}
+
+/**
+ * Get MSB position (bitplane number).
+ * Returns floor(log2(value)), i.e., the position of the highest set bit.
+ */
+static int get_msb_bitplane(int value) {
+    if (value == 0) return 0;
+    int bitplane = 0;
+    while (value > 1) {
+        value >>= 1;
+        bitplane++;
+    }
+    return bitplane;
+}
+
+/**
+ * Recursively process a significant block - subdivide until 1x1.
+ */
+static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
+    // If 1x1 block: emit sign bit and add to significant queue
+    if (block.width == 1 && block.height == 1) {
+        int idx = block.y * ctx->width + block.x;
+        bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
+        (*ctx->sign_count)++;
+        ctx->states[idx].significant = true;
+        ctx->states[idx].first_bitplane = ctx->bitplane;
+        queue_push(ctx->next_significant, block);
+        return;
+    }
+
+    // Block is > 1x1: subdivide into children and recursively process each
+    int mid_x = block.width / 2;
+    int mid_y = block.height / 2;
+    if (mid_x == 0) mid_x = 1;
+    if (mid_y == 0) mid_y = 1;
+
+    // Process top-left child
+    ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
+    if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
+        bitstream_write_bit(ctx->bs, 1);  // Significant
+        process_significant_block_recursive(ctx, tl);
+    } else {
+        bitstream_write_bit(ctx->bs, 0);  // Insignificant
+        queue_push(ctx->next_insignificant, tl);
+    }
+
+    // Process top-right child (if exists)
+    if (block.width > mid_x) {
+        ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
+        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
+            bitstream_write_bit(ctx->bs, 1);
+            process_significant_block_recursive(ctx, tr);
+        } else {
+            bitstream_write_bit(ctx->bs, 0);
+            queue_push(ctx->next_insignificant, tr);
+        }
+    }
+
+    // Process bottom-left child (if exists)
+    if (block.height > mid_y) {
+        ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
+        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
+            bitstream_write_bit(ctx->bs, 1);
+            process_significant_block_recursive(ctx, bl);
+        } else {
+            bitstream_write_bit(ctx->bs, 0);
+            queue_push(ctx->next_insignificant, bl);
+        }
+    }
+
+    // Process bottom-right child (if exists)
+    if (block.width > mid_x && block.height > mid_y) {
+        ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
+        if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
+            bitstream_write_bit(ctx->bs, 1);
+            process_significant_block_recursive(ctx, br);
+        } else {
+            bitstream_write_bit(ctx->bs, 0);
+            queue_push(ctx->next_insignificant, br);
+        }
+    }
+}
+
+// =============================================================================
+// Main EZBC Encoding Function
+// =============================================================================
+
+/**
+ * EZBC encoding for a single channel.
+ *
+ * Uses two separate queues for insignificant blocks and significant 1x1 blocks.
+ * Encodes coefficients progressively from MSB to LSB bitplane.
+ *
+ * Algorithm:
+ * 1. Find MSB bitplane from maximum absolute coefficient value
+ * 2. Write header: MSB bitplane, width, height
+ * 3. For each bitplane from MSB to 0:
+ *    a. Process insignificant blocks: check if they become significant
+ *    b. For newly significant blocks: recursively subdivide until 1x1
+ *    c. Emit sign bits for newly significant 1x1 coefficients
+ *    d. Process already-significant coefficients: emit refinement bits
+ * 4. Return encoded bitstream
+ *
+ * @param coeffs  Input quantized coefficients (int16_t array)
+ * @param count   Number of coefficients
+ * @param width   Frame width
+ * @param height  Frame height
+ * @param output  Output buffer pointer (allocated by this function)
+ * @return        Encoded size in bytes
+ */
+size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
+                                uint8_t **output) {
+    bitstream_t bs;
+    bitstream_init(&bs, count / 4);  // Initial guess
+
+    // Track coefficient significance
+    coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
+
+    // Find maximum value to determine MSB bitplane
+    int max_abs = find_max_abs_ezbc(coeffs, count);
+    int msb_bitplane = get_msb_bitplane(max_abs);
+
+    // Write header: MSB bitplane and dimensions
+    bitstream_write_bits(&bs, msb_bitplane, 8);
+    bitstream_write_bits(&bs, width, 16);
+    bitstream_write_bits(&bs, height, 16);
+
+    // Initialise two queues: insignificant blocks and significant 1x1 blocks
+    block_queue_t insignificant_queue, next_insignificant;
+    block_queue_t significant_queue, next_significant;
+
+    queue_init(&insignificant_queue);
+    queue_init(&next_insignificant);
+    queue_init(&significant_queue);
+    queue_init(&next_significant);
+
+    // Start with root block as insignificant
+    ezbc_block_t root = {0, 0, width, height};
+    queue_push(&insignificant_queue, root);
+
+    // Process bitplanes from MSB to LSB
+    for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
+        int threshold = 1 << bitplane;
+
+        int sign_bits_this_bitplane = 0;
+
+        // Process insignificant blocks - check if they become significant
+        for (size_t i = 0; i < insignificant_queue.count; i++) {
+            ezbc_block_t block = insignificant_queue.blocks[i];
+
+            // Check if this block has any coefficient >= threshold
+            if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
+                // Still insignificant: emit 0
+                bitstream_write_bit(&bs, 0);
+                // Keep in insignificant queue for next bitplane
+                queue_push(&next_insignificant, block);
+            } else {
+                // Became significant: emit 1
+                bitstream_write_bit(&bs, 1);
+
+                // Use recursive subdivision to process this block and all children
+                ezbc_context_t ctx = {
+                    .bs = &bs,
+                    .coeffs = coeffs,
+                    .states = states,
+                    .width = width,
+                    .height = height,
+                    .bitplane = bitplane,
+                    .threshold = threshold,
+                    .next_insignificant = &next_insignificant,
+                    .next_significant = &next_significant,
+                    .sign_count = &sign_bits_this_bitplane
+                };
+                process_significant_block_recursive(&ctx, block);
+            }
+        }
+
+        // Process significant 1x1 blocks - emit refinement bits
+        for (size_t i = 0; i < significant_queue.count; i++) {
+            ezbc_block_t block = significant_queue.blocks[i];
+            int idx = block.y * width + block.x;
+            int abs_val = abs(coeffs[idx]);
+
+            // Emit refinement bit at current bitplane
+            int bit = (abs_val >> bitplane) & 1;
+            bitstream_write_bit(&bs, bit);
+
+            // Keep in significant queue for next bitplane
+            queue_push(&next_significant, block);
+        }
+
+        // Swap queues for next bitplane
+        queue_free(&insignificant_queue);
+        queue_free(&significant_queue);
+        insignificant_queue = next_insignificant;
+        significant_queue = next_significant;
+        queue_init(&next_insignificant);
+        queue_init(&next_significant);
+    }
+
+    // Free all queues
+    queue_free(&insignificant_queue);
+    queue_free(&significant_queue);
+    queue_free(&next_insignificant);
+    queue_free(&next_significant);
+    free(states);
+
+    size_t final_size = bitstream_size(&bs);
+    *output = bs.data;
+
+    return final_size;
+}
--- a/video_encoder/lib/libtavenc/tav_encoder_ezbc.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_ezbc.h
@@ -0,0 +1,61 @@
+/**
+ * TAV Encoder - EZBC (Embedded Zero Block Coding) Library
+ *
+ * Public API for EZBC entropy coding of wavelet coefficients.
+ */
+
+#ifndef TAV_ENCODER_EZBC_H
+#define TAV_ENCODER_EZBC_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =============================================================================
+// EZBC Encoding
+// =============================================================================
+
+/**
+ * EZBC encoding for a single channel.
+ *
+ * Implements binary tree embedded zero block coding for efficient storage
+ * of sparse wavelet coefficients. Exploits coefficient sparsity through
+ * hierarchical significance testing and progressive bitplane encoding.
+ *
+ * Algorithm:
+ * 1. Find MSB bitplane from maximum absolute coefficient value
+ * 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
+ * 3. For each bitplane from MSB to 0:
+ *    a. Process insignificant blocks: check if they become significant
+ *       - Emit 0 if still insignificant, 1 if became significant
+ *    b. For newly significant blocks: recursively subdivide until 1x1
+ *       - Emit tree structure: 1=child is significant, 0=child insignificant
+ *    c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
+ *    d. Process already-significant coefficients: emit refinement bits
+ *       - Emit bit at current bitplane for progressive reconstruction
+ * 4. Return encoded bitstream
+ *
+ * Benefits:
+ * - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
+ * - Progressive refinement from MSB to LSB
+ * - Spatial clustering through quadtree decomposition
+ * - No additional entropy coding needed (bitstream is already compressed)
+ *
+ * @param coeffs  Input quantized coefficients (int16_t array)
+ * @param count   Number of coefficients (width × height)
+ * @param width   Frame width (must match coefficient array layout)
+ * @param height  Frame height (must match coefficient array layout)
+ * @param output  Output buffer pointer (allocated by this function, caller must free)
+ * @return        Encoded size in bytes (including header)
+ */
+size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
+                                uint8_t **output);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TAV_ENCODER_EZBC_H
--- a/video_encoder/lib/libtavenc/tav_encoder_lib.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_lib.c
--- a/video_encoder/lib/libtavenc/tav_encoder_quantize.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_quantize.c
@@ -0,0 +1,624 @@
+/**
+ * TAV Encoder - Quantization Library
+ *
+ * Provides DWT coefficient quantization with perceptual weighting based on
+ * the Human Visual System (HVS). Implements separable 3D quantization for
+ * temporal GOP encoding.
+ *
+ * Extracted from encoder_tav.c as part of library refactoring.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+// Forward declaration of encoder context (defined in main encoder)
+typedef struct tav_encoder_s tav_encoder_t;
+
+// =============================================================================
+// Utility Functions
+// =============================================================================
+
+static inline int CLAMP(int x, int min, int max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+static inline float FCLAMP(float x, float min, float max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+// =============================================================================
+// Constants for Perceptual Model
+// =============================================================================
+
+// Dead-zone quantization scaling factors (applied selectively to luma only)
+#define DEAD_ZONE_FINEST_SCALE 1.0f      // Full dead-zone for finest level
+#define DEAD_ZONE_FINE_SCALE 0.5f        // Reduced dead-zone for second-finest level
+
+// Anisotropy parameters for horizontal vs vertical detail quantization
+// Index by quality level (0-5)
+static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
+static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
+
+// Chroma-specific anisotropy (more aggressive quantization)
+static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
+static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
+
+// Detail preservation factors for 2-pixel and 4-pixel structures
+#define FOUR_PIXEL_DETAILER 0.88f
+#define TWO_PIXEL_DETAILER  0.92f
+
+// =============================================================================
+// Subband Analysis Helper Functions
+// =============================================================================
+
+/**
+ * Get decomposition level for coefficient at 2D spatial position.
+ * Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
+ */
+static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
+    // Recursively determine which level this coefficient belongs to
+    // by checking which quadrant it's in at each level
+
+    for (int level = 1; level <= decomp_levels; level++) {
+        int half_w = width >> 1;
+        int half_h = height >> 1;
+
+        // Check if in top-left quadrant (LL - contains finer levels)
+        if (x < half_w && y < half_h) {
+            // Continue to finer level
+            width = half_w;
+            height = half_h;
+            continue;
+        }
+
+        // In one of the detail bands (LH, HL, HH) at this level
+        return level;
+    }
+
+    // Reached LL subband at coarsest level
+    return 0;
+}
+
+/**
+ * Get subband type for coefficient at 2D spatial position.
+ * Returns: 0=LL, 1=LH, 2=HL, 3=HH
+ */
+static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
+    // Recursively determine which subband this coefficient belongs to
+
+    for (int level = 1; level <= decomp_levels; level++) {
+        int half_w = width >> 1;
+        int half_h = height >> 1;
+
+        // Check if in top-left quadrant (LL - contains finer levels)
+        if (x < half_w && y < half_h) {
+            // Continue to finer level
+            width = half_w;
+            height = half_h;
+            continue;
+        }
+
+        // Determine which detail band at this level
+        if (x >= half_w && y < half_h) {
+            return 1; // LH (top-right)
+        } else if (x < half_w && y >= half_h) {
+            return 2; // HL (bottom-left)
+        } else {
+            return 3; // HH (bottom-right)
+        }
+    }
+
+    // Reached LL subband at coarsest level
+    return 0;
+}
+
+/**
+ * Legacy functions - convert linear index to 2D coords.
+ */
+static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
+    int x = linear_idx % width;
+    int y = linear_idx / width;
+    return get_subband_level_2d(x, y, width, height, decomp_levels);
+}
+
+static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
+    int x = linear_idx % width;
+    int y = linear_idx / width;
+    return get_subband_type_2d(x, y, width, height, decomp_levels);
+}
+
+/**
+ * Get temporal subband level for frame index in GOP.
+ * After temporal DWT with N levels, frames are organized as:
+ * - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
+ * - Remaining frames are temporal high-pass subbands at various levels
+ *
+ * Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
+ */
+static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
+    // Check each level boundary from coarsest to finest
+    for (int level = 0; level < temporal_levels; level++) {
+        int frames_at_this_level = num_frames >> (temporal_levels - level);
+        if (frame_idx < frames_at_this_level) {
+            return level;
+        }
+    }
+
+    // Finest level (first decomposition's high-pass)
+    return temporal_levels;
+}
+
+// =============================================================================
+// Perceptual Model Functions (HVS-based weighting)
+// =============================================================================
+
+// Linear interpolation helper
+static float lerp(float x, float y, float a) {
+    return x * (1.f - a) + y * a;
+}
+
+/**
+ * Perceptual model for LH subband (horizontal details).
+ * Human eyes are more sensitive to horizontal details than vertical.
+ * Curve: https://www.desmos.com/calculator/mjlpwqm8ge
+ *
+ * @param quality  Quality level (0-5)
+ * @param level    Normalized decomposition level (1.0-6.0)
+ * @return         Perceptual weight multiplier
+ */
+static float perceptual_model3_LH(int quality, float level) {
+    float H4 = 1.2f;
+    float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
+    float K12 = K * 12.f;
+    float x = level;
+
+    float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
+    float C3 = -1.f / 45.f * (K12 + 92);
+    float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
+
+    return (level >= 4) ? Lx : G3x;
+}
+
+/**
+ * Perceptual model for HL subband (vertical details).
+ * Derived from LH with anisotropy compensation.
+ *
+ * @param quality  Quality level (0-5)
+ * @param LH       LH subband weight
+ * @return         Perceptual weight multiplier
+ */
+static float perceptual_model3_HL(int quality, float LH) {
+    return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
+}
+
+/**
+ * Perceptual model for HH subband (diagonal details).
+ * Interpolates between LH and HL based on level.
+ *
+ * @param LH     LH subband weight
+ * @param HL     HL subband weight
+ * @param level  Normalized decomposition level
+ * @return       Perceptual weight multiplier
+ */
+static float perceptual_model3_HH(float LH, float HL, float level) {
+    float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
+    return lerp(LH, HL, Kx);
+}
+
+/**
+ * Perceptual model for LL subband (low-frequency baseband).
+ * Contains most image energy, preserve carefully.
+ *
+ * @param quality  Quality level (0-5)
+ * @param level    Normalized decomposition level
+ * @return         Perceptual weight multiplier
+ */
+static float perceptual_model3_LL(int quality, float level) {
+    float n = perceptual_model3_LH(quality, level);
+    float m = perceptual_model3_LH(quality, level - 1) / n;
+
+    return n / m;
+}
+
+/**
+ * Chroma-specific perceptual model base curve.
+ * Less critical for human perception, more aggressive quantization.
+ *
+ * @param quality  Quality level (0-5)
+ * @param level    Normalized decomposition level
+ * @return         Perceptual weight multiplier
+ */
+static float perceptual_model3_chroma_basecurve(int quality, float level) {
+    return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
+}
+
+/**
+ * Get perceptual weight for a specific subband and level.
+ * Implements HVS-optimized frequency weighting.
+ *
+ * NOTE: This function requires enc->quality_level field from encoder context.
+ *
+ * @param enc           Encoder context (for quality_level)
+ * @param level0        Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
+ * @param subband_type  Subband type (0=LL, 1=LH, 2=HL, 3=HH)
+ * @param is_chroma     1 for chroma channels, 0 for luma
+ * @param max_levels    Maximum decomposition levels
+ * @return              Perceptual weight multiplier (≥1.0)
+ */
+static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
+
+/**
+ * Get perceptual weight for coefficient at linear index position.
+ * Maps linear coefficient index to DWT subband layout.
+ *
+ * NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
+ *
+ * @param enc             Encoder context (for widths/heights arrays and quality_level)
+ * @param linear_idx      Linear coefficient index
+ * @param width           Frame width
+ * @param height          Frame height
+ * @param decomp_levels   Number of decomposition levels
+ * @param is_chroma       1 for chroma channels, 0 for luma
+ * @return                Perceptual weight multiplier (≥1.0)
+ */
+static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
+
+// =============================================================================
+// Quantization Functions
+// =============================================================================
+
+/**
+ * Quantize DWT coefficients with uniform quantization and optional dead-zone.
+ *
+ * This is the basic quantization function without perceptual weighting.
+ * Dead-zone quantization is applied selectively to luma channel only:
+ * - HH1 (finest diagonal): full dead-zone
+ * - LH1/HL1/HH2: half dead-zone
+ * - Coarser levels: no dead-zone (preserve structure)
+ *
+ * @param coeffs               Input DWT coefficients (float)
+ * @param quantised            Output quantized coefficients (int16_t)
+ * @param size                 Number of coefficients
+ * @param quantiser            Base quantizer value (1-4096)
+ * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
+ * @param width                Frame width
+ * @param height               Frame height
+ * @param decomp_levels        Number of decomposition levels
+ * @param is_chroma            1 for chroma channels, 0 for luma
+ */
+void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
+                          float dead_zone_threshold, int width, int height,
+                          int decomp_levels, int is_chroma);
+
+/**
+ * Quantize DWT coefficients with per-coefficient perceptual weighting.
+ *
+ * Applies HVS-optimized frequency weighting to each coefficient based on its
+ * position in the DWT subband tree. Implements the full perceptual model with
+ * dead-zone quantization for luma.
+ *
+ * NOTE: This function requires encoder context fields:
+ * - enc->widths[]/enc->heights[] for subband layout
+ * - enc->quality_level for perceptual model
+ * - enc->dead_zone_threshold for dead-zone quantization
+ *
+ * @param enc             Encoder context
+ * @param coeffs          Input DWT coefficients (float)
+ * @param quantised       Output quantized coefficients (int16_t)
+ * @param size            Number of coefficients
+ * @param base_quantiser  Base quantizer value (before perceptual weighting)
+ * @param width           Frame width
+ * @param height          Frame height
+ * @param decomp_levels   Number of decomposition levels
+ * @param is_chroma       1 for chroma channels, 0 for luma
+ * @param frame_count     Current frame number (for any frame-dependent logic)
+ */
+void tav_quantise_perceptual(tav_encoder_t *enc,
+                              float *coeffs, int16_t *quantised, int size,
+                              int base_quantiser, int width, int height,
+                              int decomp_levels, int is_chroma, int frame_count);
+
+/**
+ * Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
+ *
+ * After 3D DWT (temporal + spatial), GOP coefficients have this structure:
+ * - Temporal DWT applied first → temporal subbands at different levels
+ * - Spatial 2D DWT applied to each temporal subband
+ *
+ * Quantization strategy:
+ * 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
+ *    - tLL (level 0): coarsest temporal → smallest quantizer
+ *    - tHH (highest level): finest temporal → largest quantizer
+ * 2. Apply spatial perceptual weighting to tH_base
+ * 3. Final quantizer: Q_effective = tH_base × spatial_weight
+ *
+ * NOTE: This function requires encoder context fields:
+ * - enc->encoder_preset for sports mode detection
+ * - enc->temporal_decomp_levels for temporal level calculation
+ * - enc->verbose for debug output
+ * - Plus all fields needed by tav_quantise_perceptual()
+ *
+ * @param enc             Encoder context
+ * @param gop_coeffs      GOP coefficients [frame][pixel] (temporal subbands)
+ * @param quantised       Output quantized coefficients [frame][pixel]
+ * @param num_frames      Number of temporal subband frames
+ * @param spatial_size    Number of spatial coefficients per frame
+ * @param base_quantiser  Base quantizer value (before temporal/spatial scaling)
+ * @param is_chroma       1 for chroma channels, 0 for luma
+ */
+void tav_quantise_3d_dwt(tav_encoder_t *enc,
+                         float **gop_coeffs, int16_t **quantised, int num_frames,
+                         int spatial_size, int base_quantiser, int is_chroma);
+
+/**
+ * Convert floating-point quantizer to integer with dithering (for bitrate mode).
+ *
+ * Implements Floyd-Steinberg style error diffusion to avoid quantization
+ * artifacts when converting float quantizer values to integers for rate control.
+ *
+ * NOTE: This function requires encoder context fields:
+ * - enc->adjusted_quantiser_y_float (current float quantizer)
+ * - enc->dither_accumulator (accumulated error, modified by this function)
+ *
+ * @param enc  Encoder context
+ * @return     Integer quantizer value (0-254)
+ */
+int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
+
+// =============================================================================
+// Perceptual Weight Implementation (requires encoder context)
+// =============================================================================
+
+// NOTE: This implementation requires encoder context (enc->quality_level)
+// Struct definition will be in encoder header when integrated
+
+#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
+// Forward declare structure access - will be properly defined when integrated
+struct tav_encoder_s {
+    int quality_level;
+    int *widths;
+    int *heights;
+    int decomp_levels;
+    float dead_zone_threshold;
+    int encoder_preset;
+    int temporal_decomp_levels;
+    int verbose;
+    int frame_count;
+    float adjusted_quantiser_y_float;
+    float dither_accumulator;
+    int width;
+    int height;
+};
+#endif
+
+static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
+    // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
+
+    float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
+
+    // strategy: more horizontal detail
+    if (!is_chroma) {
+        // LL subband - contains most image energy, preserve carefully
+        if (subband_type == 0)
+            return perceptual_model3_LL(enc->quality_level, level);
+
+        // LH subband - horizontal details (human eyes more sensitive)
+        float LH = perceptual_model3_LH(enc->quality_level, level);
+        if (subband_type == 1)
+            return LH;
+
+        // HL subband - vertical details
+        float HL = perceptual_model3_HL(enc->quality_level, LH);
+        if (subband_type == 2)
+            return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
+
+        // HH subband - diagonal details
+        else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
+    } else {
+        // CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
+        float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
+
+        if (subband_type == 0) { // LL chroma - still important but less than luma
+            return 1.0f;
+        } else if (subband_type == 1) { // LH chroma - horizontal chroma details
+            return FCLAMP(base, 1.0f, 100.0f);
+        } else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
+            return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
+        } else { // HH chroma - diagonal chroma details (most aggressive)
+            return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
+        }
+    }
+}
+
+static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
+    // Map linear coefficient index to DWT subband using same layout as decoder
+    int offset = 0;
+
+    // First: LL subband at maximum decomposition level
+    int ll_width = enc->widths[decomp_levels];
+    int ll_height = enc->heights[decomp_levels];
+    int ll_size = ll_width * ll_height;
+
+    if (linear_idx < offset + ll_size) {
+        // LL subband at maximum level - use get_perceptual_weight for consistency
+        return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
+    }
+    offset += ll_size;
+
+    // Then: LH, HL, HH subbands for each level from max down to 1
+    for (int level = decomp_levels; level >= 1; level--) {
+        int level_width = enc->widths[decomp_levels - level + 1];
+        int level_height = enc->heights[decomp_levels - level + 1];
+        const int subband_size = level_width * level_height;
+
+        // LH subband (horizontal details)
+        if (linear_idx < offset + subband_size) {
+            return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
+        }
+        offset += subband_size;
+
+        // HL subband (vertical details)
+        if (linear_idx < offset + subband_size) {
+            return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
+        }
+        offset += subband_size;
+
+        // HH subband (diagonal details)
+        if (linear_idx < offset + subband_size) {
+            return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
+        }
+        offset += subband_size;
+    }
+
+    // Fallback for out-of-bounds indices
+    return 1.0f;
+}
+
+// =============================================================================
+// Quantization Function Implementations
+// =============================================================================
+
+void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
+                          float dead_zone_threshold, int width, int height,
+                          int decomp_levels, int is_chroma) {
+    float effective_q = quantiser;
+    effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
+
+    // Scalar implementation (AVX-512 version would go in separate optimized module)
+    for (int i = 0; i < size; i++) {
+        float quantised_val = coeffs[i] / effective_q;
+
+        // Apply dead-zone quantisation ONLY to luma channel and specific subbands
+        if (dead_zone_threshold > 0.0f && !is_chroma) {
+            int level = get_subband_level(i, width, height, decomp_levels);
+            int subband_type = get_subband_type(i, width, height, decomp_levels);
+            float level_threshold = 0.0f;
+
+            if (level == 1) {
+                // Finest level
+                if (subband_type == 3) {
+                    // HH1: full dead-zone
+                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
+                } else if (subband_type == 1 || subband_type == 2) {
+                    // LH1, HL1: half dead-zone
+                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
+                }
+            } else if (level == 2) {
+                // Second-finest level
+                if (subband_type == 3) {
+                    // HH2: half dead-zone
+                    level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
+                }
+            }
+
+            if (fabsf(quantised_val) <= level_threshold) {
+                quantised_val = 0.0f;
+            }
+        }
+
+        quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
+    }
+}
+
+void tav_quantise_perceptual(tav_encoder_t *enc,
+                              float *coeffs, int16_t *quantised, int size,
+                              int base_quantiser, int width, int height,
+                              int decomp_levels, int is_chroma, int frame_count) {
+    float effective_base_q = base_quantiser;
+    effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
+
+    for (int i = 0; i < size; i++) {
+        // Apply perceptual weight based on coefficient's position in DWT layout
+        float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
+        float effective_q = effective_base_q * weight;
+        float quantised_val = coeffs[i] / effective_q;
+
+        // Apply dead-zone quantisation ONLY to luma channel
+        if (enc->dead_zone_threshold > 0.0f && !is_chroma) {
+            int level = get_subband_level(i, width, height, decomp_levels);
+            int subband_type = get_subband_type(i, width, height, decomp_levels);
+            float level_threshold = 0.0f;
+
+            if (level == 1) {
+                if (subband_type == 3) {
+                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
+                } else if (subband_type == 1 || subband_type == 2) {
+                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
+                }
+            } else if (level == 2) {
+                if (subband_type == 3) {
+                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
+                }
+            }
+
+            if (fabsf(quantised_val) <= level_threshold) {
+                quantised_val = 0.0f;
+            }
+        }
+
+        quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
+    }
+}
+
+void tav_quantise_3d_dwt(tav_encoder_t *enc,
+                         float **gop_coeffs, int16_t **quantised, int num_frames,
+                         int spatial_size, int base_quantiser, int is_chroma) {
+    // Sports preset: use finer temporal quantisation (less aggressive)
+    const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
+    const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
+
+    // Process each temporal subband independently (separable approach)
+    for (int t = 0; t < num_frames; t++) {
+        // Step 1: Determine temporal subband level
+        int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
+
+        // Step 2: Compute temporal base quantiser using exponential scaling
+        float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
+        float temporal_quantiser = base_quantiser * temporal_scale;
+
+        int temporal_base_quantiser = (int)roundf(temporal_quantiser);
+        temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
+
+        // Step 3: Apply spatial quantisation within this temporal subband
+        tav_quantise_perceptual(
+            enc,
+            gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
+            quantised[t],            // Output: quantised spatial coefficients
+            spatial_size,            // Number of spatial coefficients
+            temporal_base_quantiser, // Temporally-scaled base quantiser
+            enc->width,              // Frame width
+            enc->height,             // Frame height
+            enc->decomp_levels,      // Spatial decomposition levels
+            is_chroma,               // Is chroma channel
+            enc->frame_count + t     // Frame number
+        );
+
+        /*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
+            printf("  Temporal subband %d: level=%d, tH_base=%d\n",
+                   t, temporal_level, temporal_base_quantiser);
+        }*/
+    }
+}
+
+int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
+    float qy_float = enc->adjusted_quantiser_y_float;
+
+    // Add accumulated dithering error
+    float qy_with_error = qy_float + enc->dither_accumulator;
+
+    // Round to nearest integer
+    int qy_int = (int)(qy_with_error + 0.5f);
+
+    // Calculate quantisation error and accumulate for next frame
+    // This is Floyd-Steinberg style error diffusion
+    float quantisation_error = qy_with_error - (float)qy_int;
+    enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
+
+    // Clamp to valid range
+    qy_int = CLAMP(qy_int, 0, 254);
+
+    return qy_int;
+}
--- a/video_encoder/lib/libtavenc/tav_encoder_quantize.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_quantize.h
@@ -0,0 +1,137 @@
+/**
+ * TAV Encoder - Quantization Library
+ *
+ * Public API for DWT coefficient quantization with perceptual weighting.
+ */
+
+#ifndef TAV_ENCODER_QUANTIZE_H
+#define TAV_ENCODER_QUANTIZE_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Forward declaration of encoder context (defined in main encoder)
+typedef struct tav_encoder_s tav_encoder_t;
+
+// =============================================================================
+// Uniform Quantization
+// =============================================================================
+
+/**
+ * Quantize DWT coefficients with uniform quantization and optional dead-zone.
+ *
+ * This is the basic quantization function without perceptual weighting.
+ * Dead-zone quantization is applied selectively to luma channel only:
+ * - HH1 (finest diagonal): full dead-zone
+ * - LH1/HL1/HH2: half dead-zone
+ * - Coarser levels: no dead-zone (preserve structure)
+ *
+ * @param coeffs               Input DWT coefficients (float)
+ * @param quantised            Output quantized coefficients (int16_t)
+ * @param size                 Number of coefficients
+ * @param quantiser            Base quantizer value (1-4096)
+ * @param dead_zone_threshold  Dead-zone threshold (0.0 = disabled)
+ * @param width                Frame width
+ * @param height               Frame height
+ * @param decomp_levels        Number of decomposition levels
+ * @param is_chroma            1 for chroma channels, 0 for luma
+ */
+void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
+                          float dead_zone_threshold, int width, int height,
+                          int decomp_levels, int is_chroma);
+
+// =============================================================================
+// Perceptual Quantization
+// =============================================================================
+
+/**
+ * Quantize DWT coefficients with per-coefficient perceptual weighting.
+ *
+ * Applies HVS-optimized frequency weighting to each coefficient based on its
+ * position in the DWT subband tree. Implements the full perceptual model with
+ * dead-zone quantization for luma.
+ *
+ * NOTE: This function requires encoder context fields:
+ * - enc->widths[]/enc->heights[] for subband layout
+ * - enc->quality_level for perceptual model
+ * - enc->dead_zone_threshold for dead-zone quantization
+ *
+ * @param enc             Encoder context
+ * @param coeffs          Input DWT coefficients (float)
+ * @param quantised       Output quantized coefficients (int16_t)
+ * @param size            Number of coefficients
+ * @param base_quantiser  Base quantizer value (before perceptual weighting)
+ * @param width           Frame width
+ * @param height          Frame height
+ * @param decomp_levels   Number of decomposition levels
+ * @param is_chroma       1 for chroma channels, 0 for luma
+ * @param frame_count     Current frame number (for any frame-dependent logic)
+ */
+void tav_quantise_perceptual(tav_encoder_t *enc,
+                              float *coeffs, int16_t *quantised, int size,
+                              int base_quantiser, int width, int height,
+                              int decomp_levels, int is_chroma, int frame_count);
+
+// =============================================================================
+// 3D GOP Quantization
+// =============================================================================
+
+/**
+ * Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
+ *
+ * After 3D DWT (temporal + spatial), GOP coefficients have this structure:
+ * - Temporal DWT applied first → temporal subbands at different levels
+ * - Spatial 2D DWT applied to each temporal subband
+ *
+ * Quantization strategy:
+ * 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
+ *    - tLL (level 0): coarsest temporal → smallest quantizer
+ *    - tHH (highest level): finest temporal → largest quantizer
+ * 2. Apply spatial perceptual weighting to tH_base
+ * 3. Final quantizer: Q_effective = tH_base × spatial_weight
+ *
+ * NOTE: This function requires encoder context fields:
+ * - enc->encoder_preset for sports mode detection
+ * - enc->temporal_decomp_levels for temporal level calculation
+ * - enc->verbose for debug output
+ * - Plus all fields needed by tav_quantise_perceptual()
+ *
+ * @param enc             Encoder context
+ * @param gop_coeffs      GOP coefficients [frame][pixel] (temporal subbands)
+ * @param quantised       Output quantized coefficients [frame][pixel]
+ * @param num_frames      Number of temporal subband frames
+ * @param spatial_size    Number of spatial coefficients per frame
+ * @param base_quantiser  Base quantizer value (before temporal/spatial scaling)
+ * @param is_chroma       1 for chroma channels, 0 for luma
+ */
+void tav_quantise_3d_dwt(tav_encoder_t *enc,
+                         float **gop_coeffs, int16_t **quantised, int num_frames,
+                         int spatial_size, int base_quantiser, int is_chroma);
+
+// =============================================================================
+// Rate Control
+// =============================================================================
+
+/**
+ * Convert floating-point quantizer to integer with dithering (for bitrate mode).
+ *
+ * Implements Floyd-Steinberg style error diffusion to avoid quantization
+ * artifacts when converting float quantizer values to integers for rate control.
+ *
+ * NOTE: This function requires encoder context fields:
+ * - enc->adjusted_quantiser_y_float (current float quantizer)
+ * - enc->dither_accumulator (accumulated error, modified by this function)
+ *
+ * @param enc  Encoder context
+ * @return     Integer quantizer value (0-254)
+ */
+int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TAV_ENCODER_QUANTIZE_H
--- a/video_encoder/lib/libtavenc/tav_encoder_utils.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_utils.c
@@ -0,0 +1,441 @@
+/**
+ * TAV Encoder - Utilities Library
+ *
+ * Common utility functions and helpers used across the encoder.
+ * Includes math utilities, clamping, filename generation, etc.
+ *
+ * Extracted from encoder_tav.c as part of library refactoring.
+ */
+
+#define _POSIX_C_SOURCE 200112L
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+
+// =============================================================================
+// Math Utilities
+// =============================================================================
+
+/**
+ * Clamp integer value to range [min, max].
+ */
+int tav_clamp_int(int x, int min, int max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+/**
+ * Clamp float value to range [min, max].
+ */
+float tav_clamp_float(float x, float min, float max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+/**
+ * Clamp double value to range [min, max].
+ */
+double tav_clamp_double(double x, double min, double max) {
+    return x < min ? min : (x > max ? max : x);
+}
+
+/**
+ * Round double to nearest integer.
+ */
+int tav_iround(double v) {
+    return (int)floor(v + 0.5);
+}
+
+/**
+ * Linear interpolation between two values.
+ * @param a  Start value (when t=0)
+ * @param b  End value (when t=1)
+ * @param t  Interpolation factor (0.0 to 1.0)
+ * @return   Interpolated value
+ */
+float tav_lerp(float a, float b, float t) {
+    return a * (1.0f - t) + b * t;
+}
+
+/**
+ * Double precision linear interpolation.
+ */
+double tav_lerp_double(double a, double b, double t) {
+    return a * (1.0 - t) + b * t;
+}
+
+/**
+ * Get minimum of two integers.
+ */
+int tav_min_int(int a, int b) {
+    return a < b ? a : b;
+}
+
+/**
+ * Get maximum of two integers.
+ */
+int tav_max_int(int a, int b) {
+    return a > b ? a : b;
+}
+
+/**
+ * Get minimum of two floats.
+ */
+float tav_min_float(float a, float b) {
+    return a < b ? a : b;
+}
+
+/**
+ * Get maximum of two floats.
+ */
+float tav_max_float(float a, float b) {
+    return a > b ? a : b;
+}
+
+/**
+ * Compute absolute value of integer.
+ */
+int tav_abs_int(int x) {
+    return x < 0 ? -x : x;
+}
+
+/**
+ * Compute absolute value of float.
+ */
+float tav_abs_float(float x) {
+    return x < 0.0f ? -x : x;
+}
+
+/**
+ * Sign function: returns -1, 0, or 1.
+ */
+int tav_sign(int x) {
+    return (x > 0) - (x < 0);
+}
+
+/**
+ * Check if integer is power of 2.
+ */
+int tav_is_power_of_2(int x) {
+    return x > 0 && (x & (x - 1)) == 0;
+}
+
+/**
+ * Round up to next power of 2.
+ */
+int tav_next_power_of_2(int x) {
+    if (x <= 0) return 1;
+    x--;
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+    return x + 1;
+}
+
+/**
+ * Compute floor of log2(x).
+ * Returns -1 for x <= 0.
+ */
+int tav_floor_log2(int x) {
+    if (x <= 0) return -1;
+    int log = 0;
+    while (x > 1) {
+        x >>= 1;
+        log++;
+    }
+    return log;
+}
+
+/**
+ * Compute ceil of log2(x).
+ * Returns -1 for x <= 0.
+ */
+int tav_ceil_log2(int x) {
+    if (x <= 0) return -1;
+    if (x == 1) return 0;
+    int log = tav_floor_log2(x);
+    // Check if x is power of 2
+    if ((1 << log) == x) {
+        return log;
+    }
+    return log + 1;
+}
+
+// =============================================================================
+// Random Filename Generation
+// =============================================================================
+
+/**
+ * Generate a random temporary filename with .mp2 extension.
+ * Format: /tmp/[32 random chars].mp2
+ *
+ * @param filename  Output buffer (must be at least 42 bytes)
+ */
+void tav_generate_random_filename(char *filename) {
+    static int seeded = 0;
+    if (!seeded) {
+        srand(time(NULL));
+        seeded = 1;
+    }
+
+    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    const int charset_size = sizeof(charset) - 1;
+
+    // Start with the prefix
+    strcpy(filename, "/tmp/");
+
+    // Generate 32 random characters
+    for (int i = 0; i < 32; i++) {
+        filename[5 + i] = charset[rand() % charset_size];
+    }
+
+    // Add the .mp2 extension
+    strcpy(filename + 37, ".mp2");
+    filename[41] = '\0';  // Null terminate
+}
+
+/**
+ * Generate a random temporary filename with custom extension.
+ * Format: /tmp/[32 random chars].[ext]
+ *
+ * @param filename  Output buffer (must be large enough for path + extension)
+ * @param ext       File extension (without leading dot, e.g., "tmp", "wav")
+ */
+void tav_generate_random_filename_ext(char *filename, const char *ext) {
+    static int seeded = 0;
+    if (!seeded) {
+        srand(time(NULL));
+        seeded = 1;
+    }
+
+    const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    const int charset_size = sizeof(charset) - 1;
+
+    // Start with the prefix
+    strcpy(filename, "/tmp/");
+
+    // Generate 32 random characters
+    for (int i = 0; i < 32; i++) {
+        filename[5 + i] = charset[rand() % charset_size];
+    }
+
+    // Add the extension
+    filename[37] = '.';
+    strcpy(filename + 38, ext);
+}
+
+// =============================================================================
+// Memory Utilities
+// =============================================================================
+
+/**
+ * Safe malloc with error checking.
+ * Exits program on allocation failure.
+ */
+void *tav_malloc(size_t size) {
+    void *ptr = malloc(size);
+    if (!ptr && size > 0) {
+        fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
+        exit(1);
+    }
+    return ptr;
+}
+
+/**
+ * Safe calloc with error checking.
+ * Exits program on allocation failure.
+ */
+void *tav_calloc(size_t count, size_t size) {
+    void *ptr = calloc(count, size);
+    if (!ptr && count > 0 && size > 0) {
+        fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
+        exit(1);
+    }
+    return ptr;
+}
+
+/**
+ * Safe realloc with error checking.
+ * Exits program on allocation failure.
+ */
+void *tav_realloc(void *ptr, size_t size) {
+    void *new_ptr = realloc(ptr, size);
+    if (!new_ptr && size > 0) {
+        fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
+        exit(1);
+    }
+    return new_ptr;
+}
+
+/**
+ * Allocate aligned memory.
+ * Returns NULL on failure.
+ */
+void *tav_aligned_alloc(size_t alignment, size_t size) {
+    // Ensure alignment is power of 2
+    if (!tav_is_power_of_2(alignment)) {
+        fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
+        return NULL;
+    }
+
+#ifdef _WIN32
+    return _aligned_malloc(size, alignment);
+#else
+    void *ptr = NULL;
+    if (posix_memalign(&ptr, alignment, size) != 0) {
+        return NULL;
+    }
+    return ptr;
+#endif
+}
+
+/**
+ * Free aligned memory.
+ */
+void tav_aligned_free(void *ptr) {
+#ifdef _WIN32
+    _aligned_free(ptr);
+#else
+    free(ptr);
+#endif
+}
+
+// =============================================================================
+// Array Utilities
+// =============================================================================
+
+/**
+ * Fill integer array with constant value.
+ */
+void tav_array_fill_int(int *array, size_t count, int value) {
+    for (size_t i = 0; i < count; i++) {
+        array[i] = value;
+    }
+}
+
+/**
+ * Fill float array with constant value.
+ */
+void tav_array_fill_float(float *array, size_t count, float value) {
+    for (size_t i = 0; i < count; i++) {
+        array[i] = value;
+    }
+}
+
+/**
+ * Copy integer array.
+ */
+void tav_array_copy_int(int *dst, const int *src, size_t count) {
+    memcpy(dst, src, count * sizeof(int));
+}
+
+/**
+ * Copy float array.
+ */
+void tav_array_copy_float(float *dst, const float *src, size_t count) {
+    memcpy(dst, src, count * sizeof(float));
+}
+
+/**
+ * Find maximum value in integer array.
+ */
+int tav_array_max_int(const int *array, size_t count) {
+    if (count == 0) return 0;
+    int max_val = array[0];
+    for (size_t i = 1; i < count; i++) {
+        if (array[i] > max_val) {
+            max_val = array[i];
+        }
+    }
+    return max_val;
+}
+
+/**
+ * Find minimum value in integer array.
+ */
+int tav_array_min_int(const int *array, size_t count) {
+    if (count == 0) return 0;
+    int min_val = array[0];
+    for (size_t i = 1; i < count; i++) {
+        if (array[i] < min_val) {
+            min_val = array[i];
+        }
+    }
+    return min_val;
+}
+
+/**
+ * Find maximum absolute value in float array.
+ */
+float tav_array_max_abs_float(const float *array, size_t count) {
+    if (count == 0) return 0.0f;
+    float max_abs = fabsf(array[0]);
+    for (size_t i = 1; i < count; i++) {
+        float abs_val = fabsf(array[i]);
+        if (abs_val > max_abs) {
+            max_abs = abs_val;
+        }
+    }
+    return max_abs;
+}
+
+/**
+ * Compute sum of integer array.
+ */
+long long tav_array_sum_int(const int *array, size_t count) {
+    long long sum = 0;
+    for (size_t i = 0; i < count; i++) {
+        sum += array[i];
+    }
+    return sum;
+}
+
+/**
+ * Compute sum of float array.
+ */
+double tav_array_sum_float(const float *array, size_t count) {
+    double sum = 0.0;
+    for (size_t i = 0; i < count; i++) {
+        sum += array[i];
+    }
+    return sum;
+}
+
+/**
+ * Compute mean of float array.
+ */
+float tav_array_mean_float(const float *array, size_t count) {
+    if (count == 0) return 0.0f;
+    return (float)(tav_array_sum_float(array, count) / count);
+}
+
+/**
+ * Swap two integer values.
+ */
+void tav_swap_int(int *a, int *b) {
+    int temp = *a;
+    *a = *b;
+    *b = temp;
+}
+
+/**
+ * Swap two float values.
+ */
+void tav_swap_float(float *a, float *b) {
+    float temp = *a;
+    *a = *b;
+    *b = temp;
+}
+
+/**
+ * Swap two pointer values.
+ */
+void tav_swap_ptr(void **a, void **b) {
+    void *temp = *a;
+    *a = *b;
+    *b = temp;
+}
--- a/video_encoder/lib/libtavenc/tav_encoder_utils.h
+++ b/video_encoder/lib/libtavenc/tav_encoder_utils.h
@@ -0,0 +1,165 @@
+/**
+ * TAV Encoder - Utilities Library
+ *
+ * Public API for common utility functions and helpers.
+ */
+
+#ifndef TAV_ENCODER_UTILS_H
+#define TAV_ENCODER_UTILS_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =============================================================================
+// Math Utilities
+// =============================================================================
+
+/** Clamp integer value to range [min, max] */
+int tav_clamp_int(int x, int min, int max);
+
+/** Clamp float value to range [min, max] */
+float tav_clamp_float(float x, float min, float max);
+
+/** Clamp double value to range [min, max] */
+double tav_clamp_double(double x, double min, double max);
+
+/** Round double to nearest integer */
+int tav_iround(double v);
+
+/** Linear interpolation between two floats */
+float tav_lerp(float a, float b, float t);
+
+/** Linear interpolation between two doubles */
+double tav_lerp_double(double a, double b, double t);
+
+/** Get minimum of two integers */
+int tav_min_int(int a, int b);
+
+/** Get maximum of two integers */
+int tav_max_int(int a, int b);
+
+/** Get minimum of two floats */
+float tav_min_float(float a, float b);
+
+/** Get maximum of two floats */
+float tav_max_float(float a, float b);
+
+/** Compute absolute value of integer */
+int tav_abs_int(int x);
+
+/** Compute absolute value of float */
+float tav_abs_float(float x);
+
+/** Sign function: returns -1, 0, or 1 */
+int tav_sign(int x);
+
+/** Check if integer is power of 2 */
+int tav_is_power_of_2(int x);
+
+/** Round up to next power of 2 */
+int tav_next_power_of_2(int x);
+
+/** Compute floor of log2(x) */
+int tav_floor_log2(int x);
+
+/** Compute ceil of log2(x) */
+int tav_ceil_log2(int x);
+
+// =============================================================================
+// Random Filename Generation
+// =============================================================================
+
+/**
+ * Generate a random temporary filename with .mp2 extension.
+ * Format: /tmp/[32 random chars].mp2
+ *
+ * @param filename  Output buffer (must be at least 42 bytes)
+ */
+void tav_generate_random_filename(char *filename);
+
+/**
+ * Generate a random temporary filename with custom extension.
+ * Format: /tmp/[32 random chars].[ext]
+ *
+ * @param filename  Output buffer (must be large enough)
+ * @param ext       File extension (without leading dot)
+ */
+void tav_generate_random_filename_ext(char *filename, const char *ext);
+
+// =============================================================================
+// Memory Utilities
+// =============================================================================
+
+/** Safe malloc with error checking (exits on failure) */
+void *tav_malloc(size_t size);
+
+/** Safe calloc with error checking (exits on failure) */
+void *tav_calloc(size_t count, size_t size);
+
+/** Safe realloc with error checking (exits on failure) */
+void *tav_realloc(void *ptr, size_t size);
+
+/** Allocate aligned memory (returns NULL on failure) */
+void *tav_aligned_alloc(size_t alignment, size_t size);
+
+/** Free aligned memory */
+void tav_aligned_free(void *ptr);
+
+// =============================================================================
+// Array Utilities
+// =============================================================================
+
+/** Fill integer array with constant value */
+void tav_array_fill_int(int *array, size_t count, int value);
+
+/** Fill float array with constant value */
+void tav_array_fill_float(float *array, size_t count, float value);
+
+/** Copy integer array */
+void tav_array_copy_int(int *dst, const int *src, size_t count);
+
+/** Copy float array */
+void tav_array_copy_float(float *dst, const float *src, size_t count);
+
+/** Find maximum value in integer array */
+int tav_array_max_int(const int *array, size_t count);
+
+/** Find minimum value in integer array */
+int tav_array_min_int(const int *array, size_t count);
+
+/** Find maximum absolute value in float array */
+float tav_array_max_abs_float(const float *array, size_t count);
+
+/** Compute sum of integer array */
+long long tav_array_sum_int(const int *array, size_t count);
+
+/** Compute sum of float array */
+double tav_array_sum_float(const float *array, size_t count);
+
+/** Compute mean of float array */
+float tav_array_mean_float(const float *array, size_t count);
+
+/** Swap two integer values */
+void tav_swap_int(int *a, int *b);
+
+/** Swap two float values */
+void tav_swap_float(float *a, float *b);
+
+/** Swap two pointer values */
+void tav_swap_ptr(void **a, void **b);
+
+// =============================================================================
+// Convenience Macros (for backward compatibility)
+// =============================================================================
+
+#define CLAMP(x, min, max)  tav_clamp_int(x, min, max)
+#define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TAV_ENCODER_UTILS_H