tav: librarying

This commit is contained in:
minjaesong
2025-12-05 03:39:32 +09:00
parent d3cc05789f
commit 94ae24e9e4
32 changed files with 7073 additions and 14028 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,354 @@
# libtavenc - TAV Video Encoder Library
**libtavenc** is a high-performance video encoding library implementing the TSVM Advanced Video (TAV) codec. It provides a clean C API for encoding RGB24 video frames using discrete wavelet transform (DWT) with perceptual quantization and GOP-based temporal compression.
## Features
- **Multiple Wavelet Types**: CDF 5/3, CDF 9/7, CDF 13/7, DD-4, Haar
- **3D DWT GOP Encoding**: Temporal + spatial wavelet compression
- **Perceptual Quantization**: HVS-optimized coefficient scaling
- **EZBC Entropy Coding**: Efficient coefficient compression with Zstd
- **Multi-threading**: Internal thread pool for optimal performance
- **Color Spaces**: YCoCg-R (default) and ICtCp (for HDR)
- **Quality Levels**: 0-5 (0=lowest/smallest, 5=highest/largest)
## Building
```bash
# Build static library
make lib/libtavenc.a
# Build with encoder CLI
make encoder_tav
# Install library and headers
make install-libs PREFIX=/usr/local
```
## Quick Start
### Basic Encoding
```c
#include "tav_encoder_lib.h"
#include <stdio.h>
int main() {
// Initialize encoder parameters
tav_encoder_params_t params;
tav_encoder_params_init(&params, 1920, 1080);
// Configure encoding options
params.fps_num = 60;
params.fps_den = 1;
params.wavelet_type = 1; // CDF 9/7 (default)
params.quality_y = 3; // Quality level 3
params.quality_co = 3;
params.quality_cg = 3;
params.enable_temporal_dwt = 1; // Enable 3D GOP encoding
params.gop_size = 0; // Auto-calculate (typically 16-24)
params.num_threads = 4; // 4 worker threads
// Create encoder context
tav_encoder_context_t *ctx = tav_encoder_create(&params);
if (!ctx) {
fprintf(stderr, "Failed to create encoder\n");
return -1;
}
// Get actual parameters (with auto-calculated values)
tav_encoder_get_params(ctx, &params);
printf("GOP size: %d frames\n", params.gop_size);
// Encode frames
uint8_t *rgb_frame = /* ... load RGB24 frame ... */;
tav_encoder_packet_t *packet;
for (int i = 0; i < num_frames; i++) {
int result = tav_encoder_encode_frame(ctx, rgb_frame, i, &packet);
if (result == 1) {
// Packet ready (GOP completed)
fwrite(packet->data, 1, packet->size, outfile);
tav_encoder_free_packet(packet);
}
else if (result == 0) {
// Frame buffered, waiting for GOP to fill
}
else {
// Error
fprintf(stderr, "Encoding error: %s\n", tav_encoder_get_error(ctx));
break;
}
}
// Flush remaining frames
while (tav_encoder_flush(ctx, &packet) == 1) {
fwrite(packet->data, 1, packet->size, outfile);
tav_encoder_free_packet(packet);
}
// Cleanup
tav_encoder_free(ctx);
return 0;
}
```
### Stateless GOP Encoding (Multi-threaded)
The library provides `tav_encoder_encode_gop()` for stateless GOP encoding, perfect for multi-threaded applications:
```c
#include "tav_encoder_lib.h"
#include <pthread.h>
typedef struct {
tav_encoder_params_t params;
uint8_t **rgb_frames;
int num_frames;
int *frame_numbers;
tav_encoder_packet_t *output_packet;
} gop_encode_job_t;
void *encode_gop_thread(void *arg) {
gop_encode_job_t *job = (gop_encode_job_t *)arg;
// Create thread-local encoder context
tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
if (!ctx) {
return NULL;
}
// Encode entire GOP at once (stateless, thread-safe)
tav_encoder_encode_gop(ctx,
(const uint8_t **)job->rgb_frames,
job->num_frames,
job->frame_numbers,
&job->output_packet);
tav_encoder_free(ctx);
return NULL;
}
int main() {
// Setup parameters
tav_encoder_params_t params;
tav_encoder_params_init(&params, 1920, 1080);
params.enable_temporal_dwt = 1;
params.gop_size = 24;
// Create worker threads
pthread_t threads[4];
gop_encode_job_t jobs[4];
for (int i = 0; i < 4; i++) {
jobs[i].params = params;
jobs[i].rgb_frames = /* ... load GOP frames ... */;
jobs[i].num_frames = 24;
jobs[i].frame_numbers = /* ... frame indices ... */;
pthread_create(&threads[i], NULL, encode_gop_thread, &jobs[i]);
}
// Wait for completion
for (int i = 0; i < 4; i++) {
pthread_join(threads[i], NULL);
// Write output packet
if (jobs[i].output_packet) {
fwrite(jobs[i].output_packet->data, 1,
jobs[i].output_packet->size, outfile);
tav_encoder_free_packet(jobs[i].output_packet);
}
}
return 0;
}
```
## API Reference
### Context Management
#### `tav_encoder_create()`
Creates encoder context with specified parameters. Allocates internal buffers and initializes thread pool if multi-threading enabled.
**Returns**: Encoder context or NULL on failure
#### `tav_encoder_free()`
Frees encoder context and all resources. Any unflushed GOP frames are lost.
#### `tav_encoder_get_error()`
Returns last error message string.
#### `tav_encoder_get_params()`
Gets encoder parameters with calculated values (e.g., auto-calculated GOP size, decomposition levels).
### Frame Encoding
#### `tav_encoder_encode_frame()`
Encodes single RGB24 frame. Frames are buffered until GOP is full.
**Parameters**:
- `rgb_frame`: RGB24 planar format `[R...][G...][B...]`, width×height×3 bytes
- `frame_pts`: Presentation timestamp (frame number or time)
- `packet`: Output packet pointer (NULL if GOP not ready)
**Returns**:
- `1`: Packet ready (GOP completed)
- `0`: Frame buffered, waiting for more frames
- `-1`: Error
#### `tav_encoder_flush()`
Flushes remaining buffered frames and encodes final GOP. Call at end of stream.
**Returns**:
- `1`: Packet ready
- `0`: No more packets
- `-1`: Error
#### `tav_encoder_encode_gop()`
Stateless GOP encoding. Thread-safe with separate contexts.
**Parameters**:
- `rgb_frames`: Array of RGB24 frames `[frame][width×height×3]`
- `num_frames`: Number of frames in GOP (1-24)
- `frame_numbers`: Frame indices for timecodes (can be NULL)
- `packet`: Output packet pointer
**Returns**: `1` on success, `-1` on error
### Packet Management
#### `tav_encoder_free_packet()`
Frees packet returned by encoding functions.
## Encoder Parameters
### Video Dimensions
- `width`, `height`: Frame dimensions (must be even)
- `fps_num`, `fps_den`: Framerate (e.g., 60/1 for 60fps)
### Wavelet Configuration
- `wavelet_type`: Spatial wavelet
- `0`: CDF 5/3 (reversible, lossless-capable)
- `1`: CDF 9/7 (default, best compression)
- `2`: CDF 13/7 (experimental)
- `16`: DD-4 (four-point interpolating)
- `255`: Haar (demonstration)
- `temporal_wavelet`: Temporal wavelet for 3D DWT
- `0`: Haar (default for sports/high motion)
- `1`: CDF 5/3 (smooth motion)
- `decomp_levels`: Spatial DWT levels (0=auto, typically 6)
- `temporal_levels`: Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
### Color Space
- `channel_layout`:
- `0`: YCoCg-R (default, efficient chroma)
- `1`: ICtCp (for HDR/BT.2100 sources)
- `perceptual_tuning`: 1=enable HVS perceptual quantization (default), 0=uniform
### GOP Configuration
- `enable_temporal_dwt`: 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
- `gop_size`: Frames per GOP (8, 16, or 24; 0=auto based on framerate)
- `enable_two_pass`: 1=enable two-pass with scene change detection (default), 0=single-pass
### Quality Control
- `quality_y`: Luma quality (0-5, default: 3)
- `quality_co`: Orange chrominance quality (0-5, default: 3)
- `quality_cg`: Green chrominance quality (0-5, default: 3)
- `dead_zone_threshold`: Dead-zone quantization (0=disabled, 1-10 typical)
### Entropy Coding
- `entropy_coder`:
- `0`: Twobitmap (default, fast)
- `1`: EZBC (better compression for high-quality)
- `zstd_level`: Zstd compression level (3-22, default: 7)
### Multi-threading
- `num_threads`: Worker threads
- `0`: Single-threaded (default for CLI)
- `-1`: Auto-detect CPU cores
- `1-16`: Explicit thread count
### Encoder Presets
- `encoder_preset`: Preset flags
- `0x01`: Sports mode (finer temporal quantization)
- `0x02`: Anime mode (disable grain)
## TAV Packet Types
Output packets have type field indicating content:
- `0x10`: I-frame (intra-only, single frame)
- `0x11`: P-frame (delta from previous)
- `0x12`: GOP unified (3D DWT, multiple frames)
- `0x24`: TAD audio (DWT-based audio codec)
- `0xF0`: Loop point start
- `0xFC`: GOP sync (frame count marker)
- `0xFD`: Timecode metadata
## Performance Notes
### Threading Model
- Library manages internal thread pool when `num_threads > 0`
- GOP encoding is parallelized across worker threads
- For CLI tools: use `num_threads=0` (single-threaded) to avoid double-threading with external parallelism
- For library integration: use `num_threads=-1` or explicit count for optimal performance
### Memory Usage
- Each encoder context allocates:
- GOP buffer: `gop_size × width × height × 3` bytes (RGB frames)
- DWT coefficients: `~width × height × 12` bytes per channel
- Thread pool: `num_threads × (GOP buffer + workspace)`
- Typical 1920×1080 encoder with GOP=24: ~180 MB per context
### Encoding Speed
- Single-threaded: 10-15 fps (1920×1080 on modern CPU)
- Multi-threaded (4 threads): 30-40 fps
- GOP size affects latency: larger GOP = higher latency, better compression
## Integration with TAD Audio
TAV files typically include TAD-compressed audio. Link with both libraries:
```c
#include "tav_encoder_lib.h"
#include "encoder_tad.h"
// Encode video frame
tav_encoder_encode_frame(video_ctx, rgb_frame, pts, &video_packet);
// Encode audio chunk (32kHz stereo, float samples)
tad32_encode_chunk(audio_ctx, pcm_samples, num_samples, &audio_data, &audio_size);
// Mux both into TAV file (interleave by frame PTS)
```
## Error Handling
All functions return error codes and set error message accessible via `tav_encoder_get_error()`:
```c
if (tav_encoder_encode_frame(ctx, frame, pts, &packet) < 0) {
fprintf(stderr, "Encoding failed: %s\n", tav_encoder_get_error(ctx));
// Handle error
}
```
## Limitations
- Maximum resolution: 8192×8192
- GOP size: 1-48 frames
- Single-tile encoding only (no spatial tiling)
- Requires even width and height
## License
Part of the TSVM project.
## See Also
- `include/tav_encoder_lib.h` - Complete API documentation
- `src/encoder_tav.c` - CLI reference implementation
- `lib/libtadenc/` - TAD audio encoder library

View File

@@ -0,0 +1,255 @@
/**
* TAV Encoder - Color Space Conversion Library
*
* Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
* for the TSVM Advanced Video (TAV) encoder.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// =============================================================================
// Utility Functions
// =============================================================================
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
static inline int iround(double v) {
return (int)floor(v + 0.5);
}
// =============================================================================
// sRGB Gamma Helpers
// =============================================================================
static inline double srgb_linearise(double val) {
if (val <= 0.04045) return val / 12.92;
return pow((val + 0.055) / 1.055, 2.4);
}
static inline double srgb_unlinearise(double val) {
if (val <= 0.0031308) return 12.92 * val;
return 1.055 * pow(val, 1.0/2.4) - 0.055;
}
// =============================================================================
// HLG (Hybrid Log-Gamma) Transfer Functions
// =============================================================================
static inline double HLG_OETF(double E) {
const double a = 0.17883277;
const double b = 0.28466892; // 1 - 4*a
const double c = 0.55991073; // 0.5 - a*ln(4*a)
if (E <= 1.0/12.0) return sqrt(3.0 * E);
return a * log(12.0 * E - b) + c;
}
static inline double HLG_EOTF(double Ep) {
const double a = 0.17883277;
const double b = 0.28466892;
const double c = 0.55991073;
if (Ep <= 0.5) {
double val = Ep * Ep / 3.0;
return val;
}
double val = (exp((Ep - c) / a) + b) / 12.0;
return val;
}
// =============================================================================
// Color Space Transformation Matrices
// =============================================================================
// BT.2100 RGB -> LMS matrix
static const double M_RGB_TO_LMS[3][3] = {
{1688.0/4096, 2146.0/4096, 262.0/4096},
{ 683.0/4096, 2951.0/4096, 462.0/4096},
{ 99.0/4096, 309.0/4096, 3688.0/4096}
};
// LMS -> RGB inverse matrix
static const double M_LMS_TO_RGB[3][3] = {
{ 6.1723815689243215, -5.319534979827695, 0.14699442094633924},
{-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
{-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
};
// ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
};
// ICtCp -> L' M' S' inverse matrix
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
{ 1.0, -0.015718580108730416, -0.20958106811640548},
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
};
// =============================================================================
// YCoCg-R Color Space Conversion
// =============================================================================
/**
* Convert RGB24 to YCoCg-R color space for a full frame.
*
* YCoCg-R is a reversible color transform optimized for compression:
* - Y = luma (G + (R-B)/2)
* - Co = orange chrominance (R - B)
* - Cg = green chrominance (G - (R+B)/2)
*
* @param rgb Input RGB24 data (planar: RRRR...GGGG...BBBB...)
* @param y Output luma channel
* @param co Output orange chrominance
* @param cg Output green chrominance
* @param width Frame width
* @param height Frame height
*/
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height)
{
const int total_pixels = width * height;
// Process 4 pixels at a time for better cache utilization
int i = 0;
const int simd_end = (total_pixels / 4) * 4;
// Vectorized processing for groups of 4 pixels
for (i = 0; i < simd_end; i += 4) {
const uint8_t *rgb_ptr = &rgb[i * 3];
// Process 4 pixels simultaneously with loop unrolling
for (int j = 0; j < 4; j++) {
const int idx = i + j;
const float r = rgb_ptr[j * 3 + 0];
const float g = rgb_ptr[j * 3 + 1];
const float b = rgb_ptr[j * 3 + 2];
// YCoCg-R transform
co[idx] = r - b;
const float tmp = b + co[idx] * 0.5f;
cg[idx] = g - tmp;
y[idx] = tmp + cg[idx] * 0.5f;
}
}
// Handle remaining pixels (1-3 pixels)
for (; i < total_pixels; i++) {
const float r = rgb[i * 3 + 0];
const float g = rgb[i * 3 + 1];
const float b = rgb[i * 3 + 2];
co[i] = r - b;
const float tmp = b + co[i] * 0.5f;
cg[i] = g - tmp;
y[i] = tmp + cg[i] * 0.5f;
}
}
// =============================================================================
// ICtCp Color Space Conversion (HDR-capable)
// =============================================================================
/**
* Convert sRGB8 to ICtCp color space using HLG transfer function.
*
* ICtCp is a perceptually uniform color space designed for HDR content:
* - I = intensity (luma)
* - Ct = tritanope (blue-yellow)
* - Cp = protanope (red-green)
*
* Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
*
* @param r8 Input red component (0-255)
* @param g8 Input green component (0-255)
* @param b8 Input blue component (0-255)
* @param out_I Output intensity (0-255)
* @param out_Ct Output tritanope (0-255, centered at 127.5)
* @param out_Cp Output protanope (0-255, centered at 127.5)
*/
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp)
{
// 1) Linearize sRGB to 0..1
double r = srgb_linearise((double)r8 / 255.0);
double g = srgb_linearise((double)g8 / 255.0);
double b = srgb_linearise((double)b8 / 255.0);
// 2) Linear RGB -> LMS (3x3 multiply)
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
// 3) Apply HLG OETF (Hybrid Log-Gamma)
double Lp = HLG_OETF(L);
double Mp = HLG_OETF(M);
double Sp = HLG_OETF(S);
// 4) L'M'S' -> ICtCp
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
// 5) Scale and offset to 0-255 range
*out_I = FCLAMP(I * 255.0, 0.0, 255.0);
*out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
*out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
}
/**
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
*
* @param I8 Input intensity (0-255)
* @param Ct8 Input tritanope (0-255, centered at 127.5)
* @param Cp8 Input protanope (0-255, centered at 127.5)
* @param r8 Output red component (0-255)
* @param g8 Output green component (0-255)
* @param b8 Output blue component (0-255)
*/
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8)
{
// 1) Denormalize from 0-255 range
double I = I8 / 255.0;
double Ct = (Ct8 - 127.5) / 255.0;
double Cp = (Cp8 - 127.5) / 255.0;
// 2) ICtCp -> L' M' S' (3x3 inverse multiply)
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
// 3) Apply HLG inverse EOTF
double L = HLG_EOTF(Lp);
double M = HLG_EOTF(Mp);
double S = HLG_EOTF(Sp);
// 4) LMS -> linear sRGB (3x3 inverse multiply)
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
// 5) Apply sRGB gamma and convert to 0-255 with rounding
double r = srgb_unlinearise(r_lin);
double g = srgb_unlinearise(g_lin);
double b = srgb_unlinearise(b_lin);
*r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
*g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
*b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
}

View File

@@ -0,0 +1,67 @@
/**
* TAV Encoder - Color Space Conversion Library
*
* Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
*/
#ifndef TAV_ENCODER_COLOR_H
#define TAV_ENCODER_COLOR_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// YCoCg-R Color Space Conversion
// =============================================================================
/**
* Convert RGB24 to YCoCg-R color space for a full frame.
*
* @param rgb Input RGB24 data (interleaved: RGBRGBRGB...)
* @param y Output luma channel
* @param co Output orange chrominance
* @param cg Output green chrominance
* @param width Frame width
* @param height Frame height
*/
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height);
// =============================================================================
// ICtCp Color Space Conversion (HDR-capable)
// =============================================================================
/**
* Convert sRGB8 to ICtCp color space using HLG transfer function.
*
* @param r8 Input red component (0-255)
* @param g8 Input green component (0-255)
* @param b8 Input blue component (0-255)
* @param out_I Output intensity (0-255)
* @param out_Ct Output tritanope (0-255, centered at 127.5)
* @param out_Cp Output protanope (0-255, centered at 127.5)
*/
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp);
/**
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
*
* @param I8 Input intensity (0-255)
* @param Ct8 Input tritanope (0-255, centered at 127.5)
* @param Cp8 Input protanope (0-255, centered at 127.5)
* @param r8 Output red component (0-255)
* @param g8 Output green component (0-255)
* @param b8 Output blue component (0-255)
*/
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_COLOR_H

View File

@@ -0,0 +1,619 @@
/**
* TAV Encoder - Discrete Wavelet Transform (DWT) Library
*
* Provides multi-resolution wavelet decomposition for video compression.
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// =============================================================================
// Wavelet Type Constants
// =============================================================================
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 - Lossless capable
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 - Higher compression (default)
#define WAVELET_BIORTHOGONAL_13_7 2 // Biorthogonal 13/7
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
#define WAVELET_HAAR 255 // Haar - Simplest wavelet
// =============================================================================
// 1D Forward DWT Transforms
// =============================================================================
/**
* CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
*
* Uses lifting scheme with predict and update steps.
* Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
*
* @param data In/out signal data (modified in-place)
* @param length Signal length (handles non-power-of-2)
*/
static void dwt_53_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = calloc(length, sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass)
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
temp[half + i] = data[idx] - pred;
}
}
// Update step (low-pass)
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
*
* Five-step lifting scheme with scaling for optimal compression.
* Output layout: [LL...LL, HH...HH]
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_97_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i]; // Even (low)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1]; // Odd (high)
}
// JPEG2000 9/7 lifting coefficients
const float alpha = -1.586134342f;
const float beta = -0.052980118f;
const float gamma = 0.882911076f;
const float delta = 0.443506852f;
const float K = 1.230174105f;
// Step 1: Predict α
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += alpha * (s_curr + s_next);
}
}
// Step 2: Update β
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += beta * (d_prev + d_curr);
}
// Step 3: Predict γ
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += gamma * (s_curr + s_next);
}
}
// Step 4: Update δ
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += delta * (d_prev + d_curr);
}
// Step 5: Scaling
for (int i = 0; i < half; i++) {
temp[i] *= K;
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K;
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
*
* Same structure as 9/7 irreversible but uses integer arithmetic.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_97_iint_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
for (int i = 0; i < half; ++i) temp[i] = data[2*i];
for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
const int SHIFT = 16;
const int64_t ROUND = 1LL << (SHIFT - 1);
const int64_t A = -103949; // α
const int64_t B = -3472; // β
const int64_t G = 57862; // γ
const int64_t D = 29066; // δ
const int64_t K_FP = 80542; // ≈ 1.230174105 * 2^16
const int64_t Ki_FP = 53283; // ≈ (1/1.230174105) * 2^16
#define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
// Predict α
for (int i = 0; i < length/2; ++i) {
int s = temp[i];
int sn = (i+1<half)? temp[i+1] : s;
temp[half+i] += RN(A * (int64_t)(s + sn));
}
// Update β
for (int i = 0; i < half; ++i) {
int d = (half+i<length)? temp[half+i]:0;
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
temp[i] += RN(B * (int64_t)(dp + d));
}
// Predict γ
for (int i = 0; i < length/2; ++i) {
int s = temp[i];
int sn = (i+1<half)? temp[i+1]:s;
temp[half+i] += RN(G * (int64_t)(s + sn));
}
// Update δ
for (int i = 0; i < half; ++i) {
int d = (half+i<length)? temp[half+i]:0;
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
temp[i] += RN(D * (int64_t)(dp + d));
}
// Scaling
for (int i = 0; i < half; ++i) {
temp[i] = (((int64_t)temp[i] * K_FP + ROUND) >> SHIFT);
}
for (int i = 0; i < length/2; ++i) {
if (half + i < length) {
temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
#undef RN
}
/**
* Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
*
* Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
* Good for smooth signals and still images.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_dd4_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i];
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1];
}
// DD-4 prediction step with four-point kernel
for (int i = 0; i < length / 2; i++) {
// Get four neighbouring even samples with symmetric boundary extension
float s_m1, s_0, s_1, s_2;
s_m1 = (i > 0) ? temp[i - 1] : temp[0];
s_0 = temp[i];
s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
temp[half + i] -= prediction;
}
// DD-4 update step
for (int i = 0; i < half; i++) {
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
temp[i] += 0.25f * (d_prev + d_curr);
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* Biorthogonal 13/7 wavelet forward 1D.
*
* Analysis filters: Low-pass (13 taps), High-pass (7 taps)
* Simplified implementation using 5/3 structure with scaling.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_bior137_forward_1d(float *data, int length) {
if (length < 2) return;
const float K = 1.230174105f;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass)
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float left = data[2 * i];
float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
float prediction = 0.5f * (left + right);
temp[half + i] = data[idx] - prediction;
}
}
// Update step (low-pass)
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
// Scaling
for (int i = 0; i < half; i++) {
temp[i] *= K;
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K;
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* Haar wavelet forward 1D transform.
*
* The simplest wavelet: averages (low-pass) and differences (high-pass).
* Useful for temporal DWT in GOPs.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_haar_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
} else {
temp[i] = data[2 * i];
if (half + i < length) {
temp[half + i] = 0.0f;
}
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// 1D Inverse DWT Transforms
// =============================================================================
/**
* CDF 5/3 reversible wavelet inverse 1D transform.
*
* Reverses dwt_53_forward_1d() transform exactly.
*
* @param data In/out coefficient data
* @param length Signal length
*/
static void dwt_53_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Copy low-pass and high-pass coefficients
memcpy(temp, data, length * sizeof(float));
// Undo update step
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] -= update;
}
// Undo predict step
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
data[2 * i] = temp[i];
data[idx] = temp[half + i] + pred;
} else {
data[2 * i] = temp[i];
}
}
free(temp);
}
/**
* Haar wavelet inverse 1D transform.
*
* Reverses dwt_haar_forward_1d() transform.
*
* @param data In/out coefficient data
* @param length Signal length
*/
static void dwt_haar_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Reconstruct from averages and differences
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
temp[2 * i] = data[i] + data[half + i];
temp[2 * i + 1] = data[i] - data[half + i];
} else {
temp[2 * i] = data[i];
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// 2D DWT Transform
// =============================================================================
/**
* Apply 2D forward DWT to a frame (in-place).
*
* Applies separable 1D transforms: horizontal (rows), then vertical (columns).
* Supports multi-level decomposition.
*
* @param data In/out 2D image data (row-major, width stride)
* @param width Image width
* @param height Image height
* @param levels Number of decomposition levels
* @param filter_type Wavelet type (WAVELET_* constant)
*/
void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
const int max_size = (width > height) ? width : height;
float *temp_row = malloc(max_size * sizeof(float));
float *temp_col = malloc(max_size * sizeof(float));
// Pre-calculate dimensions for each level
int *widths = malloc((levels + 1) * sizeof(int));
int *heights = malloc((levels + 1) * sizeof(int));
widths[0] = width;
heights[0] = height;
for (int i = 1; i <= levels; i++) {
widths[i] = (widths[i - 1] + 1) / 2;
heights[i] = (heights[i - 1] + 1) / 2;
}
// Apply multi-level decomposition
for (int level = 0; level < levels; level++) {
int current_width = widths[level];
int current_height = heights[level];
if (current_width < 1 || current_height < 1) break;
// Row transform (horizontal)
for (int y = 0; y < current_height; y++) {
// Extract row
for (int x = 0; x < current_width; x++) {
temp_row[x] = data[y * width + x];
}
// Apply 1D DWT
switch (filter_type) {
case WAVELET_5_3_REVERSIBLE:
dwt_53_forward_1d(temp_row, current_width);
break;
case WAVELET_9_7_IRREVERSIBLE:
dwt_97_forward_1d(temp_row, current_width);
break;
case WAVELET_BIORTHOGONAL_13_7:
dwt_bior137_forward_1d(temp_row, current_width);
break;
case WAVELET_DD4:
dwt_dd4_forward_1d(temp_row, current_width);
break;
case WAVELET_HAAR:
dwt_haar_forward_1d(temp_row, current_width);
break;
}
// Write back
for (int x = 0; x < current_width; x++) {
data[y * width + x] = temp_row[x];
}
}
// Column transform (vertical)
for (int x = 0; x < current_width; x++) {
// Extract column
for (int y = 0; y < current_height; y++) {
temp_col[y] = data[y * width + x];
}
// Apply 1D DWT
switch (filter_type) {
case WAVELET_5_3_REVERSIBLE:
dwt_53_forward_1d(temp_col, current_height);
break;
case WAVELET_9_7_IRREVERSIBLE:
dwt_97_forward_1d(temp_col, current_height);
break;
case WAVELET_BIORTHOGONAL_13_7:
dwt_bior137_forward_1d(temp_col, current_height);
break;
case WAVELET_DD4:
dwt_dd4_forward_1d(temp_col, current_height);
break;
case WAVELET_HAAR:
dwt_haar_forward_1d(temp_col, current_height);
break;
}
// Write back
for (int y = 0; y < current_height; y++) {
data[y * width + x] = temp_col[y];
}
}
}
free(widths);
free(heights);
free(temp_row);
free(temp_col);
}
// =============================================================================
// 3D DWT Transform (Temporal + Spatial)
// =============================================================================
/**
* Apply 3D forward DWT to a GOP (group of pictures).
*
* First applies temporal DWT across frames at each spatial location,
* then applies 2D spatial DWT to each resulting temporal subband.
*
* @param gop_data Array of frame pointers [num_frames][width*height]
* @param width Frame width
* @param height Frame height
* @param num_frames Number of frames in GOP
* @param spatial_levels Number of 2D spatial decomposition levels
* @param temporal_levels Number of 1D temporal decomposition levels
* @param spatial_filter Wavelet type for spatial transform
* @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
*/
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
int spatial_levels, int temporal_levels,
int spatial_filter, int temporal_filter) {
if (num_frames < 2 || width < 2 || height < 2) return;
float *temporal_line = malloc(num_frames * sizeof(float));
// Pre-calculate temporal lengths for non-power-of-2 GOPs
int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
temporal_lengths[0] = num_frames;
for (int i = 1; i <= temporal_levels; i++) {
temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
}
// Step 1: Apply temporal DWT across frames
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int pixel_idx = y * width + x;
// Extract temporal signal
for (int t = 0; t < num_frames; t++) {
temporal_line[t] = gop_data[t][pixel_idx];
}
// Apply temporal DWT with multiple levels
for (int level = 0; level < temporal_levels; level++) {
int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
if (temporal_filter == 255) {
// Haar temporal (default)
dwt_haar_forward_1d(temporal_line, level_frames);
} else if (temporal_filter == 0) {
// CDF 5/3 temporal
dwt_53_forward_1d(temporal_line, level_frames);
} else {
// Fallback to Haar for unsupported wavelets
dwt_haar_forward_1d(temporal_line, level_frames);
}
}
}
// Write back temporal coefficients
for (int t = 0; t < num_frames; t++) {
gop_data[t][pixel_idx] = temporal_line[t];
}
}
}
free(temporal_lengths);
free(temporal_line);
// Step 2: Apply 2D spatial DWT to each temporal subband
for (int t = 0; t < num_frames; t++) {
tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
}
}
// =============================================================================
// Utility Functions
// =============================================================================
/**
* Calculate recommended number of decomposition levels for given dimensions.
*
* @param width Image width
* @param height Image height
* @return Recommended number of levels (1-6)
*/
int tav_dwt_calculate_levels(int width, int height) {
int levels = 0;
int min_size = (width < height) ? width : height;
// Keep halving until we reach minimum size
while (min_size >= 32) {
min_size /= 2;
levels++;
}
// Cap at reasonable maximum
return (levels > 6) ? 6 : levels;
}

View File

@@ -0,0 +1,88 @@
/**
* TAV Encoder - Discrete Wavelet Transform Library
*
* Public API for multi-resolution wavelet decomposition.
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
*/
#ifndef TAV_ENCODER_DWT_H
#define TAV_ENCODER_DWT_H
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Wavelet Type Constants
// =============================================================================
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 reversible (lossless capable)
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 JPEG2000 (default, best compression)
#define WAVELET_BIORTHOGONAL_13_7 2 // CDF 13/7 experimental
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
#define WAVELET_HAAR 255 // Haar (demonstration only)
// =============================================================================
// 2D Discrete Wavelet Transform
// =============================================================================
/**
* Apply 2D wavelet transform to spatial data.
*
* Uses separable 1D transforms: apply horizontal rows, then vertical columns.
* Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
*
* @param data Input/output data array (modified in-place)
* @param width Frame width
* @param height Frame height
* @param levels Number of decomposition levels (0 = auto-calculate)
* @param filter_type Wavelet type (WAVELET_* constants)
*/
void tav_dwt_2d_forward(float *data, int width, int height,
int levels, int filter_type);
// =============================================================================
// 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
// =============================================================================
/**
* Apply 3D wavelet transform to group-of-pictures (GOP).
*
* Process:
* 1. Apply temporal 1D DWT across frames at each spatial position
* 2. Apply spatial 2D DWT to each temporal subband frame
*
* @param gop_data Array of frame pointers [num_frames]
* @param width Frame width
* @param height Frame height
* @param num_frames Number of frames in GOP
* @param spatial_levels Spatial decomposition levels (0 = auto)
* @param temporal_levels Temporal decomposition levels
* @param spatial_filter Wavelet type for spatial transform
* @param temporal_filter Wavelet type for temporal transform
*/
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
int spatial_levels, int temporal_levels,
int spatial_filter, int temporal_filter);
// =============================================================================
// Utility Functions
// =============================================================================
/**
* Calculate optimal number of decomposition levels for given dimensions.
*
* Uses formula: floor(log2(min(width, height))) - 1
* Ensures at least 2x2 low-pass subband remains after decomposition.
*
* @param width Frame width
* @param height Frame height
* @return Recommended number of levels
*/
int tav_dwt_calculate_levels(int width, int height);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_DWT_H

View File

@@ -0,0 +1,415 @@
/**
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
*
* Implements binary tree embedded zero block coding for efficient storage
* of sparse wavelet coefficients. Exploits coefficient sparsity through
* hierarchical significance testing and progressive bitplane encoding.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
// =============================================================================
// EZBC Structures
// =============================================================================
/**
* Bitstream writer for bit-level encoding.
*/
typedef struct {
uint8_t *data;
size_t capacity;
size_t byte_pos;
uint8_t bit_pos; // 0-7, current bit position in current byte
} bitstream_t;
/**
* Block structure for EZBC quadtree decomposition.
*/
typedef struct {
int x, y; // Top-left position in 2D coefficient array
int width, height; // Block dimensions
} ezbc_block_t;
/**
* Queue for EZBC block processing.
*/
typedef struct {
ezbc_block_t *blocks;
size_t count;
size_t capacity;
} block_queue_t;
/**
* Track coefficient state for refinement.
*/
typedef struct {
bool significant; // Has been marked significant
int first_bitplane; // Bitplane where it became significant
} coeff_state_t;
/**
* EZBC encoding context for recursive processing.
*/
typedef struct {
bitstream_t *bs;
int16_t *coeffs;
coeff_state_t *states;
int width;
int height;
int bitplane;
int threshold;
block_queue_t *next_insignificant;
block_queue_t *next_significant;
int *sign_count;
} ezbc_context_t;
// =============================================================================
// Bitstream Operations
// =============================================================================
/**
* Initialize bitstream with initial capacity.
*/
static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
// Ensure minimum capacity to avoid issues with zero-size allocations
if (initial_capacity < 64) initial_capacity = 64;
bs->capacity = initial_capacity;
bs->data = calloc(1, initial_capacity);
if (!bs->data) {
fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
exit(1);
}
bs->byte_pos = 0;
bs->bit_pos = 0;
}
/**
* Write a single bit to bitstream.
*/
static void bitstream_write_bit(bitstream_t *bs, int bit) {
// Grow if needed
if (bs->byte_pos >= bs->capacity) {
size_t old_capacity = bs->capacity;
bs->capacity *= 2;
bs->data = realloc(bs->data, bs->capacity);
// Clear only the newly allocated memory region
memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
}
if (bit) {
bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
}
bs->bit_pos++;
if (bs->bit_pos == 8) {
bs->bit_pos = 0;
bs->byte_pos++;
}
}
/**
* Write multiple bits to bitstream (LSB first).
*/
static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
for (int i = 0; i < num_bits; i++) {
bitstream_write_bit(bs, (value >> i) & 1);
}
}
/**
* Get current bitstream size in bytes.
*/
static size_t bitstream_size(bitstream_t *bs) {
return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
}
/**
* Free bitstream buffer.
*/
static void bitstream_free(bitstream_t *bs) {
free(bs->data);
}
// =============================================================================
// Block Queue Operations
// =============================================================================
/**
* Initialize block queue with initial capacity.
*/
static void queue_init(block_queue_t *q) {
q->capacity = 1024;
q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
q->count = 0;
}
/**
* Push block onto queue, growing if needed.
*/
static void queue_push(block_queue_t *q, ezbc_block_t block) {
if (q->count >= q->capacity) {
q->capacity *= 2;
q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
}
q->blocks[q->count++] = block;
}
/**
* Free block queue.
*/
static void queue_free(block_queue_t *q) {
free(q->blocks);
}
// =============================================================================
// EZBC Helper Functions
// =============================================================================
/**
* Check if all coefficients in block have |coeff| < threshold.
*/
static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
const ezbc_block_t *block, int threshold) {
for (int y = block->y; y < block->y + block->height && y < height; y++) {
for (int x = block->x; x < block->x + block->width && x < width; x++) {
int idx = y * width + x;
if (abs(coeffs[idx]) >= threshold) {
return false;
}
}
}
return true;
}
/**
* Find maximum absolute value in coefficient array.
*/
static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
int max_abs = 0;
for (size_t i = 0; i < count; i++) {
int abs_val = abs(coeffs[i]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
return max_abs;
}
/**
* Get MSB position (bitplane number).
* Returns floor(log2(value)), i.e., the position of the highest set bit.
*/
static int get_msb_bitplane(int value) {
if (value == 0) return 0;
int bitplane = 0;
while (value > 1) {
value >>= 1;
bitplane++;
}
return bitplane;
}
/**
* Recursively process a significant block - subdivide until 1x1.
*/
static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
// If 1x1 block: emit sign bit and add to significant queue
if (block.width == 1 && block.height == 1) {
int idx = block.y * ctx->width + block.x;
bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
(*ctx->sign_count)++;
ctx->states[idx].significant = true;
ctx->states[idx].first_bitplane = ctx->bitplane;
queue_push(ctx->next_significant, block);
return;
}
// Block is > 1x1: subdivide into children and recursively process each
int mid_x = block.width / 2;
int mid_y = block.height / 2;
if (mid_x == 0) mid_x = 1;
if (mid_y == 0) mid_y = 1;
// Process top-left child
ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1); // Significant
process_significant_block_recursive(ctx, tl);
} else {
bitstream_write_bit(ctx->bs, 0); // Insignificant
queue_push(ctx->next_insignificant, tl);
}
// Process top-right child (if exists)
if (block.width > mid_x) {
ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, tr);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, tr);
}
}
// Process bottom-left child (if exists)
if (block.height > mid_y) {
ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, bl);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, bl);
}
}
// Process bottom-right child (if exists)
if (block.width > mid_x && block.height > mid_y) {
ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, br);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, br);
}
}
}
// =============================================================================
// Main EZBC Encoding Function
// =============================================================================
/**
* EZBC encoding for a single channel.
*
* Uses two separate queues for insignificant blocks and significant 1x1 blocks.
* Encodes coefficients progressively from MSB to LSB bitplane.
*
* Algorithm:
* 1. Find MSB bitplane from maximum absolute coefficient value
* 2. Write header: MSB bitplane, width, height
* 3. For each bitplane from MSB to 0:
* a. Process insignificant blocks: check if they become significant
* b. For newly significant blocks: recursively subdivide until 1x1
* c. Emit sign bits for newly significant 1x1 coefficients
* d. Process already-significant coefficients: emit refinement bits
* 4. Return encoded bitstream
*
* @param coeffs Input quantized coefficients (int16_t array)
* @param count Number of coefficients
* @param width Frame width
* @param height Frame height
* @param output Output buffer pointer (allocated by this function)
* @return Encoded size in bytes
*/
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
uint8_t **output) {
bitstream_t bs;
bitstream_init(&bs, count / 4); // Initial guess
// Track coefficient significance
coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
// Find maximum value to determine MSB bitplane
int max_abs = find_max_abs_ezbc(coeffs, count);
int msb_bitplane = get_msb_bitplane(max_abs);
// Write header: MSB bitplane and dimensions
bitstream_write_bits(&bs, msb_bitplane, 8);
bitstream_write_bits(&bs, width, 16);
bitstream_write_bits(&bs, height, 16);
// Initialise two queues: insignificant blocks and significant 1x1 blocks
block_queue_t insignificant_queue, next_insignificant;
block_queue_t significant_queue, next_significant;
queue_init(&insignificant_queue);
queue_init(&next_insignificant);
queue_init(&significant_queue);
queue_init(&next_significant);
// Start with root block as insignificant
ezbc_block_t root = {0, 0, width, height};
queue_push(&insignificant_queue, root);
// Process bitplanes from MSB to LSB
for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
int threshold = 1 << bitplane;
int sign_bits_this_bitplane = 0;
// Process insignificant blocks - check if they become significant
for (size_t i = 0; i < insignificant_queue.count; i++) {
ezbc_block_t block = insignificant_queue.blocks[i];
// Check if this block has any coefficient >= threshold
if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
// Still insignificant: emit 0
bitstream_write_bit(&bs, 0);
// Keep in insignificant queue for next bitplane
queue_push(&next_insignificant, block);
} else {
// Became significant: emit 1
bitstream_write_bit(&bs, 1);
// Use recursive subdivision to process this block and all children
ezbc_context_t ctx = {
.bs = &bs,
.coeffs = coeffs,
.states = states,
.width = width,
.height = height,
.bitplane = bitplane,
.threshold = threshold,
.next_insignificant = &next_insignificant,
.next_significant = &next_significant,
.sign_count = &sign_bits_this_bitplane
};
process_significant_block_recursive(&ctx, block);
}
}
// Process significant 1x1 blocks - emit refinement bits
for (size_t i = 0; i < significant_queue.count; i++) {
ezbc_block_t block = significant_queue.blocks[i];
int idx = block.y * width + block.x;
int abs_val = abs(coeffs[idx]);
// Emit refinement bit at current bitplane
int bit = (abs_val >> bitplane) & 1;
bitstream_write_bit(&bs, bit);
// Keep in significant queue for next bitplane
queue_push(&next_significant, block);
}
// Swap queues for next bitplane
queue_free(&insignificant_queue);
queue_free(&significant_queue);
insignificant_queue = next_insignificant;
significant_queue = next_significant;
queue_init(&next_insignificant);
queue_init(&next_significant);
}
// Free all queues
queue_free(&insignificant_queue);
queue_free(&significant_queue);
queue_free(&next_insignificant);
queue_free(&next_significant);
free(states);
size_t final_size = bitstream_size(&bs);
*output = bs.data;
return final_size;
}

View File

@@ -0,0 +1,61 @@
/**
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
*
* Public API for EZBC entropy coding of wavelet coefficients.
*/
#ifndef TAV_ENCODER_EZBC_H
#define TAV_ENCODER_EZBC_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// EZBC Encoding
// =============================================================================
/**
* EZBC encoding for a single channel.
*
* Implements binary tree embedded zero block coding for efficient storage
* of sparse wavelet coefficients. Exploits coefficient sparsity through
* hierarchical significance testing and progressive bitplane encoding.
*
* Algorithm:
* 1. Find MSB bitplane from maximum absolute coefficient value
* 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
* 3. For each bitplane from MSB to 0:
* a. Process insignificant blocks: check if they become significant
* - Emit 0 if still insignificant, 1 if became significant
* b. For newly significant blocks: recursively subdivide until 1x1
* - Emit tree structure: 1=child is significant, 0=child insignificant
* c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
* d. Process already-significant coefficients: emit refinement bits
* - Emit bit at current bitplane for progressive reconstruction
* 4. Return encoded bitstream
*
* Benefits:
* - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
* - Progressive refinement from MSB to LSB
* - Spatial clustering through quadtree decomposition
* - No additional entropy coding needed (bitstream is already compressed)
*
* @param coeffs Input quantized coefficients (int16_t array)
* @param count Number of coefficients (width × height)
* @param width Frame width (must match coefficient array layout)
* @param height Frame height (must match coefficient array layout)
* @param output Output buffer pointer (allocated by this function, caller must free)
* @return Encoded size in bytes (including header)
*/
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
uint8_t **output);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_EZBC_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,624 @@
/**
* TAV Encoder - Quantization Library
*
* Provides DWT coefficient quantization with perceptual weighting based on
* the Human Visual System (HVS). Implements separable 3D quantization for
* temporal GOP encoding.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// Forward declaration of encoder context (defined in main encoder)
typedef struct tav_encoder_s tav_encoder_t;
// =============================================================================
// Utility Functions
// =============================================================================
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
// =============================================================================
// Constants for Perceptual Model
// =============================================================================
// Dead-zone quantization scaling factors (applied selectively to luma only)
#define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level
#define DEAD_ZONE_FINE_SCALE 0.5f // Reduced dead-zone for second-finest level
// Anisotropy parameters for horizontal vs vertical detail quantization
// Index by quality level (0-5)
static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
// Chroma-specific anisotropy (more aggressive quantization)
static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
// Detail preservation factors for 2-pixel and 4-pixel structures
#define FOUR_PIXEL_DETAILER 0.88f
#define TWO_PIXEL_DETAILER 0.92f
// =============================================================================
// Subband Analysis Helper Functions
// =============================================================================
/**
* Get decomposition level for coefficient at 2D spatial position.
* Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
*/
static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
// Recursively determine which level this coefficient belongs to
// by checking which quadrant it's in at each level
for (int level = 1; level <= decomp_levels; level++) {
int half_w = width >> 1;
int half_h = height >> 1;
// Check if in top-left quadrant (LL - contains finer levels)
if (x < half_w && y < half_h) {
// Continue to finer level
width = half_w;
height = half_h;
continue;
}
// In one of the detail bands (LH, HL, HH) at this level
return level;
}
// Reached LL subband at coarsest level
return 0;
}
/**
* Get subband type for coefficient at 2D spatial position.
* Returns: 0=LL, 1=LH, 2=HL, 3=HH
*/
static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
// Recursively determine which subband this coefficient belongs to
for (int level = 1; level <= decomp_levels; level++) {
int half_w = width >> 1;
int half_h = height >> 1;
// Check if in top-left quadrant (LL - contains finer levels)
if (x < half_w && y < half_h) {
// Continue to finer level
width = half_w;
height = half_h;
continue;
}
// Determine which detail band at this level
if (x >= half_w && y < half_h) {
return 1; // LH (top-right)
} else if (x < half_w && y >= half_h) {
return 2; // HL (bottom-left)
} else {
return 3; // HH (bottom-right)
}
}
// Reached LL subband at coarsest level
return 0;
}
/**
* Legacy functions - convert linear index to 2D coords.
*/
static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
int x = linear_idx % width;
int y = linear_idx / width;
return get_subband_level_2d(x, y, width, height, decomp_levels);
}
static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
int x = linear_idx % width;
int y = linear_idx / width;
return get_subband_type_2d(x, y, width, height, decomp_levels);
}
/**
* Get temporal subband level for frame index in GOP.
* After temporal DWT with N levels, frames are organized as:
* - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
* - Remaining frames are temporal high-pass subbands at various levels
*
* Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
*/
static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
// Check each level boundary from coarsest to finest
for (int level = 0; level < temporal_levels; level++) {
int frames_at_this_level = num_frames >> (temporal_levels - level);
if (frame_idx < frames_at_this_level) {
return level;
}
}
// Finest level (first decomposition's high-pass)
return temporal_levels;
}
// =============================================================================
// Perceptual Model Functions (HVS-based weighting)
// =============================================================================
// Linear interpolation helper
static float lerp(float x, float y, float a) {
return x * (1.f - a) + y * a;
}
/**
* Perceptual model for LH subband (horizontal details).
* Human eyes are more sensitive to horizontal details than vertical.
* Curve: https://www.desmos.com/calculator/mjlpwqm8ge
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level (1.0-6.0)
* @return Perceptual weight multiplier
*/
static float perceptual_model3_LH(int quality, float level) {
float H4 = 1.2f;
float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
float K12 = K * 12.f;
float x = level;
float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
float C3 = -1.f / 45.f * (K12 + 92);
float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
return (level >= 4) ? Lx : G3x;
}
/**
* Perceptual model for HL subband (vertical details).
* Derived from LH with anisotropy compensation.
*
* @param quality Quality level (0-5)
* @param LH LH subband weight
* @return Perceptual weight multiplier
*/
static float perceptual_model3_HL(int quality, float LH) {
return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
}
/**
* Perceptual model for HH subband (diagonal details).
* Interpolates between LH and HL based on level.
*
* @param LH LH subband weight
* @param HL HL subband weight
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_HH(float LH, float HL, float level) {
float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
return lerp(LH, HL, Kx);
}
/**
* Perceptual model for LL subband (low-frequency baseband).
* Contains most image energy, preserve carefully.
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_LL(int quality, float level) {
float n = perceptual_model3_LH(quality, level);
float m = perceptual_model3_LH(quality, level - 1) / n;
return n / m;
}
/**
* Chroma-specific perceptual model base curve.
* Less critical for human perception, more aggressive quantization.
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_chroma_basecurve(int quality, float level) {
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
}
/**
* Get perceptual weight for a specific subband and level.
* Implements HVS-optimized frequency weighting.
*
* NOTE: This function requires enc->quality_level field from encoder context.
*
* @param enc Encoder context (for quality_level)
* @param level0 Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
* @param subband_type Subband type (0=LL, 1=LH, 2=HL, 3=HH)
* @param is_chroma 1 for chroma channels, 0 for luma
* @param max_levels Maximum decomposition levels
* @return Perceptual weight multiplier (≥1.0)
*/
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
/**
* Get perceptual weight for coefficient at linear index position.
* Maps linear coefficient index to DWT subband layout.
*
* NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
*
* @param enc Encoder context (for widths/heights arrays and quality_level)
* @param linear_idx Linear coefficient index
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @return Perceptual weight multiplier (≥1.0)
*/
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
// =============================================================================
// Quantization Functions
// =============================================================================
/**
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
*
* This is the basic quantization function without perceptual weighting.
* Dead-zone quantization is applied selectively to luma channel only:
* - HH1 (finest diagonal): full dead-zone
* - LH1/HL1/HH2: half dead-zone
* - Coarser levels: no dead-zone (preserve structure)
*
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param quantiser Base quantizer value (1-4096)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma);
/**
* Quantize DWT coefficients with per-coefficient perceptual weighting.
*
* Applies HVS-optimized frequency weighting to each coefficient based on its
* position in the DWT subband tree. Implements the full perceptual model with
* dead-zone quantization for luma.
*
* NOTE: This function requires encoder context fields:
* - enc->widths[]/enc->heights[] for subband layout
* - enc->quality_level for perceptual model
* - enc->dead_zone_threshold for dead-zone quantization
*
* @param enc Encoder context
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param base_quantiser Base quantizer value (before perceptual weighting)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @param frame_count Current frame number (for any frame-dependent logic)
*/
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, int width, int height,
int decomp_levels, int is_chroma, int frame_count);
/**
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
*
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
* - Temporal DWT applied first → temporal subbands at different levels
* - Spatial 2D DWT applied to each temporal subband
*
* Quantization strategy:
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
* - tLL (level 0): coarsest temporal → smallest quantizer
* - tHH (highest level): finest temporal → largest quantizer
* 2. Apply spatial perceptual weighting to tH_base
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
*
* NOTE: This function requires encoder context fields:
* - enc->encoder_preset for sports mode detection
* - enc->temporal_decomp_levels for temporal level calculation
* - enc->verbose for debug output
* - Plus all fields needed by tav_quantise_perceptual()
*
* @param enc Encoder context
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
* @param quantised Output quantized coefficients [frame][pixel]
* @param num_frames Number of temporal subband frames
* @param spatial_size Number of spatial coefficients per frame
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma);
/**
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
*
* Implements Floyd-Steinberg style error diffusion to avoid quantization
* artifacts when converting float quantizer values to integers for rate control.
*
* NOTE: This function requires encoder context fields:
* - enc->adjusted_quantiser_y_float (current float quantizer)
* - enc->dither_accumulator (accumulated error, modified by this function)
*
* @param enc Encoder context
* @return Integer quantizer value (0-254)
*/
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
// =============================================================================
// Perceptual Weight Implementation (requires encoder context)
// =============================================================================
// NOTE: This implementation requires encoder context (enc->quality_level)
// Struct definition will be in encoder header when integrated
#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
// Forward declare structure access - will be properly defined when integrated
struct tav_encoder_s {
int quality_level;
int *widths;
int *heights;
int decomp_levels;
float dead_zone_threshold;
int encoder_preset;
int temporal_decomp_levels;
int verbose;
int frame_count;
float adjusted_quantiser_y_float;
float dither_accumulator;
int width;
int height;
};
#endif
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
// strategy: more horizontal detail
if (!is_chroma) {
// LL subband - contains most image energy, preserve carefully
if (subband_type == 0)
return perceptual_model3_LL(enc->quality_level, level);
// LH subband - horizontal details (human eyes more sensitive)
float LH = perceptual_model3_LH(enc->quality_level, level);
if (subband_type == 1)
return LH;
// HL subband - vertical details
float HL = perceptual_model3_HL(enc->quality_level, LH);
if (subband_type == 2)
return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
// HH subband - diagonal details
else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
} else {
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
if (subband_type == 0) { // LL chroma - still important but less than luma
return 1.0f;
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
return FCLAMP(base, 1.0f, 100.0f);
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
} else { // HH chroma - diagonal chroma details (most aggressive)
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
}
}
}
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
// Map linear coefficient index to DWT subband using same layout as decoder
int offset = 0;
// First: LL subband at maximum decomposition level
int ll_width = enc->widths[decomp_levels];
int ll_height = enc->heights[decomp_levels];
int ll_size = ll_width * ll_height;
if (linear_idx < offset + ll_size) {
// LL subband at maximum level - use get_perceptual_weight for consistency
return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
}
offset += ll_size;
// Then: LH, HL, HH subbands for each level from max down to 1
for (int level = decomp_levels; level >= 1; level--) {
int level_width = enc->widths[decomp_levels - level + 1];
int level_height = enc->heights[decomp_levels - level + 1];
const int subband_size = level_width * level_height;
// LH subband (horizontal details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
}
offset += subband_size;
// HL subband (vertical details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
}
offset += subband_size;
// HH subband (diagonal details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
}
offset += subband_size;
}
// Fallback for out-of-bounds indices
return 1.0f;
}
// =============================================================================
// Quantization Function Implementations
// =============================================================================
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma) {
float effective_q = quantiser;
effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
// Scalar implementation (AVX-512 version would go in separate optimized module)
for (int i = 0; i < size; i++) {
float quantised_val = coeffs[i] / effective_q;
// Apply dead-zone quantisation ONLY to luma channel and specific subbands
if (dead_zone_threshold > 0.0f && !is_chroma) {
int level = get_subband_level(i, width, height, decomp_levels);
int subband_type = get_subband_type(i, width, height, decomp_levels);
float level_threshold = 0.0f;
if (level == 1) {
// Finest level
if (subband_type == 3) {
// HH1: full dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
} else if (subband_type == 1 || subband_type == 2) {
// LH1, HL1: half dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
} else if (level == 2) {
// Second-finest level
if (subband_type == 3) {
// HH2: half dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
}
if (fabsf(quantised_val) <= level_threshold) {
quantised_val = 0.0f;
}
}
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
}
}
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, int width, int height,
int decomp_levels, int is_chroma, int frame_count) {
float effective_base_q = base_quantiser;
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
for (int i = 0; i < size; i++) {
// Apply perceptual weight based on coefficient's position in DWT layout
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
float effective_q = effective_base_q * weight;
float quantised_val = coeffs[i] / effective_q;
// Apply dead-zone quantisation ONLY to luma channel
if (enc->dead_zone_threshold > 0.0f && !is_chroma) {
int level = get_subband_level(i, width, height, decomp_levels);
int subband_type = get_subband_type(i, width, height, decomp_levels);
float level_threshold = 0.0f;
if (level == 1) {
if (subband_type == 3) {
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
} else if (subband_type == 1 || subband_type == 2) {
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
} else if (level == 2) {
if (subband_type == 3) {
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
}
if (fabsf(quantised_val) <= level_threshold) {
quantised_val = 0.0f;
}
}
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
}
}
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma) {
// Sports preset: use finer temporal quantisation (less aggressive)
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
// Process each temporal subband independently (separable approach)
for (int t = 0; t < num_frames; t++) {
// Step 1: Determine temporal subband level
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
// Step 2: Compute temporal base quantiser using exponential scaling
float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
float temporal_quantiser = base_quantiser * temporal_scale;
int temporal_base_quantiser = (int)roundf(temporal_quantiser);
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
// Step 3: Apply spatial quantisation within this temporal subband
tav_quantise_perceptual(
enc,
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
quantised[t], // Output: quantised spatial coefficients
spatial_size, // Number of spatial coefficients
temporal_base_quantiser, // Temporally-scaled base quantiser
enc->width, // Frame width
enc->height, // Frame height
enc->decomp_levels, // Spatial decomposition levels
is_chroma, // Is chroma channel
enc->frame_count + t // Frame number
);
/*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
printf(" Temporal subband %d: level=%d, tH_base=%d\n",
t, temporal_level, temporal_base_quantiser);
}*/
}
}
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
float qy_float = enc->adjusted_quantiser_y_float;
// Add accumulated dithering error
float qy_with_error = qy_float + enc->dither_accumulator;
// Round to nearest integer
int qy_int = (int)(qy_with_error + 0.5f);
// Calculate quantisation error and accumulate for next frame
// This is Floyd-Steinberg style error diffusion
float quantisation_error = qy_with_error - (float)qy_int;
enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
// Clamp to valid range
qy_int = CLAMP(qy_int, 0, 254);
return qy_int;
}

View File

@@ -0,0 +1,137 @@
/**
* TAV Encoder - Quantization Library
*
* Public API for DWT coefficient quantization with perceptual weighting.
*/
#ifndef TAV_ENCODER_QUANTIZE_H
#define TAV_ENCODER_QUANTIZE_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Forward declaration of encoder context (defined in main encoder)
typedef struct tav_encoder_s tav_encoder_t;
// =============================================================================
// Uniform Quantization
// =============================================================================
/**
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
*
* This is the basic quantization function without perceptual weighting.
* Dead-zone quantization is applied selectively to luma channel only:
* - HH1 (finest diagonal): full dead-zone
* - LH1/HL1/HH2: half dead-zone
* - Coarser levels: no dead-zone (preserve structure)
*
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param quantiser Base quantizer value (1-4096)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma);
// =============================================================================
// Perceptual Quantization
// =============================================================================
/**
* Quantize DWT coefficients with per-coefficient perceptual weighting.
*
* Applies HVS-optimized frequency weighting to each coefficient based on its
* position in the DWT subband tree. Implements the full perceptual model with
* dead-zone quantization for luma.
*
* NOTE: This function requires encoder context fields:
* - enc->widths[]/enc->heights[] for subband layout
* - enc->quality_level for perceptual model
* - enc->dead_zone_threshold for dead-zone quantization
*
* @param enc Encoder context
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param base_quantiser Base quantizer value (before perceptual weighting)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @param frame_count Current frame number (for any frame-dependent logic)
*/
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, int width, int height,
int decomp_levels, int is_chroma, int frame_count);
// =============================================================================
// 3D GOP Quantization
// =============================================================================
/**
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
*
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
* - Temporal DWT applied first → temporal subbands at different levels
* - Spatial 2D DWT applied to each temporal subband
*
* Quantization strategy:
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
* - tLL (level 0): coarsest temporal → smallest quantizer
* - tHH (highest level): finest temporal → largest quantizer
* 2. Apply spatial perceptual weighting to tH_base
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
*
* NOTE: This function requires encoder context fields:
* - enc->encoder_preset for sports mode detection
* - enc->temporal_decomp_levels for temporal level calculation
* - enc->verbose for debug output
* - Plus all fields needed by tav_quantise_perceptual()
*
* @param enc Encoder context
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
* @param quantised Output quantized coefficients [frame][pixel]
* @param num_frames Number of temporal subband frames
* @param spatial_size Number of spatial coefficients per frame
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma);
// =============================================================================
// Rate Control
// =============================================================================
/**
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
*
* Implements Floyd-Steinberg style error diffusion to avoid quantization
* artifacts when converting float quantizer values to integers for rate control.
*
* NOTE: This function requires encoder context fields:
* - enc->adjusted_quantiser_y_float (current float quantizer)
* - enc->dither_accumulator (accumulated error, modified by this function)
*
* @param enc Encoder context
* @return Integer quantizer value (0-254)
*/
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_QUANTIZE_H

View File

@@ -0,0 +1,441 @@
/**
* TAV Encoder - Utilities Library
*
* Common utility functions and helpers used across the encoder.
* Includes math utilities, clamping, filename generation, etc.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#define _POSIX_C_SOURCE 200112L
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <math.h>
// =============================================================================
// Math Utilities
// =============================================================================
/**
* Clamp integer value to range [min, max].
*/
int tav_clamp_int(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Clamp float value to range [min, max].
*/
float tav_clamp_float(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Clamp double value to range [min, max].
*/
double tav_clamp_double(double x, double min, double max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Round double to nearest integer.
*/
int tav_iround(double v) {
return (int)floor(v + 0.5);
}
/**
* Linear interpolation between two values.
* @param a Start value (when t=0)
* @param b End value (when t=1)
* @param t Interpolation factor (0.0 to 1.0)
* @return Interpolated value
*/
float tav_lerp(float a, float b, float t) {
return a * (1.0f - t) + b * t;
}
/**
* Double precision linear interpolation.
*/
double tav_lerp_double(double a, double b, double t) {
return a * (1.0 - t) + b * t;
}
/**
* Get minimum of two integers.
*/
int tav_min_int(int a, int b) {
return a < b ? a : b;
}
/**
* Get maximum of two integers.
*/
int tav_max_int(int a, int b) {
return a > b ? a : b;
}
/**
* Get minimum of two floats.
*/
float tav_min_float(float a, float b) {
return a < b ? a : b;
}
/**
* Get maximum of two floats.
*/
float tav_max_float(float a, float b) {
return a > b ? a : b;
}
/**
* Compute absolute value of integer.
*/
int tav_abs_int(int x) {
return x < 0 ? -x : x;
}
/**
* Compute absolute value of float.
*/
float tav_abs_float(float x) {
return x < 0.0f ? -x : x;
}
/**
* Sign function: returns -1, 0, or 1.
*/
int tav_sign(int x) {
return (x > 0) - (x < 0);
}
/**
* Check if integer is power of 2.
*/
int tav_is_power_of_2(int x) {
return x > 0 && (x & (x - 1)) == 0;
}
/**
* Round up to next power of 2.
*/
int tav_next_power_of_2(int x) {
if (x <= 0) return 1;
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x + 1;
}
/**
* Compute floor of log2(x).
* Returns -1 for x <= 0.
*/
int tav_floor_log2(int x) {
if (x <= 0) return -1;
int log = 0;
while (x > 1) {
x >>= 1;
log++;
}
return log;
}
/**
* Compute ceil of log2(x).
* Returns -1 for x <= 0.
*/
int tav_ceil_log2(int x) {
if (x <= 0) return -1;
if (x == 1) return 0;
int log = tav_floor_log2(x);
// Check if x is power of 2
if ((1 << log) == x) {
return log;
}
return log + 1;
}
// =============================================================================
// Random Filename Generation
// =============================================================================
/**
* Generate a random temporary filename with .mp2 extension.
* Format: /tmp/[32 random chars].mp2
*
* @param filename Output buffer (must be at least 42 bytes)
*/
void tav_generate_random_filename(char *filename) {
static int seeded = 0;
if (!seeded) {
srand(time(NULL));
seeded = 1;
}
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the .mp2 extension
strcpy(filename + 37, ".mp2");
filename[41] = '\0'; // Null terminate
}
/**
* Generate a random temporary filename with custom extension.
* Format: /tmp/[32 random chars].[ext]
*
* @param filename Output buffer (must be large enough for path + extension)
* @param ext File extension (without leading dot, e.g., "tmp", "wav")
*/
void tav_generate_random_filename_ext(char *filename, const char *ext) {
static int seeded = 0;
if (!seeded) {
srand(time(NULL));
seeded = 1;
}
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the extension
filename[37] = '.';
strcpy(filename + 38, ext);
}
// =============================================================================
// Memory Utilities
// =============================================================================
/**
* Safe malloc with error checking.
* Exits program on allocation failure.
*/
void *tav_malloc(size_t size) {
void *ptr = malloc(size);
if (!ptr && size > 0) {
fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
exit(1);
}
return ptr;
}
/**
* Safe calloc with error checking.
* Exits program on allocation failure.
*/
void *tav_calloc(size_t count, size_t size) {
void *ptr = calloc(count, size);
if (!ptr && count > 0 && size > 0) {
fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
exit(1);
}
return ptr;
}
/**
* Safe realloc with error checking.
* Exits program on allocation failure.
*/
void *tav_realloc(void *ptr, size_t size) {
void *new_ptr = realloc(ptr, size);
if (!new_ptr && size > 0) {
fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
exit(1);
}
return new_ptr;
}
/**
* Allocate aligned memory.
* Returns NULL on failure.
*/
void *tav_aligned_alloc(size_t alignment, size_t size) {
// Ensure alignment is power of 2
if (!tav_is_power_of_2(alignment)) {
fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
return NULL;
}
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
void *ptr = NULL;
if (posix_memalign(&ptr, alignment, size) != 0) {
return NULL;
}
return ptr;
#endif
}
/**
* Free aligned memory.
*/
void tav_aligned_free(void *ptr) {
#ifdef _WIN32
_aligned_free(ptr);
#else
free(ptr);
#endif
}
// =============================================================================
// Array Utilities
// =============================================================================
/**
* Fill integer array with constant value.
*/
void tav_array_fill_int(int *array, size_t count, int value) {
for (size_t i = 0; i < count; i++) {
array[i] = value;
}
}
/**
* Fill float array with constant value.
*/
void tav_array_fill_float(float *array, size_t count, float value) {
for (size_t i = 0; i < count; i++) {
array[i] = value;
}
}
/**
* Copy integer array.
*/
void tav_array_copy_int(int *dst, const int *src, size_t count) {
memcpy(dst, src, count * sizeof(int));
}
/**
* Copy float array.
*/
void tav_array_copy_float(float *dst, const float *src, size_t count) {
memcpy(dst, src, count * sizeof(float));
}
/**
* Find maximum value in integer array.
*/
int tav_array_max_int(const int *array, size_t count) {
if (count == 0) return 0;
int max_val = array[0];
for (size_t i = 1; i < count; i++) {
if (array[i] > max_val) {
max_val = array[i];
}
}
return max_val;
}
/**
* Find minimum value in integer array.
*/
int tav_array_min_int(const int *array, size_t count) {
if (count == 0) return 0;
int min_val = array[0];
for (size_t i = 1; i < count; i++) {
if (array[i] < min_val) {
min_val = array[i];
}
}
return min_val;
}
/**
* Find maximum absolute value in float array.
*/
float tav_array_max_abs_float(const float *array, size_t count) {
if (count == 0) return 0.0f;
float max_abs = fabsf(array[0]);
for (size_t i = 1; i < count; i++) {
float abs_val = fabsf(array[i]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
return max_abs;
}
/**
* Compute sum of integer array.
*/
long long tav_array_sum_int(const int *array, size_t count) {
long long sum = 0;
for (size_t i = 0; i < count; i++) {
sum += array[i];
}
return sum;
}
/**
* Compute sum of float array.
*/
double tav_array_sum_float(const float *array, size_t count) {
double sum = 0.0;
for (size_t i = 0; i < count; i++) {
sum += array[i];
}
return sum;
}
/**
* Compute mean of float array.
*/
float tav_array_mean_float(const float *array, size_t count) {
if (count == 0) return 0.0f;
return (float)(tav_array_sum_float(array, count) / count);
}
/**
* Swap two integer values.
*/
void tav_swap_int(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
/**
* Swap two float values.
*/
void tav_swap_float(float *a, float *b) {
float temp = *a;
*a = *b;
*b = temp;
}
/**
* Swap two pointer values.
*/
void tav_swap_ptr(void **a, void **b) {
void *temp = *a;
*a = *b;
*b = temp;
}

View File

@@ -0,0 +1,165 @@
/**
* TAV Encoder - Utilities Library
*
* Public API for common utility functions and helpers.
*/
#ifndef TAV_ENCODER_UTILS_H
#define TAV_ENCODER_UTILS_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Math Utilities
// =============================================================================
/** Clamp integer value to range [min, max] */
int tav_clamp_int(int x, int min, int max);
/** Clamp float value to range [min, max] */
float tav_clamp_float(float x, float min, float max);
/** Clamp double value to range [min, max] */
double tav_clamp_double(double x, double min, double max);
/** Round double to nearest integer */
int tav_iround(double v);
/** Linear interpolation between two floats */
float tav_lerp(float a, float b, float t);
/** Linear interpolation between two doubles */
double tav_lerp_double(double a, double b, double t);
/** Get minimum of two integers */
int tav_min_int(int a, int b);
/** Get maximum of two integers */
int tav_max_int(int a, int b);
/** Get minimum of two floats */
float tav_min_float(float a, float b);
/** Get maximum of two floats */
float tav_max_float(float a, float b);
/** Compute absolute value of integer */
int tav_abs_int(int x);
/** Compute absolute value of float */
float tav_abs_float(float x);
/** Sign function: returns -1, 0, or 1 */
int tav_sign(int x);
/** Check if integer is power of 2 */
int tav_is_power_of_2(int x);
/** Round up to next power of 2 */
int tav_next_power_of_2(int x);
/** Compute floor of log2(x) */
int tav_floor_log2(int x);
/** Compute ceil of log2(x) */
int tav_ceil_log2(int x);
// =============================================================================
// Random Filename Generation
// =============================================================================
/**
* Generate a random temporary filename with .mp2 extension.
* Format: /tmp/[32 random chars].mp2
*
* @param filename Output buffer (must be at least 42 bytes)
*/
void tav_generate_random_filename(char *filename);
/**
* Generate a random temporary filename with custom extension.
* Format: /tmp/[32 random chars].[ext]
*
* @param filename Output buffer (must be large enough)
* @param ext File extension (without leading dot)
*/
void tav_generate_random_filename_ext(char *filename, const char *ext);
// =============================================================================
// Memory Utilities
// =============================================================================
/** Safe malloc with error checking (exits on failure) */
void *tav_malloc(size_t size);
/** Safe calloc with error checking (exits on failure) */
void *tav_calloc(size_t count, size_t size);
/** Safe realloc with error checking (exits on failure) */
void *tav_realloc(void *ptr, size_t size);
/** Allocate aligned memory (returns NULL on failure) */
void *tav_aligned_alloc(size_t alignment, size_t size);
/** Free aligned memory */
void tav_aligned_free(void *ptr);
// =============================================================================
// Array Utilities
// =============================================================================
/** Fill integer array with constant value */
void tav_array_fill_int(int *array, size_t count, int value);
/** Fill float array with constant value */
void tav_array_fill_float(float *array, size_t count, float value);
/** Copy integer array */
void tav_array_copy_int(int *dst, const int *src, size_t count);
/** Copy float array */
void tav_array_copy_float(float *dst, const float *src, size_t count);
/** Find maximum value in integer array */
int tav_array_max_int(const int *array, size_t count);
/** Find minimum value in integer array */
int tav_array_min_int(const int *array, size_t count);
/** Find maximum absolute value in float array */
float tav_array_max_abs_float(const float *array, size_t count);
/** Compute sum of integer array */
long long tav_array_sum_int(const int *array, size_t count);
/** Compute sum of float array */
double tav_array_sum_float(const float *array, size_t count);
/** Compute mean of float array */
float tav_array_mean_float(const float *array, size_t count);
/** Swap two integer values */
void tav_swap_int(int *a, int *b);
/** Swap two float values */
void tav_swap_float(float *a, float *b);
/** Swap two pointer values */
void tav_swap_ptr(void **a, void **b);
// =============================================================================
// Convenience Macros (for backward compatibility)
// =============================================================================
#define CLAMP(x, min, max) tav_clamp_int(x, min, max)
#define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_UTILS_H