mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
625 lines
25 KiB
C
625 lines
25 KiB
C
/**
|
||
* TAV Encoder - Quantization Library
|
||
*
|
||
* Provides DWT coefficient quantization with perceptual weighting based on
|
||
* the Human Visual System (HVS). Implements separable 3D quantization for
|
||
* temporal GOP encoding.
|
||
*
|
||
* Extracted from encoder_tav.c as part of library refactoring.
|
||
*/
|
||
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <stdint.h>
|
||
#include <string.h>
|
||
#include <math.h>
|
||
|
||
// Forward declaration of encoder context (defined in main encoder)
|
||
typedef struct tav_encoder_s tav_encoder_t;
|
||
|
||
// =============================================================================
|
||
// Utility Functions
|
||
// =============================================================================
|
||
|
||
static inline int CLAMP(int x, int min, int max) {
|
||
return x < min ? min : (x > max ? max : x);
|
||
}
|
||
|
||
static inline float FCLAMP(float x, float min, float max) {
|
||
return x < min ? min : (x > max ? max : x);
|
||
}
|
||
|
||
// =============================================================================
|
||
// Constants for Perceptual Model
|
||
// =============================================================================
|
||
|
||
// Dead-zone quantization scaling factors (applied selectively to luma only)
|
||
#define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level
|
||
#define DEAD_ZONE_FINE_SCALE 0.5f // Reduced dead-zone for second-finest level
|
||
|
||
// Anisotropy parameters for horizontal vs vertical detail quantization
|
||
// Index by quality level (0-5)
|
||
static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
|
||
static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
|
||
|
||
// Chroma-specific anisotropy (more aggressive quantization)
|
||
static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
|
||
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
|
||
|
||
// Detail preservation factors for 2-pixel and 4-pixel structures
|
||
#define FOUR_PIXEL_DETAILER 0.88f
|
||
#define TWO_PIXEL_DETAILER 0.92f
|
||
|
||
// =============================================================================
|
||
// Subband Analysis Helper Functions
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Get decomposition level for coefficient at 2D spatial position.
|
||
* Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
|
||
*/
|
||
static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
|
||
// Recursively determine which level this coefficient belongs to
|
||
// by checking which quadrant it's in at each level
|
||
|
||
for (int level = 1; level <= decomp_levels; level++) {
|
||
int half_w = width >> 1;
|
||
int half_h = height >> 1;
|
||
|
||
// Check if in top-left quadrant (LL - contains finer levels)
|
||
if (x < half_w && y < half_h) {
|
||
// Continue to finer level
|
||
width = half_w;
|
||
height = half_h;
|
||
continue;
|
||
}
|
||
|
||
// In one of the detail bands (LH, HL, HH) at this level
|
||
return level;
|
||
}
|
||
|
||
// Reached LL subband at coarsest level
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Get subband type for coefficient at 2D spatial position.
|
||
* Returns: 0=LL, 1=LH, 2=HL, 3=HH
|
||
*/
|
||
static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
|
||
// Recursively determine which subband this coefficient belongs to
|
||
|
||
for (int level = 1; level <= decomp_levels; level++) {
|
||
int half_w = width >> 1;
|
||
int half_h = height >> 1;
|
||
|
||
// Check if in top-left quadrant (LL - contains finer levels)
|
||
if (x < half_w && y < half_h) {
|
||
// Continue to finer level
|
||
width = half_w;
|
||
height = half_h;
|
||
continue;
|
||
}
|
||
|
||
// Determine which detail band at this level
|
||
if (x >= half_w && y < half_h) {
|
||
return 1; // LH (top-right)
|
||
} else if (x < half_w && y >= half_h) {
|
||
return 2; // HL (bottom-left)
|
||
} else {
|
||
return 3; // HH (bottom-right)
|
||
}
|
||
}
|
||
|
||
// Reached LL subband at coarsest level
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Legacy functions - convert linear index to 2D coords.
|
||
*/
|
||
static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
|
||
int x = linear_idx % width;
|
||
int y = linear_idx / width;
|
||
return get_subband_level_2d(x, y, width, height, decomp_levels);
|
||
}
|
||
|
||
static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
|
||
int x = linear_idx % width;
|
||
int y = linear_idx / width;
|
||
return get_subband_type_2d(x, y, width, height, decomp_levels);
|
||
}
|
||
|
||
/**
|
||
* Get temporal subband level for frame index in GOP.
|
||
* After temporal DWT with N levels, frames are organized as:
|
||
* - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
|
||
* - Remaining frames are temporal high-pass subbands at various levels
|
||
*
|
||
* Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
|
||
*/
|
||
static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
|
||
// Check each level boundary from coarsest to finest
|
||
for (int level = 0; level < temporal_levels; level++) {
|
||
int frames_at_this_level = num_frames >> (temporal_levels - level);
|
||
if (frame_idx < frames_at_this_level) {
|
||
return level;
|
||
}
|
||
}
|
||
|
||
// Finest level (first decomposition's high-pass)
|
||
return temporal_levels;
|
||
}
|
||
|
||
// =============================================================================
|
||
// Perceptual Model Functions (HVS-based weighting)
|
||
// =============================================================================
|
||
|
||
// Linear interpolation helper
|
||
static float lerp(float x, float y, float a) {
|
||
return x * (1.f - a) + y * a;
|
||
}
|
||
|
||
/**
|
||
* Perceptual model for LH subband (horizontal details).
|
||
* Human eyes are more sensitive to horizontal details than vertical.
|
||
* Curve: https://www.desmos.com/calculator/mjlpwqm8ge
|
||
*
|
||
* @param quality Quality level (0-5)
|
||
* @param level Normalized decomposition level (1.0-6.0)
|
||
* @return Perceptual weight multiplier
|
||
*/
|
||
static float perceptual_model3_LH(int quality, float level) {
|
||
float H4 = 1.2f;
|
||
float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
|
||
float K12 = K * 12.f;
|
||
float x = level;
|
||
|
||
float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
|
||
float C3 = -1.f / 45.f * (K12 + 92);
|
||
float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
|
||
|
||
return (level >= 4) ? Lx : G3x;
|
||
}
|
||
|
||
/**
|
||
* Perceptual model for HL subband (vertical details).
|
||
* Derived from LH with anisotropy compensation.
|
||
*
|
||
* @param quality Quality level (0-5)
|
||
* @param LH LH subband weight
|
||
* @return Perceptual weight multiplier
|
||
*/
|
||
static float perceptual_model3_HL(int quality, float LH) {
|
||
return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
|
||
}
|
||
|
||
/**
|
||
* Perceptual model for HH subband (diagonal details).
|
||
* Interpolates between LH and HL based on level.
|
||
*
|
||
* @param LH LH subband weight
|
||
* @param HL HL subband weight
|
||
* @param level Normalized decomposition level
|
||
* @return Perceptual weight multiplier
|
||
*/
|
||
static float perceptual_model3_HH(float LH, float HL, float level) {
|
||
float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
|
||
return lerp(LH, HL, Kx);
|
||
}
|
||
|
||
/**
|
||
* Perceptual model for LL subband (low-frequency baseband).
|
||
* Contains most image energy, preserve carefully.
|
||
*
|
||
* @param quality Quality level (0-5)
|
||
* @param level Normalized decomposition level
|
||
* @return Perceptual weight multiplier
|
||
*/
|
||
static float perceptual_model3_LL(int quality, float level) {
|
||
float n = perceptual_model3_LH(quality, level);
|
||
float m = perceptual_model3_LH(quality, level - 1) / n;
|
||
|
||
return n / m;
|
||
}
|
||
|
||
/**
|
||
* Chroma-specific perceptual model base curve.
|
||
* Less critical for human perception, more aggressive quantization.
|
||
*
|
||
* @param quality Quality level (0-5)
|
||
* @param level Normalized decomposition level
|
||
* @return Perceptual weight multiplier
|
||
*/
|
||
static float perceptual_model3_chroma_basecurve(int quality, float level) {
|
||
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
|
||
}
|
||
|
||
/**
|
||
* Get perceptual weight for a specific subband and level.
|
||
* Implements HVS-optimized frequency weighting.
|
||
*
|
||
* NOTE: This function requires enc->quality_level field from encoder context.
|
||
*
|
||
* @param enc Encoder context (for quality_level)
|
||
* @param level0 Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
|
||
* @param subband_type Subband type (0=LL, 1=LH, 2=HL, 3=HH)
|
||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||
* @param max_levels Maximum decomposition levels
|
||
* @return Perceptual weight multiplier (≥1.0)
|
||
*/
|
||
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
|
||
|
||
/**
|
||
* Get perceptual weight for coefficient at linear index position.
|
||
* Maps linear coefficient index to DWT subband layout.
|
||
*
|
||
* NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
|
||
*
|
||
* @param enc Encoder context (for widths/heights arrays and quality_level)
|
||
* @param linear_idx Linear coefficient index
|
||
* @param width Frame width
|
||
* @param height Frame height
|
||
* @param decomp_levels Number of decomposition levels
|
||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||
* @return Perceptual weight multiplier (≥1.0)
|
||
*/
|
||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
|
||
|
||
// =============================================================================
|
||
// Quantization Functions
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
|
||
*
|
||
* This is the basic quantization function without perceptual weighting.
|
||
* Dead-zone quantization is applied selectively to luma channel only:
|
||
* - HH1 (finest diagonal): full dead-zone
|
||
* - LH1/HL1/HH2: half dead-zone
|
||
* - Coarser levels: no dead-zone (preserve structure)
|
||
*
|
||
* @param coeffs Input DWT coefficients (float)
|
||
* @param quantised Output quantized coefficients (int16_t)
|
||
* @param size Number of coefficients
|
||
* @param quantiser Base quantizer value (1-4096)
|
||
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
||
* @param width Frame width
|
||
* @param height Frame height
|
||
* @param decomp_levels Number of decomposition levels
|
||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||
*/
|
||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
||
float dead_zone_threshold, int width, int height,
|
||
int decomp_levels, int is_chroma);
|
||
|
||
/**
|
||
* Quantize DWT coefficients with per-coefficient perceptual weighting.
|
||
*
|
||
* Applies HVS-optimized frequency weighting to each coefficient based on its
|
||
* position in the DWT subband tree. Implements the full perceptual model with
|
||
* dead-zone quantization for luma.
|
||
*
|
||
* NOTE: This function requires encoder context fields:
|
||
* - enc->widths[]/enc->heights[] for subband layout
|
||
* - enc->quality_level for perceptual model
|
||
* - enc->dead_zone_threshold for dead-zone quantization
|
||
*
|
||
* @param enc Encoder context
|
||
* @param coeffs Input DWT coefficients (float)
|
||
* @param quantised Output quantized coefficients (int16_t)
|
||
* @param size Number of coefficients
|
||
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
||
* @param width Frame width
|
||
* @param height Frame height
|
||
* @param decomp_levels Number of decomposition levels
|
||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||
* @param frame_count Current frame number (for any frame-dependent logic)
|
||
*/
|
||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||
float *coeffs, int16_t *quantised, int size,
|
||
int base_quantiser, int width, int height,
|
||
int decomp_levels, int is_chroma, int frame_count);
|
||
|
||
/**
|
||
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
|
||
*
|
||
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
|
||
* - Temporal DWT applied first → temporal subbands at different levels
|
||
* - Spatial 2D DWT applied to each temporal subband
|
||
*
|
||
* Quantization strategy:
|
||
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
|
||
* - tLL (level 0): coarsest temporal → smallest quantizer
|
||
* - tHH (highest level): finest temporal → largest quantizer
|
||
* 2. Apply spatial perceptual weighting to tH_base
|
||
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
|
||
*
|
||
* NOTE: This function requires encoder context fields:
|
||
* - enc->encoder_preset for sports mode detection
|
||
* - enc->temporal_decomp_levels for temporal level calculation
|
||
* - enc->verbose for debug output
|
||
* - Plus all fields needed by tav_quantise_perceptual()
|
||
*
|
||
* @param enc Encoder context
|
||
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
|
||
* @param quantised Output quantized coefficients [frame][pixel]
|
||
* @param num_frames Number of temporal subband frames
|
||
* @param spatial_size Number of spatial coefficients per frame
|
||
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
|
||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||
*/
|
||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
||
int spatial_size, int base_quantiser, int is_chroma);
|
||
|
||
/**
|
||
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
|
||
*
|
||
* Implements Floyd-Steinberg style error diffusion to avoid quantization
|
||
* artifacts when converting float quantizer values to integers for rate control.
|
||
*
|
||
* NOTE: This function requires encoder context fields:
|
||
* - enc->adjusted_quantiser_y_float (current float quantizer)
|
||
* - enc->dither_accumulator (accumulated error, modified by this function)
|
||
*
|
||
* @param enc Encoder context
|
||
* @return Integer quantizer value (0-254)
|
||
*/
|
||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
|
||
|
||
// =============================================================================
|
||
// Perceptual Weight Implementation (requires encoder context)
|
||
// =============================================================================
|
||
|
||
// NOTE: This implementation requires encoder context (enc->quality_level)
|
||
// Struct definition will be in encoder header when integrated
|
||
|
||
#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
|
||
// Forward declare structure access - will be properly defined when integrated
|
||
struct tav_encoder_s {
|
||
int quality_level;
|
||
int *widths;
|
||
int *heights;
|
||
int decomp_levels;
|
||
float dead_zone_threshold;
|
||
int encoder_preset;
|
||
int temporal_decomp_levels;
|
||
int verbose;
|
||
int frame_count;
|
||
float adjusted_quantiser_y_float;
|
||
float dither_accumulator;
|
||
int width;
|
||
int height;
|
||
};
|
||
#endif
|
||
|
||
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
|
||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||
|
||
float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
|
||
|
||
// strategy: more horizontal detail
|
||
if (!is_chroma) {
|
||
// LL subband - contains most image energy, preserve carefully
|
||
if (subband_type == 0)
|
||
return perceptual_model3_LL(enc->quality_level, level);
|
||
|
||
// LH subband - horizontal details (human eyes more sensitive)
|
||
float LH = perceptual_model3_LH(enc->quality_level, level);
|
||
if (subband_type == 1)
|
||
return LH;
|
||
|
||
// HL subband - vertical details
|
||
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
||
if (subband_type == 2)
|
||
return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
|
||
|
||
// HH subband - diagonal details
|
||
else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
|
||
} else {
|
||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
|
||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
|
||
|
||
if (subband_type == 0) { // LL chroma - still important but less than luma
|
||
return 1.0f;
|
||
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
|
||
return FCLAMP(base, 1.0f, 100.0f);
|
||
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
|
||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
|
||
} else { // HH chroma - diagonal chroma details (most aggressive)
|
||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
|
||
}
|
||
}
|
||
}
|
||
|
||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||
// Map linear coefficient index to DWT subband using same layout as decoder
|
||
int offset = 0;
|
||
|
||
// First: LL subband at maximum decomposition level
|
||
int ll_width = enc->widths[decomp_levels];
|
||
int ll_height = enc->heights[decomp_levels];
|
||
int ll_size = ll_width * ll_height;
|
||
|
||
if (linear_idx < offset + ll_size) {
|
||
// LL subband at maximum level - use get_perceptual_weight for consistency
|
||
return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
|
||
}
|
||
offset += ll_size;
|
||
|
||
// Then: LH, HL, HH subbands for each level from max down to 1
|
||
for (int level = decomp_levels; level >= 1; level--) {
|
||
int level_width = enc->widths[decomp_levels - level + 1];
|
||
int level_height = enc->heights[decomp_levels - level + 1];
|
||
const int subband_size = level_width * level_height;
|
||
|
||
// LH subband (horizontal details)
|
||
if (linear_idx < offset + subband_size) {
|
||
return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
|
||
}
|
||
offset += subband_size;
|
||
|
||
// HL subband (vertical details)
|
||
if (linear_idx < offset + subband_size) {
|
||
return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
|
||
}
|
||
offset += subband_size;
|
||
|
||
// HH subband (diagonal details)
|
||
if (linear_idx < offset + subband_size) {
|
||
return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
|
||
}
|
||
offset += subband_size;
|
||
}
|
||
|
||
// Fallback for out-of-bounds indices
|
||
return 1.0f;
|
||
}
|
||
|
||
// =============================================================================
|
||
// Quantization Function Implementations
|
||
// =============================================================================
|
||
|
||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
||
float dead_zone_threshold, int width, int height,
|
||
int decomp_levels, int is_chroma) {
|
||
float effective_q = quantiser;
|
||
effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
|
||
|
||
// Scalar implementation (AVX-512 version would go in separate optimized module)
|
||
for (int i = 0; i < size; i++) {
|
||
float quantised_val = coeffs[i] / effective_q;
|
||
|
||
// Apply dead-zone quantisation ONLY to luma channel and specific subbands
|
||
if (dead_zone_threshold > 0.0f && !is_chroma) {
|
||
int level = get_subband_level(i, width, height, decomp_levels);
|
||
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
||
float level_threshold = 0.0f;
|
||
|
||
if (level == 1) {
|
||
// Finest level
|
||
if (subband_type == 3) {
|
||
// HH1: full dead-zone
|
||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
||
} else if (subband_type == 1 || subband_type == 2) {
|
||
// LH1, HL1: half dead-zone
|
||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||
}
|
||
} else if (level == 2) {
|
||
// Second-finest level
|
||
if (subband_type == 3) {
|
||
// HH2: half dead-zone
|
||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||
}
|
||
}
|
||
|
||
if (fabsf(quantised_val) <= level_threshold) {
|
||
quantised_val = 0.0f;
|
||
}
|
||
}
|
||
|
||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||
}
|
||
}
|
||
|
||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||
float *coeffs, int16_t *quantised, int size,
|
||
int base_quantiser, int width, int height,
|
||
int decomp_levels, int is_chroma, int frame_count) {
|
||
float effective_base_q = base_quantiser;
|
||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
|
||
|
||
for (int i = 0; i < size; i++) {
|
||
// Apply perceptual weight based on coefficient's position in DWT layout
|
||
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
||
float effective_q = effective_base_q * weight;
|
||
float quantised_val = coeffs[i] / effective_q;
|
||
|
||
// Apply dead-zone quantisation ONLY to luma channel
|
||
if (enc->dead_zone_threshold > 0.0f && !is_chroma) {
|
||
int level = get_subband_level(i, width, height, decomp_levels);
|
||
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
||
float level_threshold = 0.0f;
|
||
|
||
if (level == 1) {
|
||
if (subband_type == 3) {
|
||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
||
} else if (subband_type == 1 || subband_type == 2) {
|
||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||
}
|
||
} else if (level == 2) {
|
||
if (subband_type == 3) {
|
||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||
}
|
||
}
|
||
|
||
if (fabsf(quantised_val) <= level_threshold) {
|
||
quantised_val = 0.0f;
|
||
}
|
||
}
|
||
|
||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||
}
|
||
}
|
||
|
||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
||
int spatial_size, int base_quantiser, int is_chroma) {
|
||
// Sports preset: use finer temporal quantisation (less aggressive)
|
||
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
|
||
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
|
||
|
||
// Process each temporal subband independently (separable approach)
|
||
for (int t = 0; t < num_frames; t++) {
|
||
// Step 1: Determine temporal subband level
|
||
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
|
||
|
||
// Step 2: Compute temporal base quantiser using exponential scaling
|
||
float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
|
||
float temporal_quantiser = base_quantiser * temporal_scale;
|
||
|
||
int temporal_base_quantiser = (int)roundf(temporal_quantiser);
|
||
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
|
||
|
||
// Step 3: Apply spatial quantisation within this temporal subband
|
||
tav_quantise_perceptual(
|
||
enc,
|
||
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
|
||
quantised[t], // Output: quantised spatial coefficients
|
||
spatial_size, // Number of spatial coefficients
|
||
temporal_base_quantiser, // Temporally-scaled base quantiser
|
||
enc->width, // Frame width
|
||
enc->height, // Frame height
|
||
enc->decomp_levels, // Spatial decomposition levels
|
||
is_chroma, // Is chroma channel
|
||
enc->frame_count + t // Frame number
|
||
);
|
||
|
||
/*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
|
||
printf(" Temporal subband %d: level=%d, tH_base=%d\n",
|
||
t, temporal_level, temporal_base_quantiser);
|
||
}*/
|
||
}
|
||
}
|
||
|
||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
|
||
float qy_float = enc->adjusted_quantiser_y_float;
|
||
|
||
// Add accumulated dithering error
|
||
float qy_with_error = qy_float + enc->dither_accumulator;
|
||
|
||
// Round to nearest integer
|
||
int qy_int = (int)(qy_with_error + 0.5f);
|
||
|
||
// Calculate quantisation error and accumulate for next frame
|
||
// This is Floyd-Steinberg style error diffusion
|
||
float quantisation_error = qy_with_error - (float)qy_int;
|
||
enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
|
||
|
||
// Clamp to valid range
|
||
qy_int = CLAMP(qy_int, 0, 254);
|
||
|
||
return qy_int;
|
||
}
|