mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-11 23:34:04 +09:00
tav: librarying
This commit is contained in:
@@ -61,7 +61,7 @@ typedef struct {
|
|||||||
int quality_y; // Luma quality (0-5, default: 3)
|
int quality_y; // Luma quality (0-5, default: 3)
|
||||||
int quality_co; // Orange chrominance quality (0-5, default: 3)
|
int quality_co; // Orange chrominance quality (0-5, default: 3)
|
||||||
int quality_cg; // Green chrominance quality (0-5, default: 3)
|
int quality_cg; // Green chrominance quality (0-5, default: 3)
|
||||||
int dead_zone_threshold; // Dead-zone quantization threshold (0=disabled, 1-10 typical)
|
float dead_zone_threshold; // Dead-zone quantization threshold (0.0=disabled, 0.6-1.5 typical)
|
||||||
|
|
||||||
// === Entropy Coding ===
|
// === Entropy Coding ===
|
||||||
int entropy_coder; // 0=Twobitmap (default), 1=EZBC (better for high-quality)
|
int entropy_coder; // 0=Twobitmap (default), 1=EZBC (better for high-quality)
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
|
|||||||
static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2}; // Quality levels 0-5
|
static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2}; // Quality levels 0-5
|
||||||
static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
|
static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
|
||||||
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
||||||
|
static const float DEAD_ZONE_THRESHOLD[] = {1.5f, 1.5f, 1.2f, 1.1f, 0.8f, 0.6f, 0.0f};
|
||||||
|
|
||||||
// Channel layout definitions (from TAV specification)
|
// Channel layout definitions (from TAV specification)
|
||||||
#define CHANNEL_LAYOUT_YCOCG 0
|
#define CHANNEL_LAYOUT_YCOCG 0
|
||||||
@@ -87,10 +88,17 @@ struct tav_encoder_s {
|
|||||||
int quality_level; // For perceptual quantization
|
int quality_level; // For perceptual quantization
|
||||||
int *widths; // Subband widths array (per decomposition level)
|
int *widths; // Subband widths array (per decomposition level)
|
||||||
int *heights; // Subband heights array (per decomposition level)
|
int *heights; // Subband heights array (per decomposition level)
|
||||||
int dead_zone_threshold; // Dead-zone quantization threshold
|
int decomp_levels; // Number of spatial DWT decomposition levels
|
||||||
|
float dead_zone_threshold; // Dead-zone quantization threshold
|
||||||
int encoder_preset; // Preset flags (sports mode, etc.)
|
int encoder_preset; // Preset flags (sports mode, etc.)
|
||||||
int temporal_decomp_levels; // Temporal DWT levels
|
int temporal_decomp_levels; // Temporal DWT levels
|
||||||
int verbose; // Verbose output flag
|
int verbose; // Verbose output flag
|
||||||
|
int frame_count; // Current frame number for encoding
|
||||||
|
float adjusted_quantiser_y_float; // For bitrate control (if needed)
|
||||||
|
float dither_accumulator; // Dither accumulator for bitrate mode
|
||||||
|
int width; // Frame width
|
||||||
|
int height; // Frame height
|
||||||
|
int perceptual_tuning; // 1 = perceptual quantization, 0 = uniform
|
||||||
};
|
};
|
||||||
|
|
||||||
// GOP slot for circular buffering
|
// GOP slot for circular buffering
|
||||||
@@ -282,7 +290,7 @@ void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height
|
|||||||
params->quality_y = QUALITY_Y[3]; // 11 - quantiser index
|
params->quality_y = QUALITY_Y[3]; // 11 - quantiser index
|
||||||
params->quality_co = QUALITY_CO[3]; // 76 - quantiser index
|
params->quality_co = QUALITY_CO[3]; // 76 - quantiser index
|
||||||
params->quality_cg = QUALITY_CG[3]; // 99 - quantiser index
|
params->quality_cg = QUALITY_CG[3]; // 99 - quantiser index
|
||||||
params->dead_zone_threshold = 0; // Disabled by default
|
params->dead_zone_threshold = DEAD_ZONE_THRESHOLD[3]; // 1.1 for Q3
|
||||||
|
|
||||||
// Compression
|
// Compression
|
||||||
params->entropy_coder = 1; // EZBC as default
|
params->entropy_coder = 1; // EZBC as default
|
||||||
@@ -963,6 +971,13 @@ static tav_encoder_t *create_compat_encoder(tav_encoder_context_t *ctx) {
|
|||||||
enc->encoder_preset = ctx->encoder_preset;
|
enc->encoder_preset = ctx->encoder_preset;
|
||||||
enc->temporal_decomp_levels = ctx->temporal_levels;
|
enc->temporal_decomp_levels = ctx->temporal_levels;
|
||||||
enc->verbose = ctx->verbose;
|
enc->verbose = ctx->verbose;
|
||||||
|
enc->perceptual_tuning = ctx->perceptual_tuning;
|
||||||
|
|
||||||
|
// Copy frame dimensions (needed by quantisation functions)
|
||||||
|
enc->width = ctx->width;
|
||||||
|
enc->height = ctx->height;
|
||||||
|
enc->decomp_levels = ctx->decomp_levels;
|
||||||
|
enc->frame_count = 0; // Will be updated during encoding
|
||||||
|
|
||||||
// Calculate subband widths and heights arrays
|
// Calculate subband widths and heights arrays
|
||||||
// These are needed by the perceptual quantization module
|
// These are needed by the perceptual quantization module
|
||||||
@@ -1319,11 +1334,11 @@ static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot) {
|
|||||||
|
|
||||||
if (ctx->perceptual_tuning) {
|
if (ctx->perceptual_tuning) {
|
||||||
tav_quantise_perceptual(ctx->compat_enc, work_y, quant_y, num_pixels,
|
tav_quantise_perceptual(ctx->compat_enc, work_y, quant_y, num_pixels,
|
||||||
base_quantiser_y, width, height, ctx->decomp_levels, 0, 0);
|
base_quantiser_y, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 0, 0);
|
||||||
tav_quantise_perceptual(ctx->compat_enc, work_co, quant_co, num_pixels,
|
tav_quantise_perceptual(ctx->compat_enc, work_co, quant_co, num_pixels,
|
||||||
base_quantiser_co, width, height, ctx->decomp_levels, 1, 0);
|
base_quantiser_co, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0);
|
||||||
tav_quantise_perceptual(ctx->compat_enc, work_cg, quant_cg, num_pixels,
|
tav_quantise_perceptual(ctx->compat_enc, work_cg, quant_cg, num_pixels,
|
||||||
base_quantiser_cg, width, height, ctx->decomp_levels, 1, 0);
|
base_quantiser_cg, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0);
|
||||||
} else {
|
} else {
|
||||||
tav_quantise_uniform(work_y, quant_y, num_pixels, base_quantiser_y,
|
tav_quantise_uniform(work_y, quant_y, num_pixels, base_quantiser_y,
|
||||||
(float)ctx->dead_zone_threshold, width, height,
|
(float)ctx->dead_zone_threshold, width, height,
|
||||||
@@ -1448,6 +1463,18 @@ static int encode_gop_unified(tav_encoder_context_t *ctx, gop_slot_t *slot) {
|
|||||||
int base_quantiser_co = QLUT[ctx->quantiser_co];
|
int base_quantiser_co = QLUT[ctx->quantiser_co];
|
||||||
int base_quantiser_cg = QLUT[ctx->quantiser_cg];
|
int base_quantiser_cg = QLUT[ctx->quantiser_cg];
|
||||||
|
|
||||||
|
// CRITICAL: Use UNIFORM quantization for 3D DWT GOPs to match old encoder behavior
|
||||||
|
// The old encoder had a bug where decomp_levels=0 caused perceptual weights to fallback to 1.0
|
||||||
|
// This accidentally produced better results than true perceptual quantization
|
||||||
|
// Preserve this behavior for compatibility with decoder expectations
|
||||||
|
int saved_perceptual = ctx->compat_enc->perceptual_tuning;
|
||||||
|
ctx->compat_enc->perceptual_tuning = 0; // Temporarily disable for GOP encoding
|
||||||
|
|
||||||
|
if (ctx->verbose) {
|
||||||
|
fprintf(stderr, "[DEBUG] GOP quantization: decomp_levels=%d, base_q_y=%d, perceptual=%d (forced uniform), preset=0x%02x\n",
|
||||||
|
ctx->compat_enc->decomp_levels, base_quantiser_y, ctx->compat_enc->perceptual_tuning, ctx->compat_enc->encoder_preset);
|
||||||
|
}
|
||||||
|
|
||||||
tav_quantise_3d_dwt(ctx->compat_enc, work_y, quant_y, num_frames, num_pixels,
|
tav_quantise_3d_dwt(ctx->compat_enc, work_y, quant_y, num_frames, num_pixels,
|
||||||
base_quantiser_y, 0);
|
base_quantiser_y, 0);
|
||||||
tav_quantise_3d_dwt(ctx->compat_enc, work_co, quant_co, num_frames, num_pixels,
|
tav_quantise_3d_dwt(ctx->compat_enc, work_co, quant_co, num_frames, num_pixels,
|
||||||
@@ -1455,6 +1482,8 @@ static int encode_gop_unified(tav_encoder_context_t *ctx, gop_slot_t *slot) {
|
|||||||
tav_quantise_3d_dwt(ctx->compat_enc, work_cg, quant_cg, num_frames, num_pixels,
|
tav_quantise_3d_dwt(ctx->compat_enc, work_cg, quant_cg, num_frames, num_pixels,
|
||||||
base_quantiser_cg, 1);
|
base_quantiser_cg, 1);
|
||||||
|
|
||||||
|
ctx->compat_enc->perceptual_tuning = saved_perceptual; // Restore for I-frames
|
||||||
|
|
||||||
// Step 4: Unified GOP preprocessing (EZBC only)
|
// Step 4: Unified GOP preprocessing (EZBC only)
|
||||||
size_t preprocess_capacity = num_pixels * num_frames * 3 * sizeof(int16_t) + 65536;
|
size_t preprocess_capacity = num_pixels * num_frames * 3 * sizeof(int16_t) + 65536;
|
||||||
uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity);
|
uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity);
|
||||||
|
|||||||
@@ -310,6 +310,7 @@ void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quant
|
|||||||
* @param quantised Output quantized coefficients (int16_t)
|
* @param quantised Output quantized coefficients (int16_t)
|
||||||
* @param size Number of coefficients
|
* @param size Number of coefficients
|
||||||
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
||||||
|
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
||||||
* @param width Frame width
|
* @param width Frame width
|
||||||
* @param height Frame height
|
* @param height Frame height
|
||||||
* @param decomp_levels Number of decomposition levels
|
* @param decomp_levels Number of decomposition levels
|
||||||
@@ -318,7 +319,7 @@ void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quant
|
|||||||
*/
|
*/
|
||||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||||||
float *coeffs, int16_t *quantised, int size,
|
float *coeffs, int16_t *quantised, int size,
|
||||||
int base_quantiser, int width, int height,
|
int base_quantiser, float dead_zone_threshold, int width, int height,
|
||||||
int decomp_levels, int is_chroma, int frame_count);
|
int decomp_levels, int is_chroma, int frame_count);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -391,6 +392,7 @@ struct tav_encoder_s {
|
|||||||
float dither_accumulator;
|
float dither_accumulator;
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
|
int perceptual_tuning;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -434,6 +436,11 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||||
|
// If perceptual tuning is disabled, use uniform quantization (weight = 1.0)
|
||||||
|
if (!enc->perceptual_tuning) {
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
// Map linear coefficient index to DWT subband using same layout as decoder
|
// Map linear coefficient index to DWT subband using same layout as decoder
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
|
|
||||||
@@ -525,7 +532,7 @@ void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quant
|
|||||||
|
|
||||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||||||
float *coeffs, int16_t *quantised, int size,
|
float *coeffs, int16_t *quantised, int size,
|
||||||
int base_quantiser, int width, int height,
|
int base_quantiser, float dead_zone_threshold, int width, int height,
|
||||||
int decomp_levels, int is_chroma, int frame_count) {
|
int decomp_levels, int is_chroma, int frame_count) {
|
||||||
float effective_base_q = base_quantiser;
|
float effective_base_q = base_quantiser;
|
||||||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
|
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
|
||||||
@@ -537,20 +544,20 @@ void tav_quantise_perceptual(tav_encoder_t *enc,
|
|||||||
float quantised_val = coeffs[i] / effective_q;
|
float quantised_val = coeffs[i] / effective_q;
|
||||||
|
|
||||||
// Apply dead-zone quantisation ONLY to luma channel
|
// Apply dead-zone quantisation ONLY to luma channel
|
||||||
if (enc->dead_zone_threshold > 0.0f && !is_chroma) {
|
if (dead_zone_threshold > 0.0f && !is_chroma) {
|
||||||
int level = get_subband_level(i, width, height, decomp_levels);
|
int level = get_subband_level(i, width, height, decomp_levels);
|
||||||
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
||||||
float level_threshold = 0.0f;
|
float level_threshold = 0.0f;
|
||||||
|
|
||||||
if (level == 1) {
|
if (level == 1) {
|
||||||
if (subband_type == 3) {
|
if (subband_type == 3) {
|
||||||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
||||||
} else if (subband_type == 1 || subband_type == 2) {
|
} else if (subband_type == 1 || subband_type == 2) {
|
||||||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||||||
}
|
}
|
||||||
} else if (level == 2) {
|
} else if (level == 2) {
|
||||||
if (subband_type == 3) {
|
if (subband_type == 3) {
|
||||||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -583,12 +590,16 @@ void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
|||||||
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
|
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
|
||||||
|
|
||||||
// Step 3: Apply spatial quantisation within this temporal subband
|
// Step 3: Apply spatial quantisation within this temporal subband
|
||||||
|
// Check if perceptual tuning is enabled (stored in encoder_preset bit 1)
|
||||||
|
// NOTE: perceptual_tuning field is NOT in tav_encoder_s, so we check context flag
|
||||||
|
// For now, just use perceptual (this will be controlled by caller disabling)
|
||||||
tav_quantise_perceptual(
|
tav_quantise_perceptual(
|
||||||
enc,
|
enc,
|
||||||
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
|
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
|
||||||
quantised[t], // Output: quantised spatial coefficients
|
quantised[t], // Output: quantised spatial coefficients
|
||||||
spatial_size, // Number of spatial coefficients
|
spatial_size, // Number of spatial coefficients
|
||||||
temporal_base_quantiser, // Temporally-scaled base quantiser
|
temporal_base_quantiser, // Temporally-scaled base quantiser
|
||||||
|
enc->dead_zone_threshold, // Dead zone threshold
|
||||||
enc->width, // Frame width
|
enc->width, // Frame width
|
||||||
enc->height, // Frame height
|
enc->height, // Frame height
|
||||||
enc->decomp_levels, // Spatial decomposition levels
|
enc->decomp_levels, // Spatial decomposition levels
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quant
|
|||||||
* @param quantised Output quantized coefficients (int16_t)
|
* @param quantised Output quantized coefficients (int16_t)
|
||||||
* @param size Number of coefficients
|
* @param size Number of coefficients
|
||||||
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
||||||
|
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
||||||
* @param width Frame width
|
* @param width Frame width
|
||||||
* @param height Frame height
|
* @param height Frame height
|
||||||
* @param decomp_levels Number of decomposition levels
|
* @param decomp_levels Number of decomposition levels
|
||||||
@@ -72,7 +73,7 @@ void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quant
|
|||||||
*/
|
*/
|
||||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||||||
float *coeffs, int16_t *quantised, int size,
|
float *coeffs, int16_t *quantised, int size,
|
||||||
int base_quantiser, int width, int height,
|
int base_quantiser, float dead_zone_threshold, int width, int height,
|
||||||
int decomp_levels, int is_chroma, int frame_count);
|
int decomp_levels, int is_chroma, int frame_count);
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|||||||
@@ -49,6 +49,7 @@
|
|||||||
static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2}; // Quality levels 0-5
|
static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2}; // Quality levels 0-5
|
||||||
static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
|
static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
|
||||||
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
||||||
|
static const float DEAD_ZONE_THRESHOLD[] = {1.5f, 1.5f, 1.2f, 1.1f, 0.8f, 0.6f, 0.0f};
|
||||||
|
|
||||||
static char TEMP_AUDIO_FILE[TEMP_AUDIO_FILE_SIZE];
|
static char TEMP_AUDIO_FILE[TEMP_AUDIO_FILE_SIZE];
|
||||||
static char TEMP_PCM_FILE[TEMP_PCM_FILE_SIZE];
|
static char TEMP_PCM_FILE[TEMP_PCM_FILE_SIZE];
|
||||||
@@ -1255,6 +1256,7 @@ int main(int argc, char *argv[]) {
|
|||||||
cli.enc_params.quality_y = QUALITY_Y[q];
|
cli.enc_params.quality_y = QUALITY_Y[q];
|
||||||
cli.enc_params.quality_co = QUALITY_CO[q];
|
cli.enc_params.quality_co = QUALITY_CO[q];
|
||||||
cli.enc_params.quality_cg = QUALITY_CG[q];
|
cli.enc_params.quality_cg = QUALITY_CG[q];
|
||||||
|
cli.enc_params.dead_zone_threshold = DEAD_ZONE_THRESHOLD[q];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'Q': {
|
case 'Q': {
|
||||||
|
|||||||
Reference in New Issue
Block a user