From f7d98e74e36615977fc291999fede93fafd7afc5 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Sun, 5 Oct 2025 20:47:17 +0900 Subject: [PATCH] quantisation deadzonning (massive compression gain) --- terranmon.txt | 2 +- video_encoder/encoder_tav.c | 133 ++++++++++++++++++++++++++++++------ 2 files changed, 113 insertions(+), 22 deletions(-) diff --git a/terranmon.txt b/terranmon.txt index c75d67a..2002089 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -916,7 +916,7 @@ transmission capability, and region-of-interest coding. - bit 2 = infinite loop (must be ignored when File Role is 1) - bit 7 = has no actual packets, this file is header-only without an Intro Movie uint8 Video Flags - - bit 0 = unused + - bit 0 = reserved - bit 1 = is NTSC framerate - bit 2 = is lossless mode (shorthand for `-Q1,1,1 -w 0 --intra-only --no-perceptual-tuning --arate 384`) diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 3748ac5..c753dac 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -186,6 +186,15 @@ static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29}; // 240, 180, 120, 90 static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39}; // 424, 304, 200, 144, 90, 40 static const int QUALITY_ALPHA[] = {79, 47, 23, 11, 5, 2}; // 96, 48, 24, 12, 6, 3 +// Dead-zone quantization thresholds per quality level +// Higher values = more aggressive (more coefficients set to zero) +static const float DEAD_ZONE_THRESHOLD[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; + +// Dead-zone scaling factors for different subband levels +#define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level (level 6) +#define DEAD_ZONE_FINE_SCALE 0.5f // Reduced dead-zone for second-finest level (level 5) +// Coarser levels (0-4) use 0.0f (no dead-zone) to preserve structural information + // psychovisual tuning parameters static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f}; @@ -243,6 +252,7 @@ typedef struct tav_encoder_s { int quantiser_y, quantiser_co, quantiser_cg; int wavelet_filter; int decomp_levels; + float dead_zone_threshold; // Dead-zone quantization threshold (0 = disabled) int bitrate_mode; int target_bitrate; @@ -571,6 +581,7 @@ static void show_usage(const char *program_name); static tav_encoder_t* create_encoder(void); static void cleanup_encoder(tav_encoder_t *enc); static int initialise_encoder(tav_encoder_t *enc); +static int get_subband_level(int linear_idx, int width, int height, int decomp_levels); static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height); static int calculate_max_decomp_levels(int width, int height); @@ -612,6 +623,7 @@ static void show_usage(const char *program_name) { printf(" --intra-only Disable delta encoding (less noisy picture at the cost of larger file)\n"); printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n"); printf(" --no-perceptual-tuning Disable perceptual quantisation\n"); + printf(" --no-dead-zone Disable dead-zone quantization (for comparison/testing)\n"); printf(" --encode-limit N Encode only first N frames (useful for testing/analysis)\n"); printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n"); printf(" --wavelet N Wavelet filter: 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n"); @@ -670,6 +682,7 @@ static tav_encoder_t* create_encoder(void) { enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY]; enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY]; enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY]; + enc->dead_zone_threshold = DEAD_ZONE_THRESHOLD[DEFAULT_QUALITY]; enc->intra_only = 0; enc->monoblock = 1; // Default to monoblock mode enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6) @@ -1383,12 +1396,33 @@ static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t } // Quantisation for DWT subbands with rate control -static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) { +static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser, float dead_zone_threshold, int width, int height, int decomp_levels, int is_chroma) { float effective_q = quantiser; effective_q = FCLAMP(effective_q, 1.0f, 255.0f); for (int i = 0; i < size; i++) { float quantised_val = coeffs[i] / effective_q; + + // Apply dead-zone quantization ONLY to luma channel and finest subbands + // Chroma channels skip dead-zone (already heavily quantized, avoid color banding) + if (dead_zone_threshold > 0.0f && !is_chroma) { + int level = get_subband_level(i, width, height, decomp_levels); + float level_threshold = 0.0f; + + if (level == decomp_levels) { + // Finest level (level 6): full dead-zone + level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE; + } else if (level == decomp_levels - 1) { + // Second-finest level (level 5): reduced dead-zone + level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE; + } + // Coarser levels (0-4): no dead-zone to preserve structural information + + if (fabsf(quantised_val) <= level_threshold) { + quantised_val = 0.0f; + } + } + quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); } } @@ -1485,6 +1519,38 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_t // Determine perceptual weight for coefficient at linear position (matches actual DWT layout) +// Get decomposition level for a coefficient at linear index +// Returns: 0 for LL subband, 1-decomp_levels for detail subbands +static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) { + int offset = 0; + + // First: LL subband at maximum decomposition level + int ll_width = width >> decomp_levels; + int ll_height = height >> decomp_levels; + int ll_size = ll_width * ll_height; + + if (linear_idx < offset + ll_size) { + return 0; // LL subband (coarsest) + } + offset += ll_size; + + // Then: LH, HL, HH subbands for each level from max down to 1 + for (int level = decomp_levels; level >= 1; level--) { + int level_width = width >> (decomp_levels - level + 1); + int level_height = height >> (decomp_levels - level + 1); + int subband_size = level_width * level_height; + + // Check all three subbands (LH, HL, HH) at this level + if (linear_idx < offset + (subband_size * 3)) { + return level; // Return decomposition level (1-6) + } + offset += subband_size * 3; + } + + // Fallback for out-of-bounds indices + return 0; +} + static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) { // Map linear coefficient index to DWT subband using same layout as decoder int offset = 0; @@ -1543,6 +1609,27 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma); float effective_q = effective_base_q * weight; float quantised_val = coeffs[i] / effective_q; + + // Apply dead-zone quantization ONLY to luma channel and finest subbands + // Chroma channels skip dead-zone (already heavily quantized, avoid color banding) + if (enc->dead_zone_threshold > 0.0f && !is_chroma) { + int level = get_subband_level(i, width, height, decomp_levels); + float level_threshold = 0.0f; + + if (level == decomp_levels) { + // Finest level (level 6): full dead-zone + level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE; + } else if (level == decomp_levels - 1) { + // Second-finest level (level 5): reduced dead-zone + level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE; + } + // Coarser levels (0-4): no dead-zone to preserve structural information + + if (fabsf(quantised_val) <= level_threshold) { + quantised_val = 0.0f; + } + } + quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); } } @@ -1656,9 +1743,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count); } else { // Legacy uniform quantisation - quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, this_frame_qY); - quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, this_frame_qCo); - quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg); + quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 0); + quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1); + quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1); } // Store current coefficients for future delta reference @@ -1689,9 +1776,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, } // Quantise the deltas with uniform quantisation (perceptual tuning is for original coefficients, not deltas) - quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY); - quantise_dwt_coefficients(delta_co, quantised_co, tile_size, this_frame_qCo); - quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, this_frame_qCg); + quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 0); + quantise_dwt_coefficients(delta_co, quantised_co, tile_size, this_frame_qCo, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1); + quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, this_frame_qCg, enc->dead_zone_threshold, enc->width, enc->height, enc->decomp_levels, 1); // Reconstruct coefficients like decoder will (previous + uniform_dequantised_delta) for (int i = 0; i < tile_size; i++) { @@ -2292,7 +2379,7 @@ static int write_tav_header(tav_encoder_t *enc) { fputc(extra_flags, enc->output_fp); uint8_t video_flags = 0; -// if (!enc->progressive) video_flags |= 0x01; // Interlaced +// if (!enc->progressive) video_flags |= 0x01; // Interlaced (deprecated, reserved for future use) if (enc->is_ntsc_framerate) video_flags |= 0x02; // NTSC if (enc->lossless) video_flags |= 0x04; // Lossless fputc(video_flags, enc->output_fp); @@ -3253,6 +3340,7 @@ int main(int argc, char *argv[]) { {"intra-only", no_argument, 0, 1006}, {"ictcp", no_argument, 0, 1005}, {"no-perceptual-tuning", no_argument, 0, 1007}, + {"no-dead-zone", no_argument, 0, 1013}, {"encode-limit", required_argument, 0, 1008}, {"dump-frame", required_argument, 0, 1009}, {"fontrom-lo", required_argument, 0, 1011}, @@ -3262,7 +3350,7 @@ int main(int argc, char *argv[]) { }; int c, option_index = 0; - while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:c:d:b:pS:vt", long_options, &option_index)) != -1) { + while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:a:w:c:d:b:pS:vt", long_options, &option_index)) != -1) { switch (c) { case 'i': enc->input_file = strdup(optarg); @@ -3282,6 +3370,7 @@ int main(int argc, char *argv[]) { enc->quantiser_y = QUALITY_Y[enc->quality_level]; enc->quantiser_co = QUALITY_CO[enc->quality_level]; enc->quantiser_cg = QUALITY_CG[enc->quality_level]; + enc->dead_zone_threshold = DEAD_ZONE_THRESHOLD[enc->quality_level]; break; case 'Q': // Parse quantiser values Y,Co,Cg @@ -3324,6 +3413,7 @@ int main(int argc, char *argv[]) { enc->quantiser_y = QUALITY_Y[enc->quality_level]; enc->quantiser_co = QUALITY_CO[enc->quality_level]; enc->quantiser_cg = QUALITY_CG[enc->quality_level]; + enc->dead_zone_threshold = DEAD_ZONE_THRESHOLD[enc->quality_level]; break; } case 'c': { @@ -3371,6 +3461,9 @@ int main(int argc, char *argv[]) { case 1007: // --no-perceptual-tuning enc->perceptual_tuning = 0; break; + case 1013: // --no-dead-zone + enc->dead_zone_threshold = 0.0f; + break; case 1008: // --encode-limit enc->encode_limit = atoi(optarg); if (enc->encode_limit < 0) { @@ -3389,20 +3482,18 @@ int main(int argc, char *argv[]) { enc->fontrom_hi_file = strdup(optarg); break; case 'a': - { - int bitrate = atoi(optarg); - int valid_bitrate = validate_mp2_bitrate(bitrate); - if (valid_bitrate == 0) { - fprintf(stderr, "Error: Invalid MP2 bitrate %d. Valid values are: ", bitrate); - for (int i = 0; i < sizeof(MP2_VALID_BITRATES) / sizeof(int); i++) { - fprintf(stderr, "%d%s", MP2_VALID_BITRATES[i], - (i < sizeof(MP2_VALID_BITRATES) / sizeof(int) - 1) ? ", " : "\n"); - } - cleanup_encoder(enc); - return 1; + int bitrate = atoi(optarg); + int valid_bitrate = validate_mp2_bitrate(bitrate); + if (valid_bitrate == 0) { + fprintf(stderr, "Error: Invalid MP2 bitrate %d. Valid values are: ", bitrate); + for (int i = 0; i < sizeof(MP2_VALID_BITRATES) / sizeof(int); i++) { + fprintf(stderr, "%d%s", MP2_VALID_BITRATES[i], + (i < sizeof(MP2_VALID_BITRATES) / sizeof(int) - 1) ? ", " : "\n"); } - enc->audio_bitrate = valid_bitrate; + cleanup_encoder(enc); + return 1; } + enc->audio_bitrate = valid_bitrate; break; case 1004: // --help show_usage(argv[0]);