diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 6a49712..13b65fa 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4091,6 +4091,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var ANISOTROPY_MULT = floatArrayOf(1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f) var ANISOTROPY_BIAS = floatArrayOf(0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f) + var ANISOTROPY_BIAS_CHROMA = floatArrayOf(0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f) @@ -4119,8 +4120,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { return n / m } - private val FOUR_PIXEL_DETAILER = 0.88f + fun perceptual_model3_chroma_basecurve(quality: Int, level: Int): Float { + return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4f) // just a line that passes (4,1) + } + private val FOUR_PIXEL_DETAILER = 0.88f + private val TWO_PIXEL_DETAILER = 0.92f + + // level is one-based index private fun getPerceptualWeight(qYGlobal: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float { // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity @@ -4144,58 +4151,28 @@ class GraphicsJSR223Delegate(private val vm: VM) { // HL subband - vertical details val HL: Float = perceptual_model3_HL(qualityLevel, LH) - if (subbandType == 2) return HL * (if (level == 3) FOUR_PIXEL_DETAILER else 1f) + if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f) // HH subband - diagonal details - else return perceptual_model3_HH(LH, HL) * (if (level == 3) FOUR_PIXEL_DETAILER else 1f) + else return perceptual_model3_HH(LH, HL) * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f) } else { // CHROMA CHANNELS: Less critical for human perception, more aggressive quantization - when (subbandType) { - 0 -> { // LL chroma - still important but less than luma - return 1f - return when { - level >= 6 -> 0.8f // Chroma LL6: Less critical than luma LL - level >= 5 -> 0.9f - else -> 1.0f - } - } - 1 -> { // LH chroma - horizontal chroma details - return 1.8f - return when { - level >= 6 -> 1.0f - level >= 5 -> 1.2f - level >= 4 -> 1.4f - level >= 3 -> 1.6f - level >= 2 -> 1.8f - else -> 2.0f - } - } - 2 -> { // HL chroma - vertical chroma details (even less critical) - return 1.3f; - return when { - level >= 6 -> 1.2f - level >= 5 -> 1.4f - level >= 4 -> 1.6f - level >= 3 -> 1.8f - level >= 2 -> 2.0f - else -> 2.2f - } - } - 3 -> { // HH chroma - diagonal chroma details (most aggressive) - return 2.5f - return when { - level >= 6 -> 1.4f - level >= 5 -> 1.6f - level >= 4 -> 1.8f - level >= 3 -> 2.1f - level >= 2 -> 2.3f - else -> 2.5f - } - } + val base = perceptual_model3_chroma_basecurve(qualityLevel, level) + + if (subbandType == 0) { // LL chroma - still important but less than luma + return 1.0f + } + else if (subbandType == 1) { // LH chroma - horizontal chroma details + return base.coerceAtLeast(1.0f) + } + else if (subbandType == 2) { // HL chroma - vertical chroma details (even less critical) + return (base * ANISOTROPY_MULT[qualityLevel]).coerceAtLeast(1.0f) + } + else { // HH chroma - diagonal chroma details (most aggressive) + return (base * ANISOTROPY_MULT[qualityLevel] + ANISOTROPY_BIAS_CHROMA[qualityLevel]).coerceAtLeast(1.0f) } } - return 1.0f } @@ -4218,7 +4195,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { subbands: List, baseQuantizer: Float, isChroma: Boolean, decompLevels: Int) { // Initialize output array to zero (critical for detecting missing coefficients) + if (tavDebugFrameTarget >= 0) { Arrays.fill(dequantised, 0.0f) + } // Track coefficient coverage for debugging var totalProcessed = 0 diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index cc6ec3b..14cc2ff 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -151,6 +151,7 @@ static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5}; // psychovisual tuning parameters static const float ANISOTROPY_MULT[] = {1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f}; static const float ANISOTROPY_BIAS[] = {0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f}; +static const float ANISOTROPY_BIAS_CHROMA[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f}; // DWT coefficient structure for each subband typedef struct { @@ -828,6 +829,10 @@ static float perceptual_model3_LL(int quality, int level) { return n / m; } +static float perceptual_model3_chroma_basecurve(int quality, int level) { + return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f); // just a line that passes (4,1) +} + // Get perceptual weight for specific subband - Data-driven model based on coefficient variance analysis static float get_perceptual_weight_model2(int level, int subband_type, int is_chroma, int max_levels) { // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity @@ -897,10 +902,12 @@ static float get_perceptual_weight_model2(int level, int subband_type, int is_ch } #define FOUR_PIXEL_DETAILER 0.88f +#define TWO_PIXEL_DETAILER 0.92f +// level is one-based index static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) { // Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity - // strategy: JPEG quantisation table + real-world statistics from the encoded videos + // strategy: more horizontal detail if (!is_chroma) { // LL subband - contains most image energy, preserve carefully if (subband_type == 0) @@ -914,42 +921,26 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty // HL subband - vertical details float HL = perceptual_model3_HL(enc->quality_level, LH); if (subband_type == 2) - return HL * (level == 3 ? FOUR_PIXEL_DETAILER : 1.0f); + return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f); // HH subband - diagonal details - else return perceptual_model3_HH(LH, HL) * (level == 3 ? FOUR_PIXEL_DETAILER : 1.0f); + else return perceptual_model3_HH(LH, HL) * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f); } else { // CHROMA CHANNELS: Less critical for human perception, more aggressive quantization - // strategy: mimic 4:2:2 chroma subsampling + // strategy: more horizontal detail + //// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q) + //// because our eyes are apparently sensitive to horizontal chroma diff as well? + + float base = perceptual_model3_chroma_basecurve(enc->quality_level, level); + if (subband_type == 0) { // LL chroma - still important but less than luma return 1.0f; - if (level >= 6) return 0.8f; // Chroma LL6: Less critical than luma LL - if (level >= 5) return 0.9f; - return 1.0f; } else if (subband_type == 1) { // LH chroma - horizontal chroma details - return 1.8f; - if (level >= 6) return 1.0f; - if (level >= 5) return 1.2f; - if (level >= 4) return 1.4f; - if (level >= 3) return 1.6f; - if (level >= 2) return 1.8f; - return 2.0f; + return FCLAMP(base, 1.0f, 100.0f); } else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical) - return 1.3f; - if (level >= 6) return 1.2f; - if (level >= 5) return 1.4f; - if (level >= 4) return 1.6f; - if (level >= 3) return 1.8f; - if (level >= 2) return 2.0f; - return 2.2f; + return FCLAMP(base * ANISOTROPY_MULT[enc->quality_level], 1.0f, 100.0f); } else { // HH chroma - diagonal chroma details (most aggressive) - return 2.5f; - if (level >= 6) return 1.4f; - if (level >= 5) return 1.6f; - if (level >= 4) return 1.8f; - if (level >= 3) return 2.1f; - if (level >= 2) return 2.3f; - return 2.5f; + return FCLAMP(base * ANISOTROPY_MULT[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f); } } } @@ -1009,28 +1000,12 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, float effective_base_q = base_quantizer; effective_base_q = FCLAMP(effective_base_q, 1.0f, 255.0f); - // Debug coefficient analysis - if (frame_count == 1 || frame_count == 120) { - int nonzero = 0; - for (int i = 0; i < size; i++) { - // Apply perceptual weight based on coefficient's position in DWT layout - float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma); - float effective_q = effective_base_q * weight; - float quantised_val = coeffs[i] / effective_q; - quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); - if (quantised[i] != 0) nonzero++; - } - printf("DEBUG: Frame 120 - %s channel: %d/%d nonzero coeffs after perceptual per-coeff quantization\n", - is_chroma ? "Chroma" : "Luma", nonzero, size); - } else { - // Normal quantization loop - for (int i = 0; i < size; i++) { - // Apply perceptual weight based on coefficient's position in DWT layout - float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma); - float effective_q = effective_base_q * weight; - float quantised_val = coeffs[i] / effective_q; - quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); - } + for (int i = 0; i < size; i++) { + // Apply perceptual weight based on coefficient's position in DWT layout + float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma); + float effective_q = effective_base_q * weight; + float quantised_val = coeffs[i] / effective_q; + quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); } } @@ -1373,7 +1348,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) }*/ // Debug: Check Y data before DWT transform - if (enc->frame_count == 120 && enc->verbose) { + /*if (enc->frame_count == 120 && enc->verbose) { float max_y_before = 0.0f; int nonzero_before = 0; int total_pixels = enc->monoblock ? (enc->width * enc->height) : (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y); @@ -1383,7 +1358,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) if (abs_val > 0.1f) nonzero_before++; } printf("DEBUG: Y data before DWT: max=%.2f, nonzero=%d/%d\n", max_y_before, nonzero_before, total_pixels); - } + }*/ // Apply DWT transform to each channel if (enc->monoblock) { @@ -1399,14 +1374,14 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) } // Debug: Check Y data after DWT transform for high-frequency content - if (enc->frame_count == 120 && enc->verbose) { + /*if (enc->frame_count == 120 && enc->verbose) { printf("DEBUG: Y data after DWT (some high-freq samples): "); int sample_indices[] = {47034, 47035, 47036, 47037, 47038}; // HH1 start + some samples for (int i = 0; i < 5; i++) { printf("%.3f ", tile_y_data[sample_indices[i]]); } printf("\n"); - } + }*/ // Serialise tile size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,