mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-14 00:14:05 +09:00
more psychovisual model
This commit is contained in:
@@ -4091,6 +4091,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
var ANISOTROPY_MULT = floatArrayOf(1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f)
|
var ANISOTROPY_MULT = floatArrayOf(1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f)
|
||||||
var ANISOTROPY_BIAS = floatArrayOf(0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f)
|
var ANISOTROPY_BIAS = floatArrayOf(0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f)
|
||||||
|
var ANISOTROPY_BIAS_CHROMA = floatArrayOf(0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -4119,8 +4120,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
return n / m
|
return n / m
|
||||||
}
|
}
|
||||||
|
|
||||||
private val FOUR_PIXEL_DETAILER = 0.88f
|
fun perceptual_model3_chroma_basecurve(quality: Int, level: Int): Float {
|
||||||
|
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4f) // just a line that passes (4,1)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val FOUR_PIXEL_DETAILER = 0.88f
|
||||||
|
private val TWO_PIXEL_DETAILER = 0.92f
|
||||||
|
|
||||||
|
// level is one-based index
|
||||||
private fun getPerceptualWeight(qYGlobal: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
private fun getPerceptualWeight(qYGlobal: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
||||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||||
|
|
||||||
@@ -4144,58 +4151,28 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
// HL subband - vertical details
|
// HL subband - vertical details
|
||||||
val HL: Float = perceptual_model3_HL(qualityLevel, LH)
|
val HL: Float = perceptual_model3_HL(qualityLevel, LH)
|
||||||
if (subbandType == 2) return HL * (if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
||||||
|
|
||||||
// HH subband - diagonal details
|
// HH subband - diagonal details
|
||||||
else return perceptual_model3_HH(LH, HL) * (if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
else return perceptual_model3_HH(LH, HL) * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
||||||
when (subbandType) {
|
val base = perceptual_model3_chroma_basecurve(qualityLevel, level)
|
||||||
0 -> { // LL chroma - still important but less than luma
|
|
||||||
return 1f
|
if (subbandType == 0) { // LL chroma - still important but less than luma
|
||||||
return when {
|
return 1.0f
|
||||||
level >= 6 -> 0.8f // Chroma LL6: Less critical than luma LL
|
}
|
||||||
level >= 5 -> 0.9f
|
else if (subbandType == 1) { // LH chroma - horizontal chroma details
|
||||||
else -> 1.0f
|
return base.coerceAtLeast(1.0f)
|
||||||
}
|
}
|
||||||
}
|
else if (subbandType == 2) { // HL chroma - vertical chroma details (even less critical)
|
||||||
1 -> { // LH chroma - horizontal chroma details
|
return (base * ANISOTROPY_MULT[qualityLevel]).coerceAtLeast(1.0f)
|
||||||
return 1.8f
|
}
|
||||||
return when {
|
else { // HH chroma - diagonal chroma details (most aggressive)
|
||||||
level >= 6 -> 1.0f
|
return (base * ANISOTROPY_MULT[qualityLevel] + ANISOTROPY_BIAS_CHROMA[qualityLevel]).coerceAtLeast(1.0f)
|
||||||
level >= 5 -> 1.2f
|
|
||||||
level >= 4 -> 1.4f
|
|
||||||
level >= 3 -> 1.6f
|
|
||||||
level >= 2 -> 1.8f
|
|
||||||
else -> 2.0f
|
|
||||||
}
|
|
||||||
}
|
|
||||||
2 -> { // HL chroma - vertical chroma details (even less critical)
|
|
||||||
return 1.3f;
|
|
||||||
return when {
|
|
||||||
level >= 6 -> 1.2f
|
|
||||||
level >= 5 -> 1.4f
|
|
||||||
level >= 4 -> 1.6f
|
|
||||||
level >= 3 -> 1.8f
|
|
||||||
level >= 2 -> 2.0f
|
|
||||||
else -> 2.2f
|
|
||||||
}
|
|
||||||
}
|
|
||||||
3 -> { // HH chroma - diagonal chroma details (most aggressive)
|
|
||||||
return 2.5f
|
|
||||||
return when {
|
|
||||||
level >= 6 -> 1.4f
|
|
||||||
level >= 5 -> 1.6f
|
|
||||||
level >= 4 -> 1.8f
|
|
||||||
level >= 3 -> 2.1f
|
|
||||||
level >= 2 -> 2.3f
|
|
||||||
else -> 2.5f
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 1.0f
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -4218,7 +4195,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
subbands: List<DWTSubbandInfo>, baseQuantizer: Float, isChroma: Boolean, decompLevels: Int) {
|
subbands: List<DWTSubbandInfo>, baseQuantizer: Float, isChroma: Boolean, decompLevels: Int) {
|
||||||
|
|
||||||
// Initialize output array to zero (critical for detecting missing coefficients)
|
// Initialize output array to zero (critical for detecting missing coefficients)
|
||||||
|
if (tavDebugFrameTarget >= 0) {
|
||||||
Arrays.fill(dequantised, 0.0f)
|
Arrays.fill(dequantised, 0.0f)
|
||||||
|
}
|
||||||
|
|
||||||
// Track coefficient coverage for debugging
|
// Track coefficient coverage for debugging
|
||||||
var totalProcessed = 0
|
var totalProcessed = 0
|
||||||
|
|||||||
@@ -151,6 +151,7 @@ static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5};
|
|||||||
// psychovisual tuning parameters
|
// psychovisual tuning parameters
|
||||||
static const float ANISOTROPY_MULT[] = {1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f};
|
static const float ANISOTROPY_MULT[] = {1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f};
|
||||||
static const float ANISOTROPY_BIAS[] = {0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f};
|
static const float ANISOTROPY_BIAS[] = {0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
static const float ANISOTROPY_BIAS_CHROMA[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f};
|
||||||
|
|
||||||
// DWT coefficient structure for each subband
|
// DWT coefficient structure for each subband
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -828,6 +829,10 @@ static float perceptual_model3_LL(int quality, int level) {
|
|||||||
return n / m;
|
return n / m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static float perceptual_model3_chroma_basecurve(int quality, int level) {
|
||||||
|
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f); // just a line that passes (4,1)
|
||||||
|
}
|
||||||
|
|
||||||
// Get perceptual weight for specific subband - Data-driven model based on coefficient variance analysis
|
// Get perceptual weight for specific subband - Data-driven model based on coefficient variance analysis
|
||||||
static float get_perceptual_weight_model2(int level, int subband_type, int is_chroma, int max_levels) {
|
static float get_perceptual_weight_model2(int level, int subband_type, int is_chroma, int max_levels) {
|
||||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||||
@@ -897,10 +902,12 @@ static float get_perceptual_weight_model2(int level, int subband_type, int is_ch
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define FOUR_PIXEL_DETAILER 0.88f
|
#define FOUR_PIXEL_DETAILER 0.88f
|
||||||
|
#define TWO_PIXEL_DETAILER 0.92f
|
||||||
|
|
||||||
|
// level is one-based index
|
||||||
static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) {
|
static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) {
|
||||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||||
// strategy: JPEG quantisation table + real-world statistics from the encoded videos
|
// strategy: more horizontal detail
|
||||||
if (!is_chroma) {
|
if (!is_chroma) {
|
||||||
// LL subband - contains most image energy, preserve carefully
|
// LL subband - contains most image energy, preserve carefully
|
||||||
if (subband_type == 0)
|
if (subband_type == 0)
|
||||||
@@ -914,42 +921,26 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
|
|||||||
// HL subband - vertical details
|
// HL subband - vertical details
|
||||||
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
||||||
if (subband_type == 2)
|
if (subband_type == 2)
|
||||||
return HL * (level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||||
|
|
||||||
// HH subband - diagonal details
|
// HH subband - diagonal details
|
||||||
else return perceptual_model3_HH(LH, HL) * (level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
else return perceptual_model3_HH(LH, HL) * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||||
} else {
|
} else {
|
||||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
||||||
// strategy: mimic 4:2:2 chroma subsampling
|
// strategy: more horizontal detail
|
||||||
|
//// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q)
|
||||||
|
//// because our eyes are apparently sensitive to horizontal chroma diff as well?
|
||||||
|
|
||||||
|
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level);
|
||||||
|
|
||||||
if (subband_type == 0) { // LL chroma - still important but less than luma
|
if (subband_type == 0) { // LL chroma - still important but less than luma
|
||||||
return 1.0f;
|
return 1.0f;
|
||||||
if (level >= 6) return 0.8f; // Chroma LL6: Less critical than luma LL
|
|
||||||
if (level >= 5) return 0.9f;
|
|
||||||
return 1.0f;
|
|
||||||
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
|
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
|
||||||
return 1.8f;
|
return FCLAMP(base, 1.0f, 100.0f);
|
||||||
if (level >= 6) return 1.0f;
|
|
||||||
if (level >= 5) return 1.2f;
|
|
||||||
if (level >= 4) return 1.4f;
|
|
||||||
if (level >= 3) return 1.6f;
|
|
||||||
if (level >= 2) return 1.8f;
|
|
||||||
return 2.0f;
|
|
||||||
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
|
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
|
||||||
return 1.3f;
|
return FCLAMP(base * ANISOTROPY_MULT[enc->quality_level], 1.0f, 100.0f);
|
||||||
if (level >= 6) return 1.2f;
|
|
||||||
if (level >= 5) return 1.4f;
|
|
||||||
if (level >= 4) return 1.6f;
|
|
||||||
if (level >= 3) return 1.8f;
|
|
||||||
if (level >= 2) return 2.0f;
|
|
||||||
return 2.2f;
|
|
||||||
} else { // HH chroma - diagonal chroma details (most aggressive)
|
} else { // HH chroma - diagonal chroma details (most aggressive)
|
||||||
return 2.5f;
|
return FCLAMP(base * ANISOTROPY_MULT[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
|
||||||
if (level >= 6) return 1.4f;
|
|
||||||
if (level >= 5) return 1.6f;
|
|
||||||
if (level >= 4) return 1.8f;
|
|
||||||
if (level >= 3) return 2.1f;
|
|
||||||
if (level >= 2) return 2.3f;
|
|
||||||
return 2.5f;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1009,28 +1000,12 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
|
|||||||
float effective_base_q = base_quantizer;
|
float effective_base_q = base_quantizer;
|
||||||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 255.0f);
|
effective_base_q = FCLAMP(effective_base_q, 1.0f, 255.0f);
|
||||||
|
|
||||||
// Debug coefficient analysis
|
for (int i = 0; i < size; i++) {
|
||||||
if (frame_count == 1 || frame_count == 120) {
|
// Apply perceptual weight based on coefficient's position in DWT layout
|
||||||
int nonzero = 0;
|
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
||||||
for (int i = 0; i < size; i++) {
|
float effective_q = effective_base_q * weight;
|
||||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
float quantised_val = coeffs[i] / effective_q;
|
||||||
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||||
float effective_q = effective_base_q * weight;
|
|
||||||
float quantised_val = coeffs[i] / effective_q;
|
|
||||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
|
||||||
if (quantised[i] != 0) nonzero++;
|
|
||||||
}
|
|
||||||
printf("DEBUG: Frame 120 - %s channel: %d/%d nonzero coeffs after perceptual per-coeff quantization\n",
|
|
||||||
is_chroma ? "Chroma" : "Luma", nonzero, size);
|
|
||||||
} else {
|
|
||||||
// Normal quantization loop
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
|
||||||
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
|
||||||
float effective_q = effective_base_q * weight;
|
|
||||||
float quantised_val = coeffs[i] / effective_q;
|
|
||||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1373,7 +1348,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
|||||||
}*/
|
}*/
|
||||||
|
|
||||||
// Debug: Check Y data before DWT transform
|
// Debug: Check Y data before DWT transform
|
||||||
if (enc->frame_count == 120 && enc->verbose) {
|
/*if (enc->frame_count == 120 && enc->verbose) {
|
||||||
float max_y_before = 0.0f;
|
float max_y_before = 0.0f;
|
||||||
int nonzero_before = 0;
|
int nonzero_before = 0;
|
||||||
int total_pixels = enc->monoblock ? (enc->width * enc->height) : (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y);
|
int total_pixels = enc->monoblock ? (enc->width * enc->height) : (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y);
|
||||||
@@ -1383,7 +1358,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
|||||||
if (abs_val > 0.1f) nonzero_before++;
|
if (abs_val > 0.1f) nonzero_before++;
|
||||||
}
|
}
|
||||||
printf("DEBUG: Y data before DWT: max=%.2f, nonzero=%d/%d\n", max_y_before, nonzero_before, total_pixels);
|
printf("DEBUG: Y data before DWT: max=%.2f, nonzero=%d/%d\n", max_y_before, nonzero_before, total_pixels);
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// Apply DWT transform to each channel
|
// Apply DWT transform to each channel
|
||||||
if (enc->monoblock) {
|
if (enc->monoblock) {
|
||||||
@@ -1399,14 +1374,14 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Debug: Check Y data after DWT transform for high-frequency content
|
// Debug: Check Y data after DWT transform for high-frequency content
|
||||||
if (enc->frame_count == 120 && enc->verbose) {
|
/*if (enc->frame_count == 120 && enc->verbose) {
|
||||||
printf("DEBUG: Y data after DWT (some high-freq samples): ");
|
printf("DEBUG: Y data after DWT (some high-freq samples): ");
|
||||||
int sample_indices[] = {47034, 47035, 47036, 47037, 47038}; // HH1 start + some samples
|
int sample_indices[] = {47034, 47035, 47036, 47037, 47038}; // HH1 start + some samples
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 5; i++) {
|
||||||
printf("%.3f ", tile_y_data[sample_indices[i]]);
|
printf("%.3f ", tile_y_data[sample_indices[i]]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// Serialise tile
|
// Serialise tile
|
||||||
size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
|
size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
|
||||||
|
|||||||
Reference in New Issue
Block a user