mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
still working on the psychovisual model
This commit is contained in:
@@ -11,7 +11,65 @@ import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
|
||||
import net.torvald.tsvm.peripheral.GraphicsAdapter
|
||||
import net.torvald.tsvm.peripheral.PeriBase
|
||||
import net.torvald.tsvm.peripheral.fmod
|
||||
import java.util.*
|
||||
import kotlin.Any
|
||||
import kotlin.Array
|
||||
import kotlin.Boolean
|
||||
import kotlin.BooleanArray
|
||||
import kotlin.Byte
|
||||
import kotlin.ByteArray
|
||||
import kotlin.Double
|
||||
import kotlin.Exception
|
||||
import kotlin.Float
|
||||
import kotlin.FloatArray
|
||||
import kotlin.IllegalArgumentException
|
||||
import kotlin.IllegalStateException
|
||||
import kotlin.Int
|
||||
import kotlin.IntArray
|
||||
import kotlin.Long
|
||||
import kotlin.LongArray
|
||||
import kotlin.Pair
|
||||
import kotlin.Short
|
||||
import kotlin.ShortArray
|
||||
import kotlin.String
|
||||
import kotlin.Triple
|
||||
import kotlin.arrayOf
|
||||
import kotlin.byteArrayOf
|
||||
import kotlin.collections.ArrayList
|
||||
import kotlin.collections.List
|
||||
import kotlin.collections.MutableMap
|
||||
import kotlin.collections.component1
|
||||
import kotlin.collections.component2
|
||||
import kotlin.collections.component3
|
||||
import kotlin.collections.component4
|
||||
import kotlin.collections.copyOf
|
||||
import kotlin.collections.count
|
||||
import kotlin.collections.fill
|
||||
import kotlin.collections.forEach
|
||||
import kotlin.collections.forEachIndexed
|
||||
import kotlin.collections.indices
|
||||
import kotlin.collections.isNotEmpty
|
||||
import kotlin.collections.listOf
|
||||
import kotlin.collections.map
|
||||
import kotlin.collections.maxOfOrNull
|
||||
import kotlin.collections.mutableListOf
|
||||
import kotlin.collections.mutableMapOf
|
||||
import kotlin.collections.set
|
||||
import kotlin.collections.sliceArray
|
||||
import kotlin.collections.sorted
|
||||
import kotlin.collections.sumOf
|
||||
import kotlin.collections.toFloatArray
|
||||
import kotlin.error
|
||||
import kotlin.floatArrayOf
|
||||
import kotlin.fromBits
|
||||
import kotlin.intArrayOf
|
||||
import kotlin.let
|
||||
import kotlin.longArrayOf
|
||||
import kotlin.math.*
|
||||
import kotlin.repeat
|
||||
import kotlin.text.format
|
||||
import kotlin.text.lowercase
|
||||
import kotlin.text.toString
|
||||
|
||||
class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
@@ -3888,7 +3946,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
return subbands
|
||||
}
|
||||
|
||||
private fun getPerceptualWeight(level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
||||
private fun getPerceptualWeightModel2(level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||
|
||||
if (!isChroma) {
|
||||
@@ -4031,6 +4089,116 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}*/
|
||||
}
|
||||
|
||||
var ANISOTROPY_MULT = floatArrayOf(1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f)
|
||||
var ANISOTROPY_BIAS = floatArrayOf(0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f)
|
||||
|
||||
|
||||
|
||||
private fun perceptual_model3_LH(quality: Int, level: Int): Float {
|
||||
val H4 = 1.2f
|
||||
val Lx = H4 - ((quality + 1f) / 15f) * (level - 4f)
|
||||
val Ld = (quality + 1f) / -15f
|
||||
val C = H4 - 4f * Ld - ((-16f * (quality - 5f)) / (15f))
|
||||
val Gx = (Ld * level) - (((quality - 5f) * (level - 8f) * level) / (15f)) + C
|
||||
|
||||
return if (level >= 4) Lx else Gx
|
||||
}
|
||||
|
||||
private fun perceptual_model3_HL(quality: Int, LH: Float): Float {
|
||||
return LH * ANISOTROPY_MULT[quality] + ANISOTROPY_BIAS[quality]
|
||||
}
|
||||
|
||||
private fun perceptual_model3_HH(LH: Float, HL: Float): Float {
|
||||
return 2f * (LH + HL) / 3f
|
||||
}
|
||||
|
||||
fun perceptual_model3_LL(quality: Int, level: Int): Float {
|
||||
val n = perceptual_model3_LH(quality, level)
|
||||
val m = perceptual_model3_LH(quality, level - 1) / n
|
||||
|
||||
return n / m
|
||||
}
|
||||
|
||||
private val FOUR_PIXEL_DETAILER = 0.88f
|
||||
|
||||
private fun getPerceptualWeight(qYGlobal: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||
|
||||
val qualityLevel = if (qYGlobal >= 60) 0
|
||||
else if (qYGlobal >= 42) 1
|
||||
else if (qYGlobal >= 25) 2
|
||||
else if (qYGlobal >= 12) 3
|
||||
else if (qYGlobal >= 6) 4
|
||||
else if (qYGlobal >= 2) 5
|
||||
else 5
|
||||
|
||||
if (!isChroma) {
|
||||
// LUMA CHANNEL: Based on statistical analysis from real video content
|
||||
|
||||
// LL subband - contains most image energy, preserve carefully
|
||||
if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level)
|
||||
|
||||
// LH subband - horizontal details (human eyes more sensitive)
|
||||
val LH: Float = perceptual_model3_LH(qualityLevel, level)
|
||||
if (subbandType == 1) return LH
|
||||
|
||||
// HL subband - vertical details
|
||||
val HL: Float = perceptual_model3_HL(qualityLevel, LH)
|
||||
if (subbandType == 2) return HL * (if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
||||
|
||||
// HH subband - diagonal details
|
||||
else return perceptual_model3_HH(LH, HL) * (if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
||||
|
||||
} else {
|
||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
||||
when (subbandType) {
|
||||
0 -> { // LL chroma - still important but less than luma
|
||||
return 1f
|
||||
return when {
|
||||
level >= 6 -> 0.8f // Chroma LL6: Less critical than luma LL
|
||||
level >= 5 -> 0.9f
|
||||
else -> 1.0f
|
||||
}
|
||||
}
|
||||
1 -> { // LH chroma - horizontal chroma details
|
||||
return 1.8f
|
||||
return when {
|
||||
level >= 6 -> 1.0f
|
||||
level >= 5 -> 1.2f
|
||||
level >= 4 -> 1.4f
|
||||
level >= 3 -> 1.6f
|
||||
level >= 2 -> 1.8f
|
||||
else -> 2.0f
|
||||
}
|
||||
}
|
||||
2 -> { // HL chroma - vertical chroma details (even less critical)
|
||||
return 1.3f;
|
||||
return when {
|
||||
level >= 6 -> 1.2f
|
||||
level >= 5 -> 1.4f
|
||||
level >= 4 -> 1.6f
|
||||
level >= 3 -> 1.8f
|
||||
level >= 2 -> 2.0f
|
||||
else -> 2.2f
|
||||
}
|
||||
}
|
||||
3 -> { // HH chroma - diagonal chroma details (most aggressive)
|
||||
return 2.5f
|
||||
return when {
|
||||
level >= 6 -> 1.4f
|
||||
level >= 5 -> 1.6f
|
||||
level >= 4 -> 1.8f
|
||||
level >= 3 -> 2.1f
|
||||
level >= 2 -> 2.3f
|
||||
else -> 2.5f
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1.0f
|
||||
}
|
||||
|
||||
|
||||
// Helper function to calculate five-number summary for coefficient analysis
|
||||
private fun calculateFiveNumberSummary(values: List<Int>): String {
|
||||
if (values.isEmpty()) return "empty"
|
||||
@@ -4046,20 +4214,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
return "min=$min, Q1=$q1, med=%.1f, Q3=$q3, max=$max, n=$n".format(median)
|
||||
}
|
||||
|
||||
private fun dequantiseDWTSubbandsPerceptual(quantised: ShortArray, dequantised: FloatArray,
|
||||
private fun dequantiseDWTSubbandsPerceptual(qYGlobal: Int, quantised: ShortArray, dequantised: FloatArray,
|
||||
subbands: List<DWTSubbandInfo>, baseQuantizer: Float, isChroma: Boolean, decompLevels: Int) {
|
||||
|
||||
// Initialize output array to zero (critical for detecting missing coefficients)
|
||||
for (i in dequantised.indices) {
|
||||
dequantised[i] = 0.0f
|
||||
}
|
||||
Arrays.fill(dequantised, 0.0f)
|
||||
|
||||
// Track coefficient coverage for debugging
|
||||
var totalProcessed = 0
|
||||
var maxIdx = -1
|
||||
|
||||
for (subband in subbands) {
|
||||
val weight = getPerceptualWeight(subband.level, subband.subbandType, isChroma, decompLevels)
|
||||
val weight = getPerceptualWeight(qYGlobal, subband.level, subband.subbandType, isChroma, decompLevels)
|
||||
// CRITICAL FIX: Use the same effective quantizer as encoder for proper reconstruction
|
||||
val effectiveQuantizer = baseQuantizer * weight
|
||||
|
||||
@@ -4129,7 +4295,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
try {
|
||||
// Determine if monoblock mode based on TAV version
|
||||
val isMonoblock = (tavVersion == 3 || tavVersion == 4 || tavVersion == 5 || tavVersion == 6)
|
||||
val isMonoblock = (tavVersion >= 3)
|
||||
|
||||
val tilesX: Int
|
||||
val tilesY: Int
|
||||
@@ -4168,13 +4334,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
0x01 -> { // TAV_MODE_INTRA
|
||||
// Decode DWT coefficients directly to RGB buffer
|
||||
readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr,
|
||||
readPtr = tavDecodeDWTIntraTileRGB(qYGlobal, readPtr, tileX, tileY, currentRGBAddr,
|
||||
width, height, qY, qCo, qCg,
|
||||
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock)
|
||||
}
|
||||
0x02 -> { // TAV_MODE_DELTA
|
||||
// Coefficient delta encoding for efficient P-frames
|
||||
readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr,
|
||||
readPtr = tavDecodeDeltaTileRGB(qYGlobal, readPtr, tileX, tileY, currentRGBAddr,
|
||||
width, height, qY, qCo, qCg,
|
||||
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock)
|
||||
}
|
||||
@@ -4187,7 +4353,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
private fun tavDecodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||
private fun tavDecodeDWTIntraTileRGB(qYGlobal: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
|
||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
|
||||
// Determine coefficient count based on mode
|
||||
@@ -4247,9 +4413,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y
|
||||
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
|
||||
|
||||
dequantiseDWTSubbandsPerceptual(quantisedY, yTile, subbands, qY.toFloat(), false, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(qYGlobal, quantisedY, yTile, subbands, qY.toFloat(), false, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(qYGlobal, quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(qYGlobal, quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels)
|
||||
|
||||
// Debug: Check coefficient values before inverse DWT
|
||||
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
||||
@@ -4777,7 +4943,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||
private fun tavDecodeDeltaTileRGB(qYGlobal: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
|
||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
|
||||
|
||||
@@ -4849,9 +5015,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val deltaCoFloat = FloatArray(coeffCount)
|
||||
val deltaCgFloat = FloatArray(coeffCount)
|
||||
|
||||
dequantiseDWTSubbandsPerceptual(deltaY, deltaYFloat, subbands, qY.toFloat(), false, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(deltaCo, deltaCoFloat, subbands, adjustedQCo, true, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(deltaCg, deltaCgFloat, subbands, adjustedQCg, true, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(qYGlobal, deltaY, deltaYFloat, subbands, qY.toFloat(), false, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(qYGlobal, deltaCo, deltaCoFloat, subbands, adjustedQCo, true, decompLevels)
|
||||
dequantiseDWTSubbandsPerceptual(qYGlobal, deltaCg, deltaCgFloat, subbands, adjustedQCg, true, decompLevels)
|
||||
|
||||
// Reconstruct: current = previous + perceptually_dequantized_delta
|
||||
for (i in 0 until coeffCount) {
|
||||
|
||||
@@ -148,6 +148,10 @@ static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5};
|
||||
//static const int QUALITY_CO[] = {60, 30, 15, 7, 5, 2};
|
||||
//static const int QUALITY_CG[] = {120, 60, 30, 15, 10, 4};
|
||||
|
||||
// psychovisual tuning parameters
|
||||
static const float ANISOTROPY_MULT[] = {1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f};
|
||||
static const float ANISOTROPY_BIAS[] = {0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
// DWT coefficient structure for each subband
|
||||
typedef struct {
|
||||
int16_t *coeffs;
|
||||
@@ -797,8 +801,35 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz
|
||||
}
|
||||
}
|
||||
|
||||
// https://www.desmos.com/calculator/mjlpwqm8ge
|
||||
// where Q=quality, x=level
|
||||
static float perceptual_model3_LH(int quality, int level) {
|
||||
float H4 = 1.2f;
|
||||
float Lx = H4 - ((quality + 1.f) / 15.f) * (level - 4.f);
|
||||
float Ld = (quality + 1.f) / -15.f;
|
||||
float C = H4 - 4.f * Ld - ((-16.f*(quality - 5.f))/(15.f));
|
||||
float Gx = (Ld * level) - (((quality - 5.f)*(level - 8.f)*level)/(15.f)) + C;
|
||||
|
||||
return (level >= 4) ? Lx : Gx;
|
||||
}
|
||||
|
||||
static float perceptual_model3_HL(int quality, float LH) {
|
||||
return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
|
||||
}
|
||||
|
||||
static float perceptual_model3_HH(float LH, float HL) {
|
||||
return 2.f * (LH + HL) / 3.f;
|
||||
}
|
||||
|
||||
static float perceptual_model3_LL(int quality, int level) {
|
||||
float n = perceptual_model3_LH(quality, level);
|
||||
float m = perceptual_model3_LH(quality, level - 1) / n;
|
||||
|
||||
return n / m;
|
||||
}
|
||||
|
||||
// Get perceptual weight for specific subband - Data-driven model based on coefficient variance analysis
|
||||
static float get_perceptual_weight(int level, int subband_type, int is_chroma, int max_levels) {
|
||||
static float get_perceptual_weight_model2(int level, int subband_type, int is_chroma, int max_levels) {
|
||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||
// strategy: JPEG quantisation table + real-world statistics from the encoded videos
|
||||
if (!is_chroma) {
|
||||
@@ -865,8 +896,67 @@ static float get_perceptual_weight(int level, int subband_type, int is_chroma, i
|
||||
}
|
||||
}
|
||||
|
||||
#define FOUR_PIXEL_DETAILER 0.88f
|
||||
|
||||
static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) {
|
||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||
// strategy: JPEG quantisation table + real-world statistics from the encoded videos
|
||||
if (!is_chroma) {
|
||||
// LL subband - contains most image energy, preserve carefully
|
||||
if (subband_type == 0)
|
||||
return perceptual_model3_LL(enc->quality_level, level);
|
||||
|
||||
// LH subband - horizontal details (human eyes more sensitive)
|
||||
float LH = perceptual_model3_LH(enc->quality_level, level);
|
||||
if (subband_type == 1)
|
||||
return LH;
|
||||
|
||||
// HL subband - vertical details
|
||||
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
||||
if (subband_type == 2)
|
||||
return HL * (level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||
|
||||
// HH subband - diagonal details
|
||||
else return perceptual_model3_HH(LH, HL) * (level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||
} else {
|
||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
||||
// strategy: mimic 4:2:2 chroma subsampling
|
||||
if (subband_type == 0) { // LL chroma - still important but less than luma
|
||||
return 1.0f;
|
||||
if (level >= 6) return 0.8f; // Chroma LL6: Less critical than luma LL
|
||||
if (level >= 5) return 0.9f;
|
||||
return 1.0f;
|
||||
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
|
||||
return 1.8f;
|
||||
if (level >= 6) return 1.0f;
|
||||
if (level >= 5) return 1.2f;
|
||||
if (level >= 4) return 1.4f;
|
||||
if (level >= 3) return 1.6f;
|
||||
if (level >= 2) return 1.8f;
|
||||
return 2.0f;
|
||||
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
|
||||
return 1.3f;
|
||||
if (level >= 6) return 1.2f;
|
||||
if (level >= 5) return 1.4f;
|
||||
if (level >= 4) return 1.6f;
|
||||
if (level >= 3) return 1.8f;
|
||||
if (level >= 2) return 2.0f;
|
||||
return 2.2f;
|
||||
} else { // HH chroma - diagonal chroma details (most aggressive)
|
||||
return 2.5f;
|
||||
if (level >= 6) return 1.4f;
|
||||
if (level >= 5) return 1.6f;
|
||||
if (level >= 4) return 1.8f;
|
||||
if (level >= 3) return 2.1f;
|
||||
if (level >= 2) return 2.3f;
|
||||
return 2.5f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Determine perceptual weight for coefficient at linear position (matches actual DWT layout)
|
||||
static float get_perceptual_weight_for_position(int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||
// Map linear coefficient index to DWT subband using same layout as decoder
|
||||
int offset = 0;
|
||||
|
||||
@@ -877,7 +967,7 @@ static float get_perceptual_weight_for_position(int linear_idx, int width, int h
|
||||
|
||||
if (linear_idx < offset + ll_size) {
|
||||
// LL subband at maximum level - use get_perceptual_weight for consistency
|
||||
return get_perceptual_weight(decomp_levels, 0, is_chroma, decomp_levels);
|
||||
return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += ll_size;
|
||||
|
||||
@@ -889,19 +979,19 @@ static float get_perceptual_weight_for_position(int linear_idx, int width, int h
|
||||
|
||||
// LH subband (horizontal details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight(level, 1, is_chroma, decomp_levels);
|
||||
return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
|
||||
// HL subband (vertical details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight(level, 2, is_chroma, decomp_levels);
|
||||
return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
|
||||
// HH subband (diagonal details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight(level, 3, is_chroma, decomp_levels);
|
||||
return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
}
|
||||
@@ -911,7 +1001,8 @@ static float get_perceptual_weight_for_position(int linear_idx, int width, int h
|
||||
}
|
||||
|
||||
// Apply perceptual quantization per-coefficient (same loop as uniform but with spatial weights)
|
||||
static void quantise_dwt_coefficients_perceptual_per_coeff(float *coeffs, int16_t *quantised, int size,
|
||||
static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
int base_quantizer, int width, int height,
|
||||
int decomp_levels, int is_chroma, int frame_count) {
|
||||
// EXACTLY the same approach as uniform quantization but apply weight per coefficient
|
||||
@@ -923,7 +1014,7 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(float *coeffs, int16_
|
||||
int nonzero = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
||||
float weight = get_perceptual_weight_for_position(i, width, height, decomp_levels, is_chroma);
|
||||
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
||||
float effective_q = effective_base_q * weight;
|
||||
float quantised_val = coeffs[i] / effective_q;
|
||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||
@@ -935,7 +1026,7 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(float *coeffs, int16_
|
||||
// Normal quantization loop
|
||||
for (int i = 0; i < size; i++) {
|
||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
||||
float weight = get_perceptual_weight_for_position(i, width, height, decomp_levels, is_chroma);
|
||||
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
||||
float effective_q = effective_base_q * weight;
|
||||
float quantised_val = coeffs[i] / effective_q;
|
||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||
@@ -1044,9 +1135,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
// INTRA mode: quantise coefficients directly and store for future reference
|
||||
if (enc->perceptual_tuning) {
|
||||
// Perceptual quantization: EXACTLY like uniform but with per-coefficient weights
|
||||
quantise_dwt_coefficients_perceptual_per_coeff((float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff((float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||
} else {
|
||||
// Legacy uniform quantization
|
||||
quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, this_frame_qY);
|
||||
@@ -1083,9 +1174,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
|
||||
// Quantise the deltas with per-coefficient perceptual quantization
|
||||
if (enc->perceptual_tuning) {
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(delta_y, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, 0);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(delta_co, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, 0);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(delta_cg, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, 0);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, delta_y, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, 0);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, delta_co, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, 0);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, delta_cg, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, 0);
|
||||
} else {
|
||||
// Legacy uniform delta quantization
|
||||
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY);
|
||||
@@ -1113,12 +1204,12 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
if (level_width < 1 || level_height < 1) continue;
|
||||
|
||||
// Get perceptual weights for this level
|
||||
float lh_weight_y = get_perceptual_weight(level, 1, 0, enc->decomp_levels);
|
||||
float hl_weight_y = get_perceptual_weight(level, 2, 0, enc->decomp_levels);
|
||||
float hh_weight_y = get_perceptual_weight(level, 3, 0, enc->decomp_levels);
|
||||
float lh_weight_co = get_perceptual_weight(level, 1, 1, enc->decomp_levels);
|
||||
float hl_weight_co = get_perceptual_weight(level, 2, 1, enc->decomp_levels);
|
||||
float hh_weight_co = get_perceptual_weight(level, 3, 1, enc->decomp_levels);
|
||||
float lh_weight_y = get_perceptual_weight(enc, level, 1, 0, enc->decomp_levels);
|
||||
float hl_weight_y = get_perceptual_weight(enc, level, 2, 0, enc->decomp_levels);
|
||||
float hh_weight_y = get_perceptual_weight(enc, level, 3, 0, enc->decomp_levels);
|
||||
float lh_weight_co = get_perceptual_weight(enc, level, 1, 1, enc->decomp_levels);
|
||||
float hl_weight_co = get_perceptual_weight(enc, level, 2, 1, enc->decomp_levels);
|
||||
float hh_weight_co = get_perceptual_weight(enc, level, 3, 1, enc->decomp_levels);
|
||||
|
||||
// Correct LH subband (top-right quadrant)
|
||||
for (int y = 0; y < level_height; y++) {
|
||||
@@ -1170,8 +1261,8 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
// Finally, correct LL subband (top-left corner at finest level)
|
||||
int ll_width = enc->width >> enc->decomp_levels;
|
||||
int ll_height = enc->height >> enc->decomp_levels;
|
||||
float ll_weight_y = get_perceptual_weight(enc->decomp_levels, 0, 0, enc->decomp_levels);
|
||||
float ll_weight_co = get_perceptual_weight(enc->decomp_levels, 0, 1, enc->decomp_levels);
|
||||
float ll_weight_y = get_perceptual_weight(enc, enc->decomp_levels, 0, 0, enc->decomp_levels);
|
||||
float ll_weight_co = get_perceptual_weight(enc, enc->decomp_levels, 0, 1, enc->decomp_levels);
|
||||
for (int y = 0; y < ll_height; y++) {
|
||||
for (int x = 0; x < ll_width; x++) {
|
||||
if (y < enc->height && x < enc->width) {
|
||||
|
||||
Reference in New Issue
Block a user