even more psychovisual model

This commit is contained in:
minjaesong
2025-09-22 01:01:15 +09:00
parent 3584520ff9
commit 28624309d7
3 changed files with 13 additions and 15 deletions

View File

@@ -694,7 +694,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
...
## Header (24 bytes)
uint8 Magic[8]: "\x1FTSVM TEV"
uint8 Magic[8]: "\x1F TSVM TEV"
uint8 Version: 2 (YCoCg-R) or 3 (ICtCp)
uint16 Width: video width in pixels
uint16 Height: video height in pixels
@@ -815,7 +815,7 @@ transmission capability, and region-of-interest coding.
...
## Header (32 bytes)
uint8 Magic[8]: "\x1FTSVM TAV"
uint8 Magic[8]: "\x1F TSVM TAV"
uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual)
uint16 Width: video width in pixels
uint16 Height: video height in pixels
@@ -837,7 +837,7 @@ transmission capability, and region-of-interest coding.
## Packet Types
0x10: I-frame (intra-coded frame)
0x11: P-frame (predicted frame with motion compensation)
0x11: P-frame (delta-coded frame)
0x20: MP2 audio packet
0x30: Subtitle in "Simple" format
0xFF: sync packet
@@ -942,7 +942,6 @@ TAV decoder requires new GraphicsJSR223Delegate functions:
- tavDecode(): Main DWT decoding function
- tavDWT2D(): 2D DWT/IDWT transforms
- tavQuantize(): Multi-band quantization
- tavMotionCompensate(): 64x64 tile motion compensation
## Audio Support
Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
@@ -951,8 +950,7 @@ Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
## NTSC Framerate handling
Unlike the TEV format, TAV emits extra sync packet for every 1000th frames. Decoder can just play the video
without any special treatment.
Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment.
--------------------------------------------------------------------------------

View File

@@ -4143,14 +4143,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// LUMA CHANNEL: Based on statistical analysis from real video content
// LL subband - contains most image energy, preserve carefully
if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level)
if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level + 1)
// LH subband - horizontal details (human eyes more sensitive)
val LH: Float = perceptual_model3_LH(qualityLevel, level)
val LH: Float = perceptual_model3_LH(qualityLevel, level + 1)
if (subbandType == 1) return LH
// HL subband - vertical details
val HL: Float = perceptual_model3_HL(qualityLevel, LH)
val HL: Float = perceptual_model3_HL(qualityLevel, LH + 1)
if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
// HH subband - diagonal details
@@ -4158,7 +4158,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} else {
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
val base = perceptual_model3_chroma_basecurve(qualityLevel, level)
val base = perceptual_model3_chroma_basecurve(qualityLevel, level - 1)
if (subbandType == 0) { // LL chroma - still important but less than luma
return 1.0f
@@ -4284,7 +4284,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
tilesX = 1
tilesY = 1
} else {
// Standard mode: multiple 280x224 tiles
// Standard mode: multiple 280x224 tiles (supported for backwards compatibility only)
tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X
tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
}

View File

@@ -911,15 +911,15 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
if (!is_chroma) {
// LL subband - contains most image energy, preserve carefully
if (subband_type == 0)
return perceptual_model3_LL(enc->quality_level, level);
return perceptual_model3_LL(enc->quality_level, level + 1);
// LH subband - horizontal details (human eyes more sensitive)
float LH = perceptual_model3_LH(enc->quality_level, level);
float LH = perceptual_model3_LH(enc->quality_level, level + 1);
if (subband_type == 1)
return LH;
// HL subband - vertical details
float HL = perceptual_model3_HL(enc->quality_level, LH);
float HL = perceptual_model3_HL(enc->quality_level, LH + 1);
if (subband_type == 2)
return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
@@ -931,7 +931,7 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
//// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q)
//// because our eyes are apparently sensitive to horizontal chroma diff as well?
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level);
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
if (subband_type == 0) { // LL chroma - still important but less than luma
return 1.0f;