even more psychovisual model

2026-06-06 05:28:31 +09:00 · 2025-09-22 01:01:15 +09:00
parent 3584520ff9
commit 28624309d7
3 changed files with 13 additions and 15 deletions
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -694,7 +694,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
 ...

 ## Header (24 bytes)
-    uint8  Magic[8]: "\x1FTSVM TEV"
+    uint8  Magic[8]: "\x1F TSVM TEV"
    uint8  Version: 2 (YCoCg-R) or 3 (ICtCp)
    uint16 Width: video width in pixels
    uint16 Height: video height in pixels
@@ -815,7 +815,7 @@ transmission capability, and region-of-interest coding.
 ...

 ## Header (32 bytes)
-    uint8  Magic[8]: "\x1FTSVM TAV"
+    uint8  Magic[8]: "\x1F TSVM TAV"
    uint8  Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual)
    uint16 Width: video width in pixels  
    uint16 Height: video height in pixels
@@ -837,7 +837,7 @@ transmission capability, and region-of-interest coding.

 ## Packet Types
    0x10: I-frame (intra-coded frame)
-    0x11: P-frame (predicted frame with motion compensation)
+    0x11: P-frame (delta-coded frame)
    0x20: MP2 audio packet
    0x30: Subtitle in "Simple" format  
    0xFF: sync packet
@@ -942,7 +942,6 @@ TAV decoder requires new GraphicsJSR223Delegate functions:
 - tavDecode(): Main DWT decoding function
 - tavDWT2D(): 2D DWT/IDWT transforms  
 - tavQuantize(): Multi-band quantization
- tavMotionCompensate(): 64x64 tile motion compensation

 ## Audio Support
 Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
@@ -951,8 +950,7 @@ Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
 Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.

 ## NTSC Framerate handling
-Unlike the TEV format, TAV emits extra sync packet for every 1000th frames. Decoder can just play the video
-without any special treatment.
+Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment.

 --------------------------------------------------------------------------------

--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -4143,14 +4143,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            // LUMA CHANNEL: Based on statistical analysis from real video content
            
            // LL subband - contains most image energy, preserve carefully
-            if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level)
+            if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level + 1)
            
            // LH subband - horizontal details (human eyes more sensitive)
-            val LH: Float = perceptual_model3_LH(qualityLevel, level)
+            val LH: Float = perceptual_model3_LH(qualityLevel, level + 1)
            if (subbandType == 1) return LH
            
            // HL subband - vertical details
-            val HL: Float = perceptual_model3_HL(qualityLevel, LH)
+            val HL: Float = perceptual_model3_HL(qualityLevel, LH + 1)
            if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)

            // HH subband - diagonal details
@@ -4158,7 +4158,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            
        } else {
            // CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
-            val base = perceptual_model3_chroma_basecurve(qualityLevel, level)
+            val base = perceptual_model3_chroma_basecurve(qualityLevel, level - 1)

            if (subbandType == 0) { // LL chroma - still important but less than luma
                return 1.0f
@@ -4284,7 +4284,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                tilesX = 1
                tilesY = 1
            } else {
-                // Standard mode: multiple 280x224 tiles
+                // Standard mode: multiple 280x224 tiles (supported for backwards compatibility only)
                tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X
                tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
            }
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -911,15 +911,15 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
    if (!is_chroma) {
        // LL subband - contains most image energy, preserve carefully
        if (subband_type == 0)
-            return perceptual_model3_LL(enc->quality_level, level);
+            return perceptual_model3_LL(enc->quality_level, level + 1);

        // LH subband - horizontal details (human eyes more sensitive)
-        float LH = perceptual_model3_LH(enc->quality_level, level);
+        float LH = perceptual_model3_LH(enc->quality_level, level + 1);
        if (subband_type == 1)
            return LH;

        // HL subband - vertical details
-        float HL = perceptual_model3_HL(enc->quality_level, LH);
+        float HL = perceptual_model3_HL(enc->quality_level, LH + 1);
        if (subband_type == 2)
            return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);

@@ -931,7 +931,7 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
        //// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q)
        //// because our eyes are apparently sensitive to horizontal chroma diff as well?

-        float base = perceptual_model3_chroma_basecurve(enc->quality_level, level);
+        float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);

        if (subband_type == 0) { // LL chroma - still important but less than luma
            return 1.0f;