From 28624309d7f976d34cde52a476179d9db0fea5da Mon Sep 17 00:00:00 2001 From: minjaesong Date: Mon, 22 Sep 2025 01:01:15 +0900 Subject: [PATCH] even more psychovisual model --- terranmon.txt | 10 ++++------ .../src/net/torvald/tsvm/GraphicsJSR223Delegate.kt | 10 +++++----- video_encoder/encoder_tav.c | 8 ++++---- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/terranmon.txt b/terranmon.txt index aed269d..d8f52e9 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -694,7 +694,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. ... ## Header (24 bytes) - uint8 Magic[8]: "\x1FTSVM TEV" + uint8 Magic[8]: "\x1F TSVM TEV" uint8 Version: 2 (YCoCg-R) or 3 (ICtCp) uint16 Width: video width in pixels uint16 Height: video height in pixels @@ -815,7 +815,7 @@ transmission capability, and region-of-interest coding. ... ## Header (32 bytes) - uint8 Magic[8]: "\x1FTSVM TAV" + uint8 Magic[8]: "\x1F TSVM TAV" uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual) uint16 Width: video width in pixels uint16 Height: video height in pixels @@ -837,7 +837,7 @@ transmission capability, and region-of-interest coding. ## Packet Types 0x10: I-frame (intra-coded frame) - 0x11: P-frame (predicted frame with motion compensation) + 0x11: P-frame (delta-coded frame) 0x20: MP2 audio packet 0x30: Subtitle in "Simple" format 0xFF: sync packet @@ -942,7 +942,6 @@ TAV decoder requires new GraphicsJSR223Delegate functions: - tavDecode(): Main DWT decoding function - tavDWT2D(): 2D DWT/IDWT transforms - tavQuantize(): Multi-band quantization -- tavMotionCompensate(): 64x64 tile motion compensation ## Audio Support Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility. @@ -951,8 +950,7 @@ Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility. Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality. ## NTSC Framerate handling -Unlike the TEV format, TAV emits extra sync packet for every 1000th frames. Decoder can just play the video -without any special treatment. +Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment. -------------------------------------------------------------------------------- diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 13b65fa..166ee5e 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4143,14 +4143,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { // LUMA CHANNEL: Based on statistical analysis from real video content // LL subband - contains most image energy, preserve carefully - if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level) + if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level + 1) // LH subband - horizontal details (human eyes more sensitive) - val LH: Float = perceptual_model3_LH(qualityLevel, level) + val LH: Float = perceptual_model3_LH(qualityLevel, level + 1) if (subbandType == 1) return LH // HL subband - vertical details - val HL: Float = perceptual_model3_HL(qualityLevel, LH) + val HL: Float = perceptual_model3_HL(qualityLevel, LH + 1) if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f) // HH subband - diagonal details @@ -4158,7 +4158,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } else { // CHROMA CHANNELS: Less critical for human perception, more aggressive quantization - val base = perceptual_model3_chroma_basecurve(qualityLevel, level) + val base = perceptual_model3_chroma_basecurve(qualityLevel, level - 1) if (subbandType == 0) { // LL chroma - still important but less than luma return 1.0f @@ -4284,7 +4284,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { tilesX = 1 tilesY = 1 } else { - // Standard mode: multiple 280x224 tiles + // Standard mode: multiple 280x224 tiles (supported for backwards compatibility only) tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y } diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 14cc2ff..2d6f7ae 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -911,15 +911,15 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty if (!is_chroma) { // LL subband - contains most image energy, preserve carefully if (subband_type == 0) - return perceptual_model3_LL(enc->quality_level, level); + return perceptual_model3_LL(enc->quality_level, level + 1); // LH subband - horizontal details (human eyes more sensitive) - float LH = perceptual_model3_LH(enc->quality_level, level); + float LH = perceptual_model3_LH(enc->quality_level, level + 1); if (subband_type == 1) return LH; // HL subband - vertical details - float HL = perceptual_model3_HL(enc->quality_level, LH); + float HL = perceptual_model3_HL(enc->quality_level, LH + 1); if (subband_type == 2) return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f); @@ -931,7 +931,7 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty //// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q) //// because our eyes are apparently sensitive to horizontal chroma diff as well? - float base = perceptual_model3_chroma_basecurve(enc->quality_level, level); + float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1); if (subband_type == 0) { // LL chroma - still important but less than luma return 1.0f;