mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
even more psychovisual model
This commit is contained in:
@@ -694,7 +694,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
|
||||
...
|
||||
|
||||
## Header (24 bytes)
|
||||
uint8 Magic[8]: "\x1FTSVM TEV"
|
||||
uint8 Magic[8]: "\x1F TSVM TEV"
|
||||
uint8 Version: 2 (YCoCg-R) or 3 (ICtCp)
|
||||
uint16 Width: video width in pixels
|
||||
uint16 Height: video height in pixels
|
||||
@@ -815,7 +815,7 @@ transmission capability, and region-of-interest coding.
|
||||
...
|
||||
|
||||
## Header (32 bytes)
|
||||
uint8 Magic[8]: "\x1FTSVM TAV"
|
||||
uint8 Magic[8]: "\x1F TSVM TAV"
|
||||
uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual)
|
||||
uint16 Width: video width in pixels
|
||||
uint16 Height: video height in pixels
|
||||
@@ -837,7 +837,7 @@ transmission capability, and region-of-interest coding.
|
||||
|
||||
## Packet Types
|
||||
0x10: I-frame (intra-coded frame)
|
||||
0x11: P-frame (predicted frame with motion compensation)
|
||||
0x11: P-frame (delta-coded frame)
|
||||
0x20: MP2 audio packet
|
||||
0x30: Subtitle in "Simple" format
|
||||
0xFF: sync packet
|
||||
@@ -942,7 +942,6 @@ TAV decoder requires new GraphicsJSR223Delegate functions:
|
||||
- tavDecode(): Main DWT decoding function
|
||||
- tavDWT2D(): 2D DWT/IDWT transforms
|
||||
- tavQuantize(): Multi-band quantization
|
||||
- tavMotionCompensate(): 64x64 tile motion compensation
|
||||
|
||||
## Audio Support
|
||||
Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
|
||||
@@ -951,8 +950,7 @@ Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
|
||||
Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
|
||||
|
||||
## NTSC Framerate handling
|
||||
Unlike the TEV format, TAV emits extra sync packet for every 1000th frames. Decoder can just play the video
|
||||
without any special treatment.
|
||||
Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -4143,14 +4143,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
// LUMA CHANNEL: Based on statistical analysis from real video content
|
||||
|
||||
// LL subband - contains most image energy, preserve carefully
|
||||
if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level)
|
||||
if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level + 1)
|
||||
|
||||
// LH subband - horizontal details (human eyes more sensitive)
|
||||
val LH: Float = perceptual_model3_LH(qualityLevel, level)
|
||||
val LH: Float = perceptual_model3_LH(qualityLevel, level + 1)
|
||||
if (subbandType == 1) return LH
|
||||
|
||||
// HL subband - vertical details
|
||||
val HL: Float = perceptual_model3_HL(qualityLevel, LH)
|
||||
val HL: Float = perceptual_model3_HL(qualityLevel, LH + 1)
|
||||
if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
||||
|
||||
// HH subband - diagonal details
|
||||
@@ -4158,7 +4158,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
} else {
|
||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
||||
val base = perceptual_model3_chroma_basecurve(qualityLevel, level)
|
||||
val base = perceptual_model3_chroma_basecurve(qualityLevel, level - 1)
|
||||
|
||||
if (subbandType == 0) { // LL chroma - still important but less than luma
|
||||
return 1.0f
|
||||
@@ -4284,7 +4284,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
tilesX = 1
|
||||
tilesY = 1
|
||||
} else {
|
||||
// Standard mode: multiple 280x224 tiles
|
||||
// Standard mode: multiple 280x224 tiles (supported for backwards compatibility only)
|
||||
tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X
|
||||
tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
|
||||
}
|
||||
|
||||
@@ -911,15 +911,15 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
|
||||
if (!is_chroma) {
|
||||
// LL subband - contains most image energy, preserve carefully
|
||||
if (subband_type == 0)
|
||||
return perceptual_model3_LL(enc->quality_level, level);
|
||||
return perceptual_model3_LL(enc->quality_level, level + 1);
|
||||
|
||||
// LH subband - horizontal details (human eyes more sensitive)
|
||||
float LH = perceptual_model3_LH(enc->quality_level, level);
|
||||
float LH = perceptual_model3_LH(enc->quality_level, level + 1);
|
||||
if (subband_type == 1)
|
||||
return LH;
|
||||
|
||||
// HL subband - vertical details
|
||||
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
||||
float HL = perceptual_model3_HL(enc->quality_level, LH + 1);
|
||||
if (subband_type == 2)
|
||||
return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||
|
||||
@@ -931,7 +931,7 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
|
||||
//// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q)
|
||||
//// because our eyes are apparently sensitive to horizontal chroma diff as well?
|
||||
|
||||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level);
|
||||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
|
||||
|
||||
if (subband_type == 0) { // LL chroma - still important but less than luma
|
||||
return 1.0f;
|
||||
|
||||
Reference in New Issue
Block a user