mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-10 06:54:04 +09:00
even more psychovisual model
This commit is contained in:
@@ -694,7 +694,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
|
|||||||
...
|
...
|
||||||
|
|
||||||
## Header (24 bytes)
|
## Header (24 bytes)
|
||||||
uint8 Magic[8]: "\x1FTSVM TEV"
|
uint8 Magic[8]: "\x1F TSVM TEV"
|
||||||
uint8 Version: 2 (YCoCg-R) or 3 (ICtCp)
|
uint8 Version: 2 (YCoCg-R) or 3 (ICtCp)
|
||||||
uint16 Width: video width in pixels
|
uint16 Width: video width in pixels
|
||||||
uint16 Height: video height in pixels
|
uint16 Height: video height in pixels
|
||||||
@@ -815,7 +815,7 @@ transmission capability, and region-of-interest coding.
|
|||||||
...
|
...
|
||||||
|
|
||||||
## Header (32 bytes)
|
## Header (32 bytes)
|
||||||
uint8 Magic[8]: "\x1FTSVM TAV"
|
uint8 Magic[8]: "\x1F TSVM TAV"
|
||||||
uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual)
|
uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual)
|
||||||
uint16 Width: video width in pixels
|
uint16 Width: video width in pixels
|
||||||
uint16 Height: video height in pixels
|
uint16 Height: video height in pixels
|
||||||
@@ -837,7 +837,7 @@ transmission capability, and region-of-interest coding.
|
|||||||
|
|
||||||
## Packet Types
|
## Packet Types
|
||||||
0x10: I-frame (intra-coded frame)
|
0x10: I-frame (intra-coded frame)
|
||||||
0x11: P-frame (predicted frame with motion compensation)
|
0x11: P-frame (delta-coded frame)
|
||||||
0x20: MP2 audio packet
|
0x20: MP2 audio packet
|
||||||
0x30: Subtitle in "Simple" format
|
0x30: Subtitle in "Simple" format
|
||||||
0xFF: sync packet
|
0xFF: sync packet
|
||||||
@@ -942,7 +942,6 @@ TAV decoder requires new GraphicsJSR223Delegate functions:
|
|||||||
- tavDecode(): Main DWT decoding function
|
- tavDecode(): Main DWT decoding function
|
||||||
- tavDWT2D(): 2D DWT/IDWT transforms
|
- tavDWT2D(): 2D DWT/IDWT transforms
|
||||||
- tavQuantize(): Multi-band quantization
|
- tavQuantize(): Multi-band quantization
|
||||||
- tavMotionCompensate(): 64x64 tile motion compensation
|
|
||||||
|
|
||||||
## Audio Support
|
## Audio Support
|
||||||
Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
|
Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
|
||||||
@@ -951,8 +950,7 @@ Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
|
|||||||
Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
|
Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
|
||||||
|
|
||||||
## NTSC Framerate handling
|
## NTSC Framerate handling
|
||||||
Unlike the TEV format, TAV emits extra sync packet for every 1000th frames. Decoder can just play the video
|
Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment.
|
||||||
without any special treatment.
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -4143,14 +4143,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// LUMA CHANNEL: Based on statistical analysis from real video content
|
// LUMA CHANNEL: Based on statistical analysis from real video content
|
||||||
|
|
||||||
// LL subband - contains most image energy, preserve carefully
|
// LL subband - contains most image energy, preserve carefully
|
||||||
if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level)
|
if (subbandType == 0) return perceptual_model3_LL(qualityLevel, level + 1)
|
||||||
|
|
||||||
// LH subband - horizontal details (human eyes more sensitive)
|
// LH subband - horizontal details (human eyes more sensitive)
|
||||||
val LH: Float = perceptual_model3_LH(qualityLevel, level)
|
val LH: Float = perceptual_model3_LH(qualityLevel, level + 1)
|
||||||
if (subbandType == 1) return LH
|
if (subbandType == 1) return LH
|
||||||
|
|
||||||
// HL subband - vertical details
|
// HL subband - vertical details
|
||||||
val HL: Float = perceptual_model3_HL(qualityLevel, LH)
|
val HL: Float = perceptual_model3_HL(qualityLevel, LH + 1)
|
||||||
if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
if (subbandType == 2) return HL * (if (level == 2) TWO_PIXEL_DETAILER else if (level == 3) FOUR_PIXEL_DETAILER else 1f)
|
||||||
|
|
||||||
// HH subband - diagonal details
|
// HH subband - diagonal details
|
||||||
@@ -4158,7 +4158,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantization
|
||||||
val base = perceptual_model3_chroma_basecurve(qualityLevel, level)
|
val base = perceptual_model3_chroma_basecurve(qualityLevel, level - 1)
|
||||||
|
|
||||||
if (subbandType == 0) { // LL chroma - still important but less than luma
|
if (subbandType == 0) { // LL chroma - still important but less than luma
|
||||||
return 1.0f
|
return 1.0f
|
||||||
@@ -4284,7 +4284,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
tilesX = 1
|
tilesX = 1
|
||||||
tilesY = 1
|
tilesY = 1
|
||||||
} else {
|
} else {
|
||||||
// Standard mode: multiple 280x224 tiles
|
// Standard mode: multiple 280x224 tiles (supported for backwards compatibility only)
|
||||||
tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X
|
tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X
|
||||||
tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
|
tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -911,15 +911,15 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
|
|||||||
if (!is_chroma) {
|
if (!is_chroma) {
|
||||||
// LL subband - contains most image energy, preserve carefully
|
// LL subband - contains most image energy, preserve carefully
|
||||||
if (subband_type == 0)
|
if (subband_type == 0)
|
||||||
return perceptual_model3_LL(enc->quality_level, level);
|
return perceptual_model3_LL(enc->quality_level, level + 1);
|
||||||
|
|
||||||
// LH subband - horizontal details (human eyes more sensitive)
|
// LH subband - horizontal details (human eyes more sensitive)
|
||||||
float LH = perceptual_model3_LH(enc->quality_level, level);
|
float LH = perceptual_model3_LH(enc->quality_level, level + 1);
|
||||||
if (subband_type == 1)
|
if (subband_type == 1)
|
||||||
return LH;
|
return LH;
|
||||||
|
|
||||||
// HL subband - vertical details
|
// HL subband - vertical details
|
||||||
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
float HL = perceptual_model3_HL(enc->quality_level, LH + 1);
|
||||||
if (subband_type == 2)
|
if (subband_type == 2)
|
||||||
return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
return HL * (level == 2 ? TWO_PIXEL_DETAILER : level == 3 ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||||
|
|
||||||
@@ -931,7 +931,7 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
|
|||||||
//// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q)
|
//// mimic 4:4:0 (you heard that right!) chroma subsampling (4:4:4 for higher q, 4:2:0 for lower q)
|
||||||
//// because our eyes are apparently sensitive to horizontal chroma diff as well?
|
//// because our eyes are apparently sensitive to horizontal chroma diff as well?
|
||||||
|
|
||||||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level);
|
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
|
||||||
|
|
||||||
if (subband_type == 0) { // LL chroma - still important but less than luma
|
if (subband_type == 0) { // LL chroma - still important but less than luma
|
||||||
return 1.0f;
|
return 1.0f;
|
||||||
|
|||||||
Reference in New Issue
Block a user