mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
predictive delta encoding wip
This commit is contained in:
@@ -474,6 +474,7 @@ let filenavOninput = (window, event) => {
|
||||
|
||||
firstRunLatch = true
|
||||
con.curs_set(0);clearScr()
|
||||
refreshFilePanelCache(windowMode)
|
||||
redraw()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -687,7 +687,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
|
||||
- Version 3.0: Additional support of ICtCp Colour space
|
||||
|
||||
# File Structure
|
||||
\x1F T S V M T E V
|
||||
\x1F T S V M T E V (if video), \x1F T S V M T E P (if still picture)
|
||||
[HEADER]
|
||||
[PACKET 0]
|
||||
[PACKET 1]
|
||||
@@ -695,7 +695,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
|
||||
...
|
||||
|
||||
## Header (24 bytes)
|
||||
uint8 Magic[8]: "\x1F TSVM TEV"
|
||||
uint8 Magic[8]: "\x1F TSVM TEV" or "\x1F TSVM TEP"
|
||||
uint8 Version: 2 (YCoCg-R) or 3 (ICtCp)
|
||||
uint16 Width: video width in pixels
|
||||
uint16 Height: video height in pixels
|
||||
@@ -726,11 +726,13 @@ DCT-based compression, motion compensation, and efficient temporal coding.
|
||||
0x30: Subtitle in "Simple" format
|
||||
0x31: Subtitle in "Karaoke" format
|
||||
0xE0: EXIF packet
|
||||
0xE1: ID3 packet
|
||||
0xE2: Vorbis Comment packet
|
||||
0xE1: ID3v1 packet
|
||||
0xE2: ID3v2 packet
|
||||
0xE3: Vorbis Comment packet
|
||||
0xE4: CD-text packet
|
||||
0xFF: sync packet
|
||||
|
||||
## EXIF/ID3/Vorbis Comment packet structure
|
||||
## Standard metadata payload packet structure
|
||||
uint8 0xE0/0xE1/0xE2/.../0xEF (see Packet Types section)
|
||||
uint32 Length of the payload
|
||||
* Standard payload
|
||||
@@ -792,11 +794,25 @@ to larger block sizes and hardware acceleration.
|
||||
Reuses existing MP2 audio infrastructure from TSVM MOV format for seamless
|
||||
compatibility with existing audio processing pipeline.
|
||||
|
||||
## Simple Subtitle Format
|
||||
SSF is a simple subtitle that is intended to use text buffer to display texts.
|
||||
The format is designed to be compatible with SubRip and SAMI (without markups).
|
||||
## NTSC Framerate handling
|
||||
The encoder encodes the frames as-is. The decoder must duplicate every 1000th frame to keep the decoding
|
||||
in-sync.
|
||||
|
||||
### SSF Packet Structure
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Simple Subtitle Format (SSF)
|
||||
|
||||
SSF is a simple subtitle that is intended to use text buffer to display texts.
|
||||
The format is designed to be compatible with SubRip and SAMI (without markups) and interoperable with
|
||||
TEV and TAV formats.
|
||||
|
||||
When SSF is interleaved with MP2 audio, the payload must be inserted in-between MP2 frames.
|
||||
|
||||
## Packet Structure
|
||||
uint8 0x30 (packet type)
|
||||
* SSF Payload (see below)
|
||||
|
||||
## SSF Packet Structure
|
||||
uint24 index (used to specify target subtitle object)
|
||||
uint8 opcode
|
||||
0x00 = <argument terminator>, is NOP when used here
|
||||
@@ -811,9 +827,51 @@ The format is designed to be compatible with SubRip and SAMI (without markups).
|
||||
text argument may be terminated by 0x00 BEFORE the entire arguments being terminated by 0x00,
|
||||
leaving extra 0x00 on the byte stream. A decoder must be able to handle the extra zeros.
|
||||
|
||||
## NTSC Framerate handling
|
||||
The encoder encodes the frames as-is. The decoder must duplicate every 1000th frame to keep the decoding
|
||||
in-sync.
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Karaoke Subtitle Format (KSF)
|
||||
|
||||
KSF is a frame-synced subtitle that is intended to use Karaoke-style subtitles.
|
||||
The format is designed to be interoperable with TEV and TAV formats.
|
||||
For non-karaoke style synced lyrics, use SSF.
|
||||
|
||||
When KSF is interleaved with MP2 audio, the payload must be inserted in-between MP2 frames.
|
||||
|
||||
## Packet Structure
|
||||
uint8 0x31 (packet type)
|
||||
* KSF Payload (see below)
|
||||
|
||||
### KSF Packet Structure
|
||||
KSF is line-based: you define an unrevealed line, then subsequent commands reveal words/syllables
|
||||
on appropriate timings.
|
||||
|
||||
uint24 index (used to specify target subtitle object)
|
||||
uint8 opcode
|
||||
<definition opcodes>
|
||||
0x00 = <argument terminator>, is NOP when used here
|
||||
0x01 = define line (arguments: UTF-8 text. Players will also show it in grey)
|
||||
0x02 = delete line (arguments: none)
|
||||
0x03 = move to different nonant (arguments: 0x00-bottom centre; 0x01-bottom left; 0x02-centre left; 0x03-top left; 0x04-top centre; 0x05-top right; 0x06-centre right; 0x07-bottom right; 0x08-centre
|
||||
|
||||
<reveal opcodes>
|
||||
0x30 = reveal text normally (arguments: UTF-8 text. The reveal text must contain spaces when required)
|
||||
0x31 = reveal text slowly (arguments: UTF-8 text. The effect is implementation-dependent)
|
||||
|
||||
0x40 = reveal text normally with emphasize (arguments: UTF-8 text. On TEV/TAV player, the text will be white; otherwise, implementation-dependent)
|
||||
0x41 = reveal text slowly with emphasize (arguments: UTF-8 text)
|
||||
|
||||
0x50 = reveal text normally with target colour (arguments: uint8 target colour; UTF-8 text)
|
||||
0x51 = reveal text slowly with target colour (arguments: uint8 target colour; UTF-8 text)
|
||||
|
||||
<hardware control opcodes>
|
||||
0x80 = upload to low font rom (arguments: uint16 payload length, var bytes)
|
||||
0x81 = upload to high font rom (arguments: uint16 payload length, var bytes)
|
||||
note: changing the font rom will change the appearance of the every subtitle currently being displayed
|
||||
* arguments separated AND terminated by 0x00
|
||||
text argument may be terminated by 0x00 BEFORE the entire arguments being terminated by 0x00,
|
||||
leaving extra 0x00 on the byte stream. A decoder must be able to handle the extra zeros.
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
@@ -826,7 +884,7 @@ to DCT-based codecs like TEV. Features include multi-resolution encoding, progre
|
||||
transmission capability, and region-of-interest coding.
|
||||
|
||||
# File Structure
|
||||
\x1F T S V M T A V
|
||||
\x1F T S V M T A V (if video), \x1F T S V M T A P (if still picture)
|
||||
[HEADER]
|
||||
[PACKET 0]
|
||||
[PACKET 1]
|
||||
@@ -834,7 +892,7 @@ transmission capability, and region-of-interest coding.
|
||||
...
|
||||
|
||||
## Header (32 bytes)
|
||||
uint8 Magic[8]: "\x1F TSVM TAV"
|
||||
uint8 Magic[8]: "\x1F TSVM TAV" or "\x1F TSVM TAP"
|
||||
uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual)
|
||||
uint16 Width: video width in pixels
|
||||
uint16 Height: video height in pixels
|
||||
@@ -856,6 +914,7 @@ transmission capability, and region-of-interest coding.
|
||||
- bit 0 = has alpha channel
|
||||
- bit 1 = is NTSC framerate
|
||||
- bit 2 = is lossless mode
|
||||
- bit 3 = has region-of-interest coding (for still images only)
|
||||
uint8 File Role
|
||||
- 0 = generic
|
||||
- 1 = this file is header-only, and UCF payload will be followed (used by seekable movie file)
|
||||
@@ -871,11 +930,13 @@ transmission capability, and region-of-interest coding.
|
||||
0x30: Subtitle in "Simple" format
|
||||
0x31: Subtitle in "Karaoke" format
|
||||
0xE0: EXIF packet
|
||||
0xE1: ID3 packet
|
||||
0xE2: Vorbis Comment packet
|
||||
0xE1: ID3v1 packet
|
||||
0xE2: ID3v2 packet
|
||||
0xE3: Vorbis Comment packet
|
||||
0xE4: CD-text packet
|
||||
0xFF: sync packet
|
||||
|
||||
## EXIF/ID3/Vorbis Comment packet structure
|
||||
## Standard metadata payload packet structure
|
||||
uint8 0xE0/0xE1/0xE2/.../0xEF (see Packet Types section)
|
||||
uint32 Length of the payload
|
||||
* Standard payload
|
||||
|
||||
@@ -4923,6 +4923,119 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
// Delta-specific perceptual weight model for motion-optimized coefficient reconstruction
|
||||
private fun getPerceptualWeightDelta(qualityLevel: Int, level: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
||||
// Delta coefficients have different perceptual characteristics than full-picture coefficients:
|
||||
// 1. Motion edges are more perceptually critical than static edges
|
||||
// 2. Temporal masking allows more aggressive quantization in high-motion areas
|
||||
// 3. Smaller delta magnitudes make relative quantization errors more visible
|
||||
// 4. Frequency distribution is motion-dependent rather than spatial-dependent
|
||||
|
||||
return if (!isChroma) {
|
||||
// LUMA DELTA CHANNEL: Emphasize motion coherence and edge preservation
|
||||
when (subbandType) {
|
||||
0 -> { // LL subband - DC motion changes, still important
|
||||
// DC motion changes - preserve somewhat but allow coarser quantization than full-picture
|
||||
2f // Slightly coarser than full-picture
|
||||
}
|
||||
1 -> { // LH subband - horizontal motion edges
|
||||
// Motion boundaries benefit from temporal masking - allow coarser quantization
|
||||
0.9f
|
||||
}
|
||||
2 -> { // HL subband - vertical motion edges
|
||||
// Vertical motion boundaries - equal treatment with horizontal for deltas
|
||||
1.2f
|
||||
}
|
||||
else -> { // HH subband - diagonal motion details
|
||||
// Diagonal motion deltas can be quantized most aggressively
|
||||
0.5f
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// CHROMA DELTA CHANNELS: More aggressive quantization allowed due to temporal masking
|
||||
// Motion chroma changes are less perceptually critical than static chroma
|
||||
val base = getPerceptualModelChromaBase(qualityLevel, level - 1)
|
||||
|
||||
when (subbandType) {
|
||||
0 -> 1.3f // LL chroma deltas - more aggressive than full-picture chroma
|
||||
1 -> kotlin.math.max(1.2f, kotlin.math.min(120.0f, base * 1.4f)) // LH chroma deltas
|
||||
2 -> kotlin.math.max(1.4f, kotlin.math.min(140.0f, base * 1.6f)) // HL chroma deltas
|
||||
else -> kotlin.math.max(1.6f, kotlin.math.min(160.0f, base * 1.8f)) // HH chroma deltas
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions for perceptual models (simplified versions of encoder models)
|
||||
private fun getPerceptualModelLL(qualityLevel: Int, level: Int): Float {
|
||||
// Simplified LL model - preserve DC components
|
||||
return 1.0f - (level.toFloat() / 8.0f) * (qualityLevel.toFloat() / 6.0f)
|
||||
}
|
||||
|
||||
private fun getPerceptualModelLH(qualityLevel: Int, level: Int): Float {
|
||||
// Simplified LH model - horizontal details
|
||||
return 1.2f + (level.toFloat() / 4.0f) * (qualityLevel.toFloat() / 3.0f)
|
||||
}
|
||||
|
||||
private fun getPerceptualModelHL(qualityLevel: Int, lhWeight: Float): Float {
|
||||
// Simplified HL model - vertical details
|
||||
return lhWeight * 1.1f
|
||||
}
|
||||
|
||||
private fun getPerceptualModelHH(lhWeight: Float, hlWeight: Float): Float {
|
||||
// Simplified HH model - diagonal details
|
||||
return (lhWeight + hlWeight) * 0.6f
|
||||
}
|
||||
|
||||
private fun getPerceptualModelChromaBase(qualityLevel: Int, level: Int): Float {
|
||||
// Simplified chroma base curve
|
||||
return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f)
|
||||
}
|
||||
|
||||
// Determine delta-specific perceptual weight for coefficient at linear position
|
||||
private fun getPerceptualWeightForPositionDelta(qualityLevel: Int, linearIdx: Int, width: Int, height: Int, decompLevels: Int, isChroma: Boolean): Float {
|
||||
// Map linear coefficient index to DWT subband using same layout as encoder
|
||||
var offset = 0
|
||||
|
||||
// First: LL subband at maximum decomposition level
|
||||
val llWidth = width shr decompLevels
|
||||
val llHeight = height shr decompLevels
|
||||
val llSize = llWidth * llHeight
|
||||
|
||||
if (linearIdx < offset + llSize) {
|
||||
// LL subband at maximum level - use delta-specific perceptual weight
|
||||
return getPerceptualWeightDelta(qualityLevel, decompLevels, 0, isChroma, decompLevels)
|
||||
}
|
||||
offset += llSize
|
||||
|
||||
// Then: LH, HL, HH subbands for each level from max down to 1
|
||||
for (level in decompLevels downTo 1) {
|
||||
val levelWidth = width shr (decompLevels - level + 1)
|
||||
val levelHeight = height shr (decompLevels - level + 1)
|
||||
val subbandSize = levelWidth * levelHeight
|
||||
|
||||
// LH subband (horizontal details)
|
||||
if (linearIdx < offset + subbandSize) {
|
||||
return getPerceptualWeightDelta(qualityLevel, level, 1, isChroma, decompLevels)
|
||||
}
|
||||
offset += subbandSize
|
||||
|
||||
// HL subband (vertical details)
|
||||
if (linearIdx < offset + subbandSize) {
|
||||
return getPerceptualWeightDelta(qualityLevel, level, 2, isChroma, decompLevels)
|
||||
}
|
||||
offset += subbandSize
|
||||
|
||||
// HH subband (diagonal details)
|
||||
if (linearIdx < offset + subbandSize) {
|
||||
return getPerceptualWeightDelta(qualityLevel, level, 3, isChroma, decompLevels)
|
||||
}
|
||||
offset += subbandSize
|
||||
}
|
||||
|
||||
// Fallback for out-of-bounds indices
|
||||
return 1.0f
|
||||
}
|
||||
|
||||
private fun tavDecodeDeltaTileRGB(qYGlobal: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
|
||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
|
||||
@@ -4972,7 +5085,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val currentCo = FloatArray(coeffCount)
|
||||
val currentCg = FloatArray(coeffCount)
|
||||
|
||||
// Uniform delta reconstruction because coefficient deltas cannot be perceptually coded
|
||||
// Delta-specific perceptual reconstruction using motion-optimized coefficients
|
||||
// Estimate quality level from quantization parameters for perceptual weighting
|
||||
val estimatedQualityY = when {
|
||||
qY <= 6 -> 4 // High quality
|
||||
qY <= 12 -> 3 // Medium-high quality
|
||||
qY <= 25 -> 2 // Medium quality
|
||||
qY <= 42 -> 1 // Medium-low quality
|
||||
else -> 0 // Low quality
|
||||
}
|
||||
|
||||
// TEMPORARILY DISABLED: Delta-specific perceptual reconstruction
|
||||
// Use uniform delta reconstruction (same as original implementation)
|
||||
for (i in 0 until coeffCount) {
|
||||
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY)
|
||||
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo)
|
||||
|
||||
@@ -947,6 +947,58 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
|
||||
}
|
||||
}
|
||||
|
||||
// Delta-specific perceptual weight model optimized for temporal coefficient differences
|
||||
static float get_perceptual_weight_delta(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) {
|
||||
// Delta coefficients have different perceptual characteristics than full-picture coefficients:
|
||||
// 1. Motion edges are more perceptually critical than static edges
|
||||
// 2. Temporal masking allows more aggressive quantization in high-motion areas
|
||||
// 3. Smaller delta magnitudes make relative quantization errors more visible
|
||||
// 4. Frequency distribution is motion-dependent rather than spatial-dependent
|
||||
|
||||
if (!is_chroma) {
|
||||
// LUMA DELTA CHANNEL: Emphasize motion coherence and edge preservation
|
||||
if (subband_type == 0) { // LL subband - DC motion changes, still important
|
||||
// DC motion changes - preserve somewhat but allow coarser quantization than full-picture
|
||||
return 2.0f; // Slightly coarser than full-picture
|
||||
}
|
||||
|
||||
if (subband_type == 1) { // LH subband - horizontal motion edges
|
||||
// Motion boundaries benefit from temporal masking - allow coarser quantization
|
||||
return 0.9f; // More aggressive quantization for deltas
|
||||
}
|
||||
|
||||
if (subband_type == 2) { // HL subband - vertical motion edges
|
||||
// Vertical motion boundaries - equal treatment with horizontal for deltas
|
||||
return 1.2f; // Same aggressiveness as horizontal
|
||||
}
|
||||
|
||||
// HH subband - diagonal motion details
|
||||
|
||||
// Diagonal motion deltas can be quantized most aggressively
|
||||
return 0.5f;
|
||||
|
||||
} else {
|
||||
// CHROMA DELTA CHANNELS: More aggressive quantization allowed due to temporal masking
|
||||
// Motion chroma changes are less perceptually critical than static chroma
|
||||
|
||||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
|
||||
|
||||
if (subband_type == 0) { // LL chroma deltas
|
||||
// Chroma DC motion changes - allow more aggressive quantization
|
||||
return 1.3f; // More aggressive than full-picture chroma
|
||||
} else if (subband_type == 1) { // LH chroma deltas
|
||||
// Horizontal chroma motion - temporal masking allows more quantization
|
||||
return FCLAMP(base * 1.4f, 1.2f, 120.0f);
|
||||
} else if (subband_type == 2) { // HL chroma deltas
|
||||
// Vertical chroma motion - most aggressive
|
||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] * 1.6f, 1.4f, 140.0f);
|
||||
} else { // HH chroma deltas
|
||||
// Diagonal chroma motion - extremely aggressive quantization
|
||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] * 1.8f + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.6f, 160.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Determine perceptual weight for coefficient at linear position (matches actual DWT layout)
|
||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||
@@ -993,6 +1045,51 @@ static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_i
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
// Determine delta-specific perceptual weight for coefficient at linear position
|
||||
static float get_perceptual_weight_for_position_delta(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||
// Map linear coefficient index to DWT subband using same layout as decoder
|
||||
int offset = 0;
|
||||
|
||||
// First: LL subband at maximum decomposition level
|
||||
int ll_width = width >> decomp_levels;
|
||||
int ll_height = height >> decomp_levels;
|
||||
int ll_size = ll_width * ll_height;
|
||||
|
||||
if (linear_idx < offset + ll_size) {
|
||||
// LL subband at maximum level - use delta-specific perceptual weight
|
||||
return get_perceptual_weight_delta(enc, decomp_levels, 0, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += ll_size;
|
||||
|
||||
// Then: LH, HL, HH subbands for each level from max down to 1
|
||||
for (int level = decomp_levels; level >= 1; level--) {
|
||||
int level_width = width >> (decomp_levels - level + 1);
|
||||
int level_height = height >> (decomp_levels - level + 1);
|
||||
int subband_size = level_width * level_height;
|
||||
|
||||
// LH subband (horizontal details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight_delta(enc, level, 1, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
|
||||
// HL subband (vertical details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight_delta(enc, level, 2, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
|
||||
// HH subband (diagonal details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight_delta(enc, level, 3, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
}
|
||||
|
||||
// Fallback for out-of-bounds indices
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
// Apply perceptual quantisation per-coefficient (same loop as uniform but with spatial weights)
|
||||
static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
@@ -1011,6 +1108,38 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
|
||||
}
|
||||
}
|
||||
|
||||
// Apply delta-specific perceptual quantisation for temporal coefficients
|
||||
static void quantise_dwt_coefficients_perceptual_delta(tav_encoder_t *enc,
|
||||
float *delta_coeffs, int16_t *quantised, int size,
|
||||
int base_quantiser, int width, int height,
|
||||
int decomp_levels, int is_chroma) {
|
||||
// Delta-specific perceptual quantization uses motion-optimized weights
|
||||
// Key differences from full-picture quantization:
|
||||
// 1. Finer quantization steps for deltas (smaller magnitudes)
|
||||
// 2. Motion-coherence emphasis over spatial-detail emphasis
|
||||
// 3. Enhanced temporal masking for chroma channels
|
||||
|
||||
float effective_base_q = base_quantiser;
|
||||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 255.0f);
|
||||
|
||||
// Delta-specific base quantization adjustment
|
||||
// Deltas benefit from temporal masking - allow coarser quantization steps
|
||||
float delta_coarse_tune = 1.2f; // 20% coarser quantization for delta coefficients
|
||||
effective_base_q *= delta_coarse_tune;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
// Apply delta-specific perceptual weight based on coefficient's position in DWT layout
|
||||
float weight = get_perceptual_weight_for_position_delta(enc, i, width, height, decomp_levels, is_chroma);
|
||||
float effective_q = effective_base_q * weight;
|
||||
|
||||
// Ensure minimum quantization step for very small deltas to prevent over-quantization
|
||||
effective_q = fmaxf(effective_q, 0.5f);
|
||||
|
||||
float quantised_val = delta_coeffs[i] / effective_q;
|
||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Convert 2D spatial DWT layout to linear subband layout (for decoder compatibility)
|
||||
@@ -1132,29 +1261,90 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
memcpy(prev_cg, tile_cg_data, tile_size * sizeof(float));
|
||||
|
||||
} else if (mode == TAV_MODE_DELTA) {
|
||||
// DELTA mode: compute coefficient deltas and quantise them
|
||||
// DELTA mode with predictive error compensation to mitigate accumulation artifacts
|
||||
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
||||
float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size);
|
||||
float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size);
|
||||
float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size);
|
||||
|
||||
// Compute deltas: delta = current - previous
|
||||
|
||||
// Allocate temporary buffers for error compensation
|
||||
float *delta_y = malloc(tile_size * sizeof(float));
|
||||
float *delta_co = malloc(tile_size * sizeof(float));
|
||||
float *delta_cg = malloc(tile_size * sizeof(float));
|
||||
|
||||
float *compensated_delta_y = malloc(tile_size * sizeof(float));
|
||||
float *compensated_delta_co = malloc(tile_size * sizeof(float));
|
||||
float *compensated_delta_cg = malloc(tile_size * sizeof(float));
|
||||
|
||||
// Step 1: Compute naive deltas
|
||||
for (int i = 0; i < tile_size; i++) {
|
||||
delta_y[i] = tile_y_data[i] - prev_y[i];
|
||||
delta_co[i] = tile_co_data[i] - prev_co[i];
|
||||
delta_cg[i] = tile_cg_data[i] - prev_cg[i];
|
||||
}
|
||||
|
||||
// Quantise the deltas with uniform quantisation (perceptual tuning is for original coefficients, not deltas)
|
||||
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY);
|
||||
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, this_frame_qCo);
|
||||
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, this_frame_qCg);
|
||||
|
||||
// Reconstruct coefficients like decoder will (previous + uniform_dequantised_delta)
|
||||
// Step 2: Predictive error compensation using iterative refinement
|
||||
// We simulate the quantization-dequantization process to predict decoder behavior
|
||||
for (int iteration = 0; iteration < 2; iteration++) { // 2 iterations for good convergence
|
||||
// Test quantization of current deltas
|
||||
int16_t *test_quant_y = malloc(tile_size * sizeof(int16_t));
|
||||
int16_t *test_quant_co = malloc(tile_size * sizeof(int16_t));
|
||||
int16_t *test_quant_cg = malloc(tile_size * sizeof(int16_t));
|
||||
|
||||
// TEMPORARILY DISABLED: Use uniform quantization in error compensation prediction
|
||||
quantise_dwt_coefficients(iteration == 0 ? delta_y : compensated_delta_y, test_quant_y, tile_size, this_frame_qY);
|
||||
quantise_dwt_coefficients(iteration == 0 ? delta_co : compensated_delta_co, test_quant_co, tile_size, this_frame_qCo);
|
||||
quantise_dwt_coefficients(iteration == 0 ? delta_cg : compensated_delta_cg, test_quant_cg, tile_size, this_frame_qCg);
|
||||
|
||||
// Predict what decoder will reconstruct
|
||||
float predicted_y, predicted_co, predicted_cg;
|
||||
float prediction_error_y, prediction_error_co, prediction_error_cg;
|
||||
|
||||
for (int i = 0; i < tile_size; i++) {
|
||||
// Simulate decoder reconstruction
|
||||
predicted_y = prev_y[i] + ((float)test_quant_y[i] * this_frame_qY);
|
||||
predicted_co = prev_co[i] + ((float)test_quant_co[i] * this_frame_qCo);
|
||||
predicted_cg = prev_cg[i] + ((float)test_quant_cg[i] * this_frame_qCg);
|
||||
|
||||
// Calculate prediction error (difference between true target and predicted reconstruction)
|
||||
prediction_error_y = tile_y_data[i] - predicted_y;
|
||||
prediction_error_co = tile_co_data[i] - predicted_co;
|
||||
prediction_error_cg = tile_cg_data[i] - predicted_cg;
|
||||
|
||||
// Debug: accumulate error statistics for first tile only
|
||||
static float total_error_y = 0, total_error_co = 0, total_error_cg = 0;
|
||||
static int error_samples = 0;
|
||||
if (tile_x == 0 && tile_y == 0 && i < 16) { // First tile, first 16 coeffs
|
||||
total_error_y += fabs(prediction_error_y);
|
||||
total_error_co += fabs(prediction_error_co);
|
||||
total_error_cg += fabs(prediction_error_cg);
|
||||
error_samples++;
|
||||
if (error_samples % 160 == 0) { // Print every 10 frames
|
||||
printf("[ERROR-COMP] Avg errors: Y=%.3f Co=%.3f Cg=%.3f\n",
|
||||
total_error_y/160, total_error_co/160, total_error_cg/160);
|
||||
total_error_y = total_error_co = total_error_cg = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Compensate delta by adding prediction error
|
||||
// This counteracts the quantization error that will occur
|
||||
compensated_delta_y[i] = delta_y[i] + prediction_error_y;
|
||||
compensated_delta_co[i] = delta_co[i] + prediction_error_co;
|
||||
compensated_delta_cg[i] = delta_cg[i] + prediction_error_cg;
|
||||
}
|
||||
|
||||
free(test_quant_y);
|
||||
free(test_quant_co);
|
||||
free(test_quant_cg);
|
||||
}
|
||||
|
||||
// Step 3: Quantise the error-compensated deltas with delta-specific perceptual weighting
|
||||
// TEMPORARILY DISABLED: Delta-specific perceptual quantization
|
||||
// Use uniform quantization for deltas (same as original implementation)
|
||||
quantise_dwt_coefficients(compensated_delta_y, quantised_y, tile_size, this_frame_qY);
|
||||
quantise_dwt_coefficients(compensated_delta_co, quantised_co, tile_size, this_frame_qCo);
|
||||
quantise_dwt_coefficients(compensated_delta_cg, quantised_cg, tile_size, this_frame_qCg);
|
||||
|
||||
// Step 4: Update reference coefficients exactly as decoder will reconstruct them
|
||||
for (int i = 0; i < tile_size; i++) {
|
||||
float dequant_delta_y = (float)quantised_y[i] * this_frame_qY;
|
||||
float dequant_delta_co = (float)quantised_co[i] * this_frame_qCo;
|
||||
@@ -1168,6 +1358,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
free(delta_y);
|
||||
free(delta_co);
|
||||
free(delta_cg);
|
||||
free(compensated_delta_y);
|
||||
free(compensated_delta_co);
|
||||
free(compensated_delta_cg);
|
||||
}
|
||||
|
||||
// Debug: check quantised coefficients after quantisation
|
||||
@@ -2777,7 +2970,7 @@ int main(int argc, char *argv[]) {
|
||||
int count_iframe = 0;
|
||||
int count_pframe = 0;
|
||||
|
||||
KEYFRAME_INTERVAL = enc->output_fps >> 2; // refresh often because deltas in DWT are more visible than DCT
|
||||
KEYFRAME_INTERVAL = enc->output_fps * 2; // Longer intervals for testing error compensation (was >> 2)
|
||||
|
||||
while (continue_encoding) {
|
||||
// Check encode limit if specified
|
||||
|
||||
Reference in New Issue
Block a user