mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-11 15:24:05 +09:00
Revert "predictive delta encoding wip"
This reverts commit21fd10d2but introduces changes fromd117b15e
This commit is contained in:
@@ -257,13 +257,11 @@ typedef struct {
|
|||||||
int16_t *reusable_quantised_co;
|
int16_t *reusable_quantised_co;
|
||||||
int16_t *reusable_quantised_cg;
|
int16_t *reusable_quantised_cg;
|
||||||
|
|
||||||
// Multi-frame coefficient storage for better temporal prediction
|
// Coefficient delta storage for P-frames (previous frame's coefficients)
|
||||||
float *previous_coeffs_y[3]; // Previous 3 frames Y coefficients for all tiles
|
float *previous_coeffs_y; // Previous frame Y coefficients for all tiles
|
||||||
float *previous_coeffs_co[3]; // Previous 3 frames Co coefficients for all tiles
|
float *previous_coeffs_co; // Previous frame Co coefficients for all tiles
|
||||||
float *previous_coeffs_cg[3]; // Previous 3 frames Cg coefficients for all tiles
|
float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles
|
||||||
int previous_coeffs_allocated; // Flag to track allocation
|
int previous_coeffs_allocated; // Flag to track allocation
|
||||||
int reference_frame_count; // Number of available reference frames (0-3)
|
|
||||||
int last_frame_was_intra; // 1 if previous frame was INTRA, 0 if DELTA
|
|
||||||
|
|
||||||
// Statistics
|
// Statistics
|
||||||
size_t total_compressed_size;
|
size_t total_compressed_size;
|
||||||
@@ -484,36 +482,18 @@ static int initialise_encoder(tav_encoder_t *enc) {
|
|||||||
enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t));
|
enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t));
|
||||||
enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t));
|
enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t));
|
||||||
|
|
||||||
// Allocate multi-frame coefficient storage for better temporal prediction
|
// Allocate coefficient delta storage for P-frames (per-tile coefficient storage)
|
||||||
size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float);
|
size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float);
|
||||||
for (int ref = 0; ref < 3; ref++) {
|
enc->previous_coeffs_y = malloc(total_coeff_size);
|
||||||
enc->previous_coeffs_y[ref] = malloc(total_coeff_size);
|
enc->previous_coeffs_co = malloc(total_coeff_size);
|
||||||
enc->previous_coeffs_co[ref] = malloc(total_coeff_size);
|
enc->previous_coeffs_cg = malloc(total_coeff_size);
|
||||||
enc->previous_coeffs_cg[ref] = malloc(total_coeff_size);
|
|
||||||
|
|
||||||
// Initialize to zero
|
|
||||||
memset(enc->previous_coeffs_y[ref], 0, total_coeff_size);
|
|
||||||
memset(enc->previous_coeffs_co[ref], 0, total_coeff_size);
|
|
||||||
memset(enc->previous_coeffs_cg[ref], 0, total_coeff_size);
|
|
||||||
}
|
|
||||||
enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
|
enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
|
||||||
enc->reference_frame_count = 0;
|
|
||||||
enc->last_frame_was_intra = 1; // First frame is always INTRA
|
|
||||||
|
|
||||||
// Check allocations
|
|
||||||
int allocation_success = 1;
|
|
||||||
for (int ref = 0; ref < 3; ref++) {
|
|
||||||
if (!enc->previous_coeffs_y[ref] || !enc->previous_coeffs_co[ref] || !enc->previous_coeffs_cg[ref]) {
|
|
||||||
allocation_success = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!enc->frame_rgb[0] || !enc->frame_rgb[1] ||
|
if (!enc->frame_rgb[0] || !enc->frame_rgb[1] ||
|
||||||
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
|
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
|
||||||
!enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
|
!enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
|
||||||
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
|
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
|
||||||
!allocation_success) {
|
!enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -967,164 +947,6 @@ static float get_perceptual_weight(tav_encoder_t *enc, int level, int subband_ty
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delta-specific perceptual weight model optimized for temporal coefficient differences
|
|
||||||
static float get_perceptual_weight_delta(tav_encoder_t *enc, int level, int subband_type, int is_chroma, int max_levels) {
|
|
||||||
// Delta coefficients have different perceptual characteristics than full-picture coefficients:
|
|
||||||
// 1. Motion edges are more perceptually critical than static edges
|
|
||||||
// 2. Temporal masking allows more aggressive quantization in high-motion areas
|
|
||||||
// 3. Smaller delta magnitudes make relative quantization errors more visible
|
|
||||||
// 4. Frequency distribution is motion-dependent rather than spatial-dependent
|
|
||||||
|
|
||||||
if (!is_chroma) {
|
|
||||||
// LUMA DELTA CHANNEL: Emphasize motion coherence and edge preservation
|
|
||||||
if (subband_type == 0) { // LL subband - DC motion changes, still important
|
|
||||||
// DC motion changes - preserve somewhat but allow coarser quantization than full-picture
|
|
||||||
return 2.0f; // Slightly coarser than full-picture
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subband_type == 1) { // LH subband - horizontal motion edges
|
|
||||||
// Motion boundaries benefit from temporal masking - allow coarser quantization
|
|
||||||
return 0.9f; // More aggressive quantization for deltas
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subband_type == 2) { // HL subband - vertical motion edges
|
|
||||||
// Vertical motion boundaries - equal treatment with horizontal for deltas
|
|
||||||
return 1.2f; // Same aggressiveness as horizontal
|
|
||||||
}
|
|
||||||
|
|
||||||
// HH subband - diagonal motion details
|
|
||||||
|
|
||||||
// Diagonal motion deltas can be quantized most aggressively
|
|
||||||
return 0.5f;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// CHROMA DELTA CHANNELS: More aggressive quantization allowed due to temporal masking
|
|
||||||
// Motion chroma changes are less perceptually critical than static chroma
|
|
||||||
|
|
||||||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
|
|
||||||
|
|
||||||
if (subband_type == 0) { // LL chroma deltas
|
|
||||||
// Chroma DC motion changes - allow more aggressive quantization
|
|
||||||
return 1.3f; // More aggressive than full-picture chroma
|
|
||||||
} else if (subband_type == 1) { // LH chroma deltas
|
|
||||||
// Horizontal chroma motion - temporal masking allows more quantization
|
|
||||||
return FCLAMP(base * 1.4f, 1.2f, 120.0f);
|
|
||||||
} else if (subband_type == 2) { // HL chroma deltas
|
|
||||||
// Vertical chroma motion - most aggressive
|
|
||||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] * 1.6f, 1.4f, 140.0f);
|
|
||||||
} else { // HH chroma deltas
|
|
||||||
// Diagonal chroma motion - extremely aggressive quantization
|
|
||||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] * 1.8f + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.6f, 160.0f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Safe spatial prediction using neighboring DWT coefficients (LL subband only)
|
|
||||||
static void apply_spatial_prediction_safe(float *coeffs, float *predicted_coeffs,
|
|
||||||
int width, int height, int decomp_levels) {
|
|
||||||
// Apply spatial prediction ONLY to LL subband to avoid addressing issues
|
|
||||||
// This is much safer and still provides benefit for the most important coefficients
|
|
||||||
|
|
||||||
int total_size = width * height;
|
|
||||||
|
|
||||||
// Initialize with input temporal prediction values
|
|
||||||
for (int i = 0; i < total_size; i++) {
|
|
||||||
predicted_coeffs[i] = coeffs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only process LL subband (DC component) with safe, simple neighbor averaging
|
|
||||||
int ll_width = width >> decomp_levels;
|
|
||||||
int ll_height = height >> decomp_levels;
|
|
||||||
|
|
||||||
// Only process interior pixels to avoid boundary issues
|
|
||||||
for (int y = 1; y < ll_height - 1; y++) {
|
|
||||||
for (int x = 1; x < ll_width - 1; x++) {
|
|
||||||
int idx = y * ll_width + x;
|
|
||||||
|
|
||||||
// Get 4-connected neighbors from the input (not the output being modified)
|
|
||||||
float left = coeffs[y * ll_width + (x-1)];
|
|
||||||
float right = coeffs[y * ll_width + (x+1)];
|
|
||||||
float top = coeffs[(y-1) * ll_width + x];
|
|
||||||
float bottom = coeffs[(y+1) * ll_width + x];
|
|
||||||
|
|
||||||
// Simple neighbor averaging for spatial prediction
|
|
||||||
float spatial_pred = (left + right + top + bottom) * 0.25f;
|
|
||||||
|
|
||||||
// Combine temporal and spatial predictions with conservative weight
|
|
||||||
// 85% temporal, 15% spatial for safety
|
|
||||||
predicted_coeffs[idx] = coeffs[idx] * 0.85f + spatial_pred * 0.15f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Leave all detail subbands unchanged - only modify LL subband
|
|
||||||
// This prevents any coefficient addressing corruption
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spatial prediction using neighboring DWT coefficients within the same subband
|
|
||||||
static void apply_spatial_prediction(float *coeffs, float *predicted_coeffs,
|
|
||||||
int width, int height, int decomp_levels) {
|
|
||||||
// Apply spatial prediction within each DWT subband
|
|
||||||
// This improves upon temporal prediction by using neighboring coefficients
|
|
||||||
|
|
||||||
int total_size = width * height;
|
|
||||||
|
|
||||||
// Initialize with temporal prediction values
|
|
||||||
for (int i = 0; i < total_size; i++) {
|
|
||||||
predicted_coeffs[i] = coeffs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map each coefficient to its subband and apply spatial prediction
|
|
||||||
int offset = 0;
|
|
||||||
|
|
||||||
// Process LL subband (DC component) - use simple neighbor averaging
|
|
||||||
int ll_width = width >> decomp_levels;
|
|
||||||
int ll_height = height >> decomp_levels;
|
|
||||||
int ll_size = ll_width * ll_height;
|
|
||||||
|
|
||||||
// don't modify the LL subband
|
|
||||||
offset += ll_size;
|
|
||||||
|
|
||||||
// Process detail subbands (LH, HL, HH) from coarsest to finest
|
|
||||||
for (int level = decomp_levels; level >= 1; level--) {
|
|
||||||
int level_width = width >> (decomp_levels - level + 1);
|
|
||||||
int level_height = height >> (decomp_levels - level + 1);
|
|
||||||
int subband_size = level_width * level_height;
|
|
||||||
|
|
||||||
// Process LH, HL, HH subbands for this level
|
|
||||||
for (int subband = 0; subband < 3; subband++) {
|
|
||||||
for (int y = 1; y < level_height - 1; y++) {
|
|
||||||
for (int x = 1; x < level_width - 1; x++) {
|
|
||||||
int idx = y * level_width + x;
|
|
||||||
|
|
||||||
// Get neighboring coefficients in the same subband
|
|
||||||
float left = predicted_coeffs[offset + y * level_width + (x-1)];
|
|
||||||
float right = predicted_coeffs[offset + y * level_width + (x+1)];
|
|
||||||
float top = predicted_coeffs[offset + (y-1) * level_width + x];
|
|
||||||
float bottom = predicted_coeffs[offset + (y+1) * level_width + x];
|
|
||||||
|
|
||||||
// Directional prediction based on subband type
|
|
||||||
float spatial_pred;
|
|
||||||
if (subband == 0) { // LH (horizontal edges)
|
|
||||||
// Emphasize vertical neighbors for horizontal edge prediction
|
|
||||||
spatial_pred = (top + bottom) * 0.4f + (left + right) * 0.1f;
|
|
||||||
} else if (subband == 1) { // HL (vertical edges)
|
|
||||||
// Emphasize horizontal neighbors for vertical edge prediction
|
|
||||||
spatial_pred = (left + right) * 0.4f + (top + bottom) * 0.1f;
|
|
||||||
} else { // HH (diagonal edges)
|
|
||||||
// Equal weighting for diagonal prediction
|
|
||||||
spatial_pred = (left + right + top + bottom) * 0.25f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Combine temporal and spatial predictions with lighter spatial weight for high-frequency
|
|
||||||
float spatial_weight = 0.2f; // Less spatial influence in detail subbands
|
|
||||||
predicted_coeffs[offset + idx] = coeffs[offset + idx] * (1.0f - spatial_weight) + spatial_pred * spatial_weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
offset += subband_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Determine perceptual weight for coefficient at linear position (matches actual DWT layout)
|
// Determine perceptual weight for coefficient at linear position (matches actual DWT layout)
|
||||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||||
@@ -1171,51 +993,6 @@ static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_i
|
|||||||
return 1.0f;
|
return 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine delta-specific perceptual weight for coefficient at linear position
|
|
||||||
static float get_perceptual_weight_for_position_delta(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
|
||||||
// Map linear coefficient index to DWT subband using same layout as decoder
|
|
||||||
int offset = 0;
|
|
||||||
|
|
||||||
// First: LL subband at maximum decomposition level
|
|
||||||
int ll_width = width >> decomp_levels;
|
|
||||||
int ll_height = height >> decomp_levels;
|
|
||||||
int ll_size = ll_width * ll_height;
|
|
||||||
|
|
||||||
if (linear_idx < offset + ll_size) {
|
|
||||||
// LL subband at maximum level - use delta-specific perceptual weight
|
|
||||||
return get_perceptual_weight_delta(enc, decomp_levels, 0, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += ll_size;
|
|
||||||
|
|
||||||
// Then: LH, HL, HH subbands for each level from max down to 1
|
|
||||||
for (int level = decomp_levels; level >= 1; level--) {
|
|
||||||
int level_width = width >> (decomp_levels - level + 1);
|
|
||||||
int level_height = height >> (decomp_levels - level + 1);
|
|
||||||
int subband_size = level_width * level_height;
|
|
||||||
|
|
||||||
// LH subband (horizontal details)
|
|
||||||
if (linear_idx < offset + subband_size) {
|
|
||||||
return get_perceptual_weight_delta(enc, level, 1, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += subband_size;
|
|
||||||
|
|
||||||
// HL subband (vertical details)
|
|
||||||
if (linear_idx < offset + subband_size) {
|
|
||||||
return get_perceptual_weight_delta(enc, level, 2, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += subband_size;
|
|
||||||
|
|
||||||
// HH subband (diagonal details)
|
|
||||||
if (linear_idx < offset + subband_size) {
|
|
||||||
return get_perceptual_weight_delta(enc, level, 3, is_chroma, decomp_levels);
|
|
||||||
}
|
|
||||||
offset += subband_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback for out-of-bounds indices
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply perceptual quantisation per-coefficient (same loop as uniform but with spatial weights)
|
// Apply perceptual quantisation per-coefficient (same loop as uniform but with spatial weights)
|
||||||
static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
|
static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
|
||||||
float *coeffs, int16_t *quantised, int size,
|
float *coeffs, int16_t *quantised, int size,
|
||||||
@@ -1234,38 +1011,6 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply delta-specific perceptual quantisation for temporal coefficients
|
|
||||||
static void quantise_dwt_coefficients_perceptual_delta(tav_encoder_t *enc,
|
|
||||||
float *delta_coeffs, int16_t *quantised, int size,
|
|
||||||
int base_quantiser, int width, int height,
|
|
||||||
int decomp_levels, int is_chroma) {
|
|
||||||
// Delta-specific perceptual quantization uses motion-optimized weights
|
|
||||||
// Key differences from full-picture quantization:
|
|
||||||
// 1. Finer quantization steps for deltas (smaller magnitudes)
|
|
||||||
// 2. Motion-coherence emphasis over spatial-detail emphasis
|
|
||||||
// 3. Enhanced temporal masking for chroma channels
|
|
||||||
|
|
||||||
float effective_base_q = base_quantiser;
|
|
||||||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 255.0f);
|
|
||||||
|
|
||||||
// Delta-specific base quantization adjustment
|
|
||||||
// Deltas benefit from temporal masking - allow coarser quantization steps
|
|
||||||
float delta_coarse_tune = 1.2f; // 20% coarser quantization for delta coefficients
|
|
||||||
effective_base_q *= delta_coarse_tune;
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
// Apply delta-specific perceptual weight based on coefficient's position in DWT layout
|
|
||||||
float weight = get_perceptual_weight_for_position_delta(enc, i, width, height, decomp_levels, is_chroma);
|
|
||||||
float effective_q = effective_base_q * weight;
|
|
||||||
|
|
||||||
// Ensure minimum quantization step for very small deltas to prevent over-quantization
|
|
||||||
effective_q = fmaxf(effective_q, 0.5f);
|
|
||||||
|
|
||||||
float quantised_val = delta_coeffs[i] / effective_q;
|
|
||||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Convert 2D spatial DWT layout to linear subband layout (for decoder compatibility)
|
// Convert 2D spatial DWT layout to linear subband layout (for decoder compatibility)
|
||||||
@@ -1348,7 +1093,6 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
(enc->width * enc->height) : // Monoblock mode: full frame
|
(enc->width * enc->height) : // Monoblock mode: full frame
|
||||||
(PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y); // Standard mode: padded tiles
|
(PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y); // Standard mode: padded tiles
|
||||||
// OPTIMISATION: Use pre-allocated buffers instead of malloc/free per tile
|
// OPTIMISATION: Use pre-allocated buffers instead of malloc/free per tile
|
||||||
// this is the "output" buffer for this function
|
|
||||||
int16_t *quantised_y = enc->reusable_quantised_y;
|
int16_t *quantised_y = enc->reusable_quantised_y;
|
||||||
int16_t *quantised_co = enc->reusable_quantised_co;
|
int16_t *quantised_co = enc->reusable_quantised_co;
|
||||||
int16_t *quantised_cg = enc->reusable_quantised_cg;
|
int16_t *quantised_cg = enc->reusable_quantised_cg;
|
||||||
@@ -1378,192 +1122,52 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg);
|
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store current coefficients in multi-frame reference buffer
|
// Store current coefficients for future delta reference
|
||||||
// For INTRA frames, reset the sliding window and store in frame 0
|
|
||||||
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
||||||
|
float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size);
|
||||||
|
float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size);
|
||||||
|
float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size);
|
||||||
|
memcpy(prev_y, tile_y_data, tile_size * sizeof(float));
|
||||||
|
memcpy(prev_co, tile_co_data, tile_size * sizeof(float));
|
||||||
|
memcpy(prev_cg, tile_cg_data, tile_size * sizeof(float));
|
||||||
|
|
||||||
// Reset reference frame count for INTRA frames (scene change)
|
} else if (mode == TAV_MODE_DELTA) {
|
||||||
enc->reference_frame_count = 1;
|
// DELTA mode: compute coefficient deltas and quantise them
|
||||||
enc->last_frame_was_intra = 1;
|
|
||||||
|
|
||||||
// Store in frame 0
|
|
||||||
float *curr_y = enc->previous_coeffs_y[0] + (tile_idx * tile_size);
|
|
||||||
float *curr_co = enc->previous_coeffs_co[0] + (tile_idx * tile_size);
|
|
||||||
float *curr_cg = enc->previous_coeffs_cg[0] + (tile_idx * tile_size);
|
|
||||||
memcpy(curr_y, tile_y_data, tile_size * sizeof(float));
|
|
||||||
memcpy(curr_co, tile_co_data, tile_size * sizeof(float));
|
|
||||||
memcpy(curr_cg, tile_cg_data, tile_size * sizeof(float));
|
|
||||||
|
|
||||||
}
|
|
||||||
else if (mode == TAV_MODE_DELTA) {
|
|
||||||
// DELTA mode with multi-frame temporal prediction
|
|
||||||
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
||||||
// Use the most recent frame (frame 0) as the primary reference for delta calculation
|
float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size);
|
||||||
float *prev_y = enc->previous_coeffs_y[0] + (tile_idx * tile_size);
|
float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size);
|
||||||
float *prev_co = enc->previous_coeffs_co[0] + (tile_idx * tile_size);
|
float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size);
|
||||||
float *prev_cg = enc->previous_coeffs_cg[0] + (tile_idx * tile_size);
|
|
||||||
|
|
||||||
// Allocate temporary buffers for error compensation
|
// Compute deltas: delta = current - previous
|
||||||
float *delta_y = malloc(tile_size * sizeof(float));
|
float *delta_y = malloc(tile_size * sizeof(float));
|
||||||
float *delta_co = malloc(tile_size * sizeof(float));
|
float *delta_co = malloc(tile_size * sizeof(float));
|
||||||
float *delta_cg = malloc(tile_size * sizeof(float));
|
float *delta_cg = malloc(tile_size * sizeof(float));
|
||||||
float *compensated_delta_y = malloc(tile_size * sizeof(float));
|
|
||||||
float *compensated_delta_co = malloc(tile_size * sizeof(float));
|
|
||||||
float *compensated_delta_cg = malloc(tile_size * sizeof(float));
|
|
||||||
|
|
||||||
// Step 1: Compute naive deltas
|
|
||||||
for (int i = 0; i < tile_size; i++) {
|
for (int i = 0; i < tile_size; i++) {
|
||||||
delta_y[i] = tile_y_data[i] - prev_y[i];
|
delta_y[i] = tile_y_data[i] - prev_y[i];
|
||||||
delta_co[i] = tile_co_data[i] - prev_co[i];
|
delta_co[i] = tile_co_data[i] - prev_co[i];
|
||||||
delta_cg[i] = tile_cg_data[i] - prev_cg[i];
|
delta_cg[i] = tile_cg_data[i] - prev_cg[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 2: Multi-frame temporal prediction with INTRA frame detection
|
// Quantise the deltas with uniform quantisation (perceptual tuning is for original coefficients, not deltas)
|
||||||
float *predicted_y = malloc(tile_size * sizeof(float));
|
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY);
|
||||||
float *predicted_co = malloc(tile_size * sizeof(float));
|
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, this_frame_qCo);
|
||||||
float *predicted_cg = malloc(tile_size * sizeof(float));
|
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, this_frame_qCg);
|
||||||
|
|
||||||
if (enc->last_frame_was_intra || enc->reference_frame_count < 2) {
|
|
||||||
// Scene change detected (previous frame was INTRA) or insufficient reference frames
|
|
||||||
// Use simple single-frame prediction
|
|
||||||
if (enc->verbose && tile_x == 0 && tile_y == 0) {
|
|
||||||
printf("Frame %d: Scene change detected (previous frame was INTRA) - using single-frame prediction\n",
|
|
||||||
enc->frame_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < tile_size; i++) {
|
|
||||||
predicted_y[i] = prev_y[i];
|
|
||||||
predicted_co[i] = prev_co[i];
|
|
||||||
predicted_cg[i] = prev_cg[i];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Multi-frame weighted prediction
|
|
||||||
// Weights: [0.6, 0.3, 0.1] for [most recent, 2nd most recent, 3rd most recent]
|
|
||||||
float weights[3] = {0.6f, 0.3f, 0.1f};
|
|
||||||
|
|
||||||
if (enc->verbose && tile_x == 0 && tile_y == 0) {
|
|
||||||
printf("Frame %d: Multi-frame prediction using %d reference frames\n",
|
|
||||||
enc->frame_count, enc->reference_frame_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < tile_size; i++) {
|
|
||||||
predicted_y[i] = 0.0f;
|
|
||||||
predicted_co[i] = 0.0f;
|
|
||||||
predicted_cg[i] = 0.0f;
|
|
||||||
|
|
||||||
// Weighted combination of up to 3 reference frames
|
|
||||||
float total_weight = 0.0f;
|
|
||||||
for (int ref = 0; ref < enc->reference_frame_count && ref < 3; ref++) {
|
|
||||||
float *ref_y = enc->previous_coeffs_y[ref] + (tile_idx * tile_size);
|
|
||||||
float *ref_co = enc->previous_coeffs_co[ref] + (tile_idx * tile_size);
|
|
||||||
float *ref_cg = enc->previous_coeffs_cg[ref] + (tile_idx * tile_size);
|
|
||||||
|
|
||||||
predicted_y[i] += ref_y[i] * weights[ref];
|
|
||||||
predicted_co[i] += ref_co[i] * weights[ref];
|
|
||||||
predicted_cg[i] += ref_cg[i] * weights[ref];
|
|
||||||
total_weight += weights[ref];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize by actual weight (in case we have fewer than 3 frames)
|
|
||||||
if (total_weight > 0.0f) {
|
|
||||||
predicted_y[i] /= total_weight;
|
|
||||||
predicted_co[i] /= total_weight;
|
|
||||||
predicted_cg[i] /= total_weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply spatial prediction on top of temporal prediction
|
|
||||||
float *spatially_enhanced_y = malloc(tile_size * sizeof(float));
|
|
||||||
float *spatially_enhanced_co = malloc(tile_size * sizeof(float));
|
|
||||||
float *spatially_enhanced_cg = malloc(tile_size * sizeof(float));
|
|
||||||
|
|
||||||
// Determine tile dimensions for spatial prediction
|
|
||||||
int tile_width, tile_height;
|
|
||||||
if (enc->monoblock) {
|
|
||||||
tile_width = enc->width;
|
|
||||||
tile_height = enc->height;
|
|
||||||
} else {
|
|
||||||
tile_width = PADDED_TILE_SIZE_X;
|
|
||||||
tile_height = PADDED_TILE_SIZE_Y;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply safe spatial prediction (LL subband only)
|
|
||||||
apply_spatial_prediction_safe(predicted_y, spatially_enhanced_y, tile_width, tile_height, enc->decomp_levels);
|
|
||||||
apply_spatial_prediction_safe(predicted_co, spatially_enhanced_co, tile_width, tile_height, enc->decomp_levels);
|
|
||||||
apply_spatial_prediction_safe(predicted_cg, spatially_enhanced_cg, tile_width, tile_height, enc->decomp_levels);
|
|
||||||
|
|
||||||
// Calculate improved deltas using temporal + spatial prediction
|
|
||||||
for (int i = 0; i < tile_size; i++) {
|
|
||||||
compensated_delta_y[i] = tile_y_data[i] - spatially_enhanced_y[i];
|
|
||||||
compensated_delta_co[i] = tile_co_data[i] - spatially_enhanced_co[i];
|
|
||||||
compensated_delta_cg[i] = tile_cg_data[i] - spatially_enhanced_cg[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Free spatial prediction buffers
|
|
||||||
free(spatially_enhanced_y);
|
|
||||||
free(spatially_enhanced_co);
|
|
||||||
free(spatially_enhanced_cg);
|
|
||||||
|
|
||||||
free(predicted_y);
|
|
||||||
free(predicted_co);
|
|
||||||
free(predicted_cg);
|
|
||||||
|
|
||||||
// Step 3: Quantize multi-frame predicted deltas
|
|
||||||
quantise_dwt_coefficients(compensated_delta_y, quantised_y, tile_size, this_frame_qY);
|
|
||||||
quantise_dwt_coefficients(compensated_delta_co, quantised_co, tile_size, this_frame_qCo);
|
|
||||||
quantise_dwt_coefficients(compensated_delta_cg, quantised_cg, tile_size, this_frame_qCg);
|
|
||||||
|
|
||||||
// Step 4: Update multi-frame reference coefficient sliding window
|
|
||||||
// Shift the sliding window: [0, 1, 2] becomes [new, 0, 1] (2 is discarded)
|
|
||||||
if (enc->reference_frame_count >= 2) {
|
|
||||||
// Shift frame 1 -> frame 2, frame 0 -> frame 1
|
|
||||||
float *temp_y = enc->previous_coeffs_y[2];
|
|
||||||
float *temp_co = enc->previous_coeffs_co[2];
|
|
||||||
float *temp_cg = enc->previous_coeffs_cg[2];
|
|
||||||
|
|
||||||
enc->previous_coeffs_y[2] = enc->previous_coeffs_y[1];
|
|
||||||
enc->previous_coeffs_co[2] = enc->previous_coeffs_co[1];
|
|
||||||
enc->previous_coeffs_cg[2] = enc->previous_coeffs_cg[1];
|
|
||||||
|
|
||||||
enc->previous_coeffs_y[1] = enc->previous_coeffs_y[0];
|
|
||||||
enc->previous_coeffs_co[1] = enc->previous_coeffs_co[0];
|
|
||||||
enc->previous_coeffs_cg[1] = enc->previous_coeffs_cg[0];
|
|
||||||
|
|
||||||
// Reuse the old frame 2 buffer as new frame 0
|
|
||||||
enc->previous_coeffs_y[0] = temp_y;
|
|
||||||
enc->previous_coeffs_co[0] = temp_co;
|
|
||||||
enc->previous_coeffs_cg[0] = temp_cg;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate and store the new reconstructed coefficients in frame 0
|
|
||||||
float *new_y = enc->previous_coeffs_y[0] + (tile_idx * tile_size);
|
|
||||||
float *new_co = enc->previous_coeffs_co[0] + (tile_idx * tile_size);
|
|
||||||
float *new_cg = enc->previous_coeffs_cg[0] + (tile_idx * tile_size);
|
|
||||||
|
|
||||||
|
// Reconstruct coefficients like decoder will (previous + uniform_dequantised_delta)
|
||||||
for (int i = 0; i < tile_size; i++) {
|
for (int i = 0; i < tile_size; i++) {
|
||||||
float dequant_delta_y = (float)quantised_y[i] * this_frame_qY;
|
float dequant_delta_y = (float)quantised_y[i] * this_frame_qY;
|
||||||
float dequant_delta_co = (float)quantised_co[i] * this_frame_qCo;
|
float dequant_delta_co = (float)quantised_co[i] * this_frame_qCo;
|
||||||
float dequant_delta_cg = (float)quantised_cg[i] * this_frame_qCg;
|
float dequant_delta_cg = (float)quantised_cg[i] * this_frame_qCg;
|
||||||
|
|
||||||
// Reconstruct current frame coefficients exactly as decoder will
|
prev_y[i] = prev_y[i] + dequant_delta_y;
|
||||||
new_y[i] = prev_y[i] + dequant_delta_y;
|
prev_co[i] = prev_co[i] + dequant_delta_co;
|
||||||
new_co[i] = prev_co[i] + dequant_delta_co;
|
prev_cg[i] = prev_cg[i] + dequant_delta_cg;
|
||||||
new_cg[i] = prev_cg[i] + dequant_delta_cg;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update reference frame count (up to 3 frames) and frame type
|
|
||||||
if (enc->reference_frame_count < 3) {
|
|
||||||
enc->reference_frame_count++;
|
|
||||||
}
|
|
||||||
enc->last_frame_was_intra = 0;
|
|
||||||
|
|
||||||
free(delta_y);
|
free(delta_y);
|
||||||
free(delta_co);
|
free(delta_co);
|
||||||
free(delta_cg);
|
free(delta_cg);
|
||||||
free(compensated_delta_y);
|
|
||||||
free(compensated_delta_co);
|
|
||||||
free(compensated_delta_cg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug: check quantised coefficients after quantisation
|
// Debug: check quantised coefficients after quantisation
|
||||||
@@ -3174,7 +2778,7 @@ int main(int argc, char *argv[]) {
|
|||||||
int count_iframe = 0;
|
int count_iframe = 0;
|
||||||
int count_pframe = 0;
|
int count_pframe = 0;
|
||||||
|
|
||||||
KEYFRAME_INTERVAL = enc->output_fps;// >> 2; // short interval makes ghosting less noticeable
|
KEYFRAME_INTERVAL = enc->output_fps >> 2; // refresh often because deltas in DWT are more visible than DCT
|
||||||
|
|
||||||
while (continue_encoding) {
|
while (continue_encoding) {
|
||||||
// Check encode limit if specified
|
// Check encode limit if specified
|
||||||
@@ -3403,12 +3007,9 @@ static void cleanup_encoder(tav_encoder_t *enc) {
|
|||||||
free(enc->reusable_quantised_cg);
|
free(enc->reusable_quantised_cg);
|
||||||
|
|
||||||
// Free coefficient delta storage
|
// Free coefficient delta storage
|
||||||
// Free multi-frame coefficient buffers
|
free(enc->previous_coeffs_y);
|
||||||
for (int ref = 0; ref < 3; ref++) {
|
free(enc->previous_coeffs_co);
|
||||||
free(enc->previous_coeffs_y[ref]);
|
free(enc->previous_coeffs_cg);
|
||||||
free(enc->previous_coeffs_co[ref]);
|
|
||||||
free(enc->previous_coeffs_cg[ref]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Free subtitle list
|
// Free subtitle list
|
||||||
if (enc->subtitles) {
|
if (enc->subtitles) {
|
||||||
|
|||||||
Reference in New Issue
Block a user