diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 04a17a2..dfbde84 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -257,11 +257,13 @@ typedef struct { int16_t *reusable_quantised_co; int16_t *reusable_quantised_cg; - // Coefficient delta storage for P-frames (previous frame's coefficients) - float *previous_coeffs_y; // Previous frame Y coefficients for all tiles - float *previous_coeffs_co; // Previous frame Co coefficients for all tiles - float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles + // Multi-frame coefficient storage for better temporal prediction + float *previous_coeffs_y[3]; // Previous 3 frames Y coefficients for all tiles + float *previous_coeffs_co[3]; // Previous 3 frames Co coefficients for all tiles + float *previous_coeffs_cg[3]; // Previous 3 frames Cg coefficients for all tiles int previous_coeffs_allocated; // Flag to track allocation + int reference_frame_count; // Number of available reference frames (0-3) + int last_frame_was_intra; // 1 if previous frame was INTRA, 0 if DELTA // Statistics size_t total_compressed_size; @@ -482,18 +484,36 @@ static int initialise_encoder(tav_encoder_t *enc) { enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t)); enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t)); - // Allocate coefficient delta storage for P-frames (per-tile coefficient storage) + // Allocate multi-frame coefficient storage for better temporal prediction size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float); - enc->previous_coeffs_y = malloc(total_coeff_size); - enc->previous_coeffs_co = malloc(total_coeff_size); - enc->previous_coeffs_cg = malloc(total_coeff_size); + for (int ref = 0; ref < 3; ref++) { + enc->previous_coeffs_y[ref] = malloc(total_coeff_size); + enc->previous_coeffs_co[ref] = malloc(total_coeff_size); + enc->previous_coeffs_cg[ref] = malloc(total_coeff_size); + + // Initialize to zero + memset(enc->previous_coeffs_y[ref], 0, total_coeff_size); + memset(enc->previous_coeffs_co[ref], 0, total_coeff_size); + memset(enc->previous_coeffs_cg[ref], 0, total_coeff_size); + } enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame - + enc->reference_frame_count = 0; + enc->last_frame_was_intra = 1; // First frame is always INTRA + + // Check allocations + int allocation_success = 1; + for (int ref = 0; ref < 3; ref++) { + if (!enc->previous_coeffs_y[ref] || !enc->previous_coeffs_co[ref] || !enc->previous_coeffs_cg[ref]) { + allocation_success = 0; + break; + } + } + if (!enc->frame_rgb[0] || !enc->frame_rgb[1] || !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || !enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer || !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || - !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) { + !allocation_success) { return -1; } @@ -1252,22 +1272,30 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg); } - // Store current coefficients for future delta reference + // Store current coefficients in multi-frame reference buffer + // For INTRA frames, reset the sliding window and store in frame 0 int tile_idx = tile_y * enc->tiles_x + tile_x; - float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size); - float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size); - float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size); - memcpy(prev_y, tile_y_data, tile_size * sizeof(float)); - memcpy(prev_co, tile_co_data, tile_size * sizeof(float)); - memcpy(prev_cg, tile_cg_data, tile_size * sizeof(float)); + + // Reset reference frame count for INTRA frames (scene change) + enc->reference_frame_count = 1; + enc->last_frame_was_intra = 1; + + // Store in frame 0 + float *curr_y = enc->previous_coeffs_y[0] + (tile_idx * tile_size); + float *curr_co = enc->previous_coeffs_co[0] + (tile_idx * tile_size); + float *curr_cg = enc->previous_coeffs_cg[0] + (tile_idx * tile_size); + memcpy(curr_y, tile_y_data, tile_size * sizeof(float)); + memcpy(curr_co, tile_co_data, tile_size * sizeof(float)); + memcpy(curr_cg, tile_cg_data, tile_size * sizeof(float)); } else if (mode == TAV_MODE_DELTA) { - // DELTA mode with predictive error compensation to mitigate accumulation artifacts + // DELTA mode with multi-frame temporal prediction int tile_idx = tile_y * enc->tiles_x + tile_x; - float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size); - float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size); - float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size); + // Use the most recent frame (frame 0) as the primary reference for delta calculation + float *prev_y = enc->previous_coeffs_y[0] + (tile_idx * tile_size); + float *prev_co = enc->previous_coeffs_co[0] + (tile_idx * tile_size); + float *prev_cg = enc->previous_coeffs_cg[0] + (tile_idx * tile_size); // Allocate temporary buffers for error compensation float *delta_y = malloc(tile_size * sizeof(float)); @@ -1284,163 +1312,127 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, delta_cg[i] = tile_cg_data[i] - prev_cg[i]; } - // Step 2: Simple predictive error compensation (back to working version) - // We simulate the quantization-dequantization process to predict decoder behavior - for (int iteration = 0; iteration < 2; iteration++) { // Back to simple 2-iteration approach - // Test quantization of current deltas - int16_t *test_quant_y = malloc(tile_size * sizeof(int16_t)); - int16_t *test_quant_co = malloc(tile_size * sizeof(int16_t)); - int16_t *test_quant_cg = malloc(tile_size * sizeof(int16_t)); + // Step 2: Multi-frame temporal prediction with INTRA frame detection + float *predicted_y = malloc(tile_size * sizeof(float)); + float *predicted_co = malloc(tile_size * sizeof(float)); + float *predicted_cg = malloc(tile_size * sizeof(float)); - // TEMPORARILY DISABLED: Use uniform quantization in error compensation prediction - quantise_dwt_coefficients(iteration == 0 ? delta_y : compensated_delta_y, test_quant_y, tile_size, this_frame_qY); - quantise_dwt_coefficients(iteration == 0 ? delta_co : compensated_delta_co, test_quant_co, tile_size, this_frame_qCo); - quantise_dwt_coefficients(iteration == 0 ? delta_cg : compensated_delta_cg, test_quant_cg, tile_size, this_frame_qCg); - - // Predict what decoder will reconstruct - float predicted_y, predicted_co, predicted_cg; - float prediction_error_y, prediction_error_co, prediction_error_cg; + if (enc->last_frame_was_intra || enc->reference_frame_count < 2) { + // Scene change detected (previous frame was INTRA) or insufficient reference frames + // Use simple single-frame prediction + if (enc->verbose && tile_x == 0 && tile_y == 0) { + printf("Frame %d: Scene change detected (previous frame was INTRA) - using single-frame prediction\n", + enc->frame_count); + } for (int i = 0; i < tile_size; i++) { - // Simulate decoder reconstruction - predicted_y = prev_y[i] + ((float)test_quant_y[i] * this_frame_qY); - predicted_co = prev_co[i] + ((float)test_quant_co[i] * this_frame_qCo); - predicted_cg = prev_cg[i] + ((float)test_quant_cg[i] * this_frame_qCg); + predicted_y[i] = prev_y[i]; + predicted_co[i] = prev_co[i]; + predicted_cg[i] = prev_cg[i]; + } + } else { + // Multi-frame weighted prediction + // Weights: [0.6, 0.3, 0.1] for [most recent, 2nd most recent, 3rd most recent] + float weights[3] = {0.6f, 0.3f, 0.1f}; - // Calculate prediction error (difference between true target and predicted reconstruction) - prediction_error_y = tile_y_data[i] - predicted_y; - prediction_error_co = tile_co_data[i] - predicted_co; - prediction_error_cg = tile_cg_data[i] - predicted_cg; - - // Damped error compensation to prevent oscillation - // Apply different damping factors based on frequency (subband position) - float damping_factor = 1.0f; - int subband_size = tile_size / 4; // Each subband is 1/4 of tile - - if (i < subband_size) { - // LL subband (low-low): stable, allow full compensation - damping_factor = 0.8f; - } else if (i < 2 * subband_size) { - // LH subband (low-high): horizontal edges, moderate damping - damping_factor = 0.5f; - } else if (i < 3 * subband_size) { - // HL subband (high-low): vertical edges, moderate damping - damping_factor = 0.5f; - } else { - // HH subband (high-high): diagonal details, heavy damping to prevent oscillation - damping_factor = 0.3f; - } - - // Further reduce compensation on second iteration to prevent overcorrection - if (iteration == 1) { - damping_factor *= 0.5f; // Even more conservative on second iteration - } - - compensated_delta_y[i] = delta_y[i] + (prediction_error_y * damping_factor); - compensated_delta_co[i] = delta_co[i] + (prediction_error_co * damping_factor); - compensated_delta_cg[i] = delta_cg[i] + (prediction_error_cg * damping_factor); - - // Debug: Optional convergence monitoring (commented out for performance) - // if (tile_x == 0 && tile_y == 0 && i < 4) { - // printf("[COMP] Frame %d, Coeff %d, Iter %d: error=%.2f, damping=%.2f\n", - // enc->frame_count, i, iteration, prediction_error_y, damping_factor); - // } + if (enc->verbose && tile_x == 0 && tile_y == 0) { + printf("Frame %d: Multi-frame prediction using %d reference frames\n", + enc->frame_count, enc->reference_frame_count); } - free(test_quant_y); - free(test_quant_co); - free(test_quant_cg); + for (int i = 0; i < tile_size; i++) { + predicted_y[i] = 0.0f; + predicted_co[i] = 0.0f; + predicted_cg[i] = 0.0f; + + // Weighted combination of up to 3 reference frames + float total_weight = 0.0f; + for (int ref = 0; ref < enc->reference_frame_count && ref < 3; ref++) { + float *ref_y = enc->previous_coeffs_y[ref] + (tile_idx * tile_size); + float *ref_co = enc->previous_coeffs_co[ref] + (tile_idx * tile_size); + float *ref_cg = enc->previous_coeffs_cg[ref] + (tile_idx * tile_size); + + predicted_y[i] += ref_y[i] * weights[ref]; + predicted_co[i] += ref_co[i] * weights[ref]; + predicted_cg[i] += ref_cg[i] * weights[ref]; + total_weight += weights[ref]; + } + + // Normalize by actual weight (in case we have fewer than 3 frames) + if (total_weight > 0.0f) { + predicted_y[i] /= total_weight; + predicted_co[i] /= total_weight; + predicted_cg[i] /= total_weight; + } + } } - // Step 3: Quantize the error-compensated deltas with error diffusion - // Apply Floyd-Steinberg-like error diffusion to distribute quantization errors - float *error_buffer_y = calloc(tile_size, sizeof(float)); - float *error_buffer_co = calloc(tile_size, sizeof(float)); - float *error_buffer_cg = calloc(tile_size, sizeof(float)); - - // Step 3a: Apply error diffusion to compensated deltas (Floyd-Steinberg style) + // Calculate improved deltas using multi-frame prediction for (int i = 0; i < tile_size; i++) { - // Add accumulated error from previous coefficients - compensated_delta_y[i] += error_buffer_y[i]; - compensated_delta_co[i] += error_buffer_co[i]; - compensated_delta_cg[i] += error_buffer_cg[i]; - - // Test quantize to calculate what the error would be - int16_t test_quant_y = (int16_t)roundf(compensated_delta_y[i] / this_frame_qY); - int16_t test_quant_co = (int16_t)roundf(compensated_delta_co[i] / this_frame_qCo); - int16_t test_quant_cg = (int16_t)roundf(compensated_delta_cg[i] / this_frame_qCg); - - // Calculate quantization errors that would occur - float quant_error_y = compensated_delta_y[i] - (test_quant_y * this_frame_qY); - float quant_error_co = compensated_delta_co[i] - (test_quant_co * this_frame_qCo); - float quant_error_cg = compensated_delta_cg[i] - (test_quant_cg * this_frame_qCg); - - // Distribute error to neighboring coefficients (simplified Floyd-Steinberg for 1D) - // Apply dithering to high-frequency subbands based on decomposition levels - int should_dither = 0; -// int ll_size = tile_size / 4; // targeting LH/HL/HH6 subbands -// int ll_size = tile_size / 16; // targeting LH/HL/HH5-6 subbands - int ll_size = tile_size / 64; // targeting LH/HL/HH4-6 subbands - - // Debug: Optional diagnostic output (commented for performance) - // if (i == 0) { - // printf("[DITHER-DEBUG] tile_size=%d, ll_size=%d, will_dither_from_coeff=%d\n", - // tile_size, ll_size, ll_size); - // } - - // Dither all coefficients except the LL (lowest frequency) subband - if (i >= ll_size) { - should_dither = 1; - } - - if (should_dither) { - if (i + 1 < tile_size) { - error_buffer_y[i + 1] += quant_error_y * 0.5f; // 50% to next coefficient - error_buffer_co[i + 1] += quant_error_co * 0.5f; - error_buffer_cg[i + 1] += quant_error_cg * 0.5f; - } - if (i + 2 < tile_size) { - error_buffer_y[i + 2] += quant_error_y * 0.3f; // 30% to coefficient +2 - error_buffer_co[i + 2] += quant_error_co * 0.3f; - error_buffer_cg[i + 2] += quant_error_cg * 0.3f; - } - // Remaining 20% is absorbed (prevents error accumulation) - - // Debug: Optional error diffusion monitoring (commented for performance) - // static int dither_debug_count = 0; - // if (dither_debug_count < 5) { - // printf("[DITHER] Coeff %d: error=%.3f, distributed to [%d]=%.3f [%d]=%.3f\n", - // i, quant_error_y, i+1, quant_error_y * 0.5f, i+2, quant_error_y * 0.3f); - // dither_debug_count++; - // } - } + compensated_delta_y[i] = tile_y_data[i] - predicted_y[i]; + compensated_delta_co[i] = tile_co_data[i] - predicted_co[i]; + compensated_delta_cg[i] = tile_cg_data[i] - predicted_cg[i]; } - // Step 3b: Now quantize the error-diffused compensated deltas + free(predicted_y); + free(predicted_co); + free(predicted_cg); + + // Step 3: Quantize multi-frame predicted deltas quantise_dwt_coefficients(compensated_delta_y, quantised_y, tile_size, this_frame_qY); quantise_dwt_coefficients(compensated_delta_co, quantised_co, tile_size, this_frame_qCo); quantise_dwt_coefficients(compensated_delta_cg, quantised_cg, tile_size, this_frame_qCg); - // Step 4: Update reference coefficients exactly as decoder will reconstruct them + // Step 4: Update multi-frame reference coefficient sliding window + // Shift the sliding window: [0, 1, 2] becomes [new, 0, 1] (2 is discarded) + if (enc->reference_frame_count >= 2) { + // Shift frame 1 -> frame 2, frame 0 -> frame 1 + float *temp_y = enc->previous_coeffs_y[2]; + float *temp_co = enc->previous_coeffs_co[2]; + float *temp_cg = enc->previous_coeffs_cg[2]; + + enc->previous_coeffs_y[2] = enc->previous_coeffs_y[1]; + enc->previous_coeffs_co[2] = enc->previous_coeffs_co[1]; + enc->previous_coeffs_cg[2] = enc->previous_coeffs_cg[1]; + + enc->previous_coeffs_y[1] = enc->previous_coeffs_y[0]; + enc->previous_coeffs_co[1] = enc->previous_coeffs_co[0]; + enc->previous_coeffs_cg[1] = enc->previous_coeffs_cg[0]; + + // Reuse the old frame 2 buffer as new frame 0 + enc->previous_coeffs_y[0] = temp_y; + enc->previous_coeffs_co[0] = temp_co; + enc->previous_coeffs_cg[0] = temp_cg; + } + + // Calculate and store the new reconstructed coefficients in frame 0 + float *new_y = enc->previous_coeffs_y[0] + (tile_idx * tile_size); + float *new_co = enc->previous_coeffs_co[0] + (tile_idx * tile_size); + float *new_cg = enc->previous_coeffs_cg[0] + (tile_idx * tile_size); + for (int i = 0; i < tile_size; i++) { float dequant_delta_y = (float)quantised_y[i] * this_frame_qY; float dequant_delta_co = (float)quantised_co[i] * this_frame_qCo; float dequant_delta_cg = (float)quantised_cg[i] * this_frame_qCg; - prev_y[i] = prev_y[i] + dequant_delta_y; - prev_co[i] = prev_co[i] + dequant_delta_co; - prev_cg[i] = prev_cg[i] + dequant_delta_cg; + // Reconstruct current frame coefficients exactly as decoder will + new_y[i] = prev_y[i] + dequant_delta_y; + new_co[i] = prev_co[i] + dequant_delta_co; + new_cg[i] = prev_cg[i] + dequant_delta_cg; } + // Update reference frame count (up to 3 frames) and frame type + if (enc->reference_frame_count < 3) { + enc->reference_frame_count++; + } + enc->last_frame_was_intra = 0; + free(delta_y); free(delta_co); free(delta_cg); free(compensated_delta_y); free(compensated_delta_co); free(compensated_delta_cg); - free(error_buffer_y); - free(error_buffer_co); - free(error_buffer_cg); } // Debug: check quantised coefficients after quantisation @@ -3280,9 +3272,12 @@ static void cleanup_encoder(tav_encoder_t *enc) { free(enc->reusable_quantised_cg); // Free coefficient delta storage - free(enc->previous_coeffs_y); - free(enc->previous_coeffs_co); - free(enc->previous_coeffs_cg); + // Free multi-frame coefficient buffers + for (int ref = 0; ref < 3; ref++) { + free(enc->previous_coeffs_y[ref]); + free(enc->previous_coeffs_co[ref]); + free(enc->previous_coeffs_cg[ref]); + } // Free subtitle list if (enc->subtitles) {