tsvm/video_encoder/encoder_tav_opencv.cpp

// Created by CuriousTorvald and Claude on 2025-10-17
// MPEG-style bidirectional block motion compensation for TAV encoder
// Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement

#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>

extern "C" {

// Dense optical flow estimation using Farneback algorithm
// Computes flow at every pixel, then samples at block centers for motion vectors
// Much more spatially coherent than independent block matching
void estimate_optical_flow_motion(
    const float *current_y,    // Current frame Y channel (width×height)
    const float *reference_y,  // Reference frame Y channel
    int width, int height,
    int block_size,            // Block size (e.g., 16)
    int16_t *mvs_x,           // Output: motion vectors X (in 1/4-pixel units)
    int16_t *mvs_y            // Output: motion vectors Y (in 1/4-pixel units)
) {
    // Convert float Y channels to 8-bit grayscale for OpenCV
    cv::Mat cur_gray(height, width, CV_8UC1);
    cv::Mat ref_gray(height, width, CV_8UC1);

    // Detect if Y is in [0,1] range and scale to [0,255] if needed
    float y_min = current_y[0], y_max = current_y[0];
    for (int i = 1; i < width * height; i++) {
        if (current_y[i] < y_min) y_min = current_y[i];
        if (current_y[i] > y_max) y_max = current_y[i];
    }
    float scale = (y_max <= 1.1f) ? 255.0f : 1.0f;

    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            int idx = y * width + x;
            cur_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, current_y[idx] * scale)));
            ref_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, reference_y[idx] * scale)));
        }
    }

    // Compute dense optical flow using Farneback algorithm
    // IMPORTANT: We need BACKWARD flow (current → reference) for motion compensation
    // This tells us where to PULL pixels FROM in the reference frame
    cv::Mat flow;
    cv::calcOpticalFlowFarneback(
        cur_gray,      // Current frame (source)
        ref_gray,      // Reference frame (destination)
        flow,          // Output flow (2-channel float: dx, dy per pixel)
        0.5,           // pyr_scale: pyramid scale (0.5 = each layer is half size)
        3,             // levels: number of pyramid levels
        20,            // winsize: averaging window size
        3,             // iterations: number of iterations at each pyramid level
        5,             // poly_n: size of pixel neighborhood (5 or 7)
        1.2,           // poly_sigma: standard deviation of Gaussian for polynomial expansion
        0              // flags: 0 = normal, OPTFLOW_USE_INITIAL_FLOW = use input flow as initial estimate
    );

    // Sample flow at block centers to get motion vectors
    int num_blocks_x = (width + block_size - 1) / block_size;
    int num_blocks_y = (height + block_size - 1) / block_size;

    for (int by = 0; by < num_blocks_y; by++) {
        for (int bx = 0; bx < num_blocks_x; bx++) {
            int block_idx = by * num_blocks_x + bx;

            // Block center position
            int center_x = bx * block_size + block_size / 2;
            int center_y = by * block_size + block_size / 2;

            // Clamp to frame boundaries
            if (center_x >= width) center_x = width - 1;
            if (center_y >= height) center_y = height - 1;

            // Get flow at block center
            cv::Point2f flow_vec = flow.at<cv::Point2f>(center_y, center_x);

            // Convert to 1/4-pixel units and store
            // Flow is in pixels, positive = motion to the right/down
            mvs_x[block_idx] = (int16_t)std::round(flow_vec.x * 4.0f);
            mvs_y[block_idx] = (int16_t)std::round(flow_vec.y * 4.0f);
        }
    }
}

// Block-based motion compensation with bilinear interpolation (sub-pixel precision)
// MVs are in 1/4-pixel units
// This implements the warp() function from MC-EZBC pseudocode
void warp_block_motion(
    const float *src,          // Source frame
    int width, int height,
    const int16_t *mvs_x,      // Motion vectors X (1/4-pixel units)
    const int16_t *mvs_y,      // Motion vectors Y (1/4-pixel units)
    int block_size,            // Block size (e.g., 16)
    float *dst                 // Output warped frame
) {
    int num_blocks_x = (width + block_size - 1) / block_size;
    int num_blocks_y = (height + block_size - 1) / block_size;

    // Process each block
    for (int by = 0; by < num_blocks_y; by++) {
        for (int bx = 0; bx < num_blocks_x; bx++) {
            int block_idx = by * num_blocks_x + bx;

            // Get motion vector for this block (in 1/4-pixel units)
            float mv_x = mvs_x[block_idx] / 4.0f;  // Convert to pixels
            float mv_y = mvs_y[block_idx] / 4.0f;

            // Block boundaries in destination frame
            int block_x_start = bx * block_size;
            int block_y_start = by * block_size;
            int block_x_end = std::min(block_x_start + block_size, width);
            int block_y_end = std::min(block_y_start + block_size, height);

            // Warp each pixel in the block
            for (int y = block_y_start; y < block_y_end; y++) {
                for (int x = block_x_start; x < block_x_end; x++) {
                    // Source position (backward warping)
                    float src_x = x - mv_x;
                    float src_y = y - mv_y;

                    // Clamp to valid range
                    src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
                    src_y = std::max(0.0f, std::min((float)(height - 1), src_y));

                    // Bilinear interpolation
                    int x0 = (int)src_x;
                    int y0 = (int)src_y;
                    int x1 = std::min(x0 + 1, width - 1);
                    int y1 = std::min(y0 + 1, height - 1);

                    float fx = src_x - x0;
                    float fy = src_y - y0;

                    float val00 = src[y0 * width + x0];
                    float val10 = src[y0 * width + x1];
                    float val01 = src[y1 * width + x0];
                    float val11 = src[y1 * width + x1];

                    float val_top = (1.0f - fx) * val00 + fx * val10;
                    float val_bot = (1.0f - fx) * val01 + fx * val11;
                    float val = (1.0f - fy) * val_top + fy * val_bot;

                    dst[y * width + x] = val;
                }
            }
        }
    }
}

// Bidirectional motion compensation for MC-EZBC predict step
// Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
void warp_bidirectional(
    const float *f0, const float *f1,
    int width, int height,
    const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y,  // F0 → F1
    const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y,  // F1 → F0
    int block_size,
    float *prediction          // Output: 0.5 * (warped_f0 + warped_f1)
) {
    int num_pixels = width * height;

    // Allocate temporary buffers
    float *warped_f0 = new float[num_pixels];
    float *warped_f1 = new float[num_pixels];

    // Warp f0 forward using forward MVs
    warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);

    // Warp f1 backward using backward MVs
    warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);

    // Average the two warped frames
    for (int i = 0; i < num_pixels; i++) {
        prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
    }

    delete[] warped_f0;
    delete[] warped_f1;
}

} // extern "C"