TAV: EZBC entropy coding

This commit is contained in:
minjaesong
2025-10-20 16:40:45 +09:00
parent 019f0aaed5
commit 9553b281af
3 changed files with 1596 additions and 1044 deletions

View File

@@ -9,413 +9,6 @@
extern "C" {
// Helper: Compute SAD (Sum of Absolute Differences) for a block
static int compute_sad(
const unsigned char *ref, const unsigned char *cur,
int ref_x, int ref_y, int cur_x, int cur_y,
int width, int height, int block_size
) {
int sad = 0;
for (int by = 0; by < block_size; by++) {
for (int bx = 0; bx < block_size; bx++) {
int ry = ref_y + by;
int rx = ref_x + bx;
int cy = cur_y + by;
int cx = cur_x + bx;
// Boundary check
if (rx < 0 || rx >= width || ry < 0 || ry >= height ||
cx < 0 || cx >= width || cy < 0 || cy >= height) {
sad += 255; // Penalty for out-of-bounds
continue;
}
int ref_val = ref[ry * width + rx];
int cur_val = cur[cy * width + cx];
sad += abs(ref_val - cur_val);
}
}
return sad;
}
// Parabolic interpolation for sub-pixel refinement
// Given SAD values at positions (-1, 0, +1), estimate peak location
static float parabolic_interp(int sad_m1, int sad_0, int sad_p1) {
// Fit parabola: y = a*x^2 + b*x + c
// Peak at x = -b/(2a) = (sad_m1 - sad_p1) / (2*(sad_m1 - 2*sad_0 + sad_p1))
int denom = 2 * (sad_m1 - 2 * sad_0 + sad_p1);
if (denom == 0) return 0.0f;
float offset = (float)(sad_m1 - sad_p1) / denom;
// Clamp to ±0.5 for reasonable sub-pixel values
if (offset < -0.5f) offset = -0.5f;
if (offset > 0.5f) offset = 0.5f;
return offset;
}
// Diamond search pattern for integer-pixel motion estimation
static void diamond_search(
const unsigned char *ref, const unsigned char *cur,
int cx, int cy, int width, int height, int block_size,
int search_range, int *best_dx, int *best_dy
) {
// Large diamond pattern (distance 2)
const int large_diamond[8][2] = {
{0, -2}, {-1, -1}, {1, -1}, {-2, 0},
{2, 0}, {-1, 1}, {1, 1}, {0, 2}
};
// Small diamond pattern (distance 1)
const int small_diamond[4][2] = {
{0, -1}, {-1, 0}, {1, 0}, {0, 1}
};
int dx = 0, dy = 0;
int best_sad = compute_sad(ref, cur, cx + dx, cy + dy, cx, cy, width, height, block_size);
// Large diamond search
bool improved = true;
while (improved) {
improved = false;
for (int i = 0; i < 8; i++) {
int test_dx = dx + large_diamond[i][0];
int test_dy = dy + large_diamond[i][1];
if (abs(test_dx) > search_range || abs(test_dy) > search_range) {
continue;
}
int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size);
if (sad < best_sad) {
best_sad = sad;
dx = test_dx;
dy = test_dy;
improved = true;
break;
}
}
}
// Small diamond refinement
improved = true;
while (improved) {
improved = false;
for (int i = 0; i < 4; i++) {
int test_dx = dx + small_diamond[i][0];
int test_dy = dy + small_diamond[i][1];
if (abs(test_dx) > search_range || abs(test_dy) > search_range) {
continue;
}
int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size);
if (sad < best_sad) {
best_sad = sad;
dx = test_dx;
dy = test_dy;
improved = true;
break;
}
}
}
*best_dx = dx;
*best_dy = dy;
}
// Sub-pixel refinement using parabolic interpolation
static void subpixel_refinement(
const unsigned char *ref, const unsigned char *cur,
int cx, int cy, int width, int height, int block_size,
int int_dx, int int_dy, // Integer-pixel motion
float *subpix_dx, float *subpix_dy // Output: 1/4-pixel precision
) {
// Get SAD at integer position and neighbors
int sad_0_0 = compute_sad(ref, cur, cx + int_dx, cy + int_dy, cx, cy, width, height, block_size);
// Horizontal neighbors
int sad_m1_0 = compute_sad(ref, cur, cx + int_dx - 1, cy + int_dy, cx, cy, width, height, block_size);
int sad_p1_0 = compute_sad(ref, cur, cx + int_dx + 1, cy + int_dy, cx, cy, width, height, block_size);
// Vertical neighbors
int sad_0_m1 = compute_sad(ref, cur, cx + int_dx, cy + int_dy - 1, cx, cy, width, height, block_size);
int sad_0_p1 = compute_sad(ref, cur, cx + int_dx, cy + int_dy + 1, cx, cy, width, height, block_size);
// Parabolic interpolation
float offset_x = parabolic_interp(sad_m1_0, sad_0_0, sad_p1_0);
float offset_y = parabolic_interp(sad_0_m1, sad_0_0, sad_0_p1);
// Quantize to 1/4-pixel precision
*subpix_dx = int_dx + roundf(offset_x * 4.0f) / 4.0f;
*subpix_dy = int_dy + roundf(offset_y * 4.0f) / 4.0f;
}
// MPEG-style bidirectional motion estimation
// Uses variable block sizes (16×16, optionally split to 8×8)
// 4-pixel overlap between blocks to reduce blocking artifacts
// Diamond search + parabolic sub-pixel refinement
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
) {
// Convert RGB to grayscale
unsigned char *gray1 = (unsigned char*)std::malloc(width * height);
unsigned char *gray2 = (unsigned char*)std::malloc(width * height);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int idx = y * width + x;
int rgb_idx = idx * 3;
gray1[idx] = (unsigned char)(0.299f * frame1_rgb[rgb_idx] +
0.587f * frame1_rgb[rgb_idx + 1] +
0.114f * frame1_rgb[rgb_idx + 2]);
gray2[idx] = (unsigned char)(0.299f * frame2_rgb[rgb_idx] +
0.587f * frame2_rgb[rgb_idx + 1] +
0.114f * frame2_rgb[rgb_idx + 2]);
}
}
*out_flow_x = (float*)std::malloc(width * height * sizeof(float));
*out_flow_y = (float*)std::malloc(width * height * sizeof(float));
std::memset(*out_flow_x, 0, width * height * sizeof(float));
std::memset(*out_flow_y, 0, width * height * sizeof(float));
// Block parameters
const int block_size = 16;
const int overlap = 4;
const int stride = block_size - overlap; // 12 pixels
const int search_range = 16; // ±16 pixels
// Process overlapping blocks
for (int by = 0; by < height; by += stride) {
for (int bx = 0; bx < width; bx += stride) {
int actual_block_size = block_size;
// Clamp block to frame boundary
if (bx + block_size > width || by + block_size > height) {
continue; // Skip partial blocks at edges
}
// Integer-pixel diamond search
int int_dx = 0, int_dy = 0;
diamond_search(gray1, gray2, bx, by, width, height,
actual_block_size, search_range, &int_dx, &int_dy);
// Sub-pixel refinement
float subpix_dx = 0.0f, subpix_dy = 0.0f;
subpixel_refinement(gray1, gray2, bx, by, width, height,
actual_block_size, int_dx, int_dy,
&subpix_dx, &subpix_dy);
// Fill motion vectors for block with distance-weighted blending in overlap regions
for (int y = by; y < by + actual_block_size && y < height; y++) {
for (int x = bx; x < bx + actual_block_size && x < width; x++) {
int idx = y * width + x;
// Distance from block center for blending weight
float dx_from_center = (x - (bx + actual_block_size / 2));
float dy_from_center = (y - (by + actual_block_size / 2));
float dist = sqrtf(dx_from_center * dx_from_center +
dy_from_center * dy_from_center);
// Weight decreases with distance from center (for smooth blending in overlaps)
float weight = 1.0f / (1.0f + dist / actual_block_size);
// Accumulate weighted motion (will be normalized later)
(*out_flow_x)[idx] += subpix_dx * weight;
(*out_flow_y)[idx] += subpix_dy * weight;
}
}
}
}
std::free(gray1);
std::free(gray2);
}
// Build distortion mesh from dense optical flow field
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
short *mesh_dx, short *mesh_dy // Output: 1/8 pixel precision
) {
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
for (int my = 0; my < mesh_h; my++) {
for (int mx = 0; mx < mesh_w; mx++) {
// Cell center coordinates
int cx = mx * cell_w + cell_w / 2;
int cy = my * cell_h + cell_h / 2;
// Sample flow at cell center (5×5 neighborhood for robustness)
float sum_dx = 0.0f, sum_dy = 0.0f;
int count = 0;
for (int dy = -2; dy <= 2; dy++) {
for (int dx = -2; dx <= 2; dx++) {
int px = cx + dx;
int py = cy + dy;
if (px >= 0 && px < width && py >= 0 && py < height) {
int idx = py * width + px;
sum_dx += flow_x[idx];
sum_dy += flow_y[idx];
count++;
}
}
}
float avg_dx = (count > 0) ? (sum_dx / count) : 0.0f;
float avg_dy = (count > 0) ? (sum_dy / count) : 0.0f;
int mesh_idx = my * mesh_w + mx;
mesh_dx[mesh_idx] = (short)(avg_dx * 4.0f); // 1/4 pixel precision
mesh_dy[mesh_idx] = (short)(avg_dy * 4.0f);
}
}
}
// Laplacian smoothing for mesh spatial coherence
void smooth_mesh_laplacian(
short *mesh_dx, short *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
) {
short *temp_dx = (short*)std::malloc(mesh_width * mesh_height * sizeof(short));
short *temp_dy = (short*)std::malloc(mesh_width * mesh_height * sizeof(short));
for (int iter = 0; iter < iterations; iter++) {
std::memcpy(temp_dx, mesh_dx, mesh_width * mesh_height * sizeof(short));
std::memcpy(temp_dy, mesh_dy, mesh_width * mesh_height * sizeof(short));
for (int my = 0; my < mesh_height; my++) {
for (int mx = 0; mx < mesh_width; mx++) {
int idx = my * mesh_width + mx;
float neighbor_dx = 0.0f, neighbor_dy = 0.0f;
int neighbor_count = 0;
int neighbors[4][2] = {{0, -1}, {0, 1}, {-1, 0}, {1, 0}};
for (int n = 0; n < 4; n++) {
int nx = mx + neighbors[n][0];
int ny = my + neighbors[n][1];
if (nx >= 0 && nx < mesh_width && ny >= 0 && ny < mesh_height) {
int nidx = ny * mesh_width + nx;
neighbor_dx += temp_dx[nidx];
neighbor_dy += temp_dy[nidx];
neighbor_count++;
}
}
if (neighbor_count > 0) {
neighbor_dx /= neighbor_count;
neighbor_dy /= neighbor_count;
float data_weight = 1.0f - smoothness;
mesh_dx[idx] = (short)(data_weight * temp_dx[idx] + smoothness * neighbor_dx);
mesh_dy[idx] = (short)(data_weight * temp_dy[idx] + smoothness * neighbor_dy);
}
}
}
}
std::free(temp_dx);
std::free(temp_dy);
}
// Bilinear mesh warp
void warp_frame_with_mesh(
const float *src_frame, int width, int height,
const short *mesh_dx, const short *mesh_dy,
int mesh_width, int mesh_height,
float *dst_frame
) {
int cell_w = width / mesh_width;
int cell_h = height / mesh_height;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
if (cell_x >= mesh_width - 1) cell_x = mesh_width - 2;
if (cell_y >= mesh_height - 1) cell_y = mesh_height - 2;
if (cell_x < 0) cell_x = 0;
if (cell_y < 0) cell_y = 0;
int idx_00 = cell_y * mesh_width + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_width + cell_x;
int idx_11 = idx_01 + 1;
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
if (alpha < 0.0f) alpha = 0.0f;
if (alpha > 1.0f) alpha = 1.0f;
if (beta < 0.0f) beta = 0.0f;
if (beta > 1.0f) beta = 1.0f;
float dx_00 = mesh_dx[idx_00] / 4.0f;
float dy_00 = mesh_dy[idx_00] / 4.0f;
float dx_10 = mesh_dx[idx_10] / 4.0f;
float dy_10 = mesh_dy[idx_10] / 4.0f;
float dx_01 = mesh_dx[idx_01] / 4.0f;
float dy_01 = mesh_dy[idx_01] / 4.0f;
float dx_11 = mesh_dx[idx_11] / 4.0f;
float dy_11 = mesh_dy[idx_11] / 4.0f;
float dx = (1 - alpha) * (1 - beta) * dx_00 +
alpha * (1 - beta) * dx_10 +
(1 - alpha) * beta * dx_01 +
alpha * beta * dx_11;
float dy = (1 - alpha) * (1 - beta) * dy_00 +
alpha * (1 - beta) * dy_10 +
(1 - alpha) * beta * dy_01 +
alpha * beta * dy_11;
float src_x = x + dx;
float src_y = y + dy;
int sx0 = (int)std::floor(src_x);
int sy0 = (int)std::floor(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
if (sx0 < 0) sx0 = 0;
if (sy0 < 0) sy0 = 0;
if (sx1 >= width) sx1 = width - 1;
if (sy1 >= height) sy1 = height - 1;
if (sx0 >= width) sx0 = width - 1;
if (sy0 >= height) sy0 = height - 1;
float fx = src_x - sx0;
float fy = src_y - sy0;
float val_00 = src_frame[sy0 * width + sx0];
float val_10 = src_frame[sy0 * width + sx1];
float val_01 = src_frame[sy1 * width + sx0];
float val_11 = src_frame[sy1 * width + sx1];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst_frame[y * width + x] = val;
}
}
}
// Dense optical flow estimation using Farneback algorithm
// Computes flow at every pixel, then samples at block centers for motion vectors
// Much more spatially coherent than independent block matching
@@ -491,4 +84,100 @@ void estimate_optical_flow_motion(
}
}
// Block-based motion compensation with bilinear interpolation (sub-pixel precision)
// MVs are in 1/4-pixel units
// This implements the warp() function from MC-EZBC pseudocode
void warp_block_motion(
const float *src, // Source frame
int width, int height,
const int16_t *mvs_x, // Motion vectors X (1/4-pixel units)
const int16_t *mvs_y, // Motion vectors Y (1/4-pixel units)
int block_size, // Block size (e.g., 16)
float *dst // Output warped frame
) {
int num_blocks_x = (width + block_size - 1) / block_size;
int num_blocks_y = (height + block_size - 1) / block_size;
// Process each block
for (int by = 0; by < num_blocks_y; by++) {
for (int bx = 0; bx < num_blocks_x; bx++) {
int block_idx = by * num_blocks_x + bx;
// Get motion vector for this block (in 1/4-pixel units)
float mv_x = mvs_x[block_idx] / 4.0f; // Convert to pixels
float mv_y = mvs_y[block_idx] / 4.0f;
// Block boundaries in destination frame
int block_x_start = bx * block_size;
int block_y_start = by * block_size;
int block_x_end = std::min(block_x_start + block_size, width);
int block_y_end = std::min(block_y_start + block_size, height);
// Warp each pixel in the block
for (int y = block_y_start; y < block_y_end; y++) {
for (int x = block_x_start; x < block_x_end; x++) {
// Source position (backward warping)
float src_x = x - mv_x;
float src_y = y - mv_y;
// Clamp to valid range
src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
src_y = std::max(0.0f, std::min((float)(height - 1), src_y));
// Bilinear interpolation
int x0 = (int)src_x;
int y0 = (int)src_y;
int x1 = std::min(x0 + 1, width - 1);
int y1 = std::min(y0 + 1, height - 1);
float fx = src_x - x0;
float fy = src_y - y0;
float val00 = src[y0 * width + x0];
float val10 = src[y0 * width + x1];
float val01 = src[y1 * width + x0];
float val11 = src[y1 * width + x1];
float val_top = (1.0f - fx) * val00 + fx * val10;
float val_bot = (1.0f - fx) * val01 + fx * val11;
float val = (1.0f - fy) * val_top + fy * val_bot;
dst[y * width + x] = val;
}
}
}
}
}
// Bidirectional motion compensation for MC-EZBC predict step
// Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
void warp_bidirectional(
const float *f0, const float *f1,
int width, int height,
const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y, // F0 → F1
const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y, // F1 → F0
int block_size,
float *prediction // Output: 0.5 * (warped_f0 + warped_f1)
) {
int num_pixels = width * height;
// Allocate temporary buffers
float *warped_f0 = new float[num_pixels];
float *warped_f1 = new float[num_pixels];
// Warp f0 forward using forward MVs
warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);
// Warp f1 backward using backward MVs
warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);
// Average the two warped frames
for (int i = 0; i < num_pixels; i++) {
prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
}
delete[] warped_f0;
delete[] warped_f1;
}
} // extern "C"