Files
tsvm/video_encoder/test_mesh_warp.cpp
2025-10-18 05:47:17 +09:00

423 lines
16 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Visual unit test for mesh warping with hierarchical block matching and affine estimation
// Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh,
// and saves both warped and target frames for visual comparison
// Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms
#include <opencv2/opencv.hpp>
#include <opencv2/video/tracking.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <ctime>
// Include the mesh functions from encoder
extern "C" {
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
);
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
int16_t *mesh_dx, int16_t *mesh_dy
);
void smooth_mesh_laplacian(
int16_t *mesh_dx, int16_t *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
);
int estimate_cell_affine(
const float *flow_x, const float *flow_y,
int width, int height,
int cell_x, int cell_y,
int cell_w, int cell_h,
float threshold,
int16_t *out_tx, int16_t *out_ty,
int16_t *out_a11, int16_t *out_a12,
int16_t *out_a21, int16_t *out_a22
);
}
// Mesh warp with bilinear interpolation and optional affine support
static void apply_mesh_warp_rgb(
const cv::Mat &src, // Input BGR image
cv::Mat &dst, // Output warped BGR image
const int16_t *mesh_dx, // Mesh motion vectors (1/8 pixel)
const int16_t *mesh_dy,
const uint8_t *affine_mask, // 1=affine, 0=translation
const int16_t *affine_a11,
const int16_t *affine_a12,
const int16_t *affine_a21,
const int16_t *affine_a22,
int mesh_w, int mesh_h
) {
int width = src.cols;
int height = src.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
dst = cv::Mat(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
// Clamp to valid mesh range
cell_x = std::min(cell_x, mesh_w - 2);
cell_y = std::min(cell_y, mesh_h - 2);
// Four corner control points
int idx_00 = cell_y * mesh_w + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
int idx_11 = idx_01 + 1;
// Control point positions
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
// Local coordinates
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
alpha = std::max(0.0f, std::min(1.0f, alpha));
beta = std::max(0.0f, std::min(1.0f, beta));
// Bilinear interpolation of motion vectors
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
alpha * beta * (mesh_dx[idx_11] / 8.0f);
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
alpha * beta * (mesh_dy[idx_11] / 8.0f);
// Check if we're using affine in this cell
// For simplicity, just use the top-left corner's affine parameters
int cell_idx = cell_y * mesh_w + cell_x;
if (affine_mask && affine_mask[cell_idx]) {
// Apply affine transform
// Compute position relative to cell center
float rel_x = x - (cell_x * cell_w + cell_w / 2.0f);
float rel_y = y - (cell_y * cell_h + cell_h / 2.0f);
float a11 = affine_a11[cell_idx] / 256.0f;
float a12 = affine_a12[cell_idx] / 256.0f;
float a21 = affine_a21[cell_idx] / 256.0f;
float a22 = affine_a22[cell_idx] / 256.0f;
// Affine warp: [x'] = [a11 a12][x] + [dx]
// [y'] [a21 a22][y] [dy]
dx = a11 * rel_x + a12 * rel_y + dx;
dy = a21 * rel_x + a22 * rel_y + dy;
}
// Source coordinates (inverse warp)
float src_x = x + dx;
float src_y = y + dy;
// Bilinear interpolation
int sx0 = (int)floorf(src_x);
int sy0 = (int)floorf(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
sx0 = std::max(0, std::min(width - 1, sx0));
sy0 = std::max(0, std::min(height - 1, sy0));
sx1 = std::max(0, std::min(width - 1, sx1));
sy1 = std::max(0, std::min(height - 1, sy1));
float fx = src_x - sx0;
float fy = src_y - sy0;
// Interpolate each channel
for (int c = 0; c < 3; c++) {
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
}
}
}
}
// Create visualization overlay showing affine cells
static void create_affine_overlay(
cv::Mat &img,
const uint8_t *affine_mask,
int mesh_w, int mesh_h
) {
int width = img.cols;
int height = img.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
for (int my = 0; my < mesh_h; my++) {
for (int mx = 0; mx < mesh_w; mx++) {
int idx = my * mesh_w + mx;
if (affine_mask[idx]) {
// Draw green rectangle for affine cells
int x0 = mx * cell_w;
int y0 = my * cell_h;
int x1 = (mx + 1) * cell_w;
int y1 = (my + 1) * cell_h;
cv::rectangle(img,
cv::Point(x0, y0),
cv::Point(x1, y1),
cv::Scalar(0, 255, 0), 1);
}
}
}
}
int main(int argc, char** argv) {
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5;
printf("Opening video: %s\n", video_file);
cv::VideoCapture cap(video_file);
if (!cap.isOpened()) {
fprintf(stderr, "Error: Cannot open video file %s\n", video_file);
return 1;
}
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
if (total_frames < 10) {
fprintf(stderr, "Error: Video too short (need at least 10 frames)\n");
return 1;
}
// Calculate mesh dimensions (32×32 pixel cells, matches current encoder)
int mesh_cell_size = 32;
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
if (mesh_w < 2) mesh_w = 2;
if (mesh_h < 2) mesh_h = 2;
printf("Mesh: %dx%d (approx %dx%d px cells)\n",
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
// Encoder parameters (match current encoder_tav.c settings)
float smoothness = 0.5f; // Mesh smoothness weight
int smooth_iterations = 8; // Smoothing iterations
float affine_threshold = 0.40f; // 40% improvement required for affine
printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n",
smoothness, smooth_iterations, affine_threshold * 100.0f);
// Seed random number generator
srand(time(NULL));
// Pick random frames (avoid first and last 5 frames)
printf("\nTesting %d random frame pairs:\n", num_test_frames);
for (int test = 0; test < num_test_frames; test++) {
// Pick random frame (ensure we have a previous frame)
int frame_num = 5 + rand() % (total_frames - 10);
printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n",
test + 1, num_test_frames, frame_num - 1, frame_num);
// Read previous frame (source for warping)
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
cv::Mat prev_frame;
cap >> prev_frame;
if (prev_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num - 1);
continue;
}
// Read current frame (target to match)
cv::Mat curr_frame;
cap >> curr_frame;
if (curr_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num);
continue;
}
// Convert to RGB for block matching
cv::Mat prev_rgb, curr_rgb;
cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB);
cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB);
// Compute hierarchical block matching (replaces optical flow)
printf(" Computing hierarchical block matching...\n");
float *flow_x = nullptr, *flow_y = nullptr;
estimate_motion_optical_flow(
prev_rgb.data, curr_rgb.data,
width, height,
&flow_x, &flow_y
);
// Build mesh from flow
printf(" Building mesh from block matches...\n");
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
// Apply Laplacian smoothing
printf(" Applying Laplacian smoothing (%d iterations, %.2f weight)...\n",
smooth_iterations, smoothness);
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
// Estimate selective per-cell affine transforms
printf(" Estimating selective affine transforms (threshold=%.0f%%)...\n",
affine_threshold * 100.0f);
uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t));
int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
int affine_count = 0;
for (int cy = 0; cy < mesh_h; cy++) {
for (int cx = 0; cx < mesh_w; cx++) {
int cell_idx = cy * mesh_w + cx;
int16_t tx, ty, a11, a12, a21, a22;
int use_affine = estimate_cell_affine(
flow_x, flow_y,
width, height,
cx, cy, cell_w, cell_h,
affine_threshold,
&tx, &ty, &a11, &a12, &a21, &a22
);
affine_mask[cell_idx] = use_affine ? 1 : 0;
mesh_dx[cell_idx] = tx;
mesh_dy[cell_idx] = ty;
affine_a11[cell_idx] = a11;
affine_a12[cell_idx] = a12;
affine_a21[cell_idx] = a21;
affine_a22[cell_idx] = a22;
if (use_affine) affine_count++;
}
}
printf(" Affine usage: %d/%d cells (%.1f%%)\n",
affine_count, mesh_w * mesh_h,
100.0f * affine_count / (mesh_w * mesh_h));
// Warp previous frame to current frame
printf(" Warping frame with mesh + affine...\n");
cv::Mat warped;
apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy,
affine_mask, affine_a11, affine_a12, affine_a21, affine_a22,
mesh_w, mesh_h);
// Create visualization with affine overlay
cv::Mat warped_viz = warped.clone();
create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h);
// Compute MSE between warped and target
double mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)warped.at<cv::Vec3b>(y, x)[c] -
(double)curr_frame.at<cv::Vec3b>(y, x)[c];
mse += diff * diff;
}
}
}
mse /= (width * height * 3);
double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0;
printf(" Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr);
// Save images
char filename[256];
sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1);
cv::imwrite(filename, prev_frame);
printf(" Saved source: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num);
cv::imwrite(filename, warped);
printf(" Saved warped: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num);
cv::imwrite(filename, warped_viz);
printf(" Saved warped+viz (green=affine): %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num);
cv::imwrite(filename, curr_frame);
printf(" Saved target: %s\n", filename);
// Compute difference image
cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
int diff = abs((int)warped.at<cv::Vec3b>(y, x)[c] -
(int)curr_frame.at<cv::Vec3b>(y, x)[c]);
diff_img.at<cv::Vec3b>(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility
}
}
}
sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num);
cv::imwrite(filename, diff_img);
printf(" Saved difference (amplified 3x): %s\n", filename);
// Compute motion statistics
float max_motion = 0.0f, avg_motion = 0.0f;
for (int i = 0; i < mesh_w * mesh_h; i++) {
float dx = mesh_dx[i] / 8.0f;
float dy = mesh_dy[i] / 8.0f;
float motion = sqrtf(dx * dx + dy * dy);
avg_motion += motion;
if (motion > max_motion) max_motion = motion;
}
avg_motion /= (mesh_w * mesh_h);
printf(" Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion);
// Cleanup
free(flow_x);
free(flow_y);
free(mesh_dx);
free(mesh_dy);
free(affine_mask);
free(affine_a11);
free(affine_a12);
free(affine_a21);
free(affine_a22);
}
printf("\nDone! Check output images:\n");
printf(" *_source.png: Original frame before warping\n");
printf(" *_warped.png: Warped frame (should match target)\n");
printf(" *_warped_viz.png: Warped with green overlay showing affine cells\n");
printf(" *_target.png: Target frame to match\n");
printf(" *_diff.png: Difference image (should be mostly black if warp is good)\n");
cap.release();
return 0;
}