TAV: some more mocomp shit

This commit is contained in:
minjaesong
2025-10-18 05:47:17 +09:00
parent 3b9e02b17f
commit 120058be6d
8 changed files with 2526 additions and 161 deletions

View File

@@ -1054,6 +1054,8 @@ transmission capability, and region-of-interest coding.
## GOP Unified Packet Structure (0x12) ## GOP Unified Packet Structure (0x12)
Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing. Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing.
Updated on 2025-10-17 to include canvas expansion margins.
This packet contains multiple frames encoded as a single spacetime block for optimal This packet contains multiple frames encoded as a single spacetime block for optimal
temporal compression. temporal compression.
@@ -1077,18 +1079,40 @@ The entire GOP (width×height×N_frames×3_channels) is preprocessed as a single
This layout enables Zstd to find patterns across both spatial and temporal dimensions, This layout enables Zstd to find patterns across both spatial and temporal dimensions,
resulting in superior compression compared to per-frame encoding. resulting in superior compression compared to per-frame encoding.
### Canvas Expansion for Motion Compensation
When frames in a GOP have camera motion, they must be aligned before temporal DWT.
However, alignment creates "gaps" at frame edges. To preserve ALL original pixels:
1. **Calculate motion range**: Determine the total shift range across all GOP frames
- Example: If frames shift by ±3 pixels horizontally, total range = 6 pixels
2. **Expand canvas**: Create a larger canvas = original_size + margin
- Canvas width = header.width + margin_left + margin_right
- Canvas height = header.height + margin_top + margin_bottom
3. **Place aligned frames**: Each frame is positioned on the expanded canvas
- All original pixels from all frames are preserved
- No artificial padding or cropping occurs
4. **Encode expanded canvas**: Apply 3D DWT to the larger canvas dimensions
5. **Store margins**: 4 bytes (L/R/T/B) tell decoder the canvas expansion
6. **Decoder extraction**: Decoder extracts display region for each frame based on
motion vectors and margins
This approach ensures lossless preservation of original video content during GOP encoding.
### Motion Vectors ### Motion Vectors
- Stored in quarter-pixel units (divide by 4.0 for pixel displacement) - Stored in 1/16-pixel units (divide by 16.0 for pixel displacement)
- Used for global motion compensation (camera movement, scene translation) - Used for global motion compensation (camera movement, scene translation)
- Computed using FFT-based phase correlation for accurate frame alignment - Computed using FFT-based phase correlation for accurate frame alignment
- First frame (frame 0) typically has motion vector (0, 0) - Cumulative relative to frame 0 (not frame-to-frame deltas)
- First frame (frame 0) always has motion vector (0, 0)
### Temporal 3D DWT Process ### Temporal 3D DWT Process
1. Apply 1D DWT across temporal axis (GOP frames) 1. Detect inter-frame motion using phase correlation
2. Apply 2D DWT on each spatial slice of temporal subbands 2. Align frames and expand canvas to preserve all original pixels
3. Perceptual quantization with temporal-spatial awareness 3. Apply 1D DWT across temporal axis (GOP frames) on expanded canvas
4. Unified significance map preprocessing across all frames/channels 4. Apply 2D DWT on each spatial slice of temporal subbands
5. Single Zstd compression of entire GOP block 5. Perceptual quantization with temporal-spatial awareness
6. Unified significance map preprocessing across all frames/channels
7. Single Zstd compression of entire GOP block
## GOP Sync Packet Structure (0xFC) ## GOP Sync Packet Structure (0xFC)
Indicates that N frames were decoded from a GOP Unified block. Indicates that N frames were decoded from a GOP Unified block.

View File

@@ -89,8 +89,8 @@ internal class UnsafePtr(pointer: Long, allocSize: Long, private val caller: Any
//// You may break the glass and use this tool when some fucking incomprehensible bugs ("vittujen vitun bugit") //// You may break the glass and use this tool when some fucking incomprehensible bugs ("vittujen vitun bugit")
//// appear (e.g. getting garbage values when it fucking shouldn't) //// appear (e.g. getting garbage values when it fucking shouldn't)
// if (destroyed) { throw DanglingPointerException("The pointer is already destroyed ($this)") } if (destroyed) { throw DanglingPointerException("The pointer is already destroyed ($this)") }
// if (index !in 0 until size) throw AddressOverflowException("Index: $index; alloc size: $size; pointer: ${this}\n${Thread.currentThread().stackTrace.joinToString("\n", limit=10) { " $it" }}") if (index !in 0 until size) throw AddressOverflowException("Index: $index; alloc size: $size; pointer: ${this}\n${Thread.currentThread().stackTrace.joinToString("\n", limit=10) { " $it" }}")
} }
operator fun get(index: Long): Byte { operator fun get(index: Long): Byte {

View File

@@ -2,11 +2,18 @@
# Makefile for TSVM Enhanced Video (TEV) encoder # Makefile for TSVM Enhanced Video (TEV) encoder
CC = gcc CC = gcc
CXX = g++
CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE
CXXFLAGS = -std=c++11 -Wall -Wextra -O2 -D_GNU_SOURCE
LIBS = -lm -lzstd LIBS = -lm -lzstd
# OpenCV flags (for TAV encoder with mesh warping)
OPENCV_CFLAGS = $(shell pkg-config --cflags opencv4)
OPENCV_LIBS = $(shell pkg-config --libs opencv4)
# Source files and targets # Source files and targets
TARGETS = tev tav tav_decoder TARGETS = tev tav tav_decoder
TEST_TARGETS = test_mesh_warp test_mesh_roundtrip
# Build all encoders # Build all encoders
all: $(TARGETS) all: $(TARGETS)
@@ -16,21 +23,35 @@ tev: encoder_tev.c
rm -f encoder_tev rm -f encoder_tev
$(CC) $(CFLAGS) -o encoder_tev $< $(LIBS) $(CC) $(CFLAGS) -o encoder_tev $< $(LIBS)
tav: encoder_tav.c tav: encoder_tav.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
rm -f encoder_tav rm -f encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o
$(CC) $(CFLAGS) -o encoder_tav $< $(LIBS) -lfftw3f $(CC) $(CFLAGS) -c encoder_tav.c -o encoder_tav.o
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o
$(CXX) $(CXXFLAGS) -c estimate_affine_from_blocks.cpp -o estimate_affine_from_blocks.o
$(CXX) -o encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o $(LIBS) -lfftw3f $(OPENCV_LIBS)
tav_decoder: decoder_tav.c tav_decoder: decoder_tav.c
rm -f decoder_tav rm -f decoder_tav
$(CC) $(CFLAGS) -o decoder_tav $< $(LIBS) $(CC) $(CFLAGS) -o decoder_tav $< $(LIBS)
# Build test programs
test_mesh_warp: test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp
rm -f test_mesh_warp test_mesh_warp.o
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o test_mesh_warp test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp $(OPENCV_LIBS)
test_mesh_roundtrip: test_mesh_roundtrip.cpp encoder_tav_opencv.cpp
rm -f test_mesh_roundtrip test_mesh_roundtrip.o
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o test_mesh_roundtrip test_mesh_roundtrip.cpp encoder_tav_opencv.cpp $(OPENCV_LIBS)
tests: $(TEST_TARGETS)
# Build with debug symbols # Build with debug symbols
debug: CFLAGS += -g -DDEBUG debug: CFLAGS += -g -DDEBUG
debug: $(TARGETS) debug: $(TARGETS)
# Clean build artifacts # Clean build artifacts
clean: clean:
rm -f $(TARGETS) rm -f $(TARGETS) *.o
# Install (copy to PATH) # Install (copy to PATH)
install: $(TARGETS) install: $(TARGETS)
@@ -43,6 +64,8 @@ check-deps:
@echo "Checking dependencies..." @echo "Checking dependencies..."
@echo "Using Zstd compression for better efficiency" @echo "Using Zstd compression for better efficiency"
@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install with: sudo apt install libzstd-dev" && exit 1) @pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install with: sudo apt install libzstd-dev" && exit 1)
@pkg-config --exists fftw3f || (echo "Error: libfftw3-dev not found. Install with: sudo apt install libfftw3-dev" && exit 1)
@pkg-config --exists opencv4 || (echo "Error: OpenCV 4 not found. Install with: sudo apt install libopencv-dev" && exit 1)
@echo "All dependencies found." @echo "All dependencies found."
# Help # Help

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,461 @@
// Created by Claude on 2025-10-17
// OpenCV-based optical flow and mesh warping functions for TAV encoder
// This file is compiled separately as C++ and linked with the C encoder
#include <opencv2/opencv.hpp>
#include <opencv2/video/tracking.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
// Extern "C" linkage for functions callable from C code
extern "C" {
// Helper: Compute SAD (Sum of Absolute Differences) for a block
static int compute_sad(
const unsigned char *ref, const unsigned char *cur,
int ref_x, int ref_y, int cur_x, int cur_y,
int width, int height, int block_size
) {
int sad = 0;
for (int by = 0; by < block_size; by++) {
for (int bx = 0; bx < block_size; bx++) {
int ry = ref_y + by;
int rx = ref_x + bx;
int cy = cur_y + by;
int cx = cur_x + bx;
// Boundary check
if (rx < 0 || rx >= width || ry < 0 || ry >= height ||
cx < 0 || cx >= width || cy < 0 || cy >= height) {
sad += 255; // Penalty for out-of-bounds
continue;
}
int ref_val = ref[ry * width + rx];
int cur_val = cur[cy * width + cx];
sad += abs(ref_val - cur_val);
}
}
return sad;
}
// Helper: Diamond search pattern for motion estimation
static void diamond_search(
const unsigned char *ref, const unsigned char *cur,
int cx, int cy, int width, int height, int block_size,
int search_range, int *best_dx, int *best_dy
) {
// Large diamond pattern (distance 2)
const int large_diamond[8][2] = {
{0, -2}, {-1, -1}, {1, -1}, {-2, 0},
{2, 0}, {-1, 1}, {1, 1}, {0, 2}
};
// Small diamond pattern (distance 1)
const int small_diamond[4][2] = {
{0, -1}, {-1, 0}, {1, 0}, {0, 1}
};
int dx = 0, dy = 0;
int best_sad = compute_sad(ref, cur, cx + dx, cy + dy, cx, cy, width, height, block_size);
// Large diamond search
bool improved = true;
while (improved) {
improved = false;
for (int i = 0; i < 8; i++) {
int test_dx = dx + large_diamond[i][0];
int test_dy = dy + large_diamond[i][1];
// Check search range bounds
if (abs(test_dx) > search_range || abs(test_dy) > search_range) {
continue;
}
int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size);
if (sad < best_sad) {
best_sad = sad;
dx = test_dx;
dy = test_dy;
improved = true;
break;
}
}
}
// Small diamond refinement
improved = true;
while (improved) {
improved = false;
for (int i = 0; i < 4; i++) {
int test_dx = dx + small_diamond[i][0];
int test_dy = dy + small_diamond[i][1];
if (abs(test_dx) > search_range || abs(test_dy) > search_range) {
continue;
}
int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size);
if (sad < best_sad) {
best_sad = sad;
dx = test_dx;
dy = test_dy;
improved = true;
break;
}
}
}
*best_dx = dx;
*best_dy = dy;
}
// Hierarchical block matching motion estimation with deeper pyramid
// 3-level hierarchy to handle large motion (up to ±32px)
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
) {
// Step 1: Convert RGB to grayscale
unsigned char *gray1 = (unsigned char*)std::malloc(width * height);
unsigned char *gray2 = (unsigned char*)std::malloc(width * height);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int idx = y * width + x;
int rgb_idx = idx * 3;
// ITU-R BT.601 grayscale conversion
gray1[idx] = (unsigned char)(0.299f * frame1_rgb[rgb_idx] +
0.587f * frame1_rgb[rgb_idx + 1] +
0.114f * frame1_rgb[rgb_idx + 2]);
gray2[idx] = (unsigned char)(0.299f * frame2_rgb[rgb_idx] +
0.587f * frame2_rgb[rgb_idx + 1] +
0.114f * frame2_rgb[rgb_idx + 2]);
}
}
// Step 2: 3-level hierarchical block matching (coarse to fine)
// Level 0: 64×64 blocks, ±32 pixel search (captures large motion up to 32px)
// Level 1: 32×32 blocks, ±16 pixel refinement
// Level 2: 16×16 blocks, ±8 pixel final refinement
*out_flow_x = (float*)std::malloc(width * height * sizeof(float));
*out_flow_y = (float*)std::malloc(width * height * sizeof(float));
// Initialize with zero motion
std::memset(*out_flow_x, 0, width * height * sizeof(float));
std::memset(*out_flow_y, 0, width * height * sizeof(float));
// Level 0: Coarsest search (64×64 blocks, ±32px)
const int block_size_l0 = 32;
const int search_range_l0 = 16;
for (int by = 0; by < height; by += block_size_l0) {
for (int bx = 0; bx < width; bx += block_size_l0) {
int dx = 0, dy = 0;
diamond_search(gray1, gray2, bx, by, width, height,
block_size_l0, search_range_l0, &dx, &dy);
// Fill flow for this block
for (int y = by; y < by + block_size_l0 && y < height; y++) {
for (int x = bx; x < bx + block_size_l0 && x < width; x++) {
int idx = y * width + x;
(*out_flow_x)[idx] = (float)dx;
(*out_flow_y)[idx] = (float)dy;
}
}
}
}
// Level 1: Medium refinement (32×32 blocks, ±16px)
const int block_size_l1 = 16;
const int search_range_l1 = 8;
for (int by = 0; by < height; by += block_size_l1) {
for (int bx = 0; bx < width; bx += block_size_l1) {
// Get initial guess from level 0
int init_dx = (int)(*out_flow_x)[by * width + bx];
int init_dy = (int)(*out_flow_y)[by * width + bx];
// Search around initial guess
int best_dx = init_dx;
int best_dy = init_dy;
int best_sad = compute_sad(gray1, gray2, bx + init_dx, by + init_dy,
bx, by, width, height, block_size_l1);
// Local search around initial guess
for (int dy = -search_range_l1; dy <= search_range_l1; dy += 2) {
for (int dx = -search_range_l1; dx <= search_range_l1; dx += 2) {
int test_dx = init_dx + dx;
int test_dy = init_dy + dy;
int sad = compute_sad(gray1, gray2, bx + test_dx, by + test_dy,
bx, by, width, height, block_size_l1);
if (sad < best_sad) {
best_sad = sad;
best_dx = test_dx;
best_dy = test_dy;
}
}
}
// Fill flow for this block
for (int y = by; y < by + block_size_l1 && y < height; y++) {
for (int x = bx; x < bx + block_size_l1 && x < width; x++) {
int idx = y * width + x;
(*out_flow_x)[idx] = (float)best_dx;
(*out_flow_y)[idx] = (float)best_dy;
}
}
}
}
// Level 2: Finest refinement (16×16 blocks, ±8px)
/*const int block_size_l2 = 16;
const int search_range_l2 = 8;
for (int by = 0; by < height; by += block_size_l2) {
for (int bx = 0; bx < width; bx += block_size_l2) {
// Get initial guess from level 1
int init_dx = (int)(*out_flow_x)[by * width + bx];
int init_dy = (int)(*out_flow_y)[by * width + bx];
// Search around initial guess (finer grid)
int best_dx = init_dx;
int best_dy = init_dy;
int best_sad = compute_sad(gray1, gray2, bx + init_dx, by + init_dy,
bx, by, width, height, block_size_l2);
// Exhaustive local search for final refinement
for (int dy = -search_range_l2; dy <= search_range_l2; dy++) {
for (int dx = -search_range_l2; dx <= search_range_l2; dx++) {
int test_dx = init_dx + dx;
int test_dy = init_dy + dy;
int sad = compute_sad(gray1, gray2, bx + test_dx, by + test_dy,
bx, by, width, height, block_size_l2);
if (sad < best_sad) {
best_sad = sad;
best_dx = test_dx;
best_dy = test_dy;
}
}
}
// Fill flow for this block
for (int y = by; y < by + block_size_l2 && y < height; y++) {
for (int x = bx; x < bx + block_size_l2 && x < width; x++) {
int idx = y * width + x;
(*out_flow_x)[idx] = (float)best_dx;
(*out_flow_y)[idx] = (float)best_dy;
}
}
}
}*/
std::free(gray1);
std::free(gray2);
}
// Build distortion mesh from dense optical flow field
// Downsamples flow to coarse mesh grid using robust averaging
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
short *mesh_dx, short *mesh_dy // Output: 1/8 pixel precision
) {
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
for (int my = 0; my < mesh_h; my++) {
for (int mx = 0; mx < mesh_w; mx++) {
// Cell center coordinates (control point position)
int cx = mx * cell_w + cell_w / 2;
int cy = my * cell_h + cell_h / 2;
// Collect flow vectors in a neighborhood around cell center (5×5 window)
float sum_dx = 0.0f, sum_dy = 0.0f;
int count = 0;
for (int dy = -2; dy <= 2; dy++) {
for (int dx = -2; dx <= 2; dx++) {
int px = cx + dx;
int py = cy + dy;
if (px >= 0 && px < width && py >= 0 && py < height) {
int idx = py * width + px;
sum_dx += flow_x[idx];
sum_dy += flow_y[idx];
count++;
}
}
}
// Average and convert to 1/8 pixel precision
float avg_dx = (count > 0) ? (sum_dx / count) : 0.0f;
float avg_dy = (count > 0) ? (sum_dy / count) : 0.0f;
int mesh_idx = my * mesh_w + mx;
mesh_dx[mesh_idx] = (short)(avg_dx * 8.0f); // 1/8 pixel precision
mesh_dy[mesh_idx] = (short)(avg_dy * 8.0f);
}
}
}
// Apply Laplacian smoothing to mesh for spatial coherence
// This prevents fold-overs and reduces high-frequency noise
void smooth_mesh_laplacian(
short *mesh_dx, short *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
) {
short *temp_dx = (short*)std::malloc(mesh_width * mesh_height * sizeof(short));
short *temp_dy = (short*)std::malloc(mesh_width * mesh_height * sizeof(short));
for (int iter = 0; iter < iterations; iter++) {
std::memcpy(temp_dx, mesh_dx, mesh_width * mesh_height * sizeof(short));
std::memcpy(temp_dy, mesh_dy, mesh_width * mesh_height * sizeof(short));
for (int my = 0; my < mesh_height; my++) {
for (int mx = 0; mx < mesh_width; mx++) {
int idx = my * mesh_width + mx;
// Collect neighbor displacements
float neighbor_dx = 0.0f, neighbor_dy = 0.0f;
int neighbor_count = 0;
// 4-connected neighbors (up, down, left, right)
int neighbors[4][2] = {{0, -1}, {0, 1}, {-1, 0}, {1, 0}};
for (int n = 0; n < 4; n++) {
int nx = mx + neighbors[n][0];
int ny = my + neighbors[n][1];
if (nx >= 0 && nx < mesh_width && ny >= 0 && ny < mesh_height) {
int nidx = ny * mesh_width + nx;
neighbor_dx += temp_dx[nidx];
neighbor_dy += temp_dy[nidx];
neighbor_count++;
}
}
if (neighbor_count > 0) {
neighbor_dx /= neighbor_count;
neighbor_dy /= neighbor_count;
// Weighted average: data term + smoothness term
float data_weight = 1.0f - smoothness;
mesh_dx[idx] = (short)(data_weight * temp_dx[idx] + smoothness * neighbor_dx);
mesh_dy[idx] = (short)(data_weight * temp_dy[idx] + smoothness * neighbor_dy);
}
}
}
}
std::free(temp_dx);
std::free(temp_dy);
}
// Apply bilinear mesh warp to a frame channel
// Uses inverse mapping (destination → source) to avoid holes
void warp_frame_with_mesh(
const float *src_frame, int width, int height,
const short *mesh_dx, const short *mesh_dy,
int mesh_width, int mesh_height,
float *dst_frame
) {
int cell_w = width / mesh_width;
int cell_h = height / mesh_height;
// For each output pixel, compute source location using mesh warp
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
// Find which mesh cell this pixel belongs to
int cell_x = x / cell_w;
int cell_y = y / cell_h;
// Clamp to valid mesh range
if (cell_x >= mesh_width - 1) cell_x = mesh_width - 2;
if (cell_y >= mesh_height - 1) cell_y = mesh_height - 2;
if (cell_x < 0) cell_x = 0;
if (cell_y < 0) cell_y = 0;
// Get four corner control points
int idx_00 = cell_y * mesh_width + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_width + cell_x;
int idx_11 = idx_01 + 1;
// Control point positions (cell centers)
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
// Local coordinates within cell (0 to 1)
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
if (alpha < 0.0f) alpha = 0.0f;
if (alpha > 1.0f) alpha = 1.0f;
if (beta < 0.0f) beta = 0.0f;
if (beta > 1.0f) beta = 1.0f;
// Bilinear interpolation of motion vectors
float dx_00 = mesh_dx[idx_00] / 8.0f; // Convert to pixels
float dy_00 = mesh_dy[idx_00] / 8.0f;
float dx_10 = mesh_dx[idx_10] / 8.0f;
float dy_10 = mesh_dy[idx_10] / 8.0f;
float dx_01 = mesh_dx[idx_01] / 8.0f;
float dy_01 = mesh_dy[idx_01] / 8.0f;
float dx_11 = mesh_dx[idx_11] / 8.0f;
float dy_11 = mesh_dy[idx_11] / 8.0f;
float dx = (1 - alpha) * (1 - beta) * dx_00 +
alpha * (1 - beta) * dx_10 +
(1 - alpha) * beta * dx_01 +
alpha * beta * dx_11;
float dy = (1 - alpha) * (1 - beta) * dy_00 +
alpha * (1 - beta) * dy_10 +
(1 - alpha) * beta * dy_01 +
alpha * beta * dy_11;
// Source coordinates (inverse warp: dst → src)
float src_x = x + dx;
float src_y = y + dy;
// Bilinear interpolation of source pixel
int sx0 = (int)std::floor(src_x);
int sy0 = (int)std::floor(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
// Clamp to frame bounds
if (sx0 < 0) sx0 = 0;
if (sy0 < 0) sy0 = 0;
if (sx1 >= width) sx1 = width - 1;
if (sy1 >= height) sy1 = height - 1;
if (sx0 >= width) sx0 = width - 1;
if (sy0 >= height) sy0 = height - 1;
float fx = src_x - sx0;
float fy = src_y - sy0;
// Bilinear interpolation
float val_00 = src_frame[sy0 * width + sx0];
float val_10 = src_frame[sy0 * width + sx1];
float val_01 = src_frame[sy1 * width + sx0];
float val_11 = src_frame[sy1 * width + sx1];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst_frame[y * width + x] = val;
}
}
}
} // extern "C"

View File

@@ -0,0 +1,169 @@
// Affine estimation for TAV mesh warping
// This file contains logic to estimate per-cell affine transforms from block motion
#include <cmath>
#include <cstdlib>
#include <cstring>
extern "C" {
// Estimate affine transform for a mesh cell from surrounding block motion vectors
// Uses least-squares fitting of motion vectors to affine model: [x'] = [a11 a12][x] + [tx]
// [y'] [a21 a22][y] [ty]
//
// Returns 1 if affine improves residual by >threshold, 0 if translation-only is better
int estimate_cell_affine(
const float *flow_x, const float *flow_y,
int width, int height,
int cell_x, int cell_y, // Cell position in mesh coordinates
int cell_w, int cell_h, // Cell size in pixels
float threshold, // Residual improvement threshold (e.g. 0.10 = 10%)
short *out_tx, short *out_ty, // Translation (1/8 pixel)
short *out_a11, short *out_a12, // Affine matrix (1/256 fixed-point)
short *out_a21, short *out_a22
) {
// Compute cell bounding box
int x_start = cell_x * cell_w;
int y_start = cell_y * cell_h;
int x_end = (cell_x + 1) * cell_w;
int y_end = (cell_y + 1) * cell_h;
if (x_end > width) x_end = width;
if (y_end > height) y_end = height;
// Sample motion vectors from a 4×4 grid within the cell
const int samples_x = 4;
const int samples_y = 4;
float sample_motion_x[16];
float sample_motion_y[16];
int sample_px[16];
int sample_py[16];
int n_samples = 0;
for (int sy = 0; sy < samples_y; sy++) {
for (int sx = 0; sx < samples_x; sx++) {
int px = x_start + (x_end - x_start) * sx / (samples_x - 1);
int py = y_start + (y_end - y_start) * sy / (samples_y - 1);
if (px >= width) px = width - 1;
if (py >= height) py = height - 1;
int idx = py * width + px;
sample_motion_x[n_samples] = flow_x[idx];
sample_motion_y[n_samples] = flow_y[idx];
sample_px[n_samples] = px - (x_start + x_end) / 2; // Relative to cell center
sample_py[n_samples] = py - (y_start + y_end) / 2;
n_samples++;
}
}
// 1. Compute translation-only model (average motion)
float avg_dx = 0, avg_dy = 0;
for (int i = 0; i < n_samples; i++) {
avg_dx += sample_motion_x[i];
avg_dy += sample_motion_y[i];
}
avg_dx /= n_samples;
avg_dy /= n_samples;
// Translation residual
float trans_residual = 0;
for (int i = 0; i < n_samples; i++) {
float dx_err = sample_motion_x[i] - avg_dx;
float dy_err = sample_motion_y[i] - avg_dy;
trans_residual += dx_err * dx_err + dy_err * dy_err;
}
// 2. Estimate affine model using least-squares
// Solve: [vx] = [a11 a12][px] + [tx]
// [vy] [a21 a22][py] [ty]
// Using normal equations for 2×2 affine
double sum_x = 0, sum_y = 0, sum_xx = 0, sum_yy = 0, sum_xy = 0;
double sum_vx = 0, sum_vy = 0, sum_vx_x = 0, sum_vx_y = 0;
double sum_vy_x = 0, sum_vy_y = 0;
for (int i = 0; i < n_samples; i++) {
double px = sample_px[i];
double py = sample_py[i];
double vx = sample_motion_x[i];
double vy = sample_motion_y[i];
sum_x += px;
sum_y += py;
sum_xx += px * px;
sum_yy += py * py;
sum_xy += px * py;
sum_vx += vx;
sum_vy += vy;
sum_vx_x += vx * px;
sum_vx_y += vx * py;
sum_vy_x += vy * px;
sum_vy_y += vy * py;
}
// Solve 2×2 system for [a11, a12, tx] and [a21, a22, ty]
double n = n_samples;
double det = n * sum_xx * sum_yy + 2 * sum_x * sum_y * sum_xy -
sum_xx * sum_y * sum_y - sum_yy * sum_x * sum_x - n * sum_xy * sum_xy;
if (fabs(det) < 1e-6) {
// Singular matrix, fall back to translation
*out_tx = (short)(avg_dx * 8.0f);
*out_ty = (short)(avg_dy * 8.0f);
*out_a11 = 256; // Identity
*out_a12 = 0;
*out_a21 = 0;
*out_a22 = 256;
return 0; // Translation only
}
// Solve for affine parameters (simplified for readability)
double a11 = (sum_vx_x * sum_yy * n - sum_vx_y * sum_xy * n - sum_vx * sum_y * sum_y +
sum_vx * sum_xy * sum_y + sum_vx_y * sum_x * sum_y - sum_vx_x * sum_y * sum_y) / det;
double a12 = (sum_vx_y * sum_xx * n - sum_vx_x * sum_xy * n - sum_vx * sum_x * sum_xy +
sum_vx * sum_xx * sum_y + sum_vx_x * sum_x * sum_y - sum_vx_y * sum_x * sum_x) / det;
double tx = (sum_vx - a11 * sum_x - a12 * sum_y) / n;
double a21 = (sum_vy_x * sum_yy * n - sum_vy_y * sum_xy * n - sum_vy * sum_y * sum_y +
sum_vy * sum_xy * sum_y + sum_vy_y * sum_x * sum_y - sum_vy_x * sum_y * sum_y) / det;
double a22 = (sum_vy_y * sum_xx * n - sum_vy_x * sum_xy * n - sum_vy * sum_x * sum_xy +
sum_vy * sum_xx * sum_y + sum_vy_x * sum_x * sum_y - sum_vy_y * sum_x * sum_x) / det;
double ty = (sum_vy - a21 * sum_x - a22 * sum_y) / n;
// Affine residual
float affine_residual = 0;
for (int i = 0; i < n_samples; i++) {
double px = sample_px[i];
double py = sample_py[i];
double pred_vx = a11 * px + a12 * py + tx;
double pred_vy = a21 * px + a22 * py + ty;
double dx_err = sample_motion_x[i] - pred_vx;
double dy_err = sample_motion_y[i] - pred_vy;
affine_residual += dx_err * dx_err + dy_err * dy_err;
}
// Decision: Use affine if residual improves by > threshold
float improvement = (trans_residual - affine_residual) / (trans_residual + 1e-6f);
if (improvement > threshold) {
// Use affine
*out_tx = (short)(tx * 8.0f);
*out_ty = (short)(ty * 8.0f);
*out_a11 = (short)(a11 * 256.0);
*out_a12 = (short)(a12 * 256.0);
*out_a21 = (short)(a21 * 256.0);
*out_a22 = (short)(a22 * 256.0);
return 1; // Affine
} else {
// Use translation
*out_tx = (short)(avg_dx * 8.0f);
*out_ty = (short)(avg_dy * 8.0f);
*out_a11 = 256; // Identity
*out_a12 = 0;
*out_a21 = 0;
*out_a22 = 256;
return 0; // Translation only
}
}
} // extern "C"

View File

@@ -0,0 +1,328 @@
// Test mesh warp round-trip consistency
// Warps a frame forward, then backward, and checks if we get the original back
// This is critical for MC-lifting invertibility
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <ctime>
// Include the mesh functions from encoder
extern "C" {
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
);
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
int16_t *mesh_dx, int16_t *mesh_dy
);
void smooth_mesh_laplacian(
int16_t *mesh_dx, int16_t *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
);
}
// Mesh warp with bilinear interpolation (translation only)
static void apply_mesh_warp_rgb(
const cv::Mat &src,
cv::Mat &dst,
const int16_t *mesh_dx,
const int16_t *mesh_dy,
int mesh_w, int mesh_h
) {
int width = src.cols;
int height = src.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
dst = cv::Mat(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
cell_x = std::min(cell_x, mesh_w - 2);
cell_y = std::min(cell_y, mesh_h - 2);
int idx_00 = cell_y * mesh_w + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
int idx_11 = idx_01 + 1;
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
alpha = std::max(0.0f, std::min(1.0f, alpha));
beta = std::max(0.0f, std::min(1.0f, beta));
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
alpha * beta * (mesh_dx[idx_11] / 8.0f);
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
alpha * beta * (mesh_dy[idx_11] / 8.0f);
float src_x = x + dx;
float src_y = y + dy;
int sx0 = (int)floorf(src_x);
int sy0 = (int)floorf(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
sx0 = std::max(0, std::min(width - 1, sx0));
sy0 = std::max(0, std::min(height - 1, sy0));
sx1 = std::max(0, std::min(width - 1, sx1));
sy1 = std::max(0, std::min(height - 1, sy1));
float fx = src_x - sx0;
float fy = src_y - sy0;
for (int c = 0; c < 3; c++) {
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
}
}
}
}
int main(int argc, char** argv) {
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
int num_tests = (argc > 2) ? atoi(argv[2]) : 5;
printf("Opening video: %s\n", video_file);
cv::VideoCapture cap(video_file);
if (!cap.isOpened()) {
fprintf(stderr, "Error: Cannot open video file\n");
return 1;
}
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
// Mesh dimensions (32×32 cells)
int mesh_cell_size = 32;
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
if (mesh_w < 2) mesh_w = 2;
if (mesh_h < 2) mesh_h = 2;
printf("Mesh: %dx%d (approx %dx%d px cells)\n\n",
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
float smoothness = 0.5f;
int smooth_iterations = 8;
srand(time(NULL));
double total_forward_psnr = 0.0;
double total_roundtrip_psnr = 0.0;
double total_half_roundtrip_psnr = 0.0;
for (int test = 0; test < num_tests; test++) {
int frame_num = 5 + rand() % (total_frames - 10);
printf("[Test %d/%d] Frame pair %d → %d\n", test + 1, num_tests, frame_num - 1, frame_num);
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
cv::Mat frame0, frame1;
cap >> frame0;
cap >> frame1;
if (frame0.empty() || frame1.empty()) {
fprintf(stderr, "Error reading frames\n");
continue;
}
cv::Mat frame0_rgb, frame1_rgb;
cv::cvtColor(frame0, frame0_rgb, cv::COLOR_BGR2RGB);
cv::cvtColor(frame1, frame1_rgb, cv::COLOR_BGR2RGB);
// Compute mesh (F0 → F1)
float *flow_x = nullptr, *flow_y = nullptr;
estimate_motion_optical_flow(frame0_rgb.data, frame1_rgb.data,
width, height, &flow_x, &flow_y);
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
// Create inverted mesh
int16_t *inv_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *inv_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
for (int i = 0; i < mesh_w * mesh_h; i++) {
inv_mesh_dx[i] = -mesh_dx[i];
inv_mesh_dy[i] = -mesh_dy[i];
}
// Create half-mesh for symmetric lifting test
int16_t *half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *neg_half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *neg_half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
for (int i = 0; i < mesh_w * mesh_h; i++) {
half_mesh_dx[i] = mesh_dx[i] / 2;
half_mesh_dy[i] = mesh_dy[i] / 2;
neg_half_mesh_dx[i] = -half_mesh_dx[i];
neg_half_mesh_dy[i] = -half_mesh_dy[i];
}
// TEST 1: Full forward warp quality (F0 → F1)
cv::Mat warped_forward;
apply_mesh_warp_rgb(frame0, warped_forward, mesh_dx, mesh_dy, mesh_w, mesh_h);
double forward_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)warped_forward.at<cv::Vec3b>(y, x)[c] -
(double)frame1.at<cv::Vec3b>(y, x)[c];
forward_mse += diff * diff;
}
}
}
forward_mse /= (width * height * 3);
double forward_psnr = (forward_mse > 0) ? 10.0 * log10(255.0 * 255.0 / forward_mse) : 999.0;
total_forward_psnr += forward_psnr;
// TEST 2: Full round-trip (F0 → forward → backward → F0')
cv::Mat roundtrip;
apply_mesh_warp_rgb(warped_forward, roundtrip, inv_mesh_dx, inv_mesh_dy, mesh_w, mesh_h);
double roundtrip_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)roundtrip.at<cv::Vec3b>(y, x)[c] -
(double)frame0.at<cv::Vec3b>(y, x)[c];
roundtrip_mse += diff * diff;
}
}
}
roundtrip_mse /= (width * height * 3);
double roundtrip_psnr = (roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / roundtrip_mse) : 999.0;
total_roundtrip_psnr += roundtrip_psnr;
// TEST 3: Half-step symmetric round-trip (MC-lifting style)
// F0 → +½mesh, then → -½mesh (should return to F0)
cv::Mat half_forward, half_roundtrip;
apply_mesh_warp_rgb(frame0, half_forward, half_mesh_dx, half_mesh_dy, mesh_w, mesh_h);
apply_mesh_warp_rgb(half_forward, half_roundtrip, neg_half_mesh_dx, neg_half_mesh_dy, mesh_w, mesh_h);
double half_roundtrip_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)half_roundtrip.at<cv::Vec3b>(y, x)[c] -
(double)frame0.at<cv::Vec3b>(y, x)[c];
half_roundtrip_mse += diff * diff;
}
}
}
half_roundtrip_mse /= (width * height * 3);
double half_roundtrip_psnr = (half_roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / half_roundtrip_mse) : 999.0;
total_half_roundtrip_psnr += half_roundtrip_psnr;
printf(" Forward warp (F0→F1): PSNR = %.2f dB\n", forward_psnr);
printf(" Full round-trip (F0→F0'): PSNR = %.2f dB\n", roundtrip_psnr);
printf(" Half round-trip (±½mesh): PSNR = %.2f dB\n", half_roundtrip_psnr);
// Compute motion stats
float avg_motion = 0.0f, max_motion = 0.0f;
for (int i = 0; i < mesh_w * mesh_h; i++) {
float dx = mesh_dx[i] / 8.0f;
float dy = mesh_dy[i] / 8.0f;
float motion = sqrtf(dx * dx + dy * dy);
avg_motion += motion;
if (motion > max_motion) max_motion = motion;
}
avg_motion /= (mesh_w * mesh_h);
printf(" Motion: avg=%.2f px, max=%.2f px\n\n", avg_motion, max_motion);
// Save visualization for worst case
if (test == 0 || roundtrip_psnr < 30.0) {
char filename[256];
sprintf(filename, "roundtrip_%04d_original.png", frame_num);
cv::imwrite(filename, frame0);
sprintf(filename, "roundtrip_%04d_forward.png", frame_num);
cv::imwrite(filename, warped_forward);
sprintf(filename, "roundtrip_%04d_roundtrip.png", frame_num);
cv::imwrite(filename, roundtrip);
// Difference images
cv::Mat diff_roundtrip = cv::Mat::zeros(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
int diff = abs((int)roundtrip.at<cv::Vec3b>(y, x)[c] -
(int)frame0.at<cv::Vec3b>(y, x)[c]);
diff_roundtrip.at<cv::Vec3b>(y, x)[c] = std::min(diff * 5, 255);
}
}
}
sprintf(filename, "roundtrip_%04d_diff.png", frame_num);
cv::imwrite(filename, diff_roundtrip);
printf(" Saved visualization: roundtrip_%04d_*.png\n\n", frame_num);
}
free(flow_x);
free(flow_y);
free(mesh_dx);
free(mesh_dy);
free(inv_mesh_dx);
free(inv_mesh_dy);
free(half_mesh_dx);
free(half_mesh_dy);
free(neg_half_mesh_dx);
free(neg_half_mesh_dy);
}
printf("===========================================\n");
printf("Average Results (%d tests):\n", num_tests);
printf(" Forward warp quality: %.2f dB\n", total_forward_psnr / num_tests);
printf(" Full round-trip error: %.2f dB\n", total_roundtrip_psnr / num_tests);
printf(" Half round-trip error: %.2f dB\n", total_half_roundtrip_psnr / num_tests);
printf("===========================================\n\n");
if (total_roundtrip_psnr / num_tests < 35.0) {
printf("WARNING: Round-trip PSNR < 35 dB indicates poor invertibility!\n");
printf("This will cause MC-lifting to accumulate errors and hurt compression.\n");
printf("Bilinear interpolation artifacts are likely the culprit.\n");
} else {
printf("Round-trip consistency looks acceptable (>35 dB).\n");
}
cap.release();
return 0;
}

View File

@@ -0,0 +1,422 @@
// Visual unit test for mesh warping with hierarchical block matching and affine estimation
// Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh,
// and saves both warped and target frames for visual comparison
// Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms
#include <opencv2/opencv.hpp>
#include <opencv2/video/tracking.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <ctime>
// Include the mesh functions from encoder
extern "C" {
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
);
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
int16_t *mesh_dx, int16_t *mesh_dy
);
void smooth_mesh_laplacian(
int16_t *mesh_dx, int16_t *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
);
int estimate_cell_affine(
const float *flow_x, const float *flow_y,
int width, int height,
int cell_x, int cell_y,
int cell_w, int cell_h,
float threshold,
int16_t *out_tx, int16_t *out_ty,
int16_t *out_a11, int16_t *out_a12,
int16_t *out_a21, int16_t *out_a22
);
}
// Mesh warp with bilinear interpolation and optional affine support
static void apply_mesh_warp_rgb(
const cv::Mat &src, // Input BGR image
cv::Mat &dst, // Output warped BGR image
const int16_t *mesh_dx, // Mesh motion vectors (1/8 pixel)
const int16_t *mesh_dy,
const uint8_t *affine_mask, // 1=affine, 0=translation
const int16_t *affine_a11,
const int16_t *affine_a12,
const int16_t *affine_a21,
const int16_t *affine_a22,
int mesh_w, int mesh_h
) {
int width = src.cols;
int height = src.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
dst = cv::Mat(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
// Clamp to valid mesh range
cell_x = std::min(cell_x, mesh_w - 2);
cell_y = std::min(cell_y, mesh_h - 2);
// Four corner control points
int idx_00 = cell_y * mesh_w + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
int idx_11 = idx_01 + 1;
// Control point positions
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
// Local coordinates
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
alpha = std::max(0.0f, std::min(1.0f, alpha));
beta = std::max(0.0f, std::min(1.0f, beta));
// Bilinear interpolation of motion vectors
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
alpha * beta * (mesh_dx[idx_11] / 8.0f);
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
alpha * beta * (mesh_dy[idx_11] / 8.0f);
// Check if we're using affine in this cell
// For simplicity, just use the top-left corner's affine parameters
int cell_idx = cell_y * mesh_w + cell_x;
if (affine_mask && affine_mask[cell_idx]) {
// Apply affine transform
// Compute position relative to cell center
float rel_x = x - (cell_x * cell_w + cell_w / 2.0f);
float rel_y = y - (cell_y * cell_h + cell_h / 2.0f);
float a11 = affine_a11[cell_idx] / 256.0f;
float a12 = affine_a12[cell_idx] / 256.0f;
float a21 = affine_a21[cell_idx] / 256.0f;
float a22 = affine_a22[cell_idx] / 256.0f;
// Affine warp: [x'] = [a11 a12][x] + [dx]
// [y'] [a21 a22][y] [dy]
dx = a11 * rel_x + a12 * rel_y + dx;
dy = a21 * rel_x + a22 * rel_y + dy;
}
// Source coordinates (inverse warp)
float src_x = x + dx;
float src_y = y + dy;
// Bilinear interpolation
int sx0 = (int)floorf(src_x);
int sy0 = (int)floorf(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
sx0 = std::max(0, std::min(width - 1, sx0));
sy0 = std::max(0, std::min(height - 1, sy0));
sx1 = std::max(0, std::min(width - 1, sx1));
sy1 = std::max(0, std::min(height - 1, sy1));
float fx = src_x - sx0;
float fy = src_y - sy0;
// Interpolate each channel
for (int c = 0; c < 3; c++) {
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
}
}
}
}
// Create visualization overlay showing affine cells
static void create_affine_overlay(
cv::Mat &img,
const uint8_t *affine_mask,
int mesh_w, int mesh_h
) {
int width = img.cols;
int height = img.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
for (int my = 0; my < mesh_h; my++) {
for (int mx = 0; mx < mesh_w; mx++) {
int idx = my * mesh_w + mx;
if (affine_mask[idx]) {
// Draw green rectangle for affine cells
int x0 = mx * cell_w;
int y0 = my * cell_h;
int x1 = (mx + 1) * cell_w;
int y1 = (my + 1) * cell_h;
cv::rectangle(img,
cv::Point(x0, y0),
cv::Point(x1, y1),
cv::Scalar(0, 255, 0), 1);
}
}
}
}
int main(int argc, char** argv) {
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5;
printf("Opening video: %s\n", video_file);
cv::VideoCapture cap(video_file);
if (!cap.isOpened()) {
fprintf(stderr, "Error: Cannot open video file %s\n", video_file);
return 1;
}
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
if (total_frames < 10) {
fprintf(stderr, "Error: Video too short (need at least 10 frames)\n");
return 1;
}
// Calculate mesh dimensions (32×32 pixel cells, matches current encoder)
int mesh_cell_size = 32;
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
if (mesh_w < 2) mesh_w = 2;
if (mesh_h < 2) mesh_h = 2;
printf("Mesh: %dx%d (approx %dx%d px cells)\n",
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
// Encoder parameters (match current encoder_tav.c settings)
float smoothness = 0.5f; // Mesh smoothness weight
int smooth_iterations = 8; // Smoothing iterations
float affine_threshold = 0.40f; // 40% improvement required for affine
printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n",
smoothness, smooth_iterations, affine_threshold * 100.0f);
// Seed random number generator
srand(time(NULL));
// Pick random frames (avoid first and last 5 frames)
printf("\nTesting %d random frame pairs:\n", num_test_frames);
for (int test = 0; test < num_test_frames; test++) {
// Pick random frame (ensure we have a previous frame)
int frame_num = 5 + rand() % (total_frames - 10);
printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n",
test + 1, num_test_frames, frame_num - 1, frame_num);
// Read previous frame (source for warping)
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
cv::Mat prev_frame;
cap >> prev_frame;
if (prev_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num - 1);
continue;
}
// Read current frame (target to match)
cv::Mat curr_frame;
cap >> curr_frame;
if (curr_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num);
continue;
}
// Convert to RGB for block matching
cv::Mat prev_rgb, curr_rgb;
cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB);
cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB);
// Compute hierarchical block matching (replaces optical flow)
printf(" Computing hierarchical block matching...\n");
float *flow_x = nullptr, *flow_y = nullptr;
estimate_motion_optical_flow(
prev_rgb.data, curr_rgb.data,
width, height,
&flow_x, &flow_y
);
// Build mesh from flow
printf(" Building mesh from block matches...\n");
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
// Apply Laplacian smoothing
printf(" Applying Laplacian smoothing (%d iterations, %.2f weight)...\n",
smooth_iterations, smoothness);
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
// Estimate selective per-cell affine transforms
printf(" Estimating selective affine transforms (threshold=%.0f%%)...\n",
affine_threshold * 100.0f);
uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t));
int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
int affine_count = 0;
for (int cy = 0; cy < mesh_h; cy++) {
for (int cx = 0; cx < mesh_w; cx++) {
int cell_idx = cy * mesh_w + cx;
int16_t tx, ty, a11, a12, a21, a22;
int use_affine = estimate_cell_affine(
flow_x, flow_y,
width, height,
cx, cy, cell_w, cell_h,
affine_threshold,
&tx, &ty, &a11, &a12, &a21, &a22
);
affine_mask[cell_idx] = use_affine ? 1 : 0;
mesh_dx[cell_idx] = tx;
mesh_dy[cell_idx] = ty;
affine_a11[cell_idx] = a11;
affine_a12[cell_idx] = a12;
affine_a21[cell_idx] = a21;
affine_a22[cell_idx] = a22;
if (use_affine) affine_count++;
}
}
printf(" Affine usage: %d/%d cells (%.1f%%)\n",
affine_count, mesh_w * mesh_h,
100.0f * affine_count / (mesh_w * mesh_h));
// Warp previous frame to current frame
printf(" Warping frame with mesh + affine...\n");
cv::Mat warped;
apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy,
affine_mask, affine_a11, affine_a12, affine_a21, affine_a22,
mesh_w, mesh_h);
// Create visualization with affine overlay
cv::Mat warped_viz = warped.clone();
create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h);
// Compute MSE between warped and target
double mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)warped.at<cv::Vec3b>(y, x)[c] -
(double)curr_frame.at<cv::Vec3b>(y, x)[c];
mse += diff * diff;
}
}
}
mse /= (width * height * 3);
double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0;
printf(" Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr);
// Save images
char filename[256];
sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1);
cv::imwrite(filename, prev_frame);
printf(" Saved source: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num);
cv::imwrite(filename, warped);
printf(" Saved warped: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num);
cv::imwrite(filename, warped_viz);
printf(" Saved warped+viz (green=affine): %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num);
cv::imwrite(filename, curr_frame);
printf(" Saved target: %s\n", filename);
// Compute difference image
cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
int diff = abs((int)warped.at<cv::Vec3b>(y, x)[c] -
(int)curr_frame.at<cv::Vec3b>(y, x)[c]);
diff_img.at<cv::Vec3b>(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility
}
}
}
sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num);
cv::imwrite(filename, diff_img);
printf(" Saved difference (amplified 3x): %s\n", filename);
// Compute motion statistics
float max_motion = 0.0f, avg_motion = 0.0f;
for (int i = 0; i < mesh_w * mesh_h; i++) {
float dx = mesh_dx[i] / 8.0f;
float dy = mesh_dy[i] / 8.0f;
float motion = sqrtf(dx * dx + dy * dy);
avg_motion += motion;
if (motion > max_motion) max_motion = motion;
}
avg_motion /= (mesh_w * mesh_h);
printf(" Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion);
// Cleanup
free(flow_x);
free(flow_y);
free(mesh_dx);
free(mesh_dy);
free(affine_mask);
free(affine_a11);
free(affine_a12);
free(affine_a21);
free(affine_a22);
}
printf("\nDone! Check output images:\n");
printf(" *_source.png: Original frame before warping\n");
printf(" *_warped.png: Warped frame (should match target)\n");
printf(" *_warped_viz.png: Warped with green overlay showing affine cells\n");
printf(" *_target.png: Target frame to match\n");
printf(" *_diff.png: Difference image (should be mostly black if warp is good)\n");
cap.release();
return 0;
}