diff --git a/terranmon.txt b/terranmon.txt index e5c747a..72ec164 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -1054,6 +1054,8 @@ transmission capability, and region-of-interest coding. ## GOP Unified Packet Structure (0x12) Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing. +Updated on 2025-10-17 to include canvas expansion margins. + This packet contains multiple frames encoded as a single spacetime block for optimal temporal compression. @@ -1077,18 +1079,40 @@ The entire GOP (width×height×N_frames×3_channels) is preprocessed as a single This layout enables Zstd to find patterns across both spatial and temporal dimensions, resulting in superior compression compared to per-frame encoding. +### Canvas Expansion for Motion Compensation +When frames in a GOP have camera motion, they must be aligned before temporal DWT. +However, alignment creates "gaps" at frame edges. To preserve ALL original pixels: + +1. **Calculate motion range**: Determine the total shift range across all GOP frames + - Example: If frames shift by ±3 pixels horizontally, total range = 6 pixels +2. **Expand canvas**: Create a larger canvas = original_size + margin + - Canvas width = header.width + margin_left + margin_right + - Canvas height = header.height + margin_top + margin_bottom +3. **Place aligned frames**: Each frame is positioned on the expanded canvas + - All original pixels from all frames are preserved + - No artificial padding or cropping occurs +4. **Encode expanded canvas**: Apply 3D DWT to the larger canvas dimensions +5. **Store margins**: 4 bytes (L/R/T/B) tell decoder the canvas expansion +6. **Decoder extraction**: Decoder extracts display region for each frame based on + motion vectors and margins + +This approach ensures lossless preservation of original video content during GOP encoding. + ### Motion Vectors -- Stored in quarter-pixel units (divide by 4.0 for pixel displacement) +- Stored in 1/16-pixel units (divide by 16.0 for pixel displacement) - Used for global motion compensation (camera movement, scene translation) - Computed using FFT-based phase correlation for accurate frame alignment -- First frame (frame 0) typically has motion vector (0, 0) +- Cumulative relative to frame 0 (not frame-to-frame deltas) +- First frame (frame 0) always has motion vector (0, 0) ### Temporal 3D DWT Process -1. Apply 1D DWT across temporal axis (GOP frames) -2. Apply 2D DWT on each spatial slice of temporal subbands -3. Perceptual quantization with temporal-spatial awareness -4. Unified significance map preprocessing across all frames/channels -5. Single Zstd compression of entire GOP block +1. Detect inter-frame motion using phase correlation +2. Align frames and expand canvas to preserve all original pixels +3. Apply 1D DWT across temporal axis (GOP frames) on expanded canvas +4. Apply 2D DWT on each spatial slice of temporal subbands +5. Perceptual quantization with temporal-spatial awareness +6. Unified significance map preprocessing across all frames/channels +7. Single Zstd compression of entire GOP block ## GOP Sync Packet Structure (0xFC) Indicates that N frames were decoded from a GOP Unified block. diff --git a/tsvm_core/src/net/torvald/tsvm/UnsafePtr.kt b/tsvm_core/src/net/torvald/tsvm/UnsafePtr.kt index 82ac83c..9b6696e 100644 --- a/tsvm_core/src/net/torvald/tsvm/UnsafePtr.kt +++ b/tsvm_core/src/net/torvald/tsvm/UnsafePtr.kt @@ -89,8 +89,8 @@ internal class UnsafePtr(pointer: Long, allocSize: Long, private val caller: Any //// You may break the glass and use this tool when some fucking incomprehensible bugs ("vittujen vitun bugit") //// appear (e.g. getting garbage values when it fucking shouldn't) -// if (destroyed) { throw DanglingPointerException("The pointer is already destroyed ($this)") } -// if (index !in 0 until size) throw AddressOverflowException("Index: $index; alloc size: $size; pointer: ${this}\n${Thread.currentThread().stackTrace.joinToString("\n", limit=10) { " $it" }}") + if (destroyed) { throw DanglingPointerException("The pointer is already destroyed ($this)") } + if (index !in 0 until size) throw AddressOverflowException("Index: $index; alloc size: $size; pointer: ${this}\n${Thread.currentThread().stackTrace.joinToString("\n", limit=10) { " $it" }}") } operator fun get(index: Long): Byte { diff --git a/video_encoder/Makefile b/video_encoder/Makefile index 4607e3c..995a8f4 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -2,11 +2,18 @@ # Makefile for TSVM Enhanced Video (TEV) encoder CC = gcc +CXX = g++ CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE +CXXFLAGS = -std=c++11 -Wall -Wextra -O2 -D_GNU_SOURCE LIBS = -lm -lzstd +# OpenCV flags (for TAV encoder with mesh warping) +OPENCV_CFLAGS = $(shell pkg-config --cflags opencv4) +OPENCV_LIBS = $(shell pkg-config --libs opencv4) + # Source files and targets TARGETS = tev tav tav_decoder +TEST_TARGETS = test_mesh_warp test_mesh_roundtrip # Build all encoders all: $(TARGETS) @@ -16,21 +23,35 @@ tev: encoder_tev.c rm -f encoder_tev $(CC) $(CFLAGS) -o encoder_tev $< $(LIBS) -tav: encoder_tav.c - rm -f encoder_tav - $(CC) $(CFLAGS) -o encoder_tav $< $(LIBS) -lfftw3f +tav: encoder_tav.c encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp + rm -f encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o + $(CC) $(CFLAGS) -c encoder_tav.c -o encoder_tav.o + $(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o + $(CXX) $(CXXFLAGS) -c estimate_affine_from_blocks.cpp -o estimate_affine_from_blocks.o + $(CXX) -o encoder_tav encoder_tav.o encoder_tav_opencv.o estimate_affine_from_blocks.o $(LIBS) -lfftw3f $(OPENCV_LIBS) tav_decoder: decoder_tav.c rm -f decoder_tav $(CC) $(CFLAGS) -o decoder_tav $< $(LIBS) +# Build test programs +test_mesh_warp: test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp + rm -f test_mesh_warp test_mesh_warp.o + $(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o test_mesh_warp test_mesh_warp.cpp encoder_tav_opencv.cpp estimate_affine_from_blocks.cpp $(OPENCV_LIBS) + +test_mesh_roundtrip: test_mesh_roundtrip.cpp encoder_tav_opencv.cpp + rm -f test_mesh_roundtrip test_mesh_roundtrip.o + $(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o test_mesh_roundtrip test_mesh_roundtrip.cpp encoder_tav_opencv.cpp $(OPENCV_LIBS) + +tests: $(TEST_TARGETS) + # Build with debug symbols debug: CFLAGS += -g -DDEBUG debug: $(TARGETS) # Clean build artifacts clean: - rm -f $(TARGETS) + rm -f $(TARGETS) *.o # Install (copy to PATH) install: $(TARGETS) @@ -43,6 +64,8 @@ check-deps: @echo "Checking dependencies..." @echo "Using Zstd compression for better efficiency" @pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install with: sudo apt install libzstd-dev" && exit 1) + @pkg-config --exists fftw3f || (echo "Error: libfftw3-dev not found. Install with: sudo apt install libfftw3-dev" && exit 1) + @pkg-config --exists opencv4 || (echo "Error: OpenCV 4 not found. Install with: sudo apt install libopencv-dev" && exit 1) @echo "All dependencies found." # Help diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 1bc94d9..35f80ae 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -17,7 +17,7 @@ #include #include -#define ENCODER_VENDOR_STRING "Encoder-TAV 20251017" +#define ENCODER_VENDOR_STRING "Encoder-TAV 20251018 with mcwarp" // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" @@ -37,16 +37,17 @@ #define TAV_MODE_DELTA 0x02 // Coefficient delta encoding (efficient P-frames) // Video packet types -#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) -#define TAV_PACKET_PFRAME 0x11 // Predicted frame -#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block) -#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio -#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet -#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet -#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet (N frames decoded) -#define TAV_PACKET_TIMECODE 0xFD // Timecode packet -#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet -#define TAV_PACKET_SYNC 0xFF // Sync packet +#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) +#define TAV_PACKET_PFRAME 0x11 // Predicted frame +#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block, translation-based) +#define TAV_PACKET_GOP_UNIFIED_MESH 0x13 // Unified 3D DWT GOP with distortion mesh warping +#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio +#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet +#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet +#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet (N frames decoded) +#define TAV_PACKET_TIMECODE 0xFD // Timecode packet +#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet +#define TAV_PACKET_SYNC 0xFF // Sync packet // DWT settings #define TILE_SIZE_X 640 @@ -106,7 +107,7 @@ static int needs_alpha_channel(int channel_layout) { #define DEFAULT_ZSTD_LEVEL 9 #define GOP_SIZE 8 #define TEMPORAL_DECOMP_LEVEL 2 -#define MOTION_THRESHOLD 64.0f // Flush if motion exceeds 24 pixels in any direction +#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction // Audio/subtitle constants (reused from TEV) #define MP2_DEFAULT_PACKET_SIZE 1152 @@ -311,10 +312,34 @@ typedef struct tav_encoder_s { float **gop_y_frames; // [frame][pixel] - Y channel for each GOP frame float **gop_co_frames; // [frame][pixel] - Co channel for each GOP frame float **gop_cg_frames; // [frame][pixel] - Cg channel for each GOP frame - int16_t *gop_translation_x; // [frame] - Translation X in 1/16-pixel units - int16_t *gop_translation_y; // [frame] - Translation Y in 1/16-pixel units + int16_t *gop_translation_x; // [frame] - Translation X in 1/16-pixel units (for 0x12 packets) + int16_t *gop_translation_y; // [frame] - Translation Y in 1/16-pixel units (for 0x12 packets) int temporal_decomp_levels; // Number of temporal DWT levels (default: 2) + // Mesh warping for temporal 3D DWT (0x13 packets) + int enable_mesh_warp; // Flag to enable mesh warping (default: 0, uses translation if temporal_dwt enabled) + int mesh_width; // Number of control points horizontally (default: 32 for 1920×1080) + int mesh_height; // Number of control points vertically (default: 18 for 1920×1080) + int16_t **gop_mesh_dx; // [frame][mesh_w * mesh_h] - Mesh displacement X in 1/8-pixel units + int16_t **gop_mesh_dy; // [frame][mesh_w * mesh_h] - Mesh displacement Y in 1/8-pixel units + + // Dense optical flow for reliability masking (used with mesh warping) + float **gop_flow_fwd_x; // [frame][width * height] - Forward flow X (F[i-1] → F[i]) + float **gop_flow_fwd_y; // [frame][width * height] - Forward flow Y + float **gop_flow_bwd_x; // [frame][width * height] - Backward flow X (F[i] → F[i-1]) + float **gop_flow_bwd_y; // [frame][width * height] - Backward flow Y + + // Selective per-cell affine transforms (0x13 packets) + uint8_t **gop_affine_mask; // [frame][mesh_w * mesh_h] - 1 bit: 1=affine, 0=translation only + int16_t **gop_affine_a11; // [frame][mesh_w * mesh_h] - Affine matrix 1/256 fixed-point (a11, a12, a21, a22) + int16_t **gop_affine_a12; + int16_t **gop_affine_a21; + int16_t **gop_affine_a22; + + float mesh_smoothness; // Laplacian smoothing weight (0.0-1.0, default: 0.5) + int mesh_smooth_iterations; // Number of smoothing iterations (default: 8) + float affine_threshold; // Residual improvement threshold for using affine (default: 0.10 = 10%) + // Tile processing int tiles_x, tiles_y; dwt_tile_t *tiles; @@ -647,6 +672,8 @@ static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output); // Temporal 3D DWT prototypes static void dwt_3d_forward(float **gop_data, int width, int height, int num_frames, int spatial_levels, int temporal_levels, int spatial_filter); +static void dwt_3d_forward_mc(tav_encoder_t *enc, float **gop_y, float **gop_co, float **gop_cg, + int num_frames, int spatial_levels, int temporal_levels, int spatial_filter); static void dwt_3d_inverse(float **gop_data, int width, int height, int num_frames, int spatial_levels, int temporal_levels, int spatial_filter); static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, @@ -710,6 +737,7 @@ static void show_usage(const char *program_name) { printf(" --enable-delta Enable delta encoding\n"); printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n"); printf(" --temporal-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n"); + printf(" --mesh-warp Enable mesh-based motion compensation (requires --temporal-dwt, uses optical flow)\n"); printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n"); printf(" --no-perceptual-tuning Disable perceptual quantisation\n"); printf(" --no-dead-zone Disable dead-zone quantisation (for comparison/testing)\n"); @@ -797,6 +825,25 @@ static tav_encoder_t* create_encoder(void) { enc->gop_translation_x = NULL; enc->gop_translation_y = NULL; + // Mesh warping settings (for 0x13 packets) + enc->enable_mesh_warp = 0; // Default: disabled (use translation-based 0x12) + enc->mesh_width = 0; // Will be calculated based on frame dimensions + enc->mesh_height = 0; + enc->gop_mesh_dx = NULL; + enc->gop_mesh_dy = NULL; + enc->gop_flow_fwd_x = NULL; // Dense flow for reliability masking + enc->gop_flow_fwd_y = NULL; + enc->gop_flow_bwd_x = NULL; + enc->gop_flow_bwd_y = NULL; + enc->gop_affine_mask = NULL; + enc->gop_affine_a11 = NULL; + enc->gop_affine_a12 = NULL; + enc->gop_affine_a21 = NULL; + enc->gop_affine_a22 = NULL; + enc->mesh_smoothness = 0.5f; // 50% smoothness weight + enc->mesh_smooth_iterations = 8; // 8 iterations + enc->affine_threshold = 0.40f; // 40% residual improvement required to use affine (was 0.10, too lenient) + return enc; } @@ -945,6 +992,93 @@ static int initialise_encoder(tav_encoder_t *enc) { memset(enc->gop_translation_x, 0, enc->gop_capacity * sizeof(int16_t)); memset(enc->gop_translation_y, 0, enc->gop_capacity * sizeof(int16_t)); + // Calculate mesh dimensions if mesh warping is enabled + if (enc->enable_mesh_warp) { + // Calculate mesh grid: approximately 16×16 pixel cells + // For 560×448: 18×14 mesh (252 points), for 1920×1080: 60×34 mesh (2040 points) + // Mesh density: 32×32 pixel cells for finer motion modeling + // Matches block matching resolution (16×16 blocks → 2 blocks per cell) + int mesh_cell_size = 32; // Pixel size of each mesh cell (finer than 64) + enc->mesh_width = (enc->width + mesh_cell_size - 1) / mesh_cell_size; // Round up + enc->mesh_height = (enc->height + mesh_cell_size - 1) / mesh_cell_size; + + // Ensure minimum 2×2 mesh + if (enc->mesh_width < 2) enc->mesh_width = 2; + if (enc->mesh_height < 2) enc->mesh_height = 2; + + int mesh_size = enc->mesh_width * enc->mesh_height; + + // Allocate mesh arrays for each GOP frame + enc->gop_mesh_dx = malloc(enc->gop_capacity * sizeof(int16_t*)); + enc->gop_mesh_dy = malloc(enc->gop_capacity * sizeof(int16_t*)); + enc->gop_affine_mask = malloc(enc->gop_capacity * sizeof(uint8_t*)); + enc->gop_affine_a11 = malloc(enc->gop_capacity * sizeof(int16_t*)); + enc->gop_affine_a12 = malloc(enc->gop_capacity * sizeof(int16_t*)); + enc->gop_affine_a21 = malloc(enc->gop_capacity * sizeof(int16_t*)); + enc->gop_affine_a22 = malloc(enc->gop_capacity * sizeof(int16_t*)); + + // Allocate dense flow arrays for reliability masking + enc->gop_flow_fwd_x = malloc(enc->gop_capacity * sizeof(float*)); + enc->gop_flow_fwd_y = malloc(enc->gop_capacity * sizeof(float*)); + enc->gop_flow_bwd_x = malloc(enc->gop_capacity * sizeof(float*)); + enc->gop_flow_bwd_y = malloc(enc->gop_capacity * sizeof(float*)); + + if (!enc->gop_mesh_dx || !enc->gop_mesh_dy || !enc->gop_affine_mask || + !enc->gop_affine_a11 || !enc->gop_affine_a12 || + !enc->gop_affine_a21 || !enc->gop_affine_a22 || + !enc->gop_flow_fwd_x || !enc->gop_flow_fwd_y || + !enc->gop_flow_bwd_x || !enc->gop_flow_bwd_y) { + fprintf(stderr, "Failed to allocate GOP mesh arrays\n"); + return -1; + } + + // Allocate individual mesh and affine buffers + int flow_size = enc->width * enc->height; + for (int i = 0; i < enc->gop_capacity; i++) { + enc->gop_mesh_dx[i] = malloc(mesh_size * sizeof(int16_t)); + enc->gop_mesh_dy[i] = malloc(mesh_size * sizeof(int16_t)); + enc->gop_affine_mask[i] = malloc(mesh_size * sizeof(uint8_t)); + enc->gop_affine_a11[i] = malloc(mesh_size * sizeof(int16_t)); + enc->gop_affine_a12[i] = malloc(mesh_size * sizeof(int16_t)); + enc->gop_affine_a21[i] = malloc(mesh_size * sizeof(int16_t)); + enc->gop_affine_a22[i] = malloc(mesh_size * sizeof(int16_t)); + + // Allocate dense flow buffers for reliability masking + enc->gop_flow_fwd_x[i] = malloc(flow_size * sizeof(float)); + enc->gop_flow_fwd_y[i] = malloc(flow_size * sizeof(float)); + enc->gop_flow_bwd_x[i] = malloc(flow_size * sizeof(float)); + enc->gop_flow_bwd_y[i] = malloc(flow_size * sizeof(float)); + + if (!enc->gop_mesh_dx[i] || !enc->gop_mesh_dy[i] || !enc->gop_affine_mask[i] || + !enc->gop_affine_a11[i] || !enc->gop_affine_a12[i] || + !enc->gop_affine_a21[i] || !enc->gop_affine_a22[i] || + !enc->gop_flow_fwd_x[i] || !enc->gop_flow_fwd_y[i] || + !enc->gop_flow_bwd_x[i] || !enc->gop_flow_bwd_y[i]) { + fprintf(stderr, "Failed to allocate GOP mesh buffers\n"); + return -1; + } + + // Initialize to zero (affine mask=0 means translation-only, identity affine) + memset(enc->gop_mesh_dx[i], 0, mesh_size * sizeof(int16_t)); + memset(enc->gop_mesh_dy[i], 0, mesh_size * sizeof(int16_t)); + memset(enc->gop_affine_mask[i], 0, mesh_size * sizeof(uint8_t)); + // Initialize affine to identity matrix (256 = 1.0 in 1/256 fixed-point) + for (int j = 0; j < mesh_size; j++) { + enc->gop_affine_a11[i][j] = 256; // 1.0 + enc->gop_affine_a12[i][j] = 0; + enc->gop_affine_a21[i][j] = 0; + enc->gop_affine_a22[i][j] = 256; // 1.0 + } + } + + if (enc->verbose) { + printf("Mesh warping enabled: mesh=%dx%d (approx %dx%d px cells), smoothness=%.2f, iterations=%d\n", + enc->mesh_width, enc->mesh_height, + enc->width / enc->mesh_width, enc->height / enc->mesh_height, + enc->mesh_smoothness, enc->mesh_smooth_iterations); + } + } + if (enc->verbose) { printf("Temporal DWT enabled: GOP size=%d, temporal levels=%d\n", enc->gop_capacity, enc->temporal_decomp_levels); @@ -1454,6 +1588,51 @@ static void phase_correlate_fft(const uint8_t *frame1_rgb, const uint8_t *frame2 fftwf_free(correlation); } +// ============================================================================== +// Optical Flow and Mesh Warping Functions +// ============================================================================== + +// Forward declaration for optical flow function (implemented in encoder_tav_opencv.cpp) +// Forward declarations for OpenCV functions (implemented in encoder_tav_opencv.cpp) +extern void estimate_motion_optical_flow( + const uint8_t *frame1_rgb, const uint8_t *frame2_rgb, + int width, int height, + float **out_flow_x, float **out_flow_y +); + +extern void build_mesh_from_flow( + const float *flow_x, const float *flow_y, + int width, int height, int mesh_w, int mesh_h, + int16_t *mesh_dx, int16_t *mesh_dy +); + +extern void smooth_mesh_laplacian( + int16_t *mesh_dx, int16_t *mesh_dy, + int mesh_width, int mesh_height, + float smoothness, int iterations +); + +extern void warp_frame_with_mesh( + const float *src_frame, int width, int height, + const int16_t *mesh_dx, const int16_t *mesh_dy, + int mesh_width, int mesh_height, + float *dst_frame +); + +extern int estimate_cell_affine( + const float *flow_x, const float *flow_y, + int width, int height, + int cell_x, int cell_y, + int cell_w, int cell_h, + float threshold, + int16_t *out_tx, int16_t *out_ty, + int16_t *out_a11, int16_t *out_a12, + int16_t *out_a21, int16_t *out_a22 +); + +// Note: build_mesh_from_flow, smooth_mesh_laplacian, and warp_frame_with_mesh +// are implemented in encoder_tav_opencv.cpp (extern declarations above) + // Apply translation to frame (for frame alignment before temporal DWT) // NO PADDING - only extracts the valid region that will be common across all frames static void apply_translation(float *frame_data, int width, int height, @@ -1594,6 +1773,195 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc, } } +// ============================================================================= +// Mesh Differential Encoding for Compression +// ============================================================================= + +// Encode mesh motion vectors with selective affine using temporal and spatial prediction +// Returns the number of bytes written to output buffer +// Format: +// 1. Mesh dimensions (2 bytes each: width, height) +// 2. Affine significance mask (1 bit per cell per frame, packed into bytes) +// 3. Translation dx/dy for ALL cells (temporal + spatial differential encoding) +// 4. Affine parameters a11, a12, a21, a22 for cells where mask=1 (temporal + spatial differential) +static size_t encode_mesh_differential( + int16_t **mesh_dx, int16_t **mesh_dy, + uint8_t **affine_mask, + int16_t **affine_a11, int16_t **affine_a12, + int16_t **affine_a21, int16_t **affine_a22, + int gop_size, int mesh_width, int mesh_height, + uint8_t *output_buffer, size_t buffer_capacity +) { + int mesh_points = mesh_width * mesh_height; + size_t bytes_written = 0; + + // Write mesh dimensions (2 bytes each) + if (bytes_written + 4 > buffer_capacity) return 0; + uint16_t mesh_w_16 = (uint16_t)mesh_width; + uint16_t mesh_h_16 = (uint16_t)mesh_height; + memcpy(output_buffer + bytes_written, &mesh_w_16, sizeof(uint16_t)); + bytes_written += sizeof(uint16_t); + memcpy(output_buffer + bytes_written, &mesh_h_16, sizeof(uint16_t)); + bytes_written += sizeof(uint16_t); + + // Write affine significance mask (packed bits: 1=affine, 0=translation-only) + int total_mask_bits = gop_size * mesh_points; + int mask_bytes = (total_mask_bits + 7) / 8; + if (bytes_written + mask_bytes > buffer_capacity) return 0; + + memset(output_buffer + bytes_written, 0, mask_bytes); // Clear mask buffer + int bit_idx = 0; + for (int t = 0; t < gop_size; t++) { + for (int i = 0; i < mesh_points; i++) { + if (affine_mask[t][i]) { + output_buffer[bytes_written + bit_idx / 8] |= (1 << (bit_idx % 8)); + } + bit_idx++; + } + } + bytes_written += mask_bytes; + + // Encode translation data for ALL cells (always present) + for (int t = 0; t < gop_size; t++) { + for (int i = 0; i < mesh_points; i++) { + int16_t dx = mesh_dx[t][i]; + int16_t dy = mesh_dy[t][i]; + + // Temporal prediction + if (t > 0) { + dx -= mesh_dx[t - 1][i]; + dy -= mesh_dy[t - 1][i]; + } + + // Spatial prediction + if (i > 0 && (i % mesh_width) != 0) { + int16_t left_dx = mesh_dx[t][i - 1]; + int16_t left_dy = mesh_dy[t][i - 1]; + if (t > 0) { + left_dx -= mesh_dx[t - 1][i - 1]; + left_dy -= mesh_dy[t - 1][i - 1]; + } + dx -= left_dx; + dy -= left_dy; + } + + if (bytes_written + 4 > buffer_capacity) return 0; + memcpy(output_buffer + bytes_written, &dx, sizeof(int16_t)); + bytes_written += sizeof(int16_t); + memcpy(output_buffer + bytes_written, &dy, sizeof(int16_t)); + bytes_written += sizeof(int16_t); + } + } + + // Encode affine parameters for cells where mask=1 + for (int t = 0; t < gop_size; t++) { + for (int i = 0; i < mesh_points; i++) { + if (!affine_mask[t][i]) continue; // Skip translation-only cells + + int16_t a11 = affine_a11[t][i]; + int16_t a12 = affine_a12[t][i]; + int16_t a21 = affine_a21[t][i]; + int16_t a22 = affine_a22[t][i]; + + // Temporal prediction (delta from previous frame's affine params) + if (t > 0 && affine_mask[t - 1][i]) { + a11 -= affine_a11[t - 1][i]; + a12 -= affine_a12[t - 1][i]; + a21 -= affine_a21[t - 1][i]; + a22 -= affine_a22[t - 1][i]; + } + + // Spatial prediction (delta from left neighbor if it also uses affine) + if (i > 0 && (i % mesh_width) != 0 && affine_mask[t][i - 1]) { + int16_t left_a11 = affine_a11[t][i - 1]; + int16_t left_a12 = affine_a12[t][i - 1]; + int16_t left_a21 = affine_a21[t][i - 1]; + int16_t left_a22 = affine_a22[t][i - 1]; + + if (t > 0 && affine_mask[t - 1][i - 1]) { + left_a11 -= affine_a11[t - 1][i - 1]; + left_a12 -= affine_a12[t - 1][i - 1]; + left_a21 -= affine_a21[t - 1][i - 1]; + left_a22 -= affine_a22[t - 1][i - 1]; + } + + a11 -= left_a11; + a12 -= left_a12; + a21 -= left_a21; + a22 -= left_a22; + } + + if (bytes_written + 8 > buffer_capacity) return 0; + memcpy(output_buffer + bytes_written, &a11, sizeof(int16_t)); + bytes_written += sizeof(int16_t); + memcpy(output_buffer + bytes_written, &a12, sizeof(int16_t)); + bytes_written += sizeof(int16_t); + memcpy(output_buffer + bytes_written, &a21, sizeof(int16_t)); + bytes_written += sizeof(int16_t); + memcpy(output_buffer + bytes_written, &a22, sizeof(int16_t)); + bytes_written += sizeof(int16_t); + } + } + + return bytes_written; +} + +// Decode mesh motion vectors from differential encoding +// Returns 0 on success, -1 on error +// This is the inverse of encode_mesh_differential() +static int decode_mesh_differential( + const uint8_t *input_buffer, size_t buffer_size, + int16_t **mesh_dx, int16_t **mesh_dy, + int gop_size, int *out_mesh_width, int *out_mesh_height +) { + size_t bytes_read = 0; + + // Read mesh dimensions + if (bytes_read + 4 > buffer_size) return -1; + uint16_t mesh_w_16, mesh_h_16; + memcpy(&mesh_w_16, input_buffer + bytes_read, sizeof(uint16_t)); + bytes_read += sizeof(uint16_t); + memcpy(&mesh_h_16, input_buffer + bytes_read, sizeof(uint16_t)); + bytes_read += sizeof(uint16_t); + + int mesh_width = (int)mesh_w_16; + int mesh_height = (int)mesh_h_16; + int mesh_points = mesh_width * mesh_height; + + *out_mesh_width = mesh_width; + *out_mesh_height = mesh_height; + + // Decode mesh data for all frames + for (int t = 0; t < gop_size; t++) { + for (int i = 0; i < mesh_points; i++) { + // Read differential values + if (bytes_read + 4 > buffer_size) return -1; + int16_t dx_delta, dy_delta; + memcpy(&dx_delta, input_buffer + bytes_read, sizeof(int16_t)); + bytes_read += sizeof(int16_t); + memcpy(&dy_delta, input_buffer + bytes_read, sizeof(int16_t)); + bytes_read += sizeof(int16_t); + + // Reconstruct: reverse spatial prediction first + if (i > 0 && (i % mesh_width) != 0) { + dx_delta += mesh_dx[t][i - 1]; + dy_delta += mesh_dy[t][i - 1]; + } + + // Then reverse temporal prediction + if (t > 0) { + dx_delta += mesh_dx[t - 1][i]; + dy_delta += mesh_dy[t - 1][i]; + } + + mesh_dx[t][i] = dx_delta; + mesh_dy[t][i] = dy_delta; + } + } + + return 0; +} + // ============================================================================= // GOP Management Functions // ============================================================================= @@ -1616,29 +1984,112 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb, memcpy(enc->gop_co_frames[frame_idx], frame_co, frame_channel_size); memcpy(enc->gop_cg_frames[frame_idx], frame_cg, frame_channel_size); - // Compute translation vector if not first frame - /*if (frame_idx > 0) { - phase_correlate_fft(enc->gop_rgb_frames[frame_idx - 1], - enc->gop_rgb_frames[frame_idx], - enc->width, enc->height, - &enc->gop_translation_x[frame_idx], - &enc->gop_translation_y[frame_idx]); + // Compute motion estimation if mesh warping is enabled + if (enc->enable_mesh_warp && frame_idx > 0) { + // Compute forward optical flow (F[i-1] → F[i]) + estimate_motion_optical_flow( + enc->gop_rgb_frames[frame_idx - 1], + enc->gop_rgb_frames[frame_idx], + enc->width, enc->height, + &enc->gop_flow_fwd_x[frame_idx], + &enc->gop_flow_fwd_y[frame_idx] + ); + + // Compute backward optical flow (F[i] → F[i-1]) for reliability masking + estimate_motion_optical_flow( + enc->gop_rgb_frames[frame_idx], + enc->gop_rgb_frames[frame_idx - 1], + enc->width, enc->height, + &enc->gop_flow_bwd_x[frame_idx], + &enc->gop_flow_bwd_y[frame_idx] + ); + + // Build distortion mesh from forward flow field + build_mesh_from_flow( + enc->gop_flow_fwd_x[frame_idx], enc->gop_flow_fwd_y[frame_idx], + enc->width, enc->height, + enc->mesh_width, enc->mesh_height, + enc->gop_mesh_dx[frame_idx], enc->gop_mesh_dy[frame_idx] + ); + + // Apply Laplacian smoothing to regularize mesh + smooth_mesh_laplacian( + enc->gop_mesh_dx[frame_idx], enc->gop_mesh_dy[frame_idx], + enc->mesh_width, enc->mesh_height, + enc->mesh_smoothness, enc->mesh_smooth_iterations + ); + + // Estimate selective per-cell affine transforms + int cell_w = enc->width / enc->mesh_width; + int cell_h = enc->height / enc->mesh_height; + int affine_count = 0; + + for (int cy = 0; cy < enc->mesh_height; cy++) { + for (int cx = 0; cx < enc->mesh_width; cx++) { + int cell_idx = cy * enc->mesh_width + cx; + + int16_t tx, ty, a11, a12, a21, a22; + int use_affine = estimate_cell_affine( + enc->gop_flow_fwd_x[frame_idx], enc->gop_flow_fwd_y[frame_idx], + enc->width, enc->height, + cx, cy, cell_w, cell_h, + enc->affine_threshold, + &tx, &ty, &a11, &a12, &a21, &a22 + ); + + if (use_affine) { + // Use affine transform for this cell + enc->gop_affine_mask[frame_idx][cell_idx] = 1; + enc->gop_mesh_dx[frame_idx][cell_idx] = tx; + enc->gop_mesh_dy[frame_idx][cell_idx] = ty; + enc->gop_affine_a11[frame_idx][cell_idx] = a11; + enc->gop_affine_a12[frame_idx][cell_idx] = a12; + enc->gop_affine_a21[frame_idx][cell_idx] = a21; + enc->gop_affine_a22[frame_idx][cell_idx] = a22; + affine_count++; + } else { + // Use translation-only (keep existing mesh dx/dy from build_mesh_from_flow) + enc->gop_affine_mask[frame_idx][cell_idx] = 0; + // Keep dx/dy from build_mesh_from_flow, set affine to identity + enc->gop_affine_a11[frame_idx][cell_idx] = 256; + enc->gop_affine_a12[frame_idx][cell_idx] = 0; + enc->gop_affine_a21[frame_idx][cell_idx] = 0; + enc->gop_affine_a22[frame_idx][cell_idx] = 256; + } + } + } + + // Flow is now stored in enc->gop_flow_*[frame_idx], don't free if (enc->verbose && (frame_idx < 3 || frame_idx == enc->gop_capacity - 1)) { - printf(" GOP frame %d: translation = (%.3f, %.3f) pixels\n", - frame_idx, - enc->gop_translation_x[frame_idx] / 16.0f, - enc->gop_translation_y[frame_idx] / 16.0f); + // Compute average mesh displacement for verbose output + int mesh_points = enc->mesh_width * enc->mesh_height; + float avg_dx = 0.0f, avg_dy = 0.0f; + for (int i = 0; i < mesh_points; i++) { + avg_dx += fabsf(enc->gop_mesh_dx[frame_idx][i] / 8.0f); + avg_dy += fabsf(enc->gop_mesh_dy[frame_idx][i] / 8.0f); + } + avg_dx /= mesh_points; + avg_dy /= mesh_points; + printf(" GOP frame %d: mesh avg=(%.2f,%.2f)px, %d/%d affine (%.1f%%), mesh=%dx%d\n", + frame_idx, avg_dx, avg_dy, + affine_count, mesh_points, 100.0f * affine_count / mesh_points, + enc->mesh_width, enc->mesh_height); } - } else { - // First frame has no translation - enc->gop_translation_x[0] = 0; - enc->gop_translation_y[0] = 0; - }*/ + } else if (frame_idx == 0) { + // First frame has no motion (reference frame) + if (enc->enable_mesh_warp) { + int mesh_points = enc->mesh_width * enc->mesh_height; + memset(enc->gop_mesh_dx[0], 0, mesh_points * sizeof(int16_t)); + memset(enc->gop_mesh_dy[0], 0, mesh_points * sizeof(int16_t)); + } + } - // disabling frame realigning: producing worse results in general - enc->gop_translation_x[frame_idx] = 0.0f; - enc->gop_translation_y[frame_idx] = 0.0f; + // Legacy translation vectors (used when mesh warp is disabled) + if (!enc->enable_mesh_warp) { + enc->gop_translation_x[frame_idx] = 0.0f; + enc->gop_translation_y[frame_idx] = 0.0f; + } enc->gop_frame_count++; return 0; @@ -1770,54 +2221,75 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, valid_width, valid_height, crop_left, crop_right, crop_top, crop_bottom); } - // Step 0.6: Apply motion compensation to align frames before temporal DWT - // This uses the cumulative translation vectors to align each frame to frame 0 - for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame) - float *aligned_y = malloc(num_pixels * sizeof(float)); - float *aligned_co = malloc(num_pixels * sizeof(float)); - float *aligned_cg = malloc(num_pixels * sizeof(float)); + // Step 0.6: Motion compensation note + // For mesh warping: MC-lifting integrates motion compensation directly into the lifting steps + // For translation: still use pre-alignment (old method for backwards compatibility) + if (!enc->enable_mesh_warp) { + // Translation-based motion compensation: align using global translation vectors + for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame) + float *aligned_y = malloc(num_pixels * sizeof(float)); + float *aligned_co = malloc(num_pixels * sizeof(float)); + float *aligned_cg = malloc(num_pixels * sizeof(float)); + + if (!aligned_y || !aligned_co || !aligned_cg) { + fprintf(stderr, "Error: Failed to allocate motion compensation buffers\n"); + // Cleanup and skip motion compensation for this GOP + free(aligned_y); + free(aligned_co); + free(aligned_cg); + break; + } + + // Apply translation to align this frame + apply_translation(gop_y_coeffs[i], enc->width, enc->height, + enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_y); + apply_translation(gop_co_coeffs[i], enc->width, enc->height, + enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_co); + apply_translation(gop_cg_coeffs[i], enc->width, enc->height, + enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_cg); + + // Copy aligned frames back + memcpy(gop_y_coeffs[i], aligned_y, num_pixels * sizeof(float)); + memcpy(gop_co_coeffs[i], aligned_co, num_pixels * sizeof(float)); + memcpy(gop_cg_coeffs[i], aligned_cg, num_pixels * sizeof(float)); - if (!aligned_y || !aligned_co || !aligned_cg) { - fprintf(stderr, "Error: Failed to allocate motion compensation buffers\n"); - // Cleanup and skip motion compensation for this GOP free(aligned_y); free(aligned_co); free(aligned_cg); - break; } - - // Apply translation to align this frame - apply_translation(gop_y_coeffs[i], enc->width, enc->height, - enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_y); - apply_translation(gop_co_coeffs[i], enc->width, enc->height, - enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_co); - apply_translation(gop_cg_coeffs[i], enc->width, enc->height, - enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_cg); - - // Copy aligned frames back - memcpy(gop_y_coeffs[i], aligned_y, num_pixels * sizeof(float)); - memcpy(gop_co_coeffs[i], aligned_co, num_pixels * sizeof(float)); - memcpy(gop_cg_coeffs[i], aligned_cg, num_pixels * sizeof(float)); - - free(aligned_y); - free(aligned_co); - free(aligned_cg); + } else { + // Mesh-based motion compensation uses MC-lifting (integrated into temporal DWT) + if (enc->verbose) { + printf("Using motion-compensated lifting (MC-lifting) (%dx%d mesh)\n", + enc->mesh_width, enc->mesh_height); + } } - // Step 0.7: Expand frames to larger canvas that preserves ALL original pixels - // Calculate expanded canvas size (UNION of all aligned frames) - int canvas_width = enc->width + crop_left + crop_right; // Original width + total shift range - int canvas_height = enc->height + crop_top + crop_bottom; // Original height + total shift range - int canvas_pixels = canvas_width * canvas_height; + // Step 0.7: Expand frames to larger canvas (only for translation-based alignment) + // Mesh-based warping doesn't need canvas expansion since the mesh handles local distortions + int canvas_width = enc->width; + int canvas_height = enc->height; + int canvas_pixels = num_pixels; - if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) { - printf("Expanded canvas: %dx%d (original %dx%d + margins L=%d R=%d T=%d B=%d)\n", - canvas_width, canvas_height, enc->width, enc->height, - crop_left, crop_right, crop_top, crop_bottom); - printf("This preserves all original pixels from all frames after alignment\n"); + if (!enc->enable_mesh_warp) { + // Calculate expanded canvas size (UNION of all aligned frames) + canvas_width = enc->width + crop_left + crop_right; // Original width + total shift range + canvas_height = enc->height + crop_top + crop_bottom; // Original height + total shift range + canvas_pixels = canvas_width * canvas_height; + + if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) { + printf("Expanded canvas: %dx%d (original %dx%d + margins L=%d R=%d T=%d B=%d)\n", + canvas_width, canvas_height, enc->width, enc->height, + crop_left, crop_right, crop_top, crop_bottom); + printf("This preserves all original pixels from all frames after alignment\n"); + } + } else { + if (enc->verbose) { + printf("Using original frame dimensions (mesh warping handles distortions)\n"); + } } - // Allocate expanded canvas buffers + // Allocate canvas buffers (expanded for translation, original size for mesh) float **canvas_y_coeffs = malloc(actual_gop_size * sizeof(float*)); float **canvas_co_coeffs = malloc(actual_gop_size * sizeof(float*)); float **canvas_cg_coeffs = malloc(actual_gop_size * sizeof(float*)); @@ -1827,58 +2299,65 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, canvas_co_coeffs[i] = calloc(canvas_pixels, sizeof(float)); canvas_cg_coeffs[i] = calloc(canvas_pixels, sizeof(float)); - // Place the aligned frame onto the canvas at the appropriate offset - // Each frame's aligned position determines where it sits on the canvas - int offset_x = crop_left; // Frames are offset by the left margin - int offset_y = crop_top; // Frames are offset by the top margin + if (enc->enable_mesh_warp) { + // Mesh warping: simply copy frames (no expansion needed) + memcpy(canvas_y_coeffs[i], gop_y_coeffs[i], canvas_pixels * sizeof(float)); + memcpy(canvas_co_coeffs[i], gop_co_coeffs[i], canvas_pixels * sizeof(float)); + memcpy(canvas_cg_coeffs[i], gop_cg_coeffs[i], canvas_pixels * sizeof(float)); + } else { + // Translation-based: expand canvas and add symmetric padding + // Place the aligned frame onto the canvas at the appropriate offset + int offset_x = crop_left; // Frames are offset by the left margin + int offset_y = crop_top; // Frames are offset by the top margin - // Copy the full aligned frame onto the canvas (preserves all original content) - for (int y = 0; y < enc->height; y++) { - for (int x = 0; x < enc->width; x++) { - int src_idx = y * enc->width + x; - int dst_idx = (y + offset_y) * canvas_width + (x + offset_x); - canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx]; - canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx]; - canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx]; + // Copy the full aligned frame onto the canvas (preserves all original content) + for (int y = 0; y < enc->height; y++) { + for (int x = 0; x < enc->width; x++) { + int src_idx = y * enc->width + x; + int dst_idx = (y + offset_y) * canvas_width + (x + offset_x); + canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx]; + canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx]; + canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx]; + } } - } - // Fill margin areas with symmetric padding from frame edges - for (int y = 0; y < canvas_height; y++) { - for (int x = 0; x < canvas_width; x++) { - // Skip pixels in the original frame region (already copied) - if (y >= offset_y && y < offset_y + enc->height && - x >= offset_x && x < offset_x + enc->width) { - continue; + // Fill margin areas with symmetric padding from frame edges + for (int y = 0; y < canvas_height; y++) { + for (int x = 0; x < canvas_width; x++) { + // Skip pixels in the original frame region (already copied) + if (y >= offset_y && y < offset_y + enc->height && + x >= offset_x && x < offset_x + enc->width) { + continue; + } + + // Calculate position relative to original frame + int src_x = x - offset_x; + int src_y = y - offset_y; + + // Apply symmetric padding (mirroring) + if (src_x < 0) { + src_x = -src_x - 1; // Mirror left edge: -1→0, -2→1, -3→2 + } else if (src_x >= enc->width) { + src_x = 2 * enc->width - src_x - 1; // Mirror right edge + } + + if (src_y < 0) { + src_y = -src_y - 1; // Mirror top edge + } else if (src_y >= enc->height) { + src_y = 2 * enc->height - src_y - 1; // Mirror bottom edge + } + + // Clamp to valid range (safety for extreme cases) + src_x = CLAMP(src_x, 0, enc->width - 1); + src_y = CLAMP(src_y, 0, enc->height - 1); + + // Copy mirrored pixel from original frame to canvas margin + int src_idx = src_y * enc->width + src_x; + int dst_idx = y * canvas_width + x; + canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx]; + canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx]; + canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx]; } - - // Calculate position relative to original frame - int src_x = x - offset_x; - int src_y = y - offset_y; - - // Apply symmetric padding (mirroring) - if (src_x < 0) { - src_x = -src_x - 1; // Mirror left edge: -1→0, -2→1, -3→2 - } else if (src_x >= enc->width) { - src_x = 2 * enc->width - src_x - 1; // Mirror right edge - } - - if (src_y < 0) { - src_y = -src_y - 1; // Mirror top edge - } else if (src_y >= enc->height) { - src_y = 2 * enc->height - src_y - 1; // Mirror bottom edge - } - - // Clamp to valid range (safety for extreme cases) - src_x = CLAMP(src_x, 0, enc->width - 1); - src_y = CLAMP(src_y, 0, enc->height - 1); - - // Copy mirrored pixel from original frame to canvas margin - int src_idx = src_y * enc->width + src_x; - int dst_idx = y * canvas_width + x; - canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx]; - canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx]; - canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx]; } } @@ -1888,7 +2367,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, free(gop_cg_coeffs[i]); } - // Replace pointers with expanded canvas + // Replace pointers with canvas free(gop_y_coeffs); free(gop_co_coeffs); free(gop_cg_coeffs); @@ -1915,12 +2394,21 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, // Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel // Note: This modifies gop_*_coeffs in-place // Use cropped dimensions to encode only the valid region - dwt_3d_forward(gop_y_coeffs, valid_width, valid_height, actual_gop_size, - enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); - dwt_3d_forward(gop_co_coeffs, valid_width, valid_height, actual_gop_size, - enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); - dwt_3d_forward(gop_cg_coeffs, valid_width, valid_height, actual_gop_size, - enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + + if (enc->enable_mesh_warp) { + // Use MC-lifting: motion compensation integrated into lifting steps + dwt_3d_forward_mc(enc, gop_y_coeffs, gop_co_coeffs, gop_cg_coeffs, + actual_gop_size, enc->decomp_levels, + enc->temporal_decomp_levels, enc->wavelet_filter); + } else { + // Use traditional 3D DWT with pre-aligned frames (translation-only) + dwt_3d_forward(gop_y_coeffs, valid_width, valid_height, actual_gop_size, + enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + dwt_3d_forward(gop_co_coeffs, valid_width, valid_height, actual_gop_size, + enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + dwt_3d_forward(gop_cg_coeffs, valid_width, valid_height, actual_gop_size, + enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + } } // Step 2: Allocate quantized coefficient buffers @@ -2024,9 +2512,8 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, } } else { // Multi-frame GOP: use unified 3D DWT encoding - // Write unified GOP packet header - // Packet structure: [packet_type=0x12][gop_size][crop_info][motion_vectors...][compressed_size][compressed_data] - uint8_t packet_type = TAV_PACKET_GOP_UNIFIED; + // Choose packet type based on motion compensation method + uint8_t packet_type = enc->enable_mesh_warp ? TAV_PACKET_GOP_UNIFIED_MESH : TAV_PACKET_GOP_UNIFIED; fwrite(&packet_type, 1, 1, output); total_bytes_written += 1; @@ -2035,25 +2522,108 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, fwrite(&gop_size_byte, 1, 1, output); total_bytes_written += 1; - // Write canvas expansion information (4 bytes) - // This tells the decoder the margins added to preserve all original pixels - // The encoded canvas is larger than the original frame to preserve edge content after alignment - uint8_t canvas_margins[4] = { - (uint8_t)crop_left, // Left margin - (uint8_t)crop_right, // Right margin - (uint8_t)crop_top, // Top margin - (uint8_t)crop_bottom // Bottom margin - }; - fwrite(canvas_margins, 1, 4, output); - total_bytes_written += 4; + if (enc->enable_mesh_warp) { + // Packet 0x13: Mesh-based warping with selective affine + // Encode mesh motion vectors + affine parameters and compress with Zstd + // Max size: mesh dimensions (4) + affine mask + translation (4 bytes/cell) + affine (8 bytes/cell worst case) + size_t max_mesh_size = 4 + (actual_gop_size * enc->mesh_width * enc->mesh_height * 13); + uint8_t *mesh_buffer = malloc(max_mesh_size); - // Write all motion vectors (1/16-pixel precision) for the entire GOP - for (int t = 0; t < actual_gop_size; t++) { - int16_t dx = enc->gop_translation_x[t]; - int16_t dy = enc->gop_translation_y[t]; - fwrite(&dx, sizeof(int16_t), 1, output); - fwrite(&dy, sizeof(int16_t), 1, output); + size_t mesh_size = encode_mesh_differential( + enc->gop_mesh_dx, enc->gop_mesh_dy, + enc->gop_affine_mask, + enc->gop_affine_a11, enc->gop_affine_a12, + enc->gop_affine_a21, enc->gop_affine_a22, + actual_gop_size, enc->mesh_width, enc->mesh_height, + mesh_buffer, max_mesh_size + ); + + if (mesh_size == 0) { + fprintf(stderr, "Error: Failed to encode mesh motion vectors\n"); + free(mesh_buffer); + // Free all allocated buffers + for (int i = 0; i < actual_gop_size; i++) { + free(gop_y_coeffs[i]); + free(gop_co_coeffs[i]); + free(gop_cg_coeffs[i]); + free(quant_y[i]); + free(quant_co[i]); + free(quant_cg[i]); + } + free(gop_y_coeffs); + free(gop_co_coeffs); + free(gop_cg_coeffs); + free(quant_y); + free(quant_co); + free(quant_cg); + return 0; + } + + // Compress mesh data with Zstd + size_t max_compressed_mesh = ZSTD_compressBound(mesh_size); + uint8_t *compressed_mesh = malloc(max_compressed_mesh); + size_t compressed_mesh_size = ZSTD_compress( + compressed_mesh, max_compressed_mesh, + mesh_buffer, mesh_size, + enc->zstd_level + ); + + if (ZSTD_isError(compressed_mesh_size)) { + fprintf(stderr, "Error: Zstd compression failed for mesh data\n"); + free(mesh_buffer); + free(compressed_mesh); + // Free all allocated buffers + for (int i = 0; i < actual_gop_size; i++) { + free(gop_y_coeffs[i]); + free(gop_co_coeffs[i]); + free(gop_cg_coeffs[i]); + free(quant_y[i]); + free(quant_co[i]); + free(quant_cg[i]); + } + free(gop_y_coeffs); + free(gop_co_coeffs); + free(gop_cg_coeffs); + free(quant_y); + free(quant_co); + free(quant_cg); + return 0; + } + + // Write compressed mesh size and data + uint32_t compressed_mesh_size_32 = (uint32_t)compressed_mesh_size; + fwrite(&compressed_mesh_size_32, sizeof(uint32_t), 1, output); + fwrite(compressed_mesh, 1, compressed_mesh_size, output); + total_bytes_written += sizeof(uint32_t) + compressed_mesh_size; + + if (enc->verbose) { + printf("Mesh data: %zu bytes raw, %zu bytes compressed (%.1f%% compression)\n", + mesh_size, compressed_mesh_size, + 100.0 * compressed_mesh_size / mesh_size); + } + + free(mesh_buffer); + free(compressed_mesh); + } else { + // Packet 0x12: Translation-based alignment + // Write canvas expansion information (4 bytes) + uint8_t canvas_margins[4] = { + (uint8_t)crop_left, // Left margin + (uint8_t)crop_right, // Right margin + (uint8_t)crop_top, // Top margin + (uint8_t)crop_bottom // Bottom margin + }; + fwrite(canvas_margins, 1, 4, output); total_bytes_written += 4; + + // Write all motion vectors (1/16-pixel precision) for the entire GOP + for (int t = 0; t < actual_gop_size; t++) { + int16_t dx = enc->gop_translation_x[t]; + int16_t dy = enc->gop_translation_y[t]; + fwrite(&dx, sizeof(int16_t), 1, output); + fwrite(&dy, sizeof(int16_t), 1, output); + total_bytes_written += 4; + } } // Preprocess ALL frames with unified significance map @@ -2252,6 +2822,251 @@ static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_q // Temporal DWT Functions // ============================================================================= +// Invert mesh for backward warping (MC-lifting update step) +// Forward mesh: warps F0 to F1 +// Backward mesh: warps F1 to F0 (negated motion vectors) +static void invert_mesh( + const short *mesh_dx, const short *mesh_dy, + int mesh_width, int mesh_height, + short *inv_mesh_dx, short *inv_mesh_dy +) { + int num_points = mesh_width * mesh_height; + for (int i = 0; i < num_points; i++) { + inv_mesh_dx[i] = -mesh_dx[i]; + inv_mesh_dy[i] = -mesh_dy[i]; + } +} + +// Build block-based reliability mask for selective motion compensation +// Process 16×16 blocks for efficiency (matches block matching resolution) +// Returns mask where 1 = use MC, 0 = fall back to plain Haar +/*static void build_reliability_mask( + const uint8_t *frame0_rgb, const uint8_t *frame1_rgb, + const float *flow_fwd_x, const float *flow_fwd_y, + const float *flow_bwd_x, const float *flow_bwd_y, + int width, int height, + uint8_t *mask +) { + const int block_size = 16; // Match block matching resolution + int num_pixels = width * height; + + // Relaxed thresholds for better coverage + float motion_threshold = 1.0f; // pixels (relaxed from 2.0) + float fb_threshold = 2.0f; // pixels (relaxed from 1.0) + float texture_threshold = 10.0f; // gradient magnitude + + int reliable_blocks = 0; + int total_blocks = 0; + int reliable_pixels = 0; + + // Process in 16×16 blocks + for (int by = 0; by < height; by += block_size) { + for (int bx = 0; bx < width; bx += block_size) { + total_blocks++; + + // Compute block statistics + float sum_motion = 0.0f; + float sum_fb_error = 0.0f; + float sum_texture = 0.0f; + int block_pixels = 0; + + int bh = (by + block_size <= height) ? block_size : (height - by); + int bw = (bx + block_size <= width) ? block_size : (width - bx); + + for (int y = by; y < by + bh; y++) { + for (int x = bx; x < bx + bw; x++) { + int idx = y * width + x; + + // Motion magnitude + float fx = flow_fwd_x[idx]; + float fy = flow_fwd_y[idx]; + sum_motion += sqrtf(fx * fx + fy * fy); + + // Forward-backward consistency + int x_warped = (int)(x + fx + 0.5f); + int y_warped = (int)(y + fy + 0.5f); + if (x_warped >= 0 && x_warped < width && y_warped >= 0 && y_warped < height) { + int idx_w = y_warped * width + x_warped; + float bx_val = flow_bwd_x[idx_w]; + float by_val = flow_bwd_y[idx_w]; + float err_x = fx + bx_val; + float err_y = fy + by_val; + sum_fb_error += sqrtf(err_x * err_x + err_y * err_y); + } else { + sum_fb_error += 999.0f; + } + + // Texture (simple gradient) + if (x > 0 && x < width - 1 && y > 0 && y < height - 1) { + int rgb_idx = idx * 3; + int rgb_idx_r = (y * width + (x + 1)) * 3; + int rgb_idx_d = ((y + 1) * width + x) * 3; + + float gx = (frame0_rgb[rgb_idx_r] - frame0_rgb[rgb_idx]); + float gy = (frame0_rgb[rgb_idx_d] - frame0_rgb[rgb_idx]); + sum_texture += sqrtf(gx * gx + gy * gy); + } + + block_pixels++; + } + } + + // Average block statistics + float avg_motion = sum_motion / block_pixels; + float avg_fb_error = sum_fb_error / block_pixels; + float avg_texture = sum_texture / block_pixels; + + // Decide if block is reliable + int block_reliable = (avg_motion > motion_threshold) && + (avg_fb_error < fb_threshold) && + (avg_texture > texture_threshold); + + if (block_reliable) reliable_blocks++; + + // Apply decision to all pixels in block + for (int y = by; y < by + bh; y++) { + for (int x = bx; x < bx + bw; x++) { + int idx = y * width + x; + mask[idx] = block_reliable ? 1 : 0; + if (mask[idx]) reliable_pixels++; + } + } + } + } + + // Debug output + printf(" Reliability mask: %d/%d blocks (%d/%d pixels, %.1f%%) - motion>%.1fpx, texture>%.1f, fb_err<%.1fpx\n", + reliable_blocks, total_blocks, reliable_pixels, num_pixels, + 100.0f * reliable_pixels / num_pixels, + motion_threshold, texture_threshold, fb_threshold); +}*/ + +// Motion-compensated lifting: Haar wavelet with SYMMETRIC bi-directional warping +// Implements predict-update lifting scheme with symmetric motion compensation +// +// Symmetric lifting (both frames meet in the middle): +// Predict: H = warp(F1, -½·mesh) - warp(F0, +½·mesh) +// Update: L = 0.5 * (warp(F0, +½·mesh) + warp(F1, -½·mesh)) +// +// With reliability masking: +// For reliable pixels: use MC-lifting (mesh warp) +// For unreliable pixels: fall back to plain Haar (H = F1 - F0, L = 0.5*(F0+F1)) +// +// This produces: +// L (lowband): symmetric motion-aligned temporal average (where reliable) +// H (highband): symmetric motion-compensated residual (where reliable) +// +// Benefits: +// - Symmetric treatment of both frames (no bias) +// - Better invertibility (both frames warp equally) +// - Lower residual energy (frames meet at temporal midpoint) +// - Eliminates errors from occlusions, low-texture regions, and bad matches +static void mc_lifting_forward_pair( + tav_encoder_t *enc, + float **f0_y, float **f0_co, float **f0_cg, // Frame 0 (reference) + float **f1_y, float **f1_co, float **f1_cg, // Frame 1 (current) + const short *mesh_dx, const short *mesh_dy, // Forward mesh (F0→F1) +// const uint8_t *reliability_mask, // Reliability mask (1=use MC, 0=plain Haar) + float **out_l_y, float **out_l_co, float **out_l_cg, // Lowband output + float **out_h_y, float **out_h_co, float **out_h_cg // Highband output +) { + int width = enc->width; + int height = enc->height; + int num_pixels = width * height; + int num_mesh_points = enc->mesh_width * enc->mesh_height; + + // Allocate temporary buffers + float *warped_f0_fwd_y = malloc(num_pixels * sizeof(float)); + float *warped_f0_fwd_co = malloc(num_pixels * sizeof(float)); + float *warped_f0_fwd_cg = malloc(num_pixels * sizeof(float)); + + float *warped_f1_back_y = malloc(num_pixels * sizeof(float)); + float *warped_f1_back_co = malloc(num_pixels * sizeof(float)); + float *warped_f1_back_cg = malloc(num_pixels * sizeof(float)); + + short *half_mesh_dx = malloc(num_mesh_points * sizeof(short)); + short *half_mesh_dy = malloc(num_mesh_points * sizeof(short)); + short *neg_half_mesh_dx = malloc(num_mesh_points * sizeof(short)); + short *neg_half_mesh_dy = malloc(num_mesh_points * sizeof(short)); + + if (!warped_f0_fwd_y || !warped_f0_fwd_co || !warped_f0_fwd_cg || + !warped_f1_back_y || !warped_f1_back_co || !warped_f1_back_cg || + !half_mesh_dx || !half_mesh_dy || + !neg_half_mesh_dx || !neg_half_mesh_dy) { + fprintf(stderr, "Error: Failed to allocate MC-lifting buffers\n"); + goto cleanup; + } + + // Create half-step meshes for symmetric warping + // +½·mesh for F0 forward, -½·mesh for F1 backward + for (int i = 0; i < num_mesh_points; i++) { + half_mesh_dx[i] = mesh_dx[i] / 2; + half_mesh_dy[i] = mesh_dy[i] / 2; + neg_half_mesh_dx[i] = -half_mesh_dx[i]; + neg_half_mesh_dy[i] = -half_mesh_dy[i]; + } + + // ===== SYMMETRIC PREDICT STEP: H = warp(F1, -½·mesh) - warp(F0, +½·mesh) ===== + // Warp F0 forward by half-step (toward temporal midpoint) + warp_frame_with_mesh(*f0_y, width, height, half_mesh_dx, half_mesh_dy, + enc->mesh_width, enc->mesh_height, warped_f0_fwd_y); + warp_frame_with_mesh(*f0_co, width, height, half_mesh_dx, half_mesh_dy, + enc->mesh_width, enc->mesh_height, warped_f0_fwd_co); + warp_frame_with_mesh(*f0_cg, width, height, half_mesh_dx, half_mesh_dy, + enc->mesh_width, enc->mesh_height, warped_f0_fwd_cg); + + // Warp F1 backward by half-step (toward temporal midpoint) + warp_frame_with_mesh(*f1_y, width, height, neg_half_mesh_dx, neg_half_mesh_dy, + enc->mesh_width, enc->mesh_height, warped_f1_back_y); + warp_frame_with_mesh(*f1_co, width, height, neg_half_mesh_dx, neg_half_mesh_dy, + enc->mesh_width, enc->mesh_height, warped_f1_back_co); + warp_frame_with_mesh(*f1_cg, width, height, neg_half_mesh_dx, neg_half_mesh_dy, + enc->mesh_width, enc->mesh_height, warped_f1_back_cg); + + float ALPHA = 0.25f; + + // Compute temporal highband and lowband with selective MC + // For reliable pixels: use MC-lifting (warped frames) + // For unreliable pixels: fall back to plain Haar (original frames) + for (int i = 0; i < num_pixels; i++) { +// if (reliability_mask[i]) { + // ===== RELIABLE: MC-LIFTING ===== + // Predict: H = warp(F1, -½·mesh) - warp(F0, +½·mesh) + (*out_h_y)[i] = warped_f1_back_y[i] - warped_f0_fwd_y[i]; + (*out_h_co)[i] = warped_f1_back_co[i] - warped_f0_fwd_co[i]; + (*out_h_cg)[i] = warped_f1_back_cg[i] - warped_f0_fwd_cg[i]; + + // Update: L = 0.5 * (warp(F0, +½·mesh) + warp(F1, -½·mesh)) + (*out_l_y)[i] = ALPHA * (warped_f0_fwd_y[i] + warped_f1_back_y[i]); + (*out_l_co)[i] = ALPHA * (warped_f0_fwd_co[i] + warped_f1_back_co[i]); + (*out_l_cg)[i] = ALPHA * (warped_f0_fwd_cg[i] + warped_f1_back_cg[i]); + /*} else { + // ===== UNRELIABLE: PLAIN HAAR (no warping) ===== + // Predict: H = F1 - F0 + (*out_h_y)[i] = (*f1_y)[i] - (*f0_y)[i]; + (*out_h_co)[i] = (*f1_co)[i] - (*f0_co)[i]; + (*out_h_cg)[i] = (*f1_cg)[i] - (*f0_cg)[i]; + + // Update: L = 0.5 * (F0 + F1) + (*out_l_y)[i] = ALPHA * ((*f0_y)[i] + (*f1_y)[i]); + (*out_l_co)[i] = ALPHA * ((*f0_co)[i] + (*f1_co)[i]); + (*out_l_cg)[i] = ALPHA * ((*f0_cg)[i] + (*f1_cg)[i]); + }*/ + } + +cleanup: + free(warped_f0_fwd_y); + free(warped_f0_fwd_co); + free(warped_f0_fwd_cg); + free(warped_f1_back_y); + free(warped_f1_back_co); + free(warped_f1_back_cg); + free(half_mesh_dx); + free(half_mesh_dy); + free(neg_half_mesh_dx); + free(neg_half_mesh_dy); +} + // Apply 1D temporal DWT along time axis for a spatial location (encoder side) // data[i] = frame i's coefficient value at this spatial location // Applies LGT 5/3 wavelet for reversibility @@ -2266,9 +3081,127 @@ static void dwt_temporal_1d_inverse_53(float *temporal_data, int num_frames) { dwt_53_inverse_1d(temporal_data, num_frames); } +// Apply 3D DWT with motion-compensated lifting (MC-lifting) +// Integrates motion compensation directly into wavelet lifting steps +// This replaces separate warping + DWT for better invertibility and compression +static void dwt_3d_forward_mc( + tav_encoder_t *enc, + float **gop_y, float **gop_co, float **gop_cg, + int num_frames, int spatial_levels, int temporal_levels, int spatial_filter +) { + if (num_frames < 2) return; + + int width = enc->width; + int height = enc->height; + int num_pixels = width * height; + + // Allocate temporary buffers for L and H bands + float **temp_l_y = malloc(num_frames * sizeof(float*)); + float **temp_l_co = malloc(num_frames * sizeof(float*)); + float **temp_l_cg = malloc(num_frames * sizeof(float*)); + float **temp_h_y = malloc(num_frames * sizeof(float*)); + float **temp_h_co = malloc(num_frames * sizeof(float*)); + float **temp_h_cg = malloc(num_frames * sizeof(float*)); + + for (int i = 0; i < num_frames; i++) { + temp_l_y[i] = malloc(num_pixels * sizeof(float)); + temp_l_co[i] = malloc(num_pixels * sizeof(float)); + temp_l_cg[i] = malloc(num_pixels * sizeof(float)); + temp_h_y[i] = malloc(num_pixels * sizeof(float)); + temp_h_co[i] = malloc(num_pixels * sizeof(float)); + temp_h_cg[i] = malloc(num_pixels * sizeof(float)); + } + + // Step 1: Apply MC-lifting temporal transform + // Process frame pairs at each decomposition level + for (int level = 0; level < temporal_levels; level++) { + int level_frames = num_frames >> level; + if (level_frames < 2) break; + + // Apply MC-lifting to each frame pair + for (int i = 0; i < level_frames; i += 2) { + int f0_idx = i; + int f1_idx = i + 1; + + if (f1_idx >= level_frames) break; + + // Get mesh for this frame pair (mesh[f1] describes motion from f0 to f1) + const short *mesh_dx = enc->gop_mesh_dx[f1_idx]; + const short *mesh_dy = enc->gop_mesh_dy[f1_idx]; + + // Build reliability mask for selective motion compensation + /*uint8_t *reliability_mask = malloc(num_pixels * sizeof(uint8_t)); + if (reliability_mask && enc->gop_flow_fwd_x[f1_idx] && enc->gop_flow_bwd_x[f1_idx]) { + build_reliability_mask( + enc->gop_rgb_frames[f0_idx], enc->gop_rgb_frames[f1_idx], + enc->gop_flow_fwd_x[f1_idx], enc->gop_flow_fwd_y[f1_idx], + enc->gop_flow_bwd_x[f1_idx], enc->gop_flow_bwd_y[f1_idx], + width, height, + reliability_mask + ); + } else { + // Fallback: use MC everywhere (all pixels reliable) + if (reliability_mask) { + memset(reliability_mask, 1, num_pixels * sizeof(uint8_t)); + } + }*/ + + // Apply MC-lifting: (L, H) = mc_lift(F0, F1, mesh, mask) + mc_lifting_forward_pair( + enc, + &gop_y[f0_idx], &gop_co[f0_idx], &gop_cg[f0_idx], // F0 + &gop_y[f1_idx], &gop_co[f1_idx], &gop_cg[f1_idx], // F1 + mesh_dx, mesh_dy, +// reliability_mask, // Reliability mask + &temp_l_y[i/2], &temp_l_co[i/2], &temp_l_cg[i/2], // L output + &temp_h_y[level_frames/2 + i/2], &temp_h_co[level_frames/2 + i/2], &temp_h_cg[level_frames/2 + i/2] // H output + ); + +// free(reliability_mask); + } + + // Copy L and H bands back to gop buffers for next level + int half = level_frames / 2; + for (int i = 0; i < half; i++) { + memcpy(gop_y[i], temp_l_y[i], num_pixels * sizeof(float)); + memcpy(gop_co[i], temp_l_co[i], num_pixels * sizeof(float)); + memcpy(gop_cg[i], temp_l_cg[i], num_pixels * sizeof(float)); + } + for (int i = 0; i < half; i++) { + memcpy(gop_y[half + i], temp_h_y[half + i], num_pixels * sizeof(float)); + memcpy(gop_co[half + i], temp_h_co[half + i], num_pixels * sizeof(float)); + memcpy(gop_cg[half + i], temp_h_cg[half + i], num_pixels * sizeof(float)); + } + } + + // Step 2: Apply 2D spatial DWT to each temporal subband + for (int t = 0; t < num_frames; t++) { + dwt_2d_forward_flexible(gop_y[t], width, height, spatial_levels, spatial_filter); + dwt_2d_forward_flexible(gop_co[t], width, height, spatial_levels, spatial_filter); + dwt_2d_forward_flexible(gop_cg[t], width, height, spatial_levels, spatial_filter); + } + + // Cleanup + for (int i = 0; i < num_frames; i++) { + free(temp_l_y[i]); + free(temp_l_co[i]); + free(temp_l_cg[i]); + free(temp_h_y[i]); + free(temp_h_co[i]); + free(temp_h_cg[i]); + } + free(temp_l_y); + free(temp_l_co); + free(temp_l_cg); + free(temp_h_y); + free(temp_h_co); + free(temp_h_cg); +} + // Apply 3D DWT: temporal DWT across frames, then spatial DWT on each temporal subband // gop_data[frame][y * width + x] - GOP buffer organized as frame-major // Modifies gop_data in-place +// NOTE: This is the OLD version without MC-lifting (kept for non-mesh mode) static void dwt_3d_forward(float **gop_data, int width, int height, int num_frames, int spatial_levels, int temporal_levels, int spatial_filter) { if (num_frames < 2 || width < 2 || height < 2) return; @@ -2290,8 +3223,8 @@ static void dwt_3d_forward(float **gop_data, int width, int height, int num_fram for (int level = 0; level < temporal_levels; level++) { int level_frames = num_frames >> level; if (level_frames >= 2) { -// dwt_temporal_1d_forward_53(temporal_line, level_frames); - dwt_haar_forward_1d(temporal_line, level_frames); +// dwt_temporal_1d_forward_53(temporal_line, level_frames); // CDF 5/3 worse for motion-compensated frames + dwt_haar_forward_1d(temporal_line, level_frames); // Haar better for imperfect alignment } } @@ -5272,6 +6205,7 @@ int main(int argc, char *argv[]) { {"delta-haar", required_argument, 0, 1018}, {"temporal-dwt", no_argument, 0, 1019}, {"temporal-3d", no_argument, 0, 1019}, + {"mesh-warp", no_argument, 0, 1020}, {"help", no_argument, 0, '?'}, {0, 0, 0, 0} }; @@ -5438,6 +6372,10 @@ int main(int argc, char *argv[]) { enc->enable_temporal_dwt = 1; printf("Temporal 3D DWT encoding enabled (GOP size: %d frames)\n", GOP_SIZE); break; + case 1020: // --mesh-warp + enc->enable_mesh_warp = 1; + printf("Mesh-based motion compensation enabled (requires --temporal-dwt)\n"); + break; case 'a': int bitrate = atoi(optarg); int valid_bitrate = validate_mp2_bitrate(bitrate); diff --git a/video_encoder/encoder_tav_opencv.cpp b/video_encoder/encoder_tav_opencv.cpp new file mode 100644 index 0000000..1a3ea6b --- /dev/null +++ b/video_encoder/encoder_tav_opencv.cpp @@ -0,0 +1,461 @@ +// Created by Claude on 2025-10-17 +// OpenCV-based optical flow and mesh warping functions for TAV encoder +// This file is compiled separately as C++ and linked with the C encoder + +#include +#include +#include +#include +#include + +// Extern "C" linkage for functions callable from C code +extern "C" { + +// Helper: Compute SAD (Sum of Absolute Differences) for a block +static int compute_sad( + const unsigned char *ref, const unsigned char *cur, + int ref_x, int ref_y, int cur_x, int cur_y, + int width, int height, int block_size +) { + int sad = 0; + for (int by = 0; by < block_size; by++) { + for (int bx = 0; bx < block_size; bx++) { + int ry = ref_y + by; + int rx = ref_x + bx; + int cy = cur_y + by; + int cx = cur_x + bx; + + // Boundary check + if (rx < 0 || rx >= width || ry < 0 || ry >= height || + cx < 0 || cx >= width || cy < 0 || cy >= height) { + sad += 255; // Penalty for out-of-bounds + continue; + } + + int ref_val = ref[ry * width + rx]; + int cur_val = cur[cy * width + cx]; + sad += abs(ref_val - cur_val); + } + } + return sad; +} + +// Helper: Diamond search pattern for motion estimation +static void diamond_search( + const unsigned char *ref, const unsigned char *cur, + int cx, int cy, int width, int height, int block_size, + int search_range, int *best_dx, int *best_dy +) { + // Large diamond pattern (distance 2) + const int large_diamond[8][2] = { + {0, -2}, {-1, -1}, {1, -1}, {-2, 0}, + {2, 0}, {-1, 1}, {1, 1}, {0, 2} + }; + + // Small diamond pattern (distance 1) + const int small_diamond[4][2] = { + {0, -1}, {-1, 0}, {1, 0}, {0, 1} + }; + + int dx = 0, dy = 0; + int best_sad = compute_sad(ref, cur, cx + dx, cy + dy, cx, cy, width, height, block_size); + + // Large diamond search + bool improved = true; + while (improved) { + improved = false; + for (int i = 0; i < 8; i++) { + int test_dx = dx + large_diamond[i][0]; + int test_dy = dy + large_diamond[i][1]; + + // Check search range bounds + if (abs(test_dx) > search_range || abs(test_dy) > search_range) { + continue; + } + + int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size); + if (sad < best_sad) { + best_sad = sad; + dx = test_dx; + dy = test_dy; + improved = true; + break; + } + } + } + + // Small diamond refinement + improved = true; + while (improved) { + improved = false; + for (int i = 0; i < 4; i++) { + int test_dx = dx + small_diamond[i][0]; + int test_dy = dy + small_diamond[i][1]; + + if (abs(test_dx) > search_range || abs(test_dy) > search_range) { + continue; + } + + int sad = compute_sad(ref, cur, cx + test_dx, cy + test_dy, cx, cy, width, height, block_size); + if (sad < best_sad) { + best_sad = sad; + dx = test_dx; + dy = test_dy; + improved = true; + break; + } + } + } + + *best_dx = dx; + *best_dy = dy; +} + +// Hierarchical block matching motion estimation with deeper pyramid +// 3-level hierarchy to handle large motion (up to ±32px) +void estimate_motion_optical_flow( + const unsigned char *frame1_rgb, const unsigned char *frame2_rgb, + int width, int height, + float **out_flow_x, float **out_flow_y +) { + // Step 1: Convert RGB to grayscale + unsigned char *gray1 = (unsigned char*)std::malloc(width * height); + unsigned char *gray2 = (unsigned char*)std::malloc(width * height); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + int idx = y * width + x; + int rgb_idx = idx * 3; + + // ITU-R BT.601 grayscale conversion + gray1[idx] = (unsigned char)(0.299f * frame1_rgb[rgb_idx] + + 0.587f * frame1_rgb[rgb_idx + 1] + + 0.114f * frame1_rgb[rgb_idx + 2]); + gray2[idx] = (unsigned char)(0.299f * frame2_rgb[rgb_idx] + + 0.587f * frame2_rgb[rgb_idx + 1] + + 0.114f * frame2_rgb[rgb_idx + 2]); + } + } + + // Step 2: 3-level hierarchical block matching (coarse to fine) + // Level 0: 64×64 blocks, ±32 pixel search (captures large motion up to 32px) + // Level 1: 32×32 blocks, ±16 pixel refinement + // Level 2: 16×16 blocks, ±8 pixel final refinement + + *out_flow_x = (float*)std::malloc(width * height * sizeof(float)); + *out_flow_y = (float*)std::malloc(width * height * sizeof(float)); + + // Initialize with zero motion + std::memset(*out_flow_x, 0, width * height * sizeof(float)); + std::memset(*out_flow_y, 0, width * height * sizeof(float)); + + // Level 0: Coarsest search (64×64 blocks, ±32px) + const int block_size_l0 = 32; + const int search_range_l0 = 16; + + for (int by = 0; by < height; by += block_size_l0) { + for (int bx = 0; bx < width; bx += block_size_l0) { + int dx = 0, dy = 0; + diamond_search(gray1, gray2, bx, by, width, height, + block_size_l0, search_range_l0, &dx, &dy); + + // Fill flow for this block + for (int y = by; y < by + block_size_l0 && y < height; y++) { + for (int x = bx; x < bx + block_size_l0 && x < width; x++) { + int idx = y * width + x; + (*out_flow_x)[idx] = (float)dx; + (*out_flow_y)[idx] = (float)dy; + } + } + } + } + + // Level 1: Medium refinement (32×32 blocks, ±16px) + const int block_size_l1 = 16; + const int search_range_l1 = 8; + + for (int by = 0; by < height; by += block_size_l1) { + for (int bx = 0; bx < width; bx += block_size_l1) { + // Get initial guess from level 0 + int init_dx = (int)(*out_flow_x)[by * width + bx]; + int init_dy = (int)(*out_flow_y)[by * width + bx]; + + // Search around initial guess + int best_dx = init_dx; + int best_dy = init_dy; + int best_sad = compute_sad(gray1, gray2, bx + init_dx, by + init_dy, + bx, by, width, height, block_size_l1); + + // Local search around initial guess + for (int dy = -search_range_l1; dy <= search_range_l1; dy += 2) { + for (int dx = -search_range_l1; dx <= search_range_l1; dx += 2) { + int test_dx = init_dx + dx; + int test_dy = init_dy + dy; + + int sad = compute_sad(gray1, gray2, bx + test_dx, by + test_dy, + bx, by, width, height, block_size_l1); + if (sad < best_sad) { + best_sad = sad; + best_dx = test_dx; + best_dy = test_dy; + } + } + } + + // Fill flow for this block + for (int y = by; y < by + block_size_l1 && y < height; y++) { + for (int x = bx; x < bx + block_size_l1 && x < width; x++) { + int idx = y * width + x; + (*out_flow_x)[idx] = (float)best_dx; + (*out_flow_y)[idx] = (float)best_dy; + } + } + } + } + + // Level 2: Finest refinement (16×16 blocks, ±8px) + /*const int block_size_l2 = 16; + const int search_range_l2 = 8; + + for (int by = 0; by < height; by += block_size_l2) { + for (int bx = 0; bx < width; bx += block_size_l2) { + // Get initial guess from level 1 + int init_dx = (int)(*out_flow_x)[by * width + bx]; + int init_dy = (int)(*out_flow_y)[by * width + bx]; + + // Search around initial guess (finer grid) + int best_dx = init_dx; + int best_dy = init_dy; + int best_sad = compute_sad(gray1, gray2, bx + init_dx, by + init_dy, + bx, by, width, height, block_size_l2); + + // Exhaustive local search for final refinement + for (int dy = -search_range_l2; dy <= search_range_l2; dy++) { + for (int dx = -search_range_l2; dx <= search_range_l2; dx++) { + int test_dx = init_dx + dx; + int test_dy = init_dy + dy; + + int sad = compute_sad(gray1, gray2, bx + test_dx, by + test_dy, + bx, by, width, height, block_size_l2); + if (sad < best_sad) { + best_sad = sad; + best_dx = test_dx; + best_dy = test_dy; + } + } + } + + // Fill flow for this block + for (int y = by; y < by + block_size_l2 && y < height; y++) { + for (int x = bx; x < bx + block_size_l2 && x < width; x++) { + int idx = y * width + x; + (*out_flow_x)[idx] = (float)best_dx; + (*out_flow_y)[idx] = (float)best_dy; + } + } + } + }*/ + + std::free(gray1); + std::free(gray2); +} + +// Build distortion mesh from dense optical flow field +// Downsamples flow to coarse mesh grid using robust averaging +void build_mesh_from_flow( + const float *flow_x, const float *flow_y, + int width, int height, + int mesh_w, int mesh_h, + short *mesh_dx, short *mesh_dy // Output: 1/8 pixel precision +) { + int cell_w = width / mesh_w; + int cell_h = height / mesh_h; + + for (int my = 0; my < mesh_h; my++) { + for (int mx = 0; mx < mesh_w; mx++) { + // Cell center coordinates (control point position) + int cx = mx * cell_w + cell_w / 2; + int cy = my * cell_h + cell_h / 2; + + // Collect flow vectors in a neighborhood around cell center (5×5 window) + float sum_dx = 0.0f, sum_dy = 0.0f; + int count = 0; + + for (int dy = -2; dy <= 2; dy++) { + for (int dx = -2; dx <= 2; dx++) { + int px = cx + dx; + int py = cy + dy; + if (px >= 0 && px < width && py >= 0 && py < height) { + int idx = py * width + px; + sum_dx += flow_x[idx]; + sum_dy += flow_y[idx]; + count++; + } + } + } + + // Average and convert to 1/8 pixel precision + float avg_dx = (count > 0) ? (sum_dx / count) : 0.0f; + float avg_dy = (count > 0) ? (sum_dy / count) : 0.0f; + + int mesh_idx = my * mesh_w + mx; + mesh_dx[mesh_idx] = (short)(avg_dx * 8.0f); // 1/8 pixel precision + mesh_dy[mesh_idx] = (short)(avg_dy * 8.0f); + } + } +} + +// Apply Laplacian smoothing to mesh for spatial coherence +// This prevents fold-overs and reduces high-frequency noise +void smooth_mesh_laplacian( + short *mesh_dx, short *mesh_dy, + int mesh_width, int mesh_height, + float smoothness, int iterations +) { + short *temp_dx = (short*)std::malloc(mesh_width * mesh_height * sizeof(short)); + short *temp_dy = (short*)std::malloc(mesh_width * mesh_height * sizeof(short)); + + for (int iter = 0; iter < iterations; iter++) { + std::memcpy(temp_dx, mesh_dx, mesh_width * mesh_height * sizeof(short)); + std::memcpy(temp_dy, mesh_dy, mesh_width * mesh_height * sizeof(short)); + + for (int my = 0; my < mesh_height; my++) { + for (int mx = 0; mx < mesh_width; mx++) { + int idx = my * mesh_width + mx; + + // Collect neighbor displacements + float neighbor_dx = 0.0f, neighbor_dy = 0.0f; + int neighbor_count = 0; + + // 4-connected neighbors (up, down, left, right) + int neighbors[4][2] = {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}; + for (int n = 0; n < 4; n++) { + int nx = mx + neighbors[n][0]; + int ny = my + neighbors[n][1]; + if (nx >= 0 && nx < mesh_width && ny >= 0 && ny < mesh_height) { + int nidx = ny * mesh_width + nx; + neighbor_dx += temp_dx[nidx]; + neighbor_dy += temp_dy[nidx]; + neighbor_count++; + } + } + + if (neighbor_count > 0) { + neighbor_dx /= neighbor_count; + neighbor_dy /= neighbor_count; + + // Weighted average: data term + smoothness term + float data_weight = 1.0f - smoothness; + mesh_dx[idx] = (short)(data_weight * temp_dx[idx] + smoothness * neighbor_dx); + mesh_dy[idx] = (short)(data_weight * temp_dy[idx] + smoothness * neighbor_dy); + } + } + } + } + + std::free(temp_dx); + std::free(temp_dy); +} + +// Apply bilinear mesh warp to a frame channel +// Uses inverse mapping (destination → source) to avoid holes +void warp_frame_with_mesh( + const float *src_frame, int width, int height, + const short *mesh_dx, const short *mesh_dy, + int mesh_width, int mesh_height, + float *dst_frame +) { + int cell_w = width / mesh_width; + int cell_h = height / mesh_height; + + // For each output pixel, compute source location using mesh warp + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + // Find which mesh cell this pixel belongs to + int cell_x = x / cell_w; + int cell_y = y / cell_h; + + // Clamp to valid mesh range + if (cell_x >= mesh_width - 1) cell_x = mesh_width - 2; + if (cell_y >= mesh_height - 1) cell_y = mesh_height - 2; + if (cell_x < 0) cell_x = 0; + if (cell_y < 0) cell_y = 0; + + // Get four corner control points + int idx_00 = cell_y * mesh_width + cell_x; + int idx_10 = idx_00 + 1; + int idx_01 = (cell_y + 1) * mesh_width + cell_x; + int idx_11 = idx_01 + 1; + + // Control point positions (cell centers) + float cp_x0 = cell_x * cell_w + cell_w / 2.0f; + float cp_y0 = cell_y * cell_h + cell_h / 2.0f; + float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f; + float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f; + + // Local coordinates within cell (0 to 1) + float alpha = (x - cp_x0) / (cp_x1 - cp_x0); + float beta = (y - cp_y0) / (cp_y1 - cp_y0); + if (alpha < 0.0f) alpha = 0.0f; + if (alpha > 1.0f) alpha = 1.0f; + if (beta < 0.0f) beta = 0.0f; + if (beta > 1.0f) beta = 1.0f; + + // Bilinear interpolation of motion vectors + float dx_00 = mesh_dx[idx_00] / 8.0f; // Convert to pixels + float dy_00 = mesh_dy[idx_00] / 8.0f; + float dx_10 = mesh_dx[idx_10] / 8.0f; + float dy_10 = mesh_dy[idx_10] / 8.0f; + float dx_01 = mesh_dx[idx_01] / 8.0f; + float dy_01 = mesh_dy[idx_01] / 8.0f; + float dx_11 = mesh_dx[idx_11] / 8.0f; + float dy_11 = mesh_dy[idx_11] / 8.0f; + + float dx = (1 - alpha) * (1 - beta) * dx_00 + + alpha * (1 - beta) * dx_10 + + (1 - alpha) * beta * dx_01 + + alpha * beta * dx_11; + + float dy = (1 - alpha) * (1 - beta) * dy_00 + + alpha * (1 - beta) * dy_10 + + (1 - alpha) * beta * dy_01 + + alpha * beta * dy_11; + + // Source coordinates (inverse warp: dst → src) + float src_x = x + dx; + float src_y = y + dy; + + // Bilinear interpolation of source pixel + int sx0 = (int)std::floor(src_x); + int sy0 = (int)std::floor(src_y); + int sx1 = sx0 + 1; + int sy1 = sy0 + 1; + + // Clamp to frame bounds + if (sx0 < 0) sx0 = 0; + if (sy0 < 0) sy0 = 0; + if (sx1 >= width) sx1 = width - 1; + if (sy1 >= height) sy1 = height - 1; + if (sx0 >= width) sx0 = width - 1; + if (sy0 >= height) sy0 = height - 1; + + float fx = src_x - sx0; + float fy = src_y - sy0; + + // Bilinear interpolation + float val_00 = src_frame[sy0 * width + sx0]; + float val_10 = src_frame[sy0 * width + sx1]; + float val_01 = src_frame[sy1 * width + sx0]; + float val_11 = src_frame[sy1 * width + sx1]; + + float val = (1 - fx) * (1 - fy) * val_00 + + fx * (1 - fy) * val_10 + + (1 - fx) * fy * val_01 + + fx * fy * val_11; + + dst_frame[y * width + x] = val; + } + } +} + +} // extern "C" diff --git a/video_encoder/estimate_affine_from_blocks.cpp b/video_encoder/estimate_affine_from_blocks.cpp new file mode 100644 index 0000000..2cb7f9b --- /dev/null +++ b/video_encoder/estimate_affine_from_blocks.cpp @@ -0,0 +1,169 @@ +// Affine estimation for TAV mesh warping +// This file contains logic to estimate per-cell affine transforms from block motion + +#include +#include +#include + +extern "C" { + +// Estimate affine transform for a mesh cell from surrounding block motion vectors +// Uses least-squares fitting of motion vectors to affine model: [x'] = [a11 a12][x] + [tx] +// [y'] [a21 a22][y] [ty] +// +// Returns 1 if affine improves residual by >threshold, 0 if translation-only is better +int estimate_cell_affine( + const float *flow_x, const float *flow_y, + int width, int height, + int cell_x, int cell_y, // Cell position in mesh coordinates + int cell_w, int cell_h, // Cell size in pixels + float threshold, // Residual improvement threshold (e.g. 0.10 = 10%) + short *out_tx, short *out_ty, // Translation (1/8 pixel) + short *out_a11, short *out_a12, // Affine matrix (1/256 fixed-point) + short *out_a21, short *out_a22 +) { + // Compute cell bounding box + int x_start = cell_x * cell_w; + int y_start = cell_y * cell_h; + int x_end = (cell_x + 1) * cell_w; + int y_end = (cell_y + 1) * cell_h; + if (x_end > width) x_end = width; + if (y_end > height) y_end = height; + + // Sample motion vectors from a 4×4 grid within the cell + const int samples_x = 4; + const int samples_y = 4; + float sample_motion_x[16]; + float sample_motion_y[16]; + int sample_px[16]; + int sample_py[16]; + int n_samples = 0; + + for (int sy = 0; sy < samples_y; sy++) { + for (int sx = 0; sx < samples_x; sx++) { + int px = x_start + (x_end - x_start) * sx / (samples_x - 1); + int py = y_start + (y_end - y_start) * sy / (samples_y - 1); + + if (px >= width) px = width - 1; + if (py >= height) py = height - 1; + + int idx = py * width + px; + sample_motion_x[n_samples] = flow_x[idx]; + sample_motion_y[n_samples] = flow_y[idx]; + sample_px[n_samples] = px - (x_start + x_end) / 2; // Relative to cell center + sample_py[n_samples] = py - (y_start + y_end) / 2; + n_samples++; + } + } + + // 1. Compute translation-only model (average motion) + float avg_dx = 0, avg_dy = 0; + for (int i = 0; i < n_samples; i++) { + avg_dx += sample_motion_x[i]; + avg_dy += sample_motion_y[i]; + } + avg_dx /= n_samples; + avg_dy /= n_samples; + + // Translation residual + float trans_residual = 0; + for (int i = 0; i < n_samples; i++) { + float dx_err = sample_motion_x[i] - avg_dx; + float dy_err = sample_motion_y[i] - avg_dy; + trans_residual += dx_err * dx_err + dy_err * dy_err; + } + + // 2. Estimate affine model using least-squares + // Solve: [vx] = [a11 a12][px] + [tx] + // [vy] [a21 a22][py] [ty] + // Using normal equations for 2×2 affine + + double sum_x = 0, sum_y = 0, sum_xx = 0, sum_yy = 0, sum_xy = 0; + double sum_vx = 0, sum_vy = 0, sum_vx_x = 0, sum_vx_y = 0; + double sum_vy_x = 0, sum_vy_y = 0; + + for (int i = 0; i < n_samples; i++) { + double px = sample_px[i]; + double py = sample_py[i]; + double vx = sample_motion_x[i]; + double vy = sample_motion_y[i]; + + sum_x += px; + sum_y += py; + sum_xx += px * px; + sum_yy += py * py; + sum_xy += px * py; + sum_vx += vx; + sum_vy += vy; + sum_vx_x += vx * px; + sum_vx_y += vx * py; + sum_vy_x += vy * px; + sum_vy_y += vy * py; + } + + // Solve 2×2 system for [a11, a12, tx] and [a21, a22, ty] + double n = n_samples; + double det = n * sum_xx * sum_yy + 2 * sum_x * sum_y * sum_xy - + sum_xx * sum_y * sum_y - sum_yy * sum_x * sum_x - n * sum_xy * sum_xy; + + if (fabs(det) < 1e-6) { + // Singular matrix, fall back to translation + *out_tx = (short)(avg_dx * 8.0f); + *out_ty = (short)(avg_dy * 8.0f); + *out_a11 = 256; // Identity + *out_a12 = 0; + *out_a21 = 0; + *out_a22 = 256; + return 0; // Translation only + } + + // Solve for affine parameters (simplified for readability) + double a11 = (sum_vx_x * sum_yy * n - sum_vx_y * sum_xy * n - sum_vx * sum_y * sum_y + + sum_vx * sum_xy * sum_y + sum_vx_y * sum_x * sum_y - sum_vx_x * sum_y * sum_y) / det; + double a12 = (sum_vx_y * sum_xx * n - sum_vx_x * sum_xy * n - sum_vx * sum_x * sum_xy + + sum_vx * sum_xx * sum_y + sum_vx_x * sum_x * sum_y - sum_vx_y * sum_x * sum_x) / det; + double tx = (sum_vx - a11 * sum_x - a12 * sum_y) / n; + + double a21 = (sum_vy_x * sum_yy * n - sum_vy_y * sum_xy * n - sum_vy * sum_y * sum_y + + sum_vy * sum_xy * sum_y + sum_vy_y * sum_x * sum_y - sum_vy_x * sum_y * sum_y) / det; + double a22 = (sum_vy_y * sum_xx * n - sum_vy_x * sum_xy * n - sum_vy * sum_x * sum_xy + + sum_vy * sum_xx * sum_y + sum_vy_x * sum_x * sum_y - sum_vy_y * sum_x * sum_x) / det; + double ty = (sum_vy - a21 * sum_x - a22 * sum_y) / n; + + // Affine residual + float affine_residual = 0; + for (int i = 0; i < n_samples; i++) { + double px = sample_px[i]; + double py = sample_py[i]; + double pred_vx = a11 * px + a12 * py + tx; + double pred_vy = a21 * px + a22 * py + ty; + double dx_err = sample_motion_x[i] - pred_vx; + double dy_err = sample_motion_y[i] - pred_vy; + affine_residual += dx_err * dx_err + dy_err * dy_err; + } + + // Decision: Use affine if residual improves by > threshold + float improvement = (trans_residual - affine_residual) / (trans_residual + 1e-6f); + + if (improvement > threshold) { + // Use affine + *out_tx = (short)(tx * 8.0f); + *out_ty = (short)(ty * 8.0f); + *out_a11 = (short)(a11 * 256.0); + *out_a12 = (short)(a12 * 256.0); + *out_a21 = (short)(a21 * 256.0); + *out_a22 = (short)(a22 * 256.0); + return 1; // Affine + } else { + // Use translation + *out_tx = (short)(avg_dx * 8.0f); + *out_ty = (short)(avg_dy * 8.0f); + *out_a11 = 256; // Identity + *out_a12 = 0; + *out_a21 = 0; + *out_a22 = 256; + return 0; // Translation only + } +} + +} // extern "C" diff --git a/video_encoder/test_mesh_roundtrip.cpp b/video_encoder/test_mesh_roundtrip.cpp new file mode 100644 index 0000000..df0e55c --- /dev/null +++ b/video_encoder/test_mesh_roundtrip.cpp @@ -0,0 +1,328 @@ +// Test mesh warp round-trip consistency +// Warps a frame forward, then backward, and checks if we get the original back +// This is critical for MC-lifting invertibility + +#include +#include +#include +#include +#include +#include + +// Include the mesh functions from encoder +extern "C" { + void estimate_motion_optical_flow( + const unsigned char *frame1_rgb, const unsigned char *frame2_rgb, + int width, int height, + float **out_flow_x, float **out_flow_y + ); + + void build_mesh_from_flow( + const float *flow_x, const float *flow_y, + int width, int height, + int mesh_w, int mesh_h, + int16_t *mesh_dx, int16_t *mesh_dy + ); + + void smooth_mesh_laplacian( + int16_t *mesh_dx, int16_t *mesh_dy, + int mesh_width, int mesh_height, + float smoothness, int iterations + ); +} + +// Mesh warp with bilinear interpolation (translation only) +static void apply_mesh_warp_rgb( + const cv::Mat &src, + cv::Mat &dst, + const int16_t *mesh_dx, + const int16_t *mesh_dy, + int mesh_w, int mesh_h +) { + int width = src.cols; + int height = src.rows; + int cell_w = width / mesh_w; + int cell_h = height / mesh_h; + + dst = cv::Mat(height, width, CV_8UC3); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + int cell_x = x / cell_w; + int cell_y = y / cell_h; + + cell_x = std::min(cell_x, mesh_w - 2); + cell_y = std::min(cell_y, mesh_h - 2); + + int idx_00 = cell_y * mesh_w + cell_x; + int idx_10 = idx_00 + 1; + int idx_01 = (cell_y + 1) * mesh_w + cell_x; + int idx_11 = idx_01 + 1; + + float cp_x0 = cell_x * cell_w + cell_w / 2.0f; + float cp_y0 = cell_y * cell_h + cell_h / 2.0f; + float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f; + float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f; + + float alpha = (x - cp_x0) / (cp_x1 - cp_x0); + float beta = (y - cp_y0) / (cp_y1 - cp_y0); + alpha = std::max(0.0f, std::min(1.0f, alpha)); + beta = std::max(0.0f, std::min(1.0f, beta)); + + float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) + + alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) + + (1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) + + alpha * beta * (mesh_dx[idx_11] / 8.0f); + + float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) + + alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) + + (1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) + + alpha * beta * (mesh_dy[idx_11] / 8.0f); + + float src_x = x + dx; + float src_y = y + dy; + + int sx0 = (int)floorf(src_x); + int sy0 = (int)floorf(src_y); + int sx1 = sx0 + 1; + int sy1 = sy0 + 1; + + sx0 = std::max(0, std::min(width - 1, sx0)); + sy0 = std::max(0, std::min(height - 1, sy0)); + sx1 = std::max(0, std::min(width - 1, sx1)); + sy1 = std::max(0, std::min(height - 1, sy1)); + + float fx = src_x - sx0; + float fy = src_y - sy0; + + for (int c = 0; c < 3; c++) { + float val_00 = src.at(sy0, sx0)[c]; + float val_10 = src.at(sy0, sx1)[c]; + float val_01 = src.at(sy1, sx0)[c]; + float val_11 = src.at(sy1, sx1)[c]; + + float val = (1 - fx) * (1 - fy) * val_00 + + fx * (1 - fy) * val_10 + + (1 - fx) * fy * val_01 + + fx * fy * val_11; + + dst.at(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val)); + } + } + } +} + +int main(int argc, char** argv) { + const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4"; + int num_tests = (argc > 2) ? atoi(argv[2]) : 5; + + printf("Opening video: %s\n", video_file); + cv::VideoCapture cap(video_file); + + if (!cap.isOpened()) { + fprintf(stderr, "Error: Cannot open video file\n"); + return 1; + } + + int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); + int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); + int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); + + printf("Video: %dx%d, %d frames\n", width, height, total_frames); + + // Mesh dimensions (32×32 cells) + int mesh_cell_size = 32; + int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size; + int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size; + if (mesh_w < 2) mesh_w = 2; + if (mesh_h < 2) mesh_h = 2; + + printf("Mesh: %dx%d (approx %dx%d px cells)\n\n", + mesh_w, mesh_h, width / mesh_w, height / mesh_h); + + float smoothness = 0.5f; + int smooth_iterations = 8; + + srand(time(NULL)); + + double total_forward_psnr = 0.0; + double total_roundtrip_psnr = 0.0; + double total_half_roundtrip_psnr = 0.0; + + for (int test = 0; test < num_tests; test++) { + int frame_num = 5 + rand() % (total_frames - 10); + + printf("[Test %d/%d] Frame pair %d → %d\n", test + 1, num_tests, frame_num - 1, frame_num); + + cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1); + cv::Mat frame0, frame1; + cap >> frame0; + cap >> frame1; + + if (frame0.empty() || frame1.empty()) { + fprintf(stderr, "Error reading frames\n"); + continue; + } + + cv::Mat frame0_rgb, frame1_rgb; + cv::cvtColor(frame0, frame0_rgb, cv::COLOR_BGR2RGB); + cv::cvtColor(frame1, frame1_rgb, cv::COLOR_BGR2RGB); + + // Compute mesh (F0 → F1) + float *flow_x = nullptr, *flow_y = nullptr; + estimate_motion_optical_flow(frame0_rgb.data, frame1_rgb.data, + width, height, &flow_x, &flow_y); + + int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy); + smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations); + + // Create inverted mesh + int16_t *inv_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *inv_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + for (int i = 0; i < mesh_w * mesh_h; i++) { + inv_mesh_dx[i] = -mesh_dx[i]; + inv_mesh_dy[i] = -mesh_dy[i]; + } + + // Create half-mesh for symmetric lifting test + int16_t *half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *neg_half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *neg_half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + for (int i = 0; i < mesh_w * mesh_h; i++) { + half_mesh_dx[i] = mesh_dx[i] / 2; + half_mesh_dy[i] = mesh_dy[i] / 2; + neg_half_mesh_dx[i] = -half_mesh_dx[i]; + neg_half_mesh_dy[i] = -half_mesh_dy[i]; + } + + // TEST 1: Full forward warp quality (F0 → F1) + cv::Mat warped_forward; + apply_mesh_warp_rgb(frame0, warped_forward, mesh_dx, mesh_dy, mesh_w, mesh_h); + + double forward_mse = 0.0; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + for (int c = 0; c < 3; c++) { + double diff = (double)warped_forward.at(y, x)[c] - + (double)frame1.at(y, x)[c]; + forward_mse += diff * diff; + } + } + } + forward_mse /= (width * height * 3); + double forward_psnr = (forward_mse > 0) ? 10.0 * log10(255.0 * 255.0 / forward_mse) : 999.0; + total_forward_psnr += forward_psnr; + + // TEST 2: Full round-trip (F0 → forward → backward → F0') + cv::Mat roundtrip; + apply_mesh_warp_rgb(warped_forward, roundtrip, inv_mesh_dx, inv_mesh_dy, mesh_w, mesh_h); + + double roundtrip_mse = 0.0; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + for (int c = 0; c < 3; c++) { + double diff = (double)roundtrip.at(y, x)[c] - + (double)frame0.at(y, x)[c]; + roundtrip_mse += diff * diff; + } + } + } + roundtrip_mse /= (width * height * 3); + double roundtrip_psnr = (roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / roundtrip_mse) : 999.0; + total_roundtrip_psnr += roundtrip_psnr; + + // TEST 3: Half-step symmetric round-trip (MC-lifting style) + // F0 → +½mesh, then → -½mesh (should return to F0) + cv::Mat half_forward, half_roundtrip; + apply_mesh_warp_rgb(frame0, half_forward, half_mesh_dx, half_mesh_dy, mesh_w, mesh_h); + apply_mesh_warp_rgb(half_forward, half_roundtrip, neg_half_mesh_dx, neg_half_mesh_dy, mesh_w, mesh_h); + + double half_roundtrip_mse = 0.0; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + for (int c = 0; c < 3; c++) { + double diff = (double)half_roundtrip.at(y, x)[c] - + (double)frame0.at(y, x)[c]; + half_roundtrip_mse += diff * diff; + } + } + } + half_roundtrip_mse /= (width * height * 3); + double half_roundtrip_psnr = (half_roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / half_roundtrip_mse) : 999.0; + total_half_roundtrip_psnr += half_roundtrip_psnr; + + printf(" Forward warp (F0→F1): PSNR = %.2f dB\n", forward_psnr); + printf(" Full round-trip (F0→F0'): PSNR = %.2f dB\n", roundtrip_psnr); + printf(" Half round-trip (±½mesh): PSNR = %.2f dB\n", half_roundtrip_psnr); + + // Compute motion stats + float avg_motion = 0.0f, max_motion = 0.0f; + for (int i = 0; i < mesh_w * mesh_h; i++) { + float dx = mesh_dx[i] / 8.0f; + float dy = mesh_dy[i] / 8.0f; + float motion = sqrtf(dx * dx + dy * dy); + avg_motion += motion; + if (motion > max_motion) max_motion = motion; + } + avg_motion /= (mesh_w * mesh_h); + printf(" Motion: avg=%.2f px, max=%.2f px\n\n", avg_motion, max_motion); + + // Save visualization for worst case + if (test == 0 || roundtrip_psnr < 30.0) { + char filename[256]; + sprintf(filename, "roundtrip_%04d_original.png", frame_num); + cv::imwrite(filename, frame0); + sprintf(filename, "roundtrip_%04d_forward.png", frame_num); + cv::imwrite(filename, warped_forward); + sprintf(filename, "roundtrip_%04d_roundtrip.png", frame_num); + cv::imwrite(filename, roundtrip); + + // Difference images + cv::Mat diff_roundtrip = cv::Mat::zeros(height, width, CV_8UC3); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + for (int c = 0; c < 3; c++) { + int diff = abs((int)roundtrip.at(y, x)[c] - + (int)frame0.at(y, x)[c]); + diff_roundtrip.at(y, x)[c] = std::min(diff * 5, 255); + } + } + } + sprintf(filename, "roundtrip_%04d_diff.png", frame_num); + cv::imwrite(filename, diff_roundtrip); + printf(" Saved visualization: roundtrip_%04d_*.png\n\n", frame_num); + } + + free(flow_x); + free(flow_y); + free(mesh_dx); + free(mesh_dy); + free(inv_mesh_dx); + free(inv_mesh_dy); + free(half_mesh_dx); + free(half_mesh_dy); + free(neg_half_mesh_dx); + free(neg_half_mesh_dy); + } + + printf("===========================================\n"); + printf("Average Results (%d tests):\n", num_tests); + printf(" Forward warp quality: %.2f dB\n", total_forward_psnr / num_tests); + printf(" Full round-trip error: %.2f dB\n", total_roundtrip_psnr / num_tests); + printf(" Half round-trip error: %.2f dB\n", total_half_roundtrip_psnr / num_tests); + printf("===========================================\n\n"); + + if (total_roundtrip_psnr / num_tests < 35.0) { + printf("WARNING: Round-trip PSNR < 35 dB indicates poor invertibility!\n"); + printf("This will cause MC-lifting to accumulate errors and hurt compression.\n"); + printf("Bilinear interpolation artifacts are likely the culprit.\n"); + } else { + printf("Round-trip consistency looks acceptable (>35 dB).\n"); + } + + cap.release(); + return 0; +} diff --git a/video_encoder/test_mesh_warp.cpp b/video_encoder/test_mesh_warp.cpp new file mode 100644 index 0000000..5dcf820 --- /dev/null +++ b/video_encoder/test_mesh_warp.cpp @@ -0,0 +1,422 @@ +// Visual unit test for mesh warping with hierarchical block matching and affine estimation +// Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh, +// and saves both warped and target frames for visual comparison +// Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms + +#include +#include +#include +#include +#include +#include +#include + +// Include the mesh functions from encoder +extern "C" { + void estimate_motion_optical_flow( + const unsigned char *frame1_rgb, const unsigned char *frame2_rgb, + int width, int height, + float **out_flow_x, float **out_flow_y + ); + + void build_mesh_from_flow( + const float *flow_x, const float *flow_y, + int width, int height, + int mesh_w, int mesh_h, + int16_t *mesh_dx, int16_t *mesh_dy + ); + + void smooth_mesh_laplacian( + int16_t *mesh_dx, int16_t *mesh_dy, + int mesh_width, int mesh_height, + float smoothness, int iterations + ); + + int estimate_cell_affine( + const float *flow_x, const float *flow_y, + int width, int height, + int cell_x, int cell_y, + int cell_w, int cell_h, + float threshold, + int16_t *out_tx, int16_t *out_ty, + int16_t *out_a11, int16_t *out_a12, + int16_t *out_a21, int16_t *out_a22 + ); +} + +// Mesh warp with bilinear interpolation and optional affine support +static void apply_mesh_warp_rgb( + const cv::Mat &src, // Input BGR image + cv::Mat &dst, // Output warped BGR image + const int16_t *mesh_dx, // Mesh motion vectors (1/8 pixel) + const int16_t *mesh_dy, + const uint8_t *affine_mask, // 1=affine, 0=translation + const int16_t *affine_a11, + const int16_t *affine_a12, + const int16_t *affine_a21, + const int16_t *affine_a22, + int mesh_w, int mesh_h +) { + int width = src.cols; + int height = src.rows; + int cell_w = width / mesh_w; + int cell_h = height / mesh_h; + + dst = cv::Mat(height, width, CV_8UC3); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + int cell_x = x / cell_w; + int cell_y = y / cell_h; + + // Clamp to valid mesh range + cell_x = std::min(cell_x, mesh_w - 2); + cell_y = std::min(cell_y, mesh_h - 2); + + // Four corner control points + int idx_00 = cell_y * mesh_w + cell_x; + int idx_10 = idx_00 + 1; + int idx_01 = (cell_y + 1) * mesh_w + cell_x; + int idx_11 = idx_01 + 1; + + // Control point positions + float cp_x0 = cell_x * cell_w + cell_w / 2.0f; + float cp_y0 = cell_y * cell_h + cell_h / 2.0f; + float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f; + float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f; + + // Local coordinates + float alpha = (x - cp_x0) / (cp_x1 - cp_x0); + float beta = (y - cp_y0) / (cp_y1 - cp_y0); + alpha = std::max(0.0f, std::min(1.0f, alpha)); + beta = std::max(0.0f, std::min(1.0f, beta)); + + // Bilinear interpolation of motion vectors + float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) + + alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) + + (1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) + + alpha * beta * (mesh_dx[idx_11] / 8.0f); + + float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) + + alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) + + (1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) + + alpha * beta * (mesh_dy[idx_11] / 8.0f); + + // Check if we're using affine in this cell + // For simplicity, just use the top-left corner's affine parameters + int cell_idx = cell_y * mesh_w + cell_x; + if (affine_mask && affine_mask[cell_idx]) { + // Apply affine transform + // Compute position relative to cell center + float rel_x = x - (cell_x * cell_w + cell_w / 2.0f); + float rel_y = y - (cell_y * cell_h + cell_h / 2.0f); + + float a11 = affine_a11[cell_idx] / 256.0f; + float a12 = affine_a12[cell_idx] / 256.0f; + float a21 = affine_a21[cell_idx] / 256.0f; + float a22 = affine_a22[cell_idx] / 256.0f; + + // Affine warp: [x'] = [a11 a12][x] + [dx] + // [y'] [a21 a22][y] [dy] + dx = a11 * rel_x + a12 * rel_y + dx; + dy = a21 * rel_x + a22 * rel_y + dy; + } + + // Source coordinates (inverse warp) + float src_x = x + dx; + float src_y = y + dy; + + // Bilinear interpolation + int sx0 = (int)floorf(src_x); + int sy0 = (int)floorf(src_y); + int sx1 = sx0 + 1; + int sy1 = sy0 + 1; + + sx0 = std::max(0, std::min(width - 1, sx0)); + sy0 = std::max(0, std::min(height - 1, sy0)); + sx1 = std::max(0, std::min(width - 1, sx1)); + sy1 = std::max(0, std::min(height - 1, sy1)); + + float fx = src_x - sx0; + float fy = src_y - sy0; + + // Interpolate each channel + for (int c = 0; c < 3; c++) { + float val_00 = src.at(sy0, sx0)[c]; + float val_10 = src.at(sy0, sx1)[c]; + float val_01 = src.at(sy1, sx0)[c]; + float val_11 = src.at(sy1, sx1)[c]; + + float val = (1 - fx) * (1 - fy) * val_00 + + fx * (1 - fy) * val_10 + + (1 - fx) * fy * val_01 + + fx * fy * val_11; + + dst.at(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val)); + } + } + } +} + +// Create visualization overlay showing affine cells +static void create_affine_overlay( + cv::Mat &img, + const uint8_t *affine_mask, + int mesh_w, int mesh_h +) { + int width = img.cols; + int height = img.rows; + int cell_w = width / mesh_w; + int cell_h = height / mesh_h; + + for (int my = 0; my < mesh_h; my++) { + for (int mx = 0; mx < mesh_w; mx++) { + int idx = my * mesh_w + mx; + + if (affine_mask[idx]) { + // Draw green rectangle for affine cells + int x0 = mx * cell_w; + int y0 = my * cell_h; + int x1 = (mx + 1) * cell_w; + int y1 = (my + 1) * cell_h; + + cv::rectangle(img, + cv::Point(x0, y0), + cv::Point(x1, y1), + cv::Scalar(0, 255, 0), 1); + } + } + } +} + +int main(int argc, char** argv) { + const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4"; + int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5; + + printf("Opening video: %s\n", video_file); + cv::VideoCapture cap(video_file); + + if (!cap.isOpened()) { + fprintf(stderr, "Error: Cannot open video file %s\n", video_file); + return 1; + } + + int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); + int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); + int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); + + printf("Video: %dx%d, %d frames\n", width, height, total_frames); + + if (total_frames < 10) { + fprintf(stderr, "Error: Video too short (need at least 10 frames)\n"); + return 1; + } + + // Calculate mesh dimensions (32×32 pixel cells, matches current encoder) + int mesh_cell_size = 32; + int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size; + int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size; + if (mesh_w < 2) mesh_w = 2; + if (mesh_h < 2) mesh_h = 2; + + printf("Mesh: %dx%d (approx %dx%d px cells)\n", + mesh_w, mesh_h, width / mesh_w, height / mesh_h); + + // Encoder parameters (match current encoder_tav.c settings) + float smoothness = 0.5f; // Mesh smoothness weight + int smooth_iterations = 8; // Smoothing iterations + float affine_threshold = 0.40f; // 40% improvement required for affine + + printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n", + smoothness, smooth_iterations, affine_threshold * 100.0f); + + // Seed random number generator + srand(time(NULL)); + + // Pick random frames (avoid first and last 5 frames) + printf("\nTesting %d random frame pairs:\n", num_test_frames); + for (int test = 0; test < num_test_frames; test++) { + // Pick random frame (ensure we have a previous frame) + int frame_num = 5 + rand() % (total_frames - 10); + + printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n", + test + 1, num_test_frames, frame_num - 1, frame_num); + + // Read previous frame (source for warping) + cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1); + + cv::Mat prev_frame; + cap >> prev_frame; + if (prev_frame.empty()) { + fprintf(stderr, "Error reading frame %d\n", frame_num - 1); + continue; + } + + // Read current frame (target to match) + cv::Mat curr_frame; + cap >> curr_frame; + if (curr_frame.empty()) { + fprintf(stderr, "Error reading frame %d\n", frame_num); + continue; + } + + // Convert to RGB for block matching + cv::Mat prev_rgb, curr_rgb; + cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB); + cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB); + + // Compute hierarchical block matching (replaces optical flow) + printf(" Computing hierarchical block matching...\n"); + float *flow_x = nullptr, *flow_y = nullptr; + estimate_motion_optical_flow( + prev_rgb.data, curr_rgb.data, + width, height, + &flow_x, &flow_y + ); + + // Build mesh from flow + printf(" Building mesh from block matches...\n"); + int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy); + + // Apply Laplacian smoothing + printf(" Applying Laplacian smoothing (%d iterations, %.2f weight)...\n", + smooth_iterations, smoothness); + smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations); + + // Estimate selective per-cell affine transforms + printf(" Estimating selective affine transforms (threshold=%.0f%%)...\n", + affine_threshold * 100.0f); + uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t)); + int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t)); + + int cell_w = width / mesh_w; + int cell_h = height / mesh_h; + int affine_count = 0; + + for (int cy = 0; cy < mesh_h; cy++) { + for (int cx = 0; cx < mesh_w; cx++) { + int cell_idx = cy * mesh_w + cx; + + int16_t tx, ty, a11, a12, a21, a22; + int use_affine = estimate_cell_affine( + flow_x, flow_y, + width, height, + cx, cy, cell_w, cell_h, + affine_threshold, + &tx, &ty, &a11, &a12, &a21, &a22 + ); + + affine_mask[cell_idx] = use_affine ? 1 : 0; + mesh_dx[cell_idx] = tx; + mesh_dy[cell_idx] = ty; + affine_a11[cell_idx] = a11; + affine_a12[cell_idx] = a12; + affine_a21[cell_idx] = a21; + affine_a22[cell_idx] = a22; + + if (use_affine) affine_count++; + } + } + + printf(" Affine usage: %d/%d cells (%.1f%%)\n", + affine_count, mesh_w * mesh_h, + 100.0f * affine_count / (mesh_w * mesh_h)); + + // Warp previous frame to current frame + printf(" Warping frame with mesh + affine...\n"); + cv::Mat warped; + apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy, + affine_mask, affine_a11, affine_a12, affine_a21, affine_a22, + mesh_w, mesh_h); + + // Create visualization with affine overlay + cv::Mat warped_viz = warped.clone(); + create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h); + + // Compute MSE between warped and target + double mse = 0.0; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + for (int c = 0; c < 3; c++) { + double diff = (double)warped.at(y, x)[c] - + (double)curr_frame.at(y, x)[c]; + mse += diff * diff; + } + } + } + mse /= (width * height * 3); + double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0; + printf(" Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr); + + // Save images + char filename[256]; + sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1); + cv::imwrite(filename, prev_frame); + printf(" Saved source: %s\n", filename); + + sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num); + cv::imwrite(filename, warped); + printf(" Saved warped: %s\n", filename); + + sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num); + cv::imwrite(filename, warped_viz); + printf(" Saved warped+viz (green=affine): %s\n", filename); + + sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num); + cv::imwrite(filename, curr_frame); + printf(" Saved target: %s\n", filename); + + // Compute difference image + cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + for (int c = 0; c < 3; c++) { + int diff = abs((int)warped.at(y, x)[c] - + (int)curr_frame.at(y, x)[c]); + diff_img.at(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility + } + } + } + sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num); + cv::imwrite(filename, diff_img); + printf(" Saved difference (amplified 3x): %s\n", filename); + + // Compute motion statistics + float max_motion = 0.0f, avg_motion = 0.0f; + for (int i = 0; i < mesh_w * mesh_h; i++) { + float dx = mesh_dx[i] / 8.0f; + float dy = mesh_dy[i] / 8.0f; + float motion = sqrtf(dx * dx + dy * dy); + avg_motion += motion; + if (motion > max_motion) max_motion = motion; + } + avg_motion /= (mesh_w * mesh_h); + printf(" Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion); + + // Cleanup + free(flow_x); + free(flow_y); + free(mesh_dx); + free(mesh_dy); + free(affine_mask); + free(affine_a11); + free(affine_a12); + free(affine_a21); + free(affine_a22); + } + + printf("\nDone! Check output images:\n"); + printf(" *_source.png: Original frame before warping\n"); + printf(" *_warped.png: Warped frame (should match target)\n"); + printf(" *_warped_viz.png: Warped with green overlay showing affine cells\n"); + printf(" *_target.png: Target frame to match\n"); + printf(" *_diff.png: Difference image (should be mostly black if warp is good)\n"); + + cap.release(); + return 0; +}