TAV: will replace frame aligning with something else, or maybe with nothing

This commit is contained in:
minjaesong
2025-10-17 06:48:21 +09:00
parent 93622fc8ca
commit 3b9e02b17f
3 changed files with 299 additions and 60 deletions

View File

@@ -999,7 +999,18 @@ try {
// Read GOP size (number of frames in this GOP, 1-16) // Read GOP size (number of frames in this GOP, 1-16)
const gopSize = seqread.readOneByte() const gopSize = seqread.readOneByte()
// Read motion vectors (quarter-pixel units, int16) // Read canvas expansion margins (4 bytes)
// Encoder expands canvas to preserve all original pixels from all aligned frames
const marginLeft = seqread.readOneByte()
const marginRight = seqread.readOneByte()
const marginTop = seqread.readOneByte()
const marginBottom = seqread.readOneByte()
// Calculate expanded canvas dimensions
const canvasWidth = header.width + marginLeft + marginRight
const canvasHeight = header.height + marginTop + marginBottom
// Read motion vectors (1/16-pixel units, int16)
// Encoder writes ALL motion vectors including frame 0 // Encoder writes ALL motion vectors including frame 0
let motionX = new Array(gopSize) let motionX = new Array(gopSize)
let motionY = new Array(gopSize) let motionY = new Array(gopSize)
@@ -1042,7 +1053,7 @@ try {
try { try {
let decodeStart = sys.nanoTime() let decodeStart = sys.nanoTime()
// Call GOP decoder // Call GOP decoder with canvas expansion information
const [r1, r2] = graphics.tavDecodeGopUnified( const [r1, r2] = graphics.tavDecodeGopUnified(
compressedPtr, compressedPtr,
compressedSize, compressedSize,
@@ -1050,8 +1061,12 @@ try {
motionX, motionX,
motionY, motionY,
gopRGBBuffers, // Array of output buffer addresses gopRGBBuffers, // Array of output buffer addresses
header.width, header.width, // Original frame width
header.height, header.height, // Original frame height
canvasWidth, // Expanded canvas width (preserves all pixels)
canvasHeight, // Expanded canvas height (preserves all pixels)
marginLeft, // Left margin
marginTop, // Top margin
header.qualityLevel, header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityY],
QLUT[header.qualityCo], QLUT[header.qualityCo],

View File

@@ -3181,11 +3181,35 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
} }
/**
* Symmetric padding (mirroring) for edge handling in motion compensation.
* This provides smoother edges than simple clamping/replication.
*
* @param coord The coordinate to mirror if out of bounds
* @param size The dimension size (width or height)
* @return The mirrored coordinate within valid range [0, size-1]
*/
private fun symmetricPadding(coord: Int, size: Int): Int {
var mirrored = coord
// Mirror for negative coordinates: -1 -> 0, -2 -> 1, -3 -> 2, etc.
if (mirrored < 0) {
mirrored = -mirrored - 1
}
// Mirror for coordinates beyond bounds: size -> size-1, size+1 -> size-2, etc.
else if (mirrored >= size) {
mirrored = 2 * size - mirrored - 1
}
// Final clamp to ensure we're within bounds (handles extreme cases)
return mirrored.coerceIn(0, size - 1)
}
private fun tevHandleMotionBlockTwoPass(startX: Int, startY: Int, mvX: Int, mvY: Int, private fun tevHandleMotionBlockTwoPass(startX: Int, startY: Int, mvX: Int, mvY: Int,
currentRGBAddr: Long, prevRGBAddr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int, width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int,
debugMotionVectors: Boolean) { debugMotionVectors: Boolean) {
// Copy 16x16 block with motion compensation // Copy 16x16 block with motion compensation using symmetric padding
for (py in 0 until 16) { for (py in 0 until 16) {
val y = startY + py val y = startY + py
if (y >= height) break if (y >= height) break
@@ -3194,8 +3218,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val x = startX + px val x = startX + px
if (x >= width) break if (x >= width) break
val srcX = (x + mvX).coerceIn(0, width - 1) // Use symmetric padding instead of clamping for smoother edges
val srcY = (y + mvY).coerceIn(0, height - 1) val srcX = symmetricPadding(x + mvX, width)
val srcY = symmetricPadding(y + mvY, height)
val srcOffset = (srcY * width + srcX) * 3 val srcOffset = (srcY * width + srcX) * 3
val dstOffset = (y * width + x) * 3 val dstOffset = (y * width + x) * 3
@@ -3226,8 +3251,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val x = startX + px val x = startX + px
if (x >= width) break if (x >= width) break
val srcX = (x + mvX).coerceIn(0, width - 1) // Use symmetric padding for smoother edges (commented-out code updated for consistency)
val srcY = (y + mvY).coerceIn(0, height - 1) val srcX = symmetricPadding(x + mvX, width)
val srcY = symmetricPadding(y + mvY, height)
val srcOffset = (srcY * width + srcX) * 3 val srcOffset = (srcY * width + srcX) * 3
val r = vm.peek(prevRGBAddr + srcOffset * prevAddrIncVec)?.toInt() ?: 0 val r = vm.peek(prevRGBAddr + srcOffset * prevAddrIncVec)?.toInt() ?: 0
@@ -6205,6 +6231,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
/** /**
* Apply inverse translation (motion compensation) to a frame. * Apply inverse translation (motion compensation) to a frame.
* Inverse operation: shifts by +dx, +dy (opposite of forward encoder). * Inverse operation: shifts by +dx, +dy (opposite of forward encoder).
* Uses symmetric boundary extension (mirror padding) to match encoder.
* *
* @param frameData Input frame data to shift * @param frameData Input frame data to shift
* @param width Frame width * @param width Frame width
@@ -6215,14 +6242,28 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun applyInverseTranslation(frameData: FloatArray, width: Int, height: Int, dx: Int, dy: Int) { private fun applyInverseTranslation(frameData: FloatArray, width: Int, height: Int, dx: Int, dy: Int) {
val output = FloatArray(width * height) val output = FloatArray(width * height)
// Apply inverse translation with boundary clamping // Apply inverse translation with symmetric boundary extension (mirror padding)
for (y in 0 until height) { for (y in 0 until height) {
for (x in 0 until width) { for (x in 0 until width) {
// Inverse: shift by +dx, +dy (opposite of encoder's -dx, -dy) // Inverse: shift by +dx, +dy (opposite of encoder's -dx, -dy)
var srcX = x + dx var srcX = x + dx
var srcY = y + dy var srcY = y + dy
// Clamp to frame boundaries // Symmetric extension at boundaries (mirror padding)
// This gives smooth edges instead of replicated stripes
if (srcX < 0) {
srcX = -srcX - 1 // Mirror left edge
} else if (srcX >= width) {
srcX = 2 * width - srcX - 1 // Mirror right edge
}
if (srcY < 0) {
srcY = -srcY - 1 // Mirror top edge
} else if (srcY >= height) {
srcY = 2 * height - srcY - 1 // Mirror bottom edge
}
// Clamp after mirroring (in case of very large shifts)
srcX = srcX.coerceIn(0, width - 1) srcX = srcX.coerceIn(0, width - 1)
srcY = srcY.coerceIn(0, height - 1) srcY = srcY.coerceIn(0, height - 1)
@@ -6244,8 +6285,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
* @param motionVectorsX X motion vectors in 1/16-pixel units * @param motionVectorsX X motion vectors in 1/16-pixel units
* @param motionVectorsY Y motion vectors in 1/16-pixel units * @param motionVectorsY Y motion vectors in 1/16-pixel units
* @param outputRGBAddrs Array of output RGB buffer addresses * @param outputRGBAddrs Array of output RGB buffer addresses
* @param width Frame width * @param width Original frame width (output dimensions)
* @param height Frame height * @param height Original frame height (output dimensions)
* @param canvasWidth Expanded canvas width (for motion compensation)
* @param canvasHeight Expanded canvas height (for motion compensation)
* @param marginLeft Left margin to crop from expanded canvas
* @param marginTop Top margin to crop from expanded canvas
* @param qIndex Quality index * @param qIndex Quality index
* @param qYGlobal Global Y quantizer * @param qYGlobal Global Y quantizer
* @param qCoGlobal Global Co quantizer * @param qCoGlobal Global Co quantizer
@@ -6265,6 +6310,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
outputRGBAddrs: LongArray, outputRGBAddrs: LongArray,
width: Int, width: Int,
height: Int, height: Int,
canvasWidth: Int,
canvasHeight: Int,
marginLeft: Int,
marginTop: Int,
qIndex: Int, qIndex: Int,
qYGlobal: Int, qYGlobal: Int,
qCoGlobal: Int, qCoGlobal: Int,
@@ -6280,7 +6329,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
dbgOut["qCg"] = qCgGlobal dbgOut["qCg"] = qCgGlobal
dbgOut["frameMode"] = "G" dbgOut["frameMode"] = "G"
val numPixels = width * height // Use expanded canvas dimensions for DWT processing
val canvasPixels = canvasWidth * canvasHeight
val outputPixels = width * height
// Step 1: Decompress unified GOP block // Step 1: Decompress unified GOP block
val compressedData = ByteArray(compressedSize) val compressedData = ByteArray(compressedSize)
@@ -6305,17 +6356,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val quantizedCoeffs = tavPostprocessGopUnified( val quantizedCoeffs = tavPostprocessGopUnified(
decompressedData, decompressedData,
gopSize, gopSize,
numPixels, canvasPixels, // Use expanded canvas size
channelLayout channelLayout
) )
// Step 3: Allocate GOP buffers for float coefficients // Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
val gopY = Array(gopSize) { FloatArray(numPixels) } val gopY = Array(gopSize) { FloatArray(canvasPixels) }
val gopCo = Array(gopSize) { FloatArray(numPixels) } val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
val gopCg = Array(gopSize) { FloatArray(numPixels) } val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
// Step 4: Calculate subband layout (needed for perceptual dequantization) // Step 4: Calculate subband layout for expanded canvas (needed for perceptual dequantization)
val subbands = calculateSubbandLayout(width, height, spatialLevels) val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
// Step 5: Dequantize with temporal-spatial scaling // Step 5: Dequantize with temporal-spatial scaling
for (t in 0 until gopSize) { for (t in 0 until gopSize) {
@@ -6347,49 +6398,60 @@ class GraphicsJSR223Delegate(private val vm: VM) {
) )
} }
// Step 6: Apply inverse 3D DWT (spatial first, then temporal) // Step 6: Apply inverse 3D DWT (spatial first, then temporal) on expanded canvas
tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter) tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter) tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter) tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
// Step 7: Apply inverse motion compensation (shift frames back) // Step 7: Apply inverse motion compensation (shift frames back) on expanded canvas
// Note: Motion vectors are in 1/16-pixel units, cumulative relative to frame 0 // Note: Motion vectors are in 1/16-pixel units, cumulative relative to frame 0
for (t in 1 until gopSize) { // Skip frame 0 (reference) for (t in 1 until gopSize) { // Skip frame 0 (reference)
val dx = motionVectorsX[t] / 16 // Convert to pixel units val dx = motionVectorsX[t] / 16 // Convert to pixel units
val dy = motionVectorsY[t] / 16 val dy = motionVectorsY[t] / 16
if (dx != 0 || dy != 0) { if (dx != 0 || dy != 0) {
applyInverseTranslation(gopY[t], width, height, dx, dy) applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
applyInverseTranslation(gopCo[t], width, height, dx, dy) applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
applyInverseTranslation(gopCg[t], width, height, dx, dy) applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
} }
} }
// Step 8: Convert each frame to RGB and write to output buffers // Step 8: Crop expanded canvas to original dimensions and convert to RGB
for (t in 0 until gopSize) { for (t in 0 until gopSize) {
val rgbAddr = outputRGBAddrs[t] val rgbAddr = outputRGBAddrs[t]
for (i in 0 until numPixels) { // Crop from expanded canvas (canvasWidth x canvasHeight) to output (width x height)
val y = gopY[t][i] for (row in 0 until height) {
val co = gopCo[t][i] for (col in 0 until width) {
val cg = gopCg[t][i] // Source pixel in expanded canvas
val canvasX = col + marginLeft
val canvasY = row + marginTop
val canvasIdx = canvasY * canvasWidth + canvasX
// YCoCg-R to RGB conversion // Destination pixel in output buffer
val tmp = y - (cg / 2.0f) val outIdx = row * width + col
val g = cg + tmp
val b = tmp - (co / 2.0f)
val r = b + co
// Clamp to 0-255 range val yVal = gopY[t][canvasIdx]
val rClamped = r.toInt().coerceIn(0, 255) val co = gopCo[t][canvasIdx]
val gClamped = g.toInt().coerceIn(0, 255) val cg = gopCg[t][canvasIdx]
val bClamped = b.toInt().coerceIn(0, 255)
// Write RGB24 format (3 bytes per pixel) // YCoCg-R to RGB conversion
val offset = rgbAddr + i * 3L val tmp = yVal - (cg / 2.0f)
vm.usermem[offset] = rClamped.toByte() val g = cg + tmp
vm.usermem[offset + 1] = gClamped.toByte() val b = tmp - (co / 2.0f)
vm.usermem[offset + 2] = bClamped.toByte() val r = b + co
// Clamp to 0-255 range
val rClamped = r.toInt().coerceIn(0, 255)
val gClamped = g.toInt().coerceIn(0, 255)
val bClamped = b.toInt().coerceIn(0, 255)
// Write RGB24 format (3 bytes per pixel)
val offset = rgbAddr + outIdx * 3L
vm.usermem[offset] = rClamped.toByte()
vm.usermem[offset + 1] = gClamped.toByte()
vm.usermem[offset + 2] = bClamped.toByte()
}
} }
} }

View File

@@ -1455,19 +1455,22 @@ static void phase_correlate_fft(const uint8_t *frame1_rgb, const uint8_t *frame2
} }
// Apply translation to frame (for frame alignment before temporal DWT) // Apply translation to frame (for frame alignment before temporal DWT)
// NO PADDING - only extracts the valid region that will be common across all frames
static void apply_translation(float *frame_data, int width, int height, static void apply_translation(float *frame_data, int width, int height,
int16_t dx_qpel, int16_t dy_qpel, float *output) { int16_t dx_qpel, int16_t dy_qpel, float *output) {
// Convert 1/16-pixel to pixel (for now, just use integer translation) // Convert 1/16-pixel to pixel (for now, just use integer translation)
int dx = dx_qpel / 16; int dx = dx_qpel / 16;
int dy = dy_qpel / 16; int dy = dy_qpel / 16;
// Apply translation with boundary handling // Apply translation WITHOUT padding - just shift the content
// Out-of-bounds regions will be cropped away later
for (int y = 0; y < height; y++) { for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) { for (int x = 0; x < width; x++) {
int src_x = x - dx; int src_x = x - dx;
int src_y = y - dy; int src_y = y - dy;
// Clamp to frame boundaries // Clamp to valid region (this will create edge repetition, but those
// edges will be cropped away, so it doesn't matter what we put there)
src_x = CLAMP(src_x, 0, width - 1); src_x = CLAMP(src_x, 0, width - 1);
src_y = CLAMP(src_y, 0, height - 1); src_y = CLAMP(src_y, 0, height - 1);
@@ -1476,6 +1479,22 @@ static void apply_translation(float *frame_data, int width, int height,
} }
} }
// Extract cropped region from a frame after alignment
static void extract_crop(const float *frame_data, int width, int height,
int crop_left, int crop_right, int crop_top, int crop_bottom,
float *cropped_output) {
int valid_width = width - crop_left - crop_right;
int valid_height = height - crop_top - crop_bottom;
for (int y = 0; y < valid_height; y++) {
for (int x = 0; x < valid_width; x++) {
int src_x = x + crop_left;
int src_y = y + crop_top;
cropped_output[y * valid_width + x] = frame_data[src_y * width + src_x];
}
}
}
// ============================================================================= // =============================================================================
// Temporal Subband Quantization // Temporal Subband Quantization
// ============================================================================= // =============================================================================
@@ -1598,7 +1617,7 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
memcpy(enc->gop_cg_frames[frame_idx], frame_cg, frame_channel_size); memcpy(enc->gop_cg_frames[frame_idx], frame_cg, frame_channel_size);
// Compute translation vector if not first frame // Compute translation vector if not first frame
if (frame_idx > 0) { /*if (frame_idx > 0) {
phase_correlate_fft(enc->gop_rgb_frames[frame_idx - 1], phase_correlate_fft(enc->gop_rgb_frames[frame_idx - 1],
enc->gop_rgb_frames[frame_idx], enc->gop_rgb_frames[frame_idx],
enc->width, enc->height, enc->width, enc->height,
@@ -1615,7 +1634,11 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
// First frame has no translation // First frame has no translation
enc->gop_translation_x[0] = 0; enc->gop_translation_x[0] = 0;
enc->gop_translation_y[0] = 0; enc->gop_translation_y[0] = 0;
} }*/
// disabling frame realigning: producing worse results in general
enc->gop_translation_x[frame_idx] = 0.0f;
enc->gop_translation_y[frame_idx] = 0.0f;
enc->gop_frame_count++; enc->gop_frame_count++;
return 0; return 0;
@@ -1675,7 +1698,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
} }
// Allocate working buffers for each channel // Allocate working buffers for each channel
const int num_pixels = enc->width * enc->height; int num_pixels = enc->width * enc->height; // Will be updated if frames are cropped
float **gop_y_coeffs = malloc(actual_gop_size * sizeof(float*)); float **gop_y_coeffs = malloc(actual_gop_size * sizeof(float*));
float **gop_co_coeffs = malloc(actual_gop_size * sizeof(float*)); float **gop_co_coeffs = malloc(actual_gop_size * sizeof(float*));
float **gop_cg_coeffs = malloc(actual_gop_size * sizeof(float*)); float **gop_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
@@ -1719,6 +1742,34 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
} }
} }
// Step 0.5b: Calculate the valid region after alignment (crop bounds)
// Find the bounding box that's valid across all aligned frames
int min_dx = 0, max_dx = 0, min_dy = 0, max_dy = 0;
for (int i = 0; i < actual_gop_size; i++) {
int dx = enc->gop_translation_x[i] / 16;
int dy = enc->gop_translation_y[i] / 16;
if (dx < min_dx) min_dx = dx;
if (dx > max_dx) max_dx = dx;
if (dy < min_dy) min_dy = dy;
if (dy > max_dy) max_dy = dy;
}
// Crop region: the area valid in all frames
// When we shift right by +N, we lose N pixels on the left, so crop left edge by abs(min_dx)
// When we shift left by -N, we lose N pixels on the right, so crop right edge by max_dx
int crop_left = (min_dx < 0) ? -min_dx : 0;
int crop_right = (max_dx > 0) ? max_dx : 0;
int crop_top = (min_dy < 0) ? -min_dy : 0;
int crop_bottom = (max_dy > 0) ? max_dy : 0;
int valid_width = enc->width - crop_left - crop_right;
int valid_height = enc->height - crop_top - crop_bottom;
if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) {
printf("Valid region after alignment: %dx%d (cropped: L=%d R=%d T=%d B=%d)\n",
valid_width, valid_height, crop_left, crop_right, crop_top, crop_bottom);
}
// Step 0.6: Apply motion compensation to align frames before temporal DWT // Step 0.6: Apply motion compensation to align frames before temporal DWT
// This uses the cumulative translation vectors to align each frame to frame 0 // This uses the cumulative translation vectors to align each frame to frame 0
for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame) for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame)
@@ -1753,23 +1804,122 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
free(aligned_cg); free(aligned_cg);
} }
// Step 0.7: Expand frames to larger canvas that preserves ALL original pixels
// Calculate expanded canvas size (UNION of all aligned frames)
int canvas_width = enc->width + crop_left + crop_right; // Original width + total shift range
int canvas_height = enc->height + crop_top + crop_bottom; // Original height + total shift range
int canvas_pixels = canvas_width * canvas_height;
if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) {
printf("Expanded canvas: %dx%d (original %dx%d + margins L=%d R=%d T=%d B=%d)\n",
canvas_width, canvas_height, enc->width, enc->height,
crop_left, crop_right, crop_top, crop_bottom);
printf("This preserves all original pixels from all frames after alignment\n");
}
// Allocate expanded canvas buffers
float **canvas_y_coeffs = malloc(actual_gop_size * sizeof(float*));
float **canvas_co_coeffs = malloc(actual_gop_size * sizeof(float*));
float **canvas_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
for (int i = 0; i < actual_gop_size; i++) {
canvas_y_coeffs[i] = calloc(canvas_pixels, sizeof(float)); // Zero-initialized
canvas_co_coeffs[i] = calloc(canvas_pixels, sizeof(float));
canvas_cg_coeffs[i] = calloc(canvas_pixels, sizeof(float));
// Place the aligned frame onto the canvas at the appropriate offset
// Each frame's aligned position determines where it sits on the canvas
int offset_x = crop_left; // Frames are offset by the left margin
int offset_y = crop_top; // Frames are offset by the top margin
// Copy the full aligned frame onto the canvas (preserves all original content)
for (int y = 0; y < enc->height; y++) {
for (int x = 0; x < enc->width; x++) {
int src_idx = y * enc->width + x;
int dst_idx = (y + offset_y) * canvas_width + (x + offset_x);
canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx];
canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx];
canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx];
}
}
// Fill margin areas with symmetric padding from frame edges
for (int y = 0; y < canvas_height; y++) {
for (int x = 0; x < canvas_width; x++) {
// Skip pixels in the original frame region (already copied)
if (y >= offset_y && y < offset_y + enc->height &&
x >= offset_x && x < offset_x + enc->width) {
continue;
}
// Calculate position relative to original frame
int src_x = x - offset_x;
int src_y = y - offset_y;
// Apply symmetric padding (mirroring)
if (src_x < 0) {
src_x = -src_x - 1; // Mirror left edge: -1→0, -2→1, -3→2
} else if (src_x >= enc->width) {
src_x = 2 * enc->width - src_x - 1; // Mirror right edge
}
if (src_y < 0) {
src_y = -src_y - 1; // Mirror top edge
} else if (src_y >= enc->height) {
src_y = 2 * enc->height - src_y - 1; // Mirror bottom edge
}
// Clamp to valid range (safety for extreme cases)
src_x = CLAMP(src_x, 0, enc->width - 1);
src_y = CLAMP(src_y, 0, enc->height - 1);
// Copy mirrored pixel from original frame to canvas margin
int src_idx = src_y * enc->width + src_x;
int dst_idx = y * canvas_width + x;
canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx];
canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx];
canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx];
}
}
// Free the original frame (no longer needed)
free(gop_y_coeffs[i]);
free(gop_co_coeffs[i]);
free(gop_cg_coeffs[i]);
}
// Replace pointers with expanded canvas
free(gop_y_coeffs);
free(gop_co_coeffs);
free(gop_cg_coeffs);
gop_y_coeffs = canvas_y_coeffs;
gop_co_coeffs = canvas_co_coeffs;
gop_cg_coeffs = canvas_cg_coeffs;
// Update dimensions to canvas size
valid_width = canvas_width;
valid_height = canvas_height;
num_pixels = canvas_pixels;
// Step 1: For single-frame GOP, skip temporal DWT and use traditional I-frame path // Step 1: For single-frame GOP, skip temporal DWT and use traditional I-frame path
if (actual_gop_size == 1) { if (actual_gop_size == 1) {
// Apply only 2D spatial DWT (no temporal transform for single frame) // Apply only 2D spatial DWT (no temporal transform for single frame)
dwt_2d_forward_flexible(gop_y_coeffs[0], enc->width, enc->height, // Use cropped dimensions (will be full size if no motion)
dwt_2d_forward_flexible(gop_y_coeffs[0], valid_width, valid_height,
enc->decomp_levels, enc->wavelet_filter); enc->decomp_levels, enc->wavelet_filter);
dwt_2d_forward_flexible(gop_co_coeffs[0], enc->width, enc->height, dwt_2d_forward_flexible(gop_co_coeffs[0], valid_width, valid_height,
enc->decomp_levels, enc->wavelet_filter); enc->decomp_levels, enc->wavelet_filter);
dwt_2d_forward_flexible(gop_cg_coeffs[0], enc->width, enc->height, dwt_2d_forward_flexible(gop_cg_coeffs[0], valid_width, valid_height,
enc->decomp_levels, enc->wavelet_filter); enc->decomp_levels, enc->wavelet_filter);
} else { } else {
// Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel // Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel
// Note: This modifies gop_*_coeffs in-place // Note: This modifies gop_*_coeffs in-place
dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size, // Use cropped dimensions to encode only the valid region
dwt_3d_forward(gop_y_coeffs, valid_width, valid_height, actual_gop_size,
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
dwt_3d_forward(gop_co_coeffs, enc->width, enc->height, actual_gop_size, dwt_3d_forward(gop_co_coeffs, valid_width, valid_height, actual_gop_size,
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
dwt_3d_forward(gop_cg_coeffs, enc->width, enc->height, actual_gop_size, dwt_3d_forward(gop_cg_coeffs, valid_width, valid_height, actual_gop_size,
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
} }
@@ -1875,7 +2025,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
} else { } else {
// Multi-frame GOP: use unified 3D DWT encoding // Multi-frame GOP: use unified 3D DWT encoding
// Write unified GOP packet header // Write unified GOP packet header
// Packet structure: [packet_type=0x12][gop_size][motion_vectors...][compressed_size][compressed_data] // Packet structure: [packet_type=0x12][gop_size][crop_info][motion_vectors...][compressed_size][compressed_data]
uint8_t packet_type = TAV_PACKET_GOP_UNIFIED; uint8_t packet_type = TAV_PACKET_GOP_UNIFIED;
fwrite(&packet_type, 1, 1, output); fwrite(&packet_type, 1, 1, output);
total_bytes_written += 1; total_bytes_written += 1;
@@ -1885,6 +2035,18 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
fwrite(&gop_size_byte, 1, 1, output); fwrite(&gop_size_byte, 1, 1, output);
total_bytes_written += 1; total_bytes_written += 1;
// Write canvas expansion information (4 bytes)
// This tells the decoder the margins added to preserve all original pixels
// The encoded canvas is larger than the original frame to preserve edge content after alignment
uint8_t canvas_margins[4] = {
(uint8_t)crop_left, // Left margin
(uint8_t)crop_right, // Right margin
(uint8_t)crop_top, // Top margin
(uint8_t)crop_bottom // Bottom margin
};
fwrite(canvas_margins, 1, 4, output);
total_bytes_written += 4;
// Write all motion vectors (1/16-pixel precision) for the entire GOP // Write all motion vectors (1/16-pixel precision) for the entire GOP
for (int t = 0; t < actual_gop_size; t++) { for (int t = 0; t < actual_gop_size; t++) {
int16_t dx = enc->gop_translation_x[t]; int16_t dx = enc->gop_translation_x[t];