mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV: will replace frame aligning with something else, or maybe with nothing
This commit is contained in:
@@ -999,7 +999,18 @@ try {
|
||||
// Read GOP size (number of frames in this GOP, 1-16)
|
||||
const gopSize = seqread.readOneByte()
|
||||
|
||||
// Read motion vectors (quarter-pixel units, int16)
|
||||
// Read canvas expansion margins (4 bytes)
|
||||
// Encoder expands canvas to preserve all original pixels from all aligned frames
|
||||
const marginLeft = seqread.readOneByte()
|
||||
const marginRight = seqread.readOneByte()
|
||||
const marginTop = seqread.readOneByte()
|
||||
const marginBottom = seqread.readOneByte()
|
||||
|
||||
// Calculate expanded canvas dimensions
|
||||
const canvasWidth = header.width + marginLeft + marginRight
|
||||
const canvasHeight = header.height + marginTop + marginBottom
|
||||
|
||||
// Read motion vectors (1/16-pixel units, int16)
|
||||
// Encoder writes ALL motion vectors including frame 0
|
||||
let motionX = new Array(gopSize)
|
||||
let motionY = new Array(gopSize)
|
||||
@@ -1042,7 +1053,7 @@ try {
|
||||
try {
|
||||
let decodeStart = sys.nanoTime()
|
||||
|
||||
// Call GOP decoder
|
||||
// Call GOP decoder with canvas expansion information
|
||||
const [r1, r2] = graphics.tavDecodeGopUnified(
|
||||
compressedPtr,
|
||||
compressedSize,
|
||||
@@ -1050,8 +1061,12 @@ try {
|
||||
motionX,
|
||||
motionY,
|
||||
gopRGBBuffers, // Array of output buffer addresses
|
||||
header.width,
|
||||
header.height,
|
||||
header.width, // Original frame width
|
||||
header.height, // Original frame height
|
||||
canvasWidth, // Expanded canvas width (preserves all pixels)
|
||||
canvasHeight, // Expanded canvas height (preserves all pixels)
|
||||
marginLeft, // Left margin
|
||||
marginTop, // Top margin
|
||||
header.qualityLevel,
|
||||
QLUT[header.qualityY],
|
||||
QLUT[header.qualityCo],
|
||||
|
||||
@@ -3181,11 +3181,35 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Symmetric padding (mirroring) for edge handling in motion compensation.
|
||||
* This provides smoother edges than simple clamping/replication.
|
||||
*
|
||||
* @param coord The coordinate to mirror if out of bounds
|
||||
* @param size The dimension size (width or height)
|
||||
* @return The mirrored coordinate within valid range [0, size-1]
|
||||
*/
|
||||
private fun symmetricPadding(coord: Int, size: Int): Int {
|
||||
var mirrored = coord
|
||||
|
||||
// Mirror for negative coordinates: -1 -> 0, -2 -> 1, -3 -> 2, etc.
|
||||
if (mirrored < 0) {
|
||||
mirrored = -mirrored - 1
|
||||
}
|
||||
// Mirror for coordinates beyond bounds: size -> size-1, size+1 -> size-2, etc.
|
||||
else if (mirrored >= size) {
|
||||
mirrored = 2 * size - mirrored - 1
|
||||
}
|
||||
|
||||
// Final clamp to ensure we're within bounds (handles extreme cases)
|
||||
return mirrored.coerceIn(0, size - 1)
|
||||
}
|
||||
|
||||
private fun tevHandleMotionBlockTwoPass(startX: Int, startY: Int, mvX: Int, mvY: Int,
|
||||
currentRGBAddr: Long, prevRGBAddr: Long,
|
||||
width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int,
|
||||
debugMotionVectors: Boolean) {
|
||||
// Copy 16x16 block with motion compensation
|
||||
// Copy 16x16 block with motion compensation using symmetric padding
|
||||
for (py in 0 until 16) {
|
||||
val y = startY + py
|
||||
if (y >= height) break
|
||||
@@ -3194,8 +3218,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val x = startX + px
|
||||
if (x >= width) break
|
||||
|
||||
val srcX = (x + mvX).coerceIn(0, width - 1)
|
||||
val srcY = (y + mvY).coerceIn(0, height - 1)
|
||||
// Use symmetric padding instead of clamping for smoother edges
|
||||
val srcX = symmetricPadding(x + mvX, width)
|
||||
val srcY = symmetricPadding(y + mvY, height)
|
||||
|
||||
val srcOffset = (srcY * width + srcX) * 3
|
||||
val dstOffset = (y * width + x) * 3
|
||||
@@ -3226,8 +3251,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val x = startX + px
|
||||
if (x >= width) break
|
||||
|
||||
val srcX = (x + mvX).coerceIn(0, width - 1)
|
||||
val srcY = (y + mvY).coerceIn(0, height - 1)
|
||||
// Use symmetric padding for smoother edges (commented-out code updated for consistency)
|
||||
val srcX = symmetricPadding(x + mvX, width)
|
||||
val srcY = symmetricPadding(y + mvY, height)
|
||||
|
||||
val srcOffset = (srcY * width + srcX) * 3
|
||||
val r = vm.peek(prevRGBAddr + srcOffset * prevAddrIncVec)?.toInt() ?: 0
|
||||
@@ -6205,6 +6231,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
/**
|
||||
* Apply inverse translation (motion compensation) to a frame.
|
||||
* Inverse operation: shifts by +dx, +dy (opposite of forward encoder).
|
||||
* Uses symmetric boundary extension (mirror padding) to match encoder.
|
||||
*
|
||||
* @param frameData Input frame data to shift
|
||||
* @param width Frame width
|
||||
@@ -6215,14 +6242,28 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
private fun applyInverseTranslation(frameData: FloatArray, width: Int, height: Int, dx: Int, dy: Int) {
|
||||
val output = FloatArray(width * height)
|
||||
|
||||
// Apply inverse translation with boundary clamping
|
||||
// Apply inverse translation with symmetric boundary extension (mirror padding)
|
||||
for (y in 0 until height) {
|
||||
for (x in 0 until width) {
|
||||
// Inverse: shift by +dx, +dy (opposite of encoder's -dx, -dy)
|
||||
var srcX = x + dx
|
||||
var srcY = y + dy
|
||||
|
||||
// Clamp to frame boundaries
|
||||
// Symmetric extension at boundaries (mirror padding)
|
||||
// This gives smooth edges instead of replicated stripes
|
||||
if (srcX < 0) {
|
||||
srcX = -srcX - 1 // Mirror left edge
|
||||
} else if (srcX >= width) {
|
||||
srcX = 2 * width - srcX - 1 // Mirror right edge
|
||||
}
|
||||
|
||||
if (srcY < 0) {
|
||||
srcY = -srcY - 1 // Mirror top edge
|
||||
} else if (srcY >= height) {
|
||||
srcY = 2 * height - srcY - 1 // Mirror bottom edge
|
||||
}
|
||||
|
||||
// Clamp after mirroring (in case of very large shifts)
|
||||
srcX = srcX.coerceIn(0, width - 1)
|
||||
srcY = srcY.coerceIn(0, height - 1)
|
||||
|
||||
@@ -6244,8 +6285,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
* @param motionVectorsX X motion vectors in 1/16-pixel units
|
||||
* @param motionVectorsY Y motion vectors in 1/16-pixel units
|
||||
* @param outputRGBAddrs Array of output RGB buffer addresses
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param width Original frame width (output dimensions)
|
||||
* @param height Original frame height (output dimensions)
|
||||
* @param canvasWidth Expanded canvas width (for motion compensation)
|
||||
* @param canvasHeight Expanded canvas height (for motion compensation)
|
||||
* @param marginLeft Left margin to crop from expanded canvas
|
||||
* @param marginTop Top margin to crop from expanded canvas
|
||||
* @param qIndex Quality index
|
||||
* @param qYGlobal Global Y quantizer
|
||||
* @param qCoGlobal Global Co quantizer
|
||||
@@ -6265,6 +6310,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
outputRGBAddrs: LongArray,
|
||||
width: Int,
|
||||
height: Int,
|
||||
canvasWidth: Int,
|
||||
canvasHeight: Int,
|
||||
marginLeft: Int,
|
||||
marginTop: Int,
|
||||
qIndex: Int,
|
||||
qYGlobal: Int,
|
||||
qCoGlobal: Int,
|
||||
@@ -6280,7 +6329,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
dbgOut["qCg"] = qCgGlobal
|
||||
dbgOut["frameMode"] = "G"
|
||||
|
||||
val numPixels = width * height
|
||||
// Use expanded canvas dimensions for DWT processing
|
||||
val canvasPixels = canvasWidth * canvasHeight
|
||||
val outputPixels = width * height
|
||||
|
||||
// Step 1: Decompress unified GOP block
|
||||
val compressedData = ByteArray(compressedSize)
|
||||
@@ -6305,17 +6356,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val quantizedCoeffs = tavPostprocessGopUnified(
|
||||
decompressedData,
|
||||
gopSize,
|
||||
numPixels,
|
||||
canvasPixels, // Use expanded canvas size
|
||||
channelLayout
|
||||
)
|
||||
|
||||
// Step 3: Allocate GOP buffers for float coefficients
|
||||
val gopY = Array(gopSize) { FloatArray(numPixels) }
|
||||
val gopCo = Array(gopSize) { FloatArray(numPixels) }
|
||||
val gopCg = Array(gopSize) { FloatArray(numPixels) }
|
||||
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
|
||||
val gopY = Array(gopSize) { FloatArray(canvasPixels) }
|
||||
val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
|
||||
val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
|
||||
|
||||
// Step 4: Calculate subband layout (needed for perceptual dequantization)
|
||||
val subbands = calculateSubbandLayout(width, height, spatialLevels)
|
||||
// Step 4: Calculate subband layout for expanded canvas (needed for perceptual dequantization)
|
||||
val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
|
||||
|
||||
// Step 5: Dequantize with temporal-spatial scaling
|
||||
for (t in 0 until gopSize) {
|
||||
@@ -6347,49 +6398,60 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
)
|
||||
}
|
||||
|
||||
// Step 6: Apply inverse 3D DWT (spatial first, then temporal)
|
||||
tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
// Step 6: Apply inverse 3D DWT (spatial first, then temporal) on expanded canvas
|
||||
tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
|
||||
// Step 7: Apply inverse motion compensation (shift frames back)
|
||||
// Step 7: Apply inverse motion compensation (shift frames back) on expanded canvas
|
||||
// Note: Motion vectors are in 1/16-pixel units, cumulative relative to frame 0
|
||||
for (t in 1 until gopSize) { // Skip frame 0 (reference)
|
||||
val dx = motionVectorsX[t] / 16 // Convert to pixel units
|
||||
val dy = motionVectorsY[t] / 16
|
||||
|
||||
if (dx != 0 || dy != 0) {
|
||||
applyInverseTranslation(gopY[t], width, height, dx, dy)
|
||||
applyInverseTranslation(gopCo[t], width, height, dx, dy)
|
||||
applyInverseTranslation(gopCg[t], width, height, dx, dy)
|
||||
applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
|
||||
applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
|
||||
applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
|
||||
}
|
||||
}
|
||||
|
||||
// Step 8: Convert each frame to RGB and write to output buffers
|
||||
// Step 8: Crop expanded canvas to original dimensions and convert to RGB
|
||||
for (t in 0 until gopSize) {
|
||||
val rgbAddr = outputRGBAddrs[t]
|
||||
|
||||
for (i in 0 until numPixels) {
|
||||
val y = gopY[t][i]
|
||||
val co = gopCo[t][i]
|
||||
val cg = gopCg[t][i]
|
||||
// Crop from expanded canvas (canvasWidth x canvasHeight) to output (width x height)
|
||||
for (row in 0 until height) {
|
||||
for (col in 0 until width) {
|
||||
// Source pixel in expanded canvas
|
||||
val canvasX = col + marginLeft
|
||||
val canvasY = row + marginTop
|
||||
val canvasIdx = canvasY * canvasWidth + canvasX
|
||||
|
||||
// YCoCg-R to RGB conversion
|
||||
val tmp = y - (cg / 2.0f)
|
||||
val g = cg + tmp
|
||||
val b = tmp - (co / 2.0f)
|
||||
val r = b + co
|
||||
// Destination pixel in output buffer
|
||||
val outIdx = row * width + col
|
||||
|
||||
// Clamp to 0-255 range
|
||||
val rClamped = r.toInt().coerceIn(0, 255)
|
||||
val gClamped = g.toInt().coerceIn(0, 255)
|
||||
val bClamped = b.toInt().coerceIn(0, 255)
|
||||
val yVal = gopY[t][canvasIdx]
|
||||
val co = gopCo[t][canvasIdx]
|
||||
val cg = gopCg[t][canvasIdx]
|
||||
|
||||
// Write RGB24 format (3 bytes per pixel)
|
||||
val offset = rgbAddr + i * 3L
|
||||
vm.usermem[offset] = rClamped.toByte()
|
||||
vm.usermem[offset + 1] = gClamped.toByte()
|
||||
vm.usermem[offset + 2] = bClamped.toByte()
|
||||
// YCoCg-R to RGB conversion
|
||||
val tmp = yVal - (cg / 2.0f)
|
||||
val g = cg + tmp
|
||||
val b = tmp - (co / 2.0f)
|
||||
val r = b + co
|
||||
|
||||
// Clamp to 0-255 range
|
||||
val rClamped = r.toInt().coerceIn(0, 255)
|
||||
val gClamped = g.toInt().coerceIn(0, 255)
|
||||
val bClamped = b.toInt().coerceIn(0, 255)
|
||||
|
||||
// Write RGB24 format (3 bytes per pixel)
|
||||
val offset = rgbAddr + outIdx * 3L
|
||||
vm.usermem[offset] = rClamped.toByte()
|
||||
vm.usermem[offset + 1] = gClamped.toByte()
|
||||
vm.usermem[offset + 2] = bClamped.toByte()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1455,19 +1455,22 @@ static void phase_correlate_fft(const uint8_t *frame1_rgb, const uint8_t *frame2
|
||||
}
|
||||
|
||||
// Apply translation to frame (for frame alignment before temporal DWT)
|
||||
// NO PADDING - only extracts the valid region that will be common across all frames
|
||||
static void apply_translation(float *frame_data, int width, int height,
|
||||
int16_t dx_qpel, int16_t dy_qpel, float *output) {
|
||||
// Convert 1/16-pixel to pixel (for now, just use integer translation)
|
||||
int dx = dx_qpel / 16;
|
||||
int dy = dy_qpel / 16;
|
||||
|
||||
// Apply translation with boundary handling
|
||||
// Apply translation WITHOUT padding - just shift the content
|
||||
// Out-of-bounds regions will be cropped away later
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int src_x = x - dx;
|
||||
int src_y = y - dy;
|
||||
|
||||
// Clamp to frame boundaries
|
||||
// Clamp to valid region (this will create edge repetition, but those
|
||||
// edges will be cropped away, so it doesn't matter what we put there)
|
||||
src_x = CLAMP(src_x, 0, width - 1);
|
||||
src_y = CLAMP(src_y, 0, height - 1);
|
||||
|
||||
@@ -1476,6 +1479,22 @@ static void apply_translation(float *frame_data, int width, int height,
|
||||
}
|
||||
}
|
||||
|
||||
// Extract cropped region from a frame after alignment
|
||||
static void extract_crop(const float *frame_data, int width, int height,
|
||||
int crop_left, int crop_right, int crop_top, int crop_bottom,
|
||||
float *cropped_output) {
|
||||
int valid_width = width - crop_left - crop_right;
|
||||
int valid_height = height - crop_top - crop_bottom;
|
||||
|
||||
for (int y = 0; y < valid_height; y++) {
|
||||
for (int x = 0; x < valid_width; x++) {
|
||||
int src_x = x + crop_left;
|
||||
int src_y = y + crop_top;
|
||||
cropped_output[y * valid_width + x] = frame_data[src_y * width + src_x];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Temporal Subband Quantization
|
||||
// =============================================================================
|
||||
@@ -1598,7 +1617,7 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
|
||||
memcpy(enc->gop_cg_frames[frame_idx], frame_cg, frame_channel_size);
|
||||
|
||||
// Compute translation vector if not first frame
|
||||
if (frame_idx > 0) {
|
||||
/*if (frame_idx > 0) {
|
||||
phase_correlate_fft(enc->gop_rgb_frames[frame_idx - 1],
|
||||
enc->gop_rgb_frames[frame_idx],
|
||||
enc->width, enc->height,
|
||||
@@ -1615,7 +1634,11 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
|
||||
// First frame has no translation
|
||||
enc->gop_translation_x[0] = 0;
|
||||
enc->gop_translation_y[0] = 0;
|
||||
}
|
||||
}*/
|
||||
|
||||
// disabling frame realigning: producing worse results in general
|
||||
enc->gop_translation_x[frame_idx] = 0.0f;
|
||||
enc->gop_translation_y[frame_idx] = 0.0f;
|
||||
|
||||
enc->gop_frame_count++;
|
||||
return 0;
|
||||
@@ -1675,7 +1698,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
||||
}
|
||||
|
||||
// Allocate working buffers for each channel
|
||||
const int num_pixels = enc->width * enc->height;
|
||||
int num_pixels = enc->width * enc->height; // Will be updated if frames are cropped
|
||||
float **gop_y_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||
float **gop_co_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||
float **gop_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||
@@ -1719,6 +1742,34 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
||||
}
|
||||
}
|
||||
|
||||
// Step 0.5b: Calculate the valid region after alignment (crop bounds)
|
||||
// Find the bounding box that's valid across all aligned frames
|
||||
int min_dx = 0, max_dx = 0, min_dy = 0, max_dy = 0;
|
||||
for (int i = 0; i < actual_gop_size; i++) {
|
||||
int dx = enc->gop_translation_x[i] / 16;
|
||||
int dy = enc->gop_translation_y[i] / 16;
|
||||
if (dx < min_dx) min_dx = dx;
|
||||
if (dx > max_dx) max_dx = dx;
|
||||
if (dy < min_dy) min_dy = dy;
|
||||
if (dy > max_dy) max_dy = dy;
|
||||
}
|
||||
|
||||
// Crop region: the area valid in all frames
|
||||
// When we shift right by +N, we lose N pixels on the left, so crop left edge by abs(min_dx)
|
||||
// When we shift left by -N, we lose N pixels on the right, so crop right edge by max_dx
|
||||
int crop_left = (min_dx < 0) ? -min_dx : 0;
|
||||
int crop_right = (max_dx > 0) ? max_dx : 0;
|
||||
int crop_top = (min_dy < 0) ? -min_dy : 0;
|
||||
int crop_bottom = (max_dy > 0) ? max_dy : 0;
|
||||
|
||||
int valid_width = enc->width - crop_left - crop_right;
|
||||
int valid_height = enc->height - crop_top - crop_bottom;
|
||||
|
||||
if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) {
|
||||
printf("Valid region after alignment: %dx%d (cropped: L=%d R=%d T=%d B=%d)\n",
|
||||
valid_width, valid_height, crop_left, crop_right, crop_top, crop_bottom);
|
||||
}
|
||||
|
||||
// Step 0.6: Apply motion compensation to align frames before temporal DWT
|
||||
// This uses the cumulative translation vectors to align each frame to frame 0
|
||||
for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame)
|
||||
@@ -1753,23 +1804,122 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
||||
free(aligned_cg);
|
||||
}
|
||||
|
||||
// Step 0.7: Expand frames to larger canvas that preserves ALL original pixels
|
||||
// Calculate expanded canvas size (UNION of all aligned frames)
|
||||
int canvas_width = enc->width + crop_left + crop_right; // Original width + total shift range
|
||||
int canvas_height = enc->height + crop_top + crop_bottom; // Original height + total shift range
|
||||
int canvas_pixels = canvas_width * canvas_height;
|
||||
|
||||
if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) {
|
||||
printf("Expanded canvas: %dx%d (original %dx%d + margins L=%d R=%d T=%d B=%d)\n",
|
||||
canvas_width, canvas_height, enc->width, enc->height,
|
||||
crop_left, crop_right, crop_top, crop_bottom);
|
||||
printf("This preserves all original pixels from all frames after alignment\n");
|
||||
}
|
||||
|
||||
// Allocate expanded canvas buffers
|
||||
float **canvas_y_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||
float **canvas_co_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||
float **canvas_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||
|
||||
for (int i = 0; i < actual_gop_size; i++) {
|
||||
canvas_y_coeffs[i] = calloc(canvas_pixels, sizeof(float)); // Zero-initialized
|
||||
canvas_co_coeffs[i] = calloc(canvas_pixels, sizeof(float));
|
||||
canvas_cg_coeffs[i] = calloc(canvas_pixels, sizeof(float));
|
||||
|
||||
// Place the aligned frame onto the canvas at the appropriate offset
|
||||
// Each frame's aligned position determines where it sits on the canvas
|
||||
int offset_x = crop_left; // Frames are offset by the left margin
|
||||
int offset_y = crop_top; // Frames are offset by the top margin
|
||||
|
||||
// Copy the full aligned frame onto the canvas (preserves all original content)
|
||||
for (int y = 0; y < enc->height; y++) {
|
||||
for (int x = 0; x < enc->width; x++) {
|
||||
int src_idx = y * enc->width + x;
|
||||
int dst_idx = (y + offset_y) * canvas_width + (x + offset_x);
|
||||
canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx];
|
||||
canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx];
|
||||
canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx];
|
||||
}
|
||||
}
|
||||
|
||||
// Fill margin areas with symmetric padding from frame edges
|
||||
for (int y = 0; y < canvas_height; y++) {
|
||||
for (int x = 0; x < canvas_width; x++) {
|
||||
// Skip pixels in the original frame region (already copied)
|
||||
if (y >= offset_y && y < offset_y + enc->height &&
|
||||
x >= offset_x && x < offset_x + enc->width) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Calculate position relative to original frame
|
||||
int src_x = x - offset_x;
|
||||
int src_y = y - offset_y;
|
||||
|
||||
// Apply symmetric padding (mirroring)
|
||||
if (src_x < 0) {
|
||||
src_x = -src_x - 1; // Mirror left edge: -1→0, -2→1, -3→2
|
||||
} else if (src_x >= enc->width) {
|
||||
src_x = 2 * enc->width - src_x - 1; // Mirror right edge
|
||||
}
|
||||
|
||||
if (src_y < 0) {
|
||||
src_y = -src_y - 1; // Mirror top edge
|
||||
} else if (src_y >= enc->height) {
|
||||
src_y = 2 * enc->height - src_y - 1; // Mirror bottom edge
|
||||
}
|
||||
|
||||
// Clamp to valid range (safety for extreme cases)
|
||||
src_x = CLAMP(src_x, 0, enc->width - 1);
|
||||
src_y = CLAMP(src_y, 0, enc->height - 1);
|
||||
|
||||
// Copy mirrored pixel from original frame to canvas margin
|
||||
int src_idx = src_y * enc->width + src_x;
|
||||
int dst_idx = y * canvas_width + x;
|
||||
canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx];
|
||||
canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx];
|
||||
canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx];
|
||||
}
|
||||
}
|
||||
|
||||
// Free the original frame (no longer needed)
|
||||
free(gop_y_coeffs[i]);
|
||||
free(gop_co_coeffs[i]);
|
||||
free(gop_cg_coeffs[i]);
|
||||
}
|
||||
|
||||
// Replace pointers with expanded canvas
|
||||
free(gop_y_coeffs);
|
||||
free(gop_co_coeffs);
|
||||
free(gop_cg_coeffs);
|
||||
gop_y_coeffs = canvas_y_coeffs;
|
||||
gop_co_coeffs = canvas_co_coeffs;
|
||||
gop_cg_coeffs = canvas_cg_coeffs;
|
||||
|
||||
// Update dimensions to canvas size
|
||||
valid_width = canvas_width;
|
||||
valid_height = canvas_height;
|
||||
num_pixels = canvas_pixels;
|
||||
|
||||
// Step 1: For single-frame GOP, skip temporal DWT and use traditional I-frame path
|
||||
if (actual_gop_size == 1) {
|
||||
// Apply only 2D spatial DWT (no temporal transform for single frame)
|
||||
dwt_2d_forward_flexible(gop_y_coeffs[0], enc->width, enc->height,
|
||||
// Use cropped dimensions (will be full size if no motion)
|
||||
dwt_2d_forward_flexible(gop_y_coeffs[0], valid_width, valid_height,
|
||||
enc->decomp_levels, enc->wavelet_filter);
|
||||
dwt_2d_forward_flexible(gop_co_coeffs[0], enc->width, enc->height,
|
||||
dwt_2d_forward_flexible(gop_co_coeffs[0], valid_width, valid_height,
|
||||
enc->decomp_levels, enc->wavelet_filter);
|
||||
dwt_2d_forward_flexible(gop_cg_coeffs[0], enc->width, enc->height,
|
||||
dwt_2d_forward_flexible(gop_cg_coeffs[0], valid_width, valid_height,
|
||||
enc->decomp_levels, enc->wavelet_filter);
|
||||
} else {
|
||||
// Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel
|
||||
// Note: This modifies gop_*_coeffs in-place
|
||||
dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,
|
||||
// Use cropped dimensions to encode only the valid region
|
||||
dwt_3d_forward(gop_y_coeffs, valid_width, valid_height, actual_gop_size,
|
||||
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
||||
dwt_3d_forward(gop_co_coeffs, enc->width, enc->height, actual_gop_size,
|
||||
dwt_3d_forward(gop_co_coeffs, valid_width, valid_height, actual_gop_size,
|
||||
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
||||
dwt_3d_forward(gop_cg_coeffs, enc->width, enc->height, actual_gop_size,
|
||||
dwt_3d_forward(gop_cg_coeffs, valid_width, valid_height, actual_gop_size,
|
||||
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
||||
}
|
||||
|
||||
@@ -1875,7 +2025,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
||||
} else {
|
||||
// Multi-frame GOP: use unified 3D DWT encoding
|
||||
// Write unified GOP packet header
|
||||
// Packet structure: [packet_type=0x12][gop_size][motion_vectors...][compressed_size][compressed_data]
|
||||
// Packet structure: [packet_type=0x12][gop_size][crop_info][motion_vectors...][compressed_size][compressed_data]
|
||||
uint8_t packet_type = TAV_PACKET_GOP_UNIFIED;
|
||||
fwrite(&packet_type, 1, 1, output);
|
||||
total_bytes_written += 1;
|
||||
@@ -1885,6 +2035,18 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
||||
fwrite(&gop_size_byte, 1, 1, output);
|
||||
total_bytes_written += 1;
|
||||
|
||||
// Write canvas expansion information (4 bytes)
|
||||
// This tells the decoder the margins added to preserve all original pixels
|
||||
// The encoded canvas is larger than the original frame to preserve edge content after alignment
|
||||
uint8_t canvas_margins[4] = {
|
||||
(uint8_t)crop_left, // Left margin
|
||||
(uint8_t)crop_right, // Right margin
|
||||
(uint8_t)crop_top, // Top margin
|
||||
(uint8_t)crop_bottom // Bottom margin
|
||||
};
|
||||
fwrite(canvas_margins, 1, 4, output);
|
||||
total_bytes_written += 4;
|
||||
|
||||
// Write all motion vectors (1/16-pixel precision) for the entire GOP
|
||||
for (int t = 0; t < actual_gop_size; t++) {
|
||||
int16_t dx = enc->gop_translation_x[t];
|
||||
|
||||
Reference in New Issue
Block a user