mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV: will replace frame aligning with something else, or maybe with nothing
This commit is contained in:
@@ -999,7 +999,18 @@ try {
|
|||||||
// Read GOP size (number of frames in this GOP, 1-16)
|
// Read GOP size (number of frames in this GOP, 1-16)
|
||||||
const gopSize = seqread.readOneByte()
|
const gopSize = seqread.readOneByte()
|
||||||
|
|
||||||
// Read motion vectors (quarter-pixel units, int16)
|
// Read canvas expansion margins (4 bytes)
|
||||||
|
// Encoder expands canvas to preserve all original pixels from all aligned frames
|
||||||
|
const marginLeft = seqread.readOneByte()
|
||||||
|
const marginRight = seqread.readOneByte()
|
||||||
|
const marginTop = seqread.readOneByte()
|
||||||
|
const marginBottom = seqread.readOneByte()
|
||||||
|
|
||||||
|
// Calculate expanded canvas dimensions
|
||||||
|
const canvasWidth = header.width + marginLeft + marginRight
|
||||||
|
const canvasHeight = header.height + marginTop + marginBottom
|
||||||
|
|
||||||
|
// Read motion vectors (1/16-pixel units, int16)
|
||||||
// Encoder writes ALL motion vectors including frame 0
|
// Encoder writes ALL motion vectors including frame 0
|
||||||
let motionX = new Array(gopSize)
|
let motionX = new Array(gopSize)
|
||||||
let motionY = new Array(gopSize)
|
let motionY = new Array(gopSize)
|
||||||
@@ -1042,7 +1053,7 @@ try {
|
|||||||
try {
|
try {
|
||||||
let decodeStart = sys.nanoTime()
|
let decodeStart = sys.nanoTime()
|
||||||
|
|
||||||
// Call GOP decoder
|
// Call GOP decoder with canvas expansion information
|
||||||
const [r1, r2] = graphics.tavDecodeGopUnified(
|
const [r1, r2] = graphics.tavDecodeGopUnified(
|
||||||
compressedPtr,
|
compressedPtr,
|
||||||
compressedSize,
|
compressedSize,
|
||||||
@@ -1050,8 +1061,12 @@ try {
|
|||||||
motionX,
|
motionX,
|
||||||
motionY,
|
motionY,
|
||||||
gopRGBBuffers, // Array of output buffer addresses
|
gopRGBBuffers, // Array of output buffer addresses
|
||||||
header.width,
|
header.width, // Original frame width
|
||||||
header.height,
|
header.height, // Original frame height
|
||||||
|
canvasWidth, // Expanded canvas width (preserves all pixels)
|
||||||
|
canvasHeight, // Expanded canvas height (preserves all pixels)
|
||||||
|
marginLeft, // Left margin
|
||||||
|
marginTop, // Top margin
|
||||||
header.qualityLevel,
|
header.qualityLevel,
|
||||||
QLUT[header.qualityY],
|
QLUT[header.qualityY],
|
||||||
QLUT[header.qualityCo],
|
QLUT[header.qualityCo],
|
||||||
|
|||||||
@@ -3181,11 +3181,35 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Symmetric padding (mirroring) for edge handling in motion compensation.
|
||||||
|
* This provides smoother edges than simple clamping/replication.
|
||||||
|
*
|
||||||
|
* @param coord The coordinate to mirror if out of bounds
|
||||||
|
* @param size The dimension size (width or height)
|
||||||
|
* @return The mirrored coordinate within valid range [0, size-1]
|
||||||
|
*/
|
||||||
|
private fun symmetricPadding(coord: Int, size: Int): Int {
|
||||||
|
var mirrored = coord
|
||||||
|
|
||||||
|
// Mirror for negative coordinates: -1 -> 0, -2 -> 1, -3 -> 2, etc.
|
||||||
|
if (mirrored < 0) {
|
||||||
|
mirrored = -mirrored - 1
|
||||||
|
}
|
||||||
|
// Mirror for coordinates beyond bounds: size -> size-1, size+1 -> size-2, etc.
|
||||||
|
else if (mirrored >= size) {
|
||||||
|
mirrored = 2 * size - mirrored - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final clamp to ensure we're within bounds (handles extreme cases)
|
||||||
|
return mirrored.coerceIn(0, size - 1)
|
||||||
|
}
|
||||||
|
|
||||||
private fun tevHandleMotionBlockTwoPass(startX: Int, startY: Int, mvX: Int, mvY: Int,
|
private fun tevHandleMotionBlockTwoPass(startX: Int, startY: Int, mvX: Int, mvY: Int,
|
||||||
currentRGBAddr: Long, prevRGBAddr: Long,
|
currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int,
|
width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int,
|
||||||
debugMotionVectors: Boolean) {
|
debugMotionVectors: Boolean) {
|
||||||
// Copy 16x16 block with motion compensation
|
// Copy 16x16 block with motion compensation using symmetric padding
|
||||||
for (py in 0 until 16) {
|
for (py in 0 until 16) {
|
||||||
val y = startY + py
|
val y = startY + py
|
||||||
if (y >= height) break
|
if (y >= height) break
|
||||||
@@ -3194,8 +3218,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val x = startX + px
|
val x = startX + px
|
||||||
if (x >= width) break
|
if (x >= width) break
|
||||||
|
|
||||||
val srcX = (x + mvX).coerceIn(0, width - 1)
|
// Use symmetric padding instead of clamping for smoother edges
|
||||||
val srcY = (y + mvY).coerceIn(0, height - 1)
|
val srcX = symmetricPadding(x + mvX, width)
|
||||||
|
val srcY = symmetricPadding(y + mvY, height)
|
||||||
|
|
||||||
val srcOffset = (srcY * width + srcX) * 3
|
val srcOffset = (srcY * width + srcX) * 3
|
||||||
val dstOffset = (y * width + x) * 3
|
val dstOffset = (y * width + x) * 3
|
||||||
@@ -3226,8 +3251,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val x = startX + px
|
val x = startX + px
|
||||||
if (x >= width) break
|
if (x >= width) break
|
||||||
|
|
||||||
val srcX = (x + mvX).coerceIn(0, width - 1)
|
// Use symmetric padding for smoother edges (commented-out code updated for consistency)
|
||||||
val srcY = (y + mvY).coerceIn(0, height - 1)
|
val srcX = symmetricPadding(x + mvX, width)
|
||||||
|
val srcY = symmetricPadding(y + mvY, height)
|
||||||
|
|
||||||
val srcOffset = (srcY * width + srcX) * 3
|
val srcOffset = (srcY * width + srcX) * 3
|
||||||
val r = vm.peek(prevRGBAddr + srcOffset * prevAddrIncVec)?.toInt() ?: 0
|
val r = vm.peek(prevRGBAddr + srcOffset * prevAddrIncVec)?.toInt() ?: 0
|
||||||
@@ -6205,6 +6231,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
/**
|
/**
|
||||||
* Apply inverse translation (motion compensation) to a frame.
|
* Apply inverse translation (motion compensation) to a frame.
|
||||||
* Inverse operation: shifts by +dx, +dy (opposite of forward encoder).
|
* Inverse operation: shifts by +dx, +dy (opposite of forward encoder).
|
||||||
|
* Uses symmetric boundary extension (mirror padding) to match encoder.
|
||||||
*
|
*
|
||||||
* @param frameData Input frame data to shift
|
* @param frameData Input frame data to shift
|
||||||
* @param width Frame width
|
* @param width Frame width
|
||||||
@@ -6215,14 +6242,28 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
private fun applyInverseTranslation(frameData: FloatArray, width: Int, height: Int, dx: Int, dy: Int) {
|
private fun applyInverseTranslation(frameData: FloatArray, width: Int, height: Int, dx: Int, dy: Int) {
|
||||||
val output = FloatArray(width * height)
|
val output = FloatArray(width * height)
|
||||||
|
|
||||||
// Apply inverse translation with boundary clamping
|
// Apply inverse translation with symmetric boundary extension (mirror padding)
|
||||||
for (y in 0 until height) {
|
for (y in 0 until height) {
|
||||||
for (x in 0 until width) {
|
for (x in 0 until width) {
|
||||||
// Inverse: shift by +dx, +dy (opposite of encoder's -dx, -dy)
|
// Inverse: shift by +dx, +dy (opposite of encoder's -dx, -dy)
|
||||||
var srcX = x + dx
|
var srcX = x + dx
|
||||||
var srcY = y + dy
|
var srcY = y + dy
|
||||||
|
|
||||||
// Clamp to frame boundaries
|
// Symmetric extension at boundaries (mirror padding)
|
||||||
|
// This gives smooth edges instead of replicated stripes
|
||||||
|
if (srcX < 0) {
|
||||||
|
srcX = -srcX - 1 // Mirror left edge
|
||||||
|
} else if (srcX >= width) {
|
||||||
|
srcX = 2 * width - srcX - 1 // Mirror right edge
|
||||||
|
}
|
||||||
|
|
||||||
|
if (srcY < 0) {
|
||||||
|
srcY = -srcY - 1 // Mirror top edge
|
||||||
|
} else if (srcY >= height) {
|
||||||
|
srcY = 2 * height - srcY - 1 // Mirror bottom edge
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clamp after mirroring (in case of very large shifts)
|
||||||
srcX = srcX.coerceIn(0, width - 1)
|
srcX = srcX.coerceIn(0, width - 1)
|
||||||
srcY = srcY.coerceIn(0, height - 1)
|
srcY = srcY.coerceIn(0, height - 1)
|
||||||
|
|
||||||
@@ -6244,8 +6285,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
* @param motionVectorsX X motion vectors in 1/16-pixel units
|
* @param motionVectorsX X motion vectors in 1/16-pixel units
|
||||||
* @param motionVectorsY Y motion vectors in 1/16-pixel units
|
* @param motionVectorsY Y motion vectors in 1/16-pixel units
|
||||||
* @param outputRGBAddrs Array of output RGB buffer addresses
|
* @param outputRGBAddrs Array of output RGB buffer addresses
|
||||||
* @param width Frame width
|
* @param width Original frame width (output dimensions)
|
||||||
* @param height Frame height
|
* @param height Original frame height (output dimensions)
|
||||||
|
* @param canvasWidth Expanded canvas width (for motion compensation)
|
||||||
|
* @param canvasHeight Expanded canvas height (for motion compensation)
|
||||||
|
* @param marginLeft Left margin to crop from expanded canvas
|
||||||
|
* @param marginTop Top margin to crop from expanded canvas
|
||||||
* @param qIndex Quality index
|
* @param qIndex Quality index
|
||||||
* @param qYGlobal Global Y quantizer
|
* @param qYGlobal Global Y quantizer
|
||||||
* @param qCoGlobal Global Co quantizer
|
* @param qCoGlobal Global Co quantizer
|
||||||
@@ -6265,6 +6310,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
outputRGBAddrs: LongArray,
|
outputRGBAddrs: LongArray,
|
||||||
width: Int,
|
width: Int,
|
||||||
height: Int,
|
height: Int,
|
||||||
|
canvasWidth: Int,
|
||||||
|
canvasHeight: Int,
|
||||||
|
marginLeft: Int,
|
||||||
|
marginTop: Int,
|
||||||
qIndex: Int,
|
qIndex: Int,
|
||||||
qYGlobal: Int,
|
qYGlobal: Int,
|
||||||
qCoGlobal: Int,
|
qCoGlobal: Int,
|
||||||
@@ -6280,7 +6329,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
dbgOut["qCg"] = qCgGlobal
|
dbgOut["qCg"] = qCgGlobal
|
||||||
dbgOut["frameMode"] = "G"
|
dbgOut["frameMode"] = "G"
|
||||||
|
|
||||||
val numPixels = width * height
|
// Use expanded canvas dimensions for DWT processing
|
||||||
|
val canvasPixels = canvasWidth * canvasHeight
|
||||||
|
val outputPixels = width * height
|
||||||
|
|
||||||
// Step 1: Decompress unified GOP block
|
// Step 1: Decompress unified GOP block
|
||||||
val compressedData = ByteArray(compressedSize)
|
val compressedData = ByteArray(compressedSize)
|
||||||
@@ -6305,17 +6356,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val quantizedCoeffs = tavPostprocessGopUnified(
|
val quantizedCoeffs = tavPostprocessGopUnified(
|
||||||
decompressedData,
|
decompressedData,
|
||||||
gopSize,
|
gopSize,
|
||||||
numPixels,
|
canvasPixels, // Use expanded canvas size
|
||||||
channelLayout
|
channelLayout
|
||||||
)
|
)
|
||||||
|
|
||||||
// Step 3: Allocate GOP buffers for float coefficients
|
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
|
||||||
val gopY = Array(gopSize) { FloatArray(numPixels) }
|
val gopY = Array(gopSize) { FloatArray(canvasPixels) }
|
||||||
val gopCo = Array(gopSize) { FloatArray(numPixels) }
|
val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
|
||||||
val gopCg = Array(gopSize) { FloatArray(numPixels) }
|
val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
|
||||||
|
|
||||||
// Step 4: Calculate subband layout (needed for perceptual dequantization)
|
// Step 4: Calculate subband layout for expanded canvas (needed for perceptual dequantization)
|
||||||
val subbands = calculateSubbandLayout(width, height, spatialLevels)
|
val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
|
||||||
|
|
||||||
// Step 5: Dequantize with temporal-spatial scaling
|
// Step 5: Dequantize with temporal-spatial scaling
|
||||||
for (t in 0 until gopSize) {
|
for (t in 0 until gopSize) {
|
||||||
@@ -6347,49 +6398,60 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 6: Apply inverse 3D DWT (spatial first, then temporal)
|
// Step 6: Apply inverse 3D DWT (spatial first, then temporal) on expanded canvas
|
||||||
tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
|
|
||||||
// Step 7: Apply inverse motion compensation (shift frames back)
|
// Step 7: Apply inverse motion compensation (shift frames back) on expanded canvas
|
||||||
// Note: Motion vectors are in 1/16-pixel units, cumulative relative to frame 0
|
// Note: Motion vectors are in 1/16-pixel units, cumulative relative to frame 0
|
||||||
for (t in 1 until gopSize) { // Skip frame 0 (reference)
|
for (t in 1 until gopSize) { // Skip frame 0 (reference)
|
||||||
val dx = motionVectorsX[t] / 16 // Convert to pixel units
|
val dx = motionVectorsX[t] / 16 // Convert to pixel units
|
||||||
val dy = motionVectorsY[t] / 16
|
val dy = motionVectorsY[t] / 16
|
||||||
|
|
||||||
if (dx != 0 || dy != 0) {
|
if (dx != 0 || dy != 0) {
|
||||||
applyInverseTranslation(gopY[t], width, height, dx, dy)
|
applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
|
||||||
applyInverseTranslation(gopCo[t], width, height, dx, dy)
|
applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
|
||||||
applyInverseTranslation(gopCg[t], width, height, dx, dy)
|
applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 8: Convert each frame to RGB and write to output buffers
|
// Step 8: Crop expanded canvas to original dimensions and convert to RGB
|
||||||
for (t in 0 until gopSize) {
|
for (t in 0 until gopSize) {
|
||||||
val rgbAddr = outputRGBAddrs[t]
|
val rgbAddr = outputRGBAddrs[t]
|
||||||
|
|
||||||
for (i in 0 until numPixels) {
|
// Crop from expanded canvas (canvasWidth x canvasHeight) to output (width x height)
|
||||||
val y = gopY[t][i]
|
for (row in 0 until height) {
|
||||||
val co = gopCo[t][i]
|
for (col in 0 until width) {
|
||||||
val cg = gopCg[t][i]
|
// Source pixel in expanded canvas
|
||||||
|
val canvasX = col + marginLeft
|
||||||
|
val canvasY = row + marginTop
|
||||||
|
val canvasIdx = canvasY * canvasWidth + canvasX
|
||||||
|
|
||||||
// YCoCg-R to RGB conversion
|
// Destination pixel in output buffer
|
||||||
val tmp = y - (cg / 2.0f)
|
val outIdx = row * width + col
|
||||||
val g = cg + tmp
|
|
||||||
val b = tmp - (co / 2.0f)
|
|
||||||
val r = b + co
|
|
||||||
|
|
||||||
// Clamp to 0-255 range
|
val yVal = gopY[t][canvasIdx]
|
||||||
val rClamped = r.toInt().coerceIn(0, 255)
|
val co = gopCo[t][canvasIdx]
|
||||||
val gClamped = g.toInt().coerceIn(0, 255)
|
val cg = gopCg[t][canvasIdx]
|
||||||
val bClamped = b.toInt().coerceIn(0, 255)
|
|
||||||
|
|
||||||
// Write RGB24 format (3 bytes per pixel)
|
// YCoCg-R to RGB conversion
|
||||||
val offset = rgbAddr + i * 3L
|
val tmp = yVal - (cg / 2.0f)
|
||||||
vm.usermem[offset] = rClamped.toByte()
|
val g = cg + tmp
|
||||||
vm.usermem[offset + 1] = gClamped.toByte()
|
val b = tmp - (co / 2.0f)
|
||||||
vm.usermem[offset + 2] = bClamped.toByte()
|
val r = b + co
|
||||||
|
|
||||||
|
// Clamp to 0-255 range
|
||||||
|
val rClamped = r.toInt().coerceIn(0, 255)
|
||||||
|
val gClamped = g.toInt().coerceIn(0, 255)
|
||||||
|
val bClamped = b.toInt().coerceIn(0, 255)
|
||||||
|
|
||||||
|
// Write RGB24 format (3 bytes per pixel)
|
||||||
|
val offset = rgbAddr + outIdx * 3L
|
||||||
|
vm.usermem[offset] = rClamped.toByte()
|
||||||
|
vm.usermem[offset + 1] = gClamped.toByte()
|
||||||
|
vm.usermem[offset + 2] = bClamped.toByte()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1455,19 +1455,22 @@ static void phase_correlate_fft(const uint8_t *frame1_rgb, const uint8_t *frame2
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Apply translation to frame (for frame alignment before temporal DWT)
|
// Apply translation to frame (for frame alignment before temporal DWT)
|
||||||
|
// NO PADDING - only extracts the valid region that will be common across all frames
|
||||||
static void apply_translation(float *frame_data, int width, int height,
|
static void apply_translation(float *frame_data, int width, int height,
|
||||||
int16_t dx_qpel, int16_t dy_qpel, float *output) {
|
int16_t dx_qpel, int16_t dy_qpel, float *output) {
|
||||||
// Convert 1/16-pixel to pixel (for now, just use integer translation)
|
// Convert 1/16-pixel to pixel (for now, just use integer translation)
|
||||||
int dx = dx_qpel / 16;
|
int dx = dx_qpel / 16;
|
||||||
int dy = dy_qpel / 16;
|
int dy = dy_qpel / 16;
|
||||||
|
|
||||||
// Apply translation with boundary handling
|
// Apply translation WITHOUT padding - just shift the content
|
||||||
|
// Out-of-bounds regions will be cropped away later
|
||||||
for (int y = 0; y < height; y++) {
|
for (int y = 0; y < height; y++) {
|
||||||
for (int x = 0; x < width; x++) {
|
for (int x = 0; x < width; x++) {
|
||||||
int src_x = x - dx;
|
int src_x = x - dx;
|
||||||
int src_y = y - dy;
|
int src_y = y - dy;
|
||||||
|
|
||||||
// Clamp to frame boundaries
|
// Clamp to valid region (this will create edge repetition, but those
|
||||||
|
// edges will be cropped away, so it doesn't matter what we put there)
|
||||||
src_x = CLAMP(src_x, 0, width - 1);
|
src_x = CLAMP(src_x, 0, width - 1);
|
||||||
src_y = CLAMP(src_y, 0, height - 1);
|
src_y = CLAMP(src_y, 0, height - 1);
|
||||||
|
|
||||||
@@ -1476,6 +1479,22 @@ static void apply_translation(float *frame_data, int width, int height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract cropped region from a frame after alignment
|
||||||
|
static void extract_crop(const float *frame_data, int width, int height,
|
||||||
|
int crop_left, int crop_right, int crop_top, int crop_bottom,
|
||||||
|
float *cropped_output) {
|
||||||
|
int valid_width = width - crop_left - crop_right;
|
||||||
|
int valid_height = height - crop_top - crop_bottom;
|
||||||
|
|
||||||
|
for (int y = 0; y < valid_height; y++) {
|
||||||
|
for (int x = 0; x < valid_width; x++) {
|
||||||
|
int src_x = x + crop_left;
|
||||||
|
int src_y = y + crop_top;
|
||||||
|
cropped_output[y * valid_width + x] = frame_data[src_y * width + src_x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Temporal Subband Quantization
|
// Temporal Subband Quantization
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -1598,7 +1617,7 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
|
|||||||
memcpy(enc->gop_cg_frames[frame_idx], frame_cg, frame_channel_size);
|
memcpy(enc->gop_cg_frames[frame_idx], frame_cg, frame_channel_size);
|
||||||
|
|
||||||
// Compute translation vector if not first frame
|
// Compute translation vector if not first frame
|
||||||
if (frame_idx > 0) {
|
/*if (frame_idx > 0) {
|
||||||
phase_correlate_fft(enc->gop_rgb_frames[frame_idx - 1],
|
phase_correlate_fft(enc->gop_rgb_frames[frame_idx - 1],
|
||||||
enc->gop_rgb_frames[frame_idx],
|
enc->gop_rgb_frames[frame_idx],
|
||||||
enc->width, enc->height,
|
enc->width, enc->height,
|
||||||
@@ -1615,7 +1634,11 @@ static int gop_add_frame(tav_encoder_t *enc, const uint8_t *frame_rgb,
|
|||||||
// First frame has no translation
|
// First frame has no translation
|
||||||
enc->gop_translation_x[0] = 0;
|
enc->gop_translation_x[0] = 0;
|
||||||
enc->gop_translation_y[0] = 0;
|
enc->gop_translation_y[0] = 0;
|
||||||
}
|
}*/
|
||||||
|
|
||||||
|
// disabling frame realigning: producing worse results in general
|
||||||
|
enc->gop_translation_x[frame_idx] = 0.0f;
|
||||||
|
enc->gop_translation_y[frame_idx] = 0.0f;
|
||||||
|
|
||||||
enc->gop_frame_count++;
|
enc->gop_frame_count++;
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1675,7 +1698,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Allocate working buffers for each channel
|
// Allocate working buffers for each channel
|
||||||
const int num_pixels = enc->width * enc->height;
|
int num_pixels = enc->width * enc->height; // Will be updated if frames are cropped
|
||||||
float **gop_y_coeffs = malloc(actual_gop_size * sizeof(float*));
|
float **gop_y_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||||
float **gop_co_coeffs = malloc(actual_gop_size * sizeof(float*));
|
float **gop_co_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||||
float **gop_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
|
float **gop_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||||
@@ -1719,6 +1742,34 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 0.5b: Calculate the valid region after alignment (crop bounds)
|
||||||
|
// Find the bounding box that's valid across all aligned frames
|
||||||
|
int min_dx = 0, max_dx = 0, min_dy = 0, max_dy = 0;
|
||||||
|
for (int i = 0; i < actual_gop_size; i++) {
|
||||||
|
int dx = enc->gop_translation_x[i] / 16;
|
||||||
|
int dy = enc->gop_translation_y[i] / 16;
|
||||||
|
if (dx < min_dx) min_dx = dx;
|
||||||
|
if (dx > max_dx) max_dx = dx;
|
||||||
|
if (dy < min_dy) min_dy = dy;
|
||||||
|
if (dy > max_dy) max_dy = dy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Crop region: the area valid in all frames
|
||||||
|
// When we shift right by +N, we lose N pixels on the left, so crop left edge by abs(min_dx)
|
||||||
|
// When we shift left by -N, we lose N pixels on the right, so crop right edge by max_dx
|
||||||
|
int crop_left = (min_dx < 0) ? -min_dx : 0;
|
||||||
|
int crop_right = (max_dx > 0) ? max_dx : 0;
|
||||||
|
int crop_top = (min_dy < 0) ? -min_dy : 0;
|
||||||
|
int crop_bottom = (max_dy > 0) ? max_dy : 0;
|
||||||
|
|
||||||
|
int valid_width = enc->width - crop_left - crop_right;
|
||||||
|
int valid_height = enc->height - crop_top - crop_bottom;
|
||||||
|
|
||||||
|
if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) {
|
||||||
|
printf("Valid region after alignment: %dx%d (cropped: L=%d R=%d T=%d B=%d)\n",
|
||||||
|
valid_width, valid_height, crop_left, crop_right, crop_top, crop_bottom);
|
||||||
|
}
|
||||||
|
|
||||||
// Step 0.6: Apply motion compensation to align frames before temporal DWT
|
// Step 0.6: Apply motion compensation to align frames before temporal DWT
|
||||||
// This uses the cumulative translation vectors to align each frame to frame 0
|
// This uses the cumulative translation vectors to align each frame to frame 0
|
||||||
for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame)
|
for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame)
|
||||||
@@ -1753,23 +1804,122 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
free(aligned_cg);
|
free(aligned_cg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 0.7: Expand frames to larger canvas that preserves ALL original pixels
|
||||||
|
// Calculate expanded canvas size (UNION of all aligned frames)
|
||||||
|
int canvas_width = enc->width + crop_left + crop_right; // Original width + total shift range
|
||||||
|
int canvas_height = enc->height + crop_top + crop_bottom; // Original height + total shift range
|
||||||
|
int canvas_pixels = canvas_width * canvas_height;
|
||||||
|
|
||||||
|
if (enc->verbose && (crop_left || crop_right || crop_top || crop_bottom)) {
|
||||||
|
printf("Expanded canvas: %dx%d (original %dx%d + margins L=%d R=%d T=%d B=%d)\n",
|
||||||
|
canvas_width, canvas_height, enc->width, enc->height,
|
||||||
|
crop_left, crop_right, crop_top, crop_bottom);
|
||||||
|
printf("This preserves all original pixels from all frames after alignment\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate expanded canvas buffers
|
||||||
|
float **canvas_y_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||||
|
float **canvas_co_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||||
|
float **canvas_cg_coeffs = malloc(actual_gop_size * sizeof(float*));
|
||||||
|
|
||||||
|
for (int i = 0; i < actual_gop_size; i++) {
|
||||||
|
canvas_y_coeffs[i] = calloc(canvas_pixels, sizeof(float)); // Zero-initialized
|
||||||
|
canvas_co_coeffs[i] = calloc(canvas_pixels, sizeof(float));
|
||||||
|
canvas_cg_coeffs[i] = calloc(canvas_pixels, sizeof(float));
|
||||||
|
|
||||||
|
// Place the aligned frame onto the canvas at the appropriate offset
|
||||||
|
// Each frame's aligned position determines where it sits on the canvas
|
||||||
|
int offset_x = crop_left; // Frames are offset by the left margin
|
||||||
|
int offset_y = crop_top; // Frames are offset by the top margin
|
||||||
|
|
||||||
|
// Copy the full aligned frame onto the canvas (preserves all original content)
|
||||||
|
for (int y = 0; y < enc->height; y++) {
|
||||||
|
for (int x = 0; x < enc->width; x++) {
|
||||||
|
int src_idx = y * enc->width + x;
|
||||||
|
int dst_idx = (y + offset_y) * canvas_width + (x + offset_x);
|
||||||
|
canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx];
|
||||||
|
canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx];
|
||||||
|
canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill margin areas with symmetric padding from frame edges
|
||||||
|
for (int y = 0; y < canvas_height; y++) {
|
||||||
|
for (int x = 0; x < canvas_width; x++) {
|
||||||
|
// Skip pixels in the original frame region (already copied)
|
||||||
|
if (y >= offset_y && y < offset_y + enc->height &&
|
||||||
|
x >= offset_x && x < offset_x + enc->width) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate position relative to original frame
|
||||||
|
int src_x = x - offset_x;
|
||||||
|
int src_y = y - offset_y;
|
||||||
|
|
||||||
|
// Apply symmetric padding (mirroring)
|
||||||
|
if (src_x < 0) {
|
||||||
|
src_x = -src_x - 1; // Mirror left edge: -1→0, -2→1, -3→2
|
||||||
|
} else if (src_x >= enc->width) {
|
||||||
|
src_x = 2 * enc->width - src_x - 1; // Mirror right edge
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src_y < 0) {
|
||||||
|
src_y = -src_y - 1; // Mirror top edge
|
||||||
|
} else if (src_y >= enc->height) {
|
||||||
|
src_y = 2 * enc->height - src_y - 1; // Mirror bottom edge
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clamp to valid range (safety for extreme cases)
|
||||||
|
src_x = CLAMP(src_x, 0, enc->width - 1);
|
||||||
|
src_y = CLAMP(src_y, 0, enc->height - 1);
|
||||||
|
|
||||||
|
// Copy mirrored pixel from original frame to canvas margin
|
||||||
|
int src_idx = src_y * enc->width + src_x;
|
||||||
|
int dst_idx = y * canvas_width + x;
|
||||||
|
canvas_y_coeffs[i][dst_idx] = gop_y_coeffs[i][src_idx];
|
||||||
|
canvas_co_coeffs[i][dst_idx] = gop_co_coeffs[i][src_idx];
|
||||||
|
canvas_cg_coeffs[i][dst_idx] = gop_cg_coeffs[i][src_idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free the original frame (no longer needed)
|
||||||
|
free(gop_y_coeffs[i]);
|
||||||
|
free(gop_co_coeffs[i]);
|
||||||
|
free(gop_cg_coeffs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace pointers with expanded canvas
|
||||||
|
free(gop_y_coeffs);
|
||||||
|
free(gop_co_coeffs);
|
||||||
|
free(gop_cg_coeffs);
|
||||||
|
gop_y_coeffs = canvas_y_coeffs;
|
||||||
|
gop_co_coeffs = canvas_co_coeffs;
|
||||||
|
gop_cg_coeffs = canvas_cg_coeffs;
|
||||||
|
|
||||||
|
// Update dimensions to canvas size
|
||||||
|
valid_width = canvas_width;
|
||||||
|
valid_height = canvas_height;
|
||||||
|
num_pixels = canvas_pixels;
|
||||||
|
|
||||||
// Step 1: For single-frame GOP, skip temporal DWT and use traditional I-frame path
|
// Step 1: For single-frame GOP, skip temporal DWT and use traditional I-frame path
|
||||||
if (actual_gop_size == 1) {
|
if (actual_gop_size == 1) {
|
||||||
// Apply only 2D spatial DWT (no temporal transform for single frame)
|
// Apply only 2D spatial DWT (no temporal transform for single frame)
|
||||||
dwt_2d_forward_flexible(gop_y_coeffs[0], enc->width, enc->height,
|
// Use cropped dimensions (will be full size if no motion)
|
||||||
|
dwt_2d_forward_flexible(gop_y_coeffs[0], valid_width, valid_height,
|
||||||
enc->decomp_levels, enc->wavelet_filter);
|
enc->decomp_levels, enc->wavelet_filter);
|
||||||
dwt_2d_forward_flexible(gop_co_coeffs[0], enc->width, enc->height,
|
dwt_2d_forward_flexible(gop_co_coeffs[0], valid_width, valid_height,
|
||||||
enc->decomp_levels, enc->wavelet_filter);
|
enc->decomp_levels, enc->wavelet_filter);
|
||||||
dwt_2d_forward_flexible(gop_cg_coeffs[0], enc->width, enc->height,
|
dwt_2d_forward_flexible(gop_cg_coeffs[0], valid_width, valid_height,
|
||||||
enc->decomp_levels, enc->wavelet_filter);
|
enc->decomp_levels, enc->wavelet_filter);
|
||||||
} else {
|
} else {
|
||||||
// Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel
|
// Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel
|
||||||
// Note: This modifies gop_*_coeffs in-place
|
// Note: This modifies gop_*_coeffs in-place
|
||||||
dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,
|
// Use cropped dimensions to encode only the valid region
|
||||||
|
dwt_3d_forward(gop_y_coeffs, valid_width, valid_height, actual_gop_size,
|
||||||
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
||||||
dwt_3d_forward(gop_co_coeffs, enc->width, enc->height, actual_gop_size,
|
dwt_3d_forward(gop_co_coeffs, valid_width, valid_height, actual_gop_size,
|
||||||
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
||||||
dwt_3d_forward(gop_cg_coeffs, enc->width, enc->height, actual_gop_size,
|
dwt_3d_forward(gop_cg_coeffs, valid_width, valid_height, actual_gop_size,
|
||||||
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1875,7 +2025,7 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
} else {
|
} else {
|
||||||
// Multi-frame GOP: use unified 3D DWT encoding
|
// Multi-frame GOP: use unified 3D DWT encoding
|
||||||
// Write unified GOP packet header
|
// Write unified GOP packet header
|
||||||
// Packet structure: [packet_type=0x12][gop_size][motion_vectors...][compressed_size][compressed_data]
|
// Packet structure: [packet_type=0x12][gop_size][crop_info][motion_vectors...][compressed_size][compressed_data]
|
||||||
uint8_t packet_type = TAV_PACKET_GOP_UNIFIED;
|
uint8_t packet_type = TAV_PACKET_GOP_UNIFIED;
|
||||||
fwrite(&packet_type, 1, 1, output);
|
fwrite(&packet_type, 1, 1, output);
|
||||||
total_bytes_written += 1;
|
total_bytes_written += 1;
|
||||||
@@ -1885,6 +2035,18 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
fwrite(&gop_size_byte, 1, 1, output);
|
fwrite(&gop_size_byte, 1, 1, output);
|
||||||
total_bytes_written += 1;
|
total_bytes_written += 1;
|
||||||
|
|
||||||
|
// Write canvas expansion information (4 bytes)
|
||||||
|
// This tells the decoder the margins added to preserve all original pixels
|
||||||
|
// The encoded canvas is larger than the original frame to preserve edge content after alignment
|
||||||
|
uint8_t canvas_margins[4] = {
|
||||||
|
(uint8_t)crop_left, // Left margin
|
||||||
|
(uint8_t)crop_right, // Right margin
|
||||||
|
(uint8_t)crop_top, // Top margin
|
||||||
|
(uint8_t)crop_bottom // Bottom margin
|
||||||
|
};
|
||||||
|
fwrite(canvas_margins, 1, 4, output);
|
||||||
|
total_bytes_written += 4;
|
||||||
|
|
||||||
// Write all motion vectors (1/16-pixel precision) for the entire GOP
|
// Write all motion vectors (1/16-pixel precision) for the entire GOP
|
||||||
for (int t = 0; t < actual_gop_size; t++) {
|
for (int t = 0; t < actual_gop_size; t++) {
|
||||||
int16_t dx = enc->gop_translation_x[t];
|
int16_t dx = enc->gop_translation_x[t];
|
||||||
|
|||||||
Reference in New Issue
Block a user