TAV: letterbox detection encoding complete

This commit is contained in:
minjaesong
2025-11-20 02:13:45 +09:00
parent 44cc54264a
commit 92274a8e19
5 changed files with 1242 additions and 220 deletions

View File

@@ -1007,6 +1007,12 @@ transmission capability, and region-of-interest coding.
0xE2: ID3v2 packet
0xE3: Vorbis Comment packet
0xE4: CD-text packet
<Extensible>
0x01: Vendor-specific video packets
0x02: Vendor-specific audio frame
0x03: Vendor-specific subtitle
0x04: Vendor-specific audio file
0x0E: Vendor-specific metadata
<Special packets>
0x00: No-op (no payload)
0xEF: TAV Extended Header
@@ -1068,6 +1074,16 @@ transmission capability, and region-of-interest coding.
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014 (list,of,features)")
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers"; the first line of text FFmpeg emits)
## Extensible Packet Structure
uint8 Packet Type
uint8 Flags
- 0x01: 64-bit size
uint8 Identifier[4]
<if 64-bit size>
uint64 Length of the payload
<if not>
uint32 Length of the payload
* Payload
## Standard Metadata Payload Packet Structure
uint8 Packet Type (0xE0/0xE1/0xE2/.../0xEE; see Packet Types section)

View File

@@ -4713,24 +4713,120 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return output
}
/**
* Peek at EZBC dimensions without decoding
* Reads width and height from the EZBC bitstream header
*
* @param compressedData EZBC frame data
* @param compressedOffset Offset to EZBC channel data
* @param channelLayout Channel layout to find Y channel
* @return Pair of (width, height) or null if unable to read
*/
private fun ezbc_peek_dimensions(compressedData: ByteArray, compressedOffset: Int, channelLayout: Int): Pair<Int, Int>? {
val hasY = (channelLayout and 4) == 0
if (!hasY) return null // Need Y channel to get dimensions
try {
// Read Y channel size header (4 bytes)
val size = ((compressedData[compressedOffset].toInt() and 0xFF) or
((compressedData[compressedOffset + 1].toInt() and 0xFF) shl 8) or
((compressedData[compressedOffset + 2].toInt() and 0xFF) shl 16) or
((compressedData[compressedOffset + 3].toInt() and 0xFF) shl 24))
if (size < 6) return null // Too small for EZBC header
// Parse EZBC bitstream header (skip MSB bitplane, read width/height)
val ezbc_offset = compressedOffset + 4
var bytePos = ezbc_offset
var bitPos = 0
fun readBits(numBits: Int): Int {
var result = 0
for (i in 0 until numBits) {
if (bytePos >= ezbc_offset + size) return 0
val bit = (compressedData[bytePos].toInt() shr bitPos) and 1
result = result or (bit shl i)
bitPos++
if (bitPos == 8) {
bitPos = 0
bytePos++
}
}
return result
}
// Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
readBits(8) // Skip MSB bitplane
val width = readBits(16)
val height = readBits(16)
return if (width > 0 && height > 0) Pair(width, height) else null
} catch (e: Exception) {
return null
}
}
/**
* Result of EZBC GOP decoding with actual dimensions (for crop encoding support)
*/
data class EZBCGopResult(
val coeffs: Array<Array<ShortArray>>,
val width: Int,
val height: Int
)
/**
* Reconstruct per-frame coefficients from unified GOP block (EZBC format)
* Format: [frame0_size(4)][frame0_ezbc][frame1_size(4)][frame1_ezbc]...
*
* With crop encoding, GOP dimensions may differ from full frame size.
* This function detects the actual dimensions from EZBC headers.
*
* @param decompressedData Unified EZBC block data (after Zstd decompression)
* @param numFrames Number of frames in GOP
* @param numPixels Pixels per frame (width × height)
* @param numPixels Pixels per frame for full frame (width × height)
* @param channelLayout Channel layout (0=YCoCg, 2=Y-only, etc)
* @return Array of [frame][channel] where channel: 0=Y, 1=Co, 2=Cg
* @param fullWidth Full frame width (for crop detection)
* @param fullHeight Full frame height (for crop detection)
* @return EZBCGopResult with coeffs array and actual GOP dimensions
*/
private fun tavPostprocessGopEZBC(
decompressedData: ByteArray,
numFrames: Int,
numPixels: Int,
channelLayout: Int
): Array<Array<ShortArray>> {
// Allocate output arrays
val output = Array(numFrames) { Array(3) { ShortArray(numPixels) } }
channelLayout: Int,
fullWidth: Int,
fullHeight: Int
): EZBCGopResult {
// Peek at first frame's EZBC dimensions to detect crop encoding
var actualWidth = fullWidth
var actualHeight = fullHeight
var actualPixels = numPixels
if (decompressedData.size >= 8) {
// Read first frame size
val firstFrameSize = ((decompressedData[0].toInt() and 0xFF) or
((decompressedData[1].toInt() and 0xFF) shl 8) or
((decompressedData[2].toInt() and 0xFF) shl 16) or
((decompressedData[3].toInt() and 0xFF) shl 24))
if (4 + firstFrameSize <= decompressedData.size) {
// Peek at EZBC dimensions from first frame's Y channel
val dims = ezbc_peek_dimensions(decompressedData, 4, channelLayout)
if (dims != null) {
actualWidth = dims.first
actualHeight = dims.second
actualPixels = actualWidth * actualHeight
if (actualPixels != numPixels) {
println("[TAV-GOP-EZBC] Detected crop encoding: GOP ${actualWidth}x${actualHeight} (${actualPixels} px) vs full frame ${fullWidth}x${fullHeight} (${numPixels} px)")
}
}
}
}
// Allocate output arrays with actual GOP dimensions
val output = Array(numFrames) { Array(3) { ShortArray(actualPixels) } }
var offset = 0
for (frame in 0 until numFrames) {
@@ -4745,39 +4841,52 @@ class GraphicsJSR223Delegate(private val vm: VM) {
if (offset + frameSize > decompressedData.size) break
// Decode this frame with EZBC
// Decode this frame with EZBC using actual GOP dimensions
postprocessCoefficientsEZBC(
decompressedData, offset, numPixels, channelLayout,
decompressedData, offset, actualPixels, channelLayout,
output[frame][0], output[frame][1], output[frame][2], null
)
offset += frameSize
}
return output
return EZBCGopResult(output, actualWidth, actualHeight)
}
/**
* Result of GOP decoding with actual dimensions (for crop encoding support)
*/
data class GopDecodeResult(
val isEZBC: Boolean,
val coeffs: Array<Array<ShortArray>>,
val width: Int,
val height: Int
)
/**
* Auto-detecting GOP postprocessor
* Detects EZBC vs twobit-map format and calls appropriate decoder
* Returns actual GOP dimensions (may differ from full frame with crop encoding)
*/
private fun tavPostprocessGopAuto(
decompressedData: ByteArray,
numFrames: Int,
numPixels: Int,
channelLayout: Int,
entropyCoder: Int
): Pair<Boolean, Array<Array<ShortArray>>> {
entropyCoder: Int,
fullWidth: Int,
fullHeight: Int
): GopDecodeResult {
// Read entropy coder from header: 0 = Twobit-map, 1 = EZBC
val isEZBC = (entropyCoder == 1)
val data = if (isEZBC) {
tavPostprocessGopEZBC(decompressedData, numFrames, numPixels, channelLayout)
return if (isEZBC) {
val result = tavPostprocessGopEZBC(decompressedData, numFrames, numPixels, channelLayout, fullWidth, fullHeight)
GopDecodeResult(true, result.coeffs, result.width, result.height)
} else {
tavPostprocessGopUnified(decompressedData, numFrames, numPixels, channelLayout)
val coeffs = tavPostprocessGopUnified(decompressedData, numFrames, numPixels, channelLayout)
GopDecodeResult(false, coeffs, fullWidth, fullHeight)
}
return Pair(isEZBC, data)
}
// TAV Simulated overlapping tiles constants (must match encoder)
@@ -6446,21 +6555,33 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
// Step 2: Postprocess unified block to per-frame coefficients
val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
// With crop encoding, GOP dimensions may differ from full frame
val gopResult = tavPostprocessGopAuto(
decompressedData,
gopSize,
outputPixels,
channelLayout,
entropyCoder
entropyCoder,
width,
height
)
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
val gopY = Array(gopSize) { FloatArray(outputPixels) }
val gopCo = Array(gopSize) { FloatArray(outputPixels) }
val gopCg = Array(gopSize) { FloatArray(outputPixels) }
val isEZBCMode = gopResult.isEZBC
val quantizedCoeffs = gopResult.coeffs
val gopWidth = gopResult.width
val gopHeight = gopResult.height
val gopPixels = gopWidth * gopHeight
// Step 4: Calculate subband layout for expanded canvas
val subbands = calculateSubbandLayout(width, height, spatialLevels)
// Detect crop encoding
val isCropEncoded = (gopWidth != width || gopHeight != height)
// Step 3: Allocate GOP buffers for float coefficients (GOP dimensions)
val gopY = Array(gopSize) { FloatArray(gopPixels) }
val gopCo = Array(gopSize) { FloatArray(gopPixels) }
val gopCg = Array(gopSize) { FloatArray(gopPixels) }
// Step 4: Calculate subband layout for GOP dimensions (may be cropped)
val subbands = calculateSubbandLayout(gopWidth, gopHeight, spatialLevels)
// Step 5: Dequantize with temporal-spatial scaling
for (t in 0 until gopSize) {
@@ -6503,32 +6624,44 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Step 5.5: Remove grain synthesis from Y channel for each GOP frame
// This must happen after dequantization but before inverse DWT
// Use GOP dimensions (may be cropped)
for (t in 0 until gopSize) {
removeGrainSynthesisDecoder(
gopY[t], width, height,
gopY[t], gopWidth, gopHeight,
rngFrameTick.getAndAdd(1) + t,
subbands, qIndex
)
}
// Step 6: Apply inverse 3D DWT
tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
// Step 6: Apply inverse 3D DWT using GOP dimensions (may be cropped)
tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
// Step 8: Convert to RGB and composite to full frame
// With crop encoding, center the cropped frame and fill letterbox areas with black
val maskTop = if (isCropEncoded && gopHeight < height) (height - gopHeight) / 2 else 0
val maskLeft = if (isCropEncoded && gopWidth < width) (width - gopWidth) / 2 else 0
// Step 8: Crop and convert to RGB, write directly to videoBuffer
for (t in 0 until gopSize) {
val videoBufferOffset = bufferOffset + (t * frameSize) // Each frame sequentially, starting at bufferOffset
for (py in 0 until height) {
for (px in 0 until width) {
// Destination pixel in videoBuffer
val outIdx = py * width + px
val offset = videoBufferOffset + outIdx * 3L
// Fill entire frame with black (for letterbox/pillarbox areas)
if (isCropEncoded) {
for (i in 0 until (width * height * 3L)) {
gpu.videoBuffer[videoBufferOffset + i] = 0
}
}
val yVal = gopY[t][outIdx]
val co = gopCo[t][outIdx]
val cg = gopCg[t][outIdx]
// Write cropped content to centered position
for (py in 0 until gopHeight) {
for (px in 0 until gopWidth) {
// Source pixel from GOP
val srcIdx = py * gopWidth + px
val yVal = gopY[t][srcIdx]
val co = gopCo[t][srcIdx]
val cg = gopCg[t][srcIdx]
// YCoCg-R to RGB conversion
val tmp = yVal - (cg / 2.0f)
@@ -6536,6 +6669,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val b = tmp - (co / 2.0f)
val r = b + co
// Destination pixel in full frame (with centering offset)
val dstY = py + maskTop
val dstX = px + maskLeft
val dstIdx = dstY * width + dstX
val offset = videoBufferOffset + dstIdx * 3L
// Clamp and write to videoBuffer
gpu.videoBuffer[offset + 0] = r.roundToInt().coerceIn(0, 255).toByte()
gpu.videoBuffer[offset + 1] = g.roundToInt().coerceIn(0, 255).toByte()

View File

@@ -3,8 +3,9 @@
CC = gcc
CXX = g++
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE #-fsanitize=address
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE #-fsanitize=address
DBGFLAGS = #-fsanitize=address
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
@@ -33,13 +34,13 @@ tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav.c -o encoder_tav.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o
$(CXX) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
$(CXX) $(DBGFLAGS) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
tav_decoder: decoder_tav.c decoder_tad.c decoder_tad.h
rm -f decoder_tav decoder_tav.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c decoder_tad.c -o decoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c decoder_tav.c -o decoder_tav.o
$(CC) -o decoder_tav decoder_tav.o decoder_tad.o $(LIBS)
$(CC) $(DBGFLAGS) -o decoder_tav decoder_tav.o decoder_tad.o $(LIBS)
tav_inspector: tav_inspector.c
rm -f tav_inspector
@@ -50,7 +51,7 @@ encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad_standalone.c -o encoder_tad_standalone.o
$(CC) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
decoder_tad: decoder_tad.c
rm -f decoder_tad

View File

@@ -689,12 +689,19 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t
// fprintf(stderr, "[EZBC] Decoded header: MSB=%d, width=%d, height=%d (expected pixels=%d)\n",
// msb_bitplane, width, height, expected_count);
if (width * height != expected_count) {
fprintf(stderr, "EZBC dimension mismatch: %dx%d != %d\n", width, height, expected_count);
// With crop encoding, dimensions can vary per frame - trust the EZBC header
// Just ensure we don't overflow the output buffer
const int actual_count = width * height;
if (actual_count > expected_count) {
fprintf(stderr, "EZBC dimension overflow: %dx%d (%d) > %d\n",
width, height, actual_count, expected_count);
memset(output, 0, expected_count * sizeof(int16_t));
return;
}
// If actual count is less, only decode what we need
expected_count = actual_count;
// Initialise output and state tracking
memset(output, 0, expected_count * sizeof(int16_t));
int8_t *significant = calloc(expected_count, sizeof(int8_t));
@@ -785,6 +792,47 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t
// nonzero_count, 100.0 * nonzero_count / expected_count, min_val, max_val);
}
// Helper: peek at EZBC header to get dimensions without decoding
static int ezbc_peek_dimensions(const uint8_t *compressed_data, int channel_layout,
int *out_width, int *out_height) {
const int has_y = (channel_layout & 0x04) == 0;
if (!has_y) {
return -1; // Need Y channel to get dimensions
}
// Read Y channel size header
const uint32_t size = ((uint32_t)compressed_data[0]) |
((uint32_t)compressed_data[1] << 8) |
((uint32_t)compressed_data[2] << 16) |
((uint32_t)compressed_data[3] << 24);
if (size < 6) {
return -1; // Too small to contain EZBC header
}
// Skip to EZBC data for Y channel (after size header)
const uint8_t *ezbc_data = compressed_data + 4;
// Read EZBC header: skip MSB bitplane (1 byte), then read width and height
// Note: EZBC uses bitstream format, but dimensions are at fixed positions
// We need to parse the bitstream header carefully
// Create a temporary reader to parse the bitstream
ezbc_bitreader_t reader;
reader.data = ezbc_data;
reader.size = size;
reader.byte_pos = 0;
reader.bit_pos = 0;
// Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
ezbc_read_bits(&reader, 8); // Skip MSB bitplane
*out_width = ezbc_read_bits(&reader, 16);
*out_height = ezbc_read_bits(&reader, 16);
return 0;
}
// EZBC postprocessing for single frames
static void postprocess_coefficients_ezbc(uint8_t *compressed_data, int coeff_count,
int16_t *output_y, int16_t *output_co, int16_t *output_cg,
@@ -1457,15 +1505,61 @@ error_cleanup:
// Postprocess GOP EZBC format to per-frame coefficients (entropyCoder=1)
// Layout: [frame0_size(4)][frame0_ezbc_data][frame1_size(4)][frame1_ezbc_data]...
// Note: EZBC is a complex embedded bitplane codec - this is a simplified placeholder
// Returns the actual dimensions through output parameters (for crop encoding support)
static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t data_size,
int gop_size, int num_pixels, int channel_layout) {
// Allocate output arrays: [gop_size][3 channels][num_pixels]
int gop_size, int num_pixels, int channel_layout,
int *out_width, int *out_height) {
// First, peek at the first frame's dimensions to determine actual GOP size
// (with crop encoding, GOP dimensions may be smaller than full frame)
int actual_width = 0, actual_height = 0;
int actual_pixels = num_pixels; // Default to full frame if peek fails
if (data_size >= 8) { // Need at least frame size header + some EZBC data
// Skip first frame's size header to get to EZBC data
const uint32_t first_frame_size = ((uint32_t)decompressed_data[0]) |
((uint32_t)decompressed_data[1] << 8) |
((uint32_t)decompressed_data[2] << 16) |
((uint32_t)decompressed_data[3] << 24);
if (4 + first_frame_size <= data_size) {
if (ezbc_peek_dimensions(decompressed_data + 4, channel_layout,
&actual_width, &actual_height) == 0) {
actual_pixels = actual_width * actual_height;
// Only log if dimensions differ significantly (crop encoding active)
// Suppress repetitive messages by using static counter
static int crop_log_count = 0;
if (actual_pixels != num_pixels && crop_log_count < 3) {
fprintf(stderr, "[GOP-EZBC] Detected crop encoding: GOP dimensions %dx%d (%d pixels) vs full frame %d pixels\n",
actual_width, actual_height, actual_pixels, num_pixels);
crop_log_count++;
if (crop_log_count == 3) {
fprintf(stderr, "[GOP-EZBC] (Further crop encoding messages suppressed)\n");
}
}
}
}
}
// If we didn't successfully peek dimensions, calculate from num_pixels
if (actual_width == 0 || actual_height == 0) {
// Assume square-ish dimensions - this is a fallback, should not happen with proper encoding
actual_width = (int)sqrt(num_pixels);
actual_height = num_pixels / actual_width;
actual_pixels = actual_width * actual_height;
}
// Return actual dimensions to caller
if (out_width) *out_width = actual_width;
if (out_height) *out_height = actual_height;
// Allocate output arrays: [gop_size][3 channels][actual_pixels]
// Use actual GOP dimensions (may be cropped) not full frame size
int16_t ***output = malloc(gop_size * sizeof(int16_t **));
for (int t = 0; t < gop_size; t++) {
output[t] = malloc(3 * sizeof(int16_t *));
output[t][0] = calloc(num_pixels, sizeof(int16_t)); // Y
output[t][1] = calloc(num_pixels, sizeof(int16_t)); // Co
output[t][2] = calloc(num_pixels, sizeof(int16_t)); // Cg
output[t][0] = calloc(actual_pixels, sizeof(int16_t)); // Y
output[t][1] = calloc(actual_pixels, sizeof(int16_t)); // Co
output[t][2] = calloc(actual_pixels, sizeof(int16_t)); // Cg
}
int offset = 0;
@@ -1491,8 +1585,9 @@ static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t
}
// Decode EZBC frame using the single-frame EZBC decoder
// Pass actual_pixels (cropped size) not num_pixels (full frame size)
postprocess_coefficients_ezbc(
(uint8_t *)(decompressed_data + offset), num_pixels,
(uint8_t *)(decompressed_data + offset), actual_pixels,
output[t][0], output[t][1], output[t][2],
channel_layout);
@@ -1622,6 +1717,12 @@ typedef struct {
uint16_t screen_mask_bottom;
uint16_t screen_mask_left;
// Phase 2: Decoding dimensions (may differ from full frame dimensions per GOP)
int decoding_width; // Actual encoded dimensions (cropped active region)
int decoding_height; // Updated when Screen Mask packet is encountered
// Note: Buffers are allocated at max size (header.width × header.height)
// but only decoding_width × decoding_height portion is used
// FFmpeg pipe for video only (audio from file)
FILE *video_pipe;
pid_t ffmpeg_pid;
@@ -1844,6 +1945,10 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu
decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6);
decoder->audio_file_path = strdup(audio_file);
// Phase 2: Initialize decoding dimensions to full frame (will be updated by Screen Mask packets)
decoder->decoding_width = decoder->header.width;
decoder->decoding_height = decoder->header.height;
// Allocate buffers
decoder->current_frame_rgb = calloc(decoder->frame_size * 3, 1);
decoder->reference_frame_rgb = calloc(decoder->frame_size * 3, 1);
@@ -1984,6 +2089,37 @@ static void tav_decoder_free(tav_decoder_t *decoder) {
//=============================================================================
// Fill masked regions (letterbox/pillarbox bars) with black
// Phase 2: Composite cropped frame back to full frame with black borders
static uint8_t* composite_to_full_frame(const uint8_t *cropped_rgb,
int cropped_width, int cropped_height,
int full_width, int full_height,
uint16_t top, uint16_t right,
uint16_t bottom, uint16_t left) {
// Allocate full frame buffer (filled with black)
uint8_t *full_frame = calloc(full_width * full_height * 3, sizeof(uint8_t));
if (!full_frame) {
return NULL;
}
// Calculate active region position in full frame
const int dest_x = left;
const int dest_y = top;
// Copy cropped frame into active region
for (int y = 0; y < cropped_height; y++) {
for (int x = 0; x < cropped_width; x++) {
const int src_offset = (y * cropped_width + x) * 3;
const int dest_offset = ((dest_y + y) * full_width + (dest_x + x)) * 3;
full_frame[dest_offset + 0] = cropped_rgb[src_offset + 0]; // R
full_frame[dest_offset + 1] = cropped_rgb[src_offset + 1]; // G
full_frame[dest_offset + 2] = cropped_rgb[src_offset + 2]; // B
}
}
return full_frame;
}
static void fill_masked_regions(uint8_t *frame_rgb, int width, int height,
uint16_t top, uint16_t right, uint16_t bottom, uint16_t left) {
// Fill top letterbox bar
@@ -2145,7 +2281,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
memcpy(decoder->current_frame_rgb, decoder->reference_frame_rgb, decoder->frame_size * 3);
} else {
// Decode coefficients (use function-level variables for proper cleanup)
int coeff_count = decoder->frame_size;
// Phase 2: Use decoding dimensions (actual encoded size)
const int decoding_pixels = decoder->decoding_width * decoder->decoding_height;
int coeff_count = decoding_pixels;
quantised_y = calloc(coeff_count, sizeof(int16_t));
quantised_co = calloc(coeff_count, sizeof(int16_t));
quantised_cg = calloc(coeff_count, sizeof(int16_t));
@@ -2183,45 +2321,52 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// fprintf(stderr, " Max quantised Y coefficient: %d\n", max_quant_y);
// }
// Phase 2: Allocate temporary DWT buffers for cropped region processing
float *temp_dwt_y = calloc(decoding_pixels, sizeof(float));
float *temp_dwt_co = calloc(decoding_pixels, sizeof(float));
float *temp_dwt_cg = calloc(decoding_pixels, sizeof(float));
if (!temp_dwt_y || !temp_dwt_co || !temp_dwt_cg) {
fprintf(stderr, "Error: Failed to allocate temporary DWT buffers\n");
free(temp_dwt_y);
free(temp_dwt_co);
free(temp_dwt_cg);
decode_success = 0;
goto write_frame;
}
// Dequantise (perceptual for versions 5-8, uniform for 1-4)
// Phase 2: Use decoding dimensions and temporary buffers
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
const int is_ezbc = (decoder->header.entropy_coder == 1);
if (is_ezbc && is_perceptual) {
// EZBC mode with perceptual quantisation: coefficients are normalised
// Need to dequantise using perceptual weights (same as twobit-map mode)
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, temp_dwt_y,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, decoder->dwt_buffer_co,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, temp_dwt_co,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, decoder->dwt_buffer_cg,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, temp_dwt_cg,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
} else if (is_perceptual) {
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, temp_dwt_y,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
// Debug: Check if values survived the function call
// if (decoder->frame_count == 32) {
// fprintf(stderr, " RIGHT AFTER dequantise_Y returns: first 5 values: %.1f %.1f %.1f %.1f %.1f\n",
// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2],
// decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]);
// }
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, decoder->dwt_buffer_co,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, temp_dwt_co,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, decoder->dwt_buffer_cg,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, temp_dwt_cg,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
} else {
for (int i = 0; i < coeff_count; i++) {
decoder->dwt_buffer_y[i] = quantised_y[i] * qy;
decoder->dwt_buffer_co[i] = quantised_co[i] * qco;
decoder->dwt_buffer_cg[i] = quantised_cg[i] * qcg;
temp_dwt_y[i] = quantised_y[i] * qy;
temp_dwt_co[i] = quantised_co[i] * qco;
temp_dwt_cg[i] = quantised_cg[i] * qcg;
}
}
@@ -2253,7 +2398,8 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// }
// Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
remove_grain_synthesis_decoder(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
// Phase 2: Use decoding dimensions and temporary buffer
remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
// Debug: Check LL band AFTER grain removal
@@ -2272,12 +2418,13 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// }
// Apply inverse DWT with correct non-power-of-2 dimension handling
// Phase 2: Use decoding dimensions and temporary buffers
// Note: quantised arrays freed at write_frame label
apply_inverse_dwt_multilevel(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
apply_inverse_dwt_multilevel(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
apply_inverse_dwt_multilevel(decoder->dwt_buffer_co, decoder->header.width, decoder->header.height,
apply_inverse_dwt_multilevel(temp_dwt_co, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
apply_inverse_dwt_multilevel(decoder->dwt_buffer_cg, decoder->header.width, decoder->header.height,
apply_inverse_dwt_multilevel(temp_dwt_cg, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
// Debug: Check spatial domain values after IDWT
@@ -2301,47 +2448,67 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// }
// Handle P-frame delta accumulation (in YCoCg float space)
// TODO Phase 2: P-frame support with crop encoding needs additional work
// - Reference frames are stored at full size but delta may be at cropped size
// - Need to extract/composite reference region appropriately
if (packet_type == TAV_PACKET_PFRAME && mode == TAV_MODE_DELTA) {
for (int i = 0; i < decoder->frame_size; i++) {
decoder->dwt_buffer_y[i] += decoder->reference_ycocg_y[i];
decoder->dwt_buffer_co[i] += decoder->reference_ycocg_co[i];
decoder->dwt_buffer_cg[i] += decoder->reference_ycocg_cg[i];
fprintf(stderr, "Warning: P-frame delta mode not yet fully supported with crop encoding\n");
for (int i = 0; i < decoding_pixels; i++) {
temp_dwt_y[i] += decoder->reference_ycocg_y[i];
temp_dwt_co[i] += decoder->reference_ycocg_co[i];
temp_dwt_cg[i] += decoder->reference_ycocg_cg[i];
}
}
// Convert YCoCg-R/ICtCp to RGB
const int is_ictcp = (decoder->header.version % 2 == 0);
float max_y = -999, max_co = -999, max_cg = -999;
int max_r = 0, max_g = 0, max_b = 0;
// Phase 2: Convert cropped region to RGB, then composite to full frame
uint8_t *cropped_rgb = malloc(decoding_pixels * 3);
if (!cropped_rgb) {
fprintf(stderr, "Error: Failed to allocate cropped RGB buffer\n");
free(temp_dwt_y);
free(temp_dwt_co);
free(temp_dwt_cg);
decode_success = 0;
goto write_frame;
}
for (int i = 0; i < decoder->frame_size; i++) {
// Convert YCoCg-R/ICtCp to RGB for cropped region
const int is_ictcp = (decoder->header.version % 2 == 0);
for (int i = 0; i < decoding_pixels; i++) {
uint8_t r, g, b;
if (is_ictcp) {
ictcp_to_rgb(decoder->dwt_buffer_y[i],
decoder->dwt_buffer_co[i],
decoder->dwt_buffer_cg[i], &r, &g, &b);
ictcp_to_rgb(temp_dwt_y[i], temp_dwt_co[i], temp_dwt_cg[i], &r, &g, &b);
} else {
ycocg_r_to_rgb(decoder->dwt_buffer_y[i],
decoder->dwt_buffer_co[i],
decoder->dwt_buffer_cg[i], &r, &g, &b);
ycocg_r_to_rgb(temp_dwt_y[i], temp_dwt_co[i], temp_dwt_cg[i], &r, &g, &b);
}
// Track max values for debugging
// if (decoder->frame_count == 1000) {
// if (decoder->dwt_buffer_y[i] > max_y) max_y = decoder->dwt_buffer_y[i];
// if (decoder->dwt_buffer_co[i] > max_co) max_co = decoder->dwt_buffer_co[i];
// if (decoder->dwt_buffer_cg[i] > max_cg) max_cg = decoder->dwt_buffer_cg[i];
// if (r > max_r) max_r = r;
// if (g > max_g) max_g = g;
// if (b > max_b) max_b = b;
// }
// RGB byte order for FFmpeg rgb24
decoder->current_frame_rgb[i * 3 + 0] = r;
decoder->current_frame_rgb[i * 3 + 1] = g;
decoder->current_frame_rgb[i * 3 + 2] = b;
cropped_rgb[i * 3 + 0] = r;
cropped_rgb[i * 3 + 1] = g;
cropped_rgb[i * 3 + 2] = b;
}
// Composite cropped frame to full frame with black borders
uint8_t *full_frame_rgb = composite_to_full_frame(cropped_rgb,
decoder->decoding_width, decoder->decoding_height,
decoder->header.width, decoder->header.height,
decoder->screen_mask_top, decoder->screen_mask_right,
decoder->screen_mask_bottom, decoder->screen_mask_left);
free(cropped_rgb);
free(temp_dwt_y);
free(temp_dwt_co);
free(temp_dwt_cg);
if (!full_frame_rgb) {
fprintf(stderr, "Error: Failed to composite frame to full size\n");
decode_success = 0;
goto write_frame;
}
// Copy composited frame to decoder buffer
memcpy(decoder->current_frame_rgb, full_frame_rgb, decoder->frame_size * 3);
free(full_frame_rgb);
// if (decoder->frame_count == 1000) {
// fprintf(stderr, "\n=== Frame 1000 Value Analysis ===\n");
// fprintf(stderr, "Max YCoCg values: Y=%.1f, Co=%.1f, Cg=%.1f\n", max_y, max_co, max_cg);
@@ -2360,10 +2527,12 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// fprintf(stderr, "\n");
// }
// Update reference YCoCg frame
memcpy(decoder->reference_ycocg_y, decoder->dwt_buffer_y, decoder->frame_size * sizeof(float));
memcpy(decoder->reference_ycocg_co, decoder->dwt_buffer_co, decoder->frame_size * sizeof(float));
memcpy(decoder->reference_ycocg_cg, decoder->dwt_buffer_cg, decoder->frame_size * sizeof(float));
// TODO Phase 2: Reference YCoCg frame update needs rework for crop encoding
// Currently not updated because we use temporary buffers that are already freed
// P-frame support will need to store reference at appropriate dimensions
// memcpy(decoder->reference_ycocg_y, temp_dwt_y, decoding_pixels * sizeof(float));
// memcpy(decoder->reference_ycocg_co, temp_dwt_co, decoding_pixels * sizeof(float));
// memcpy(decoder->reference_ycocg_cg, temp_dwt_cg, decoding_pixels * sizeof(float));
}
// Update reference frame
@@ -2622,9 +2791,20 @@ int main(int argc, char *argv[]) {
entry->bottom = bottom;
entry->left = left;
// Phase 2: Update current active mask and decoding dimensions
decoder->screen_mask_top = top;
decoder->screen_mask_right = right;
decoder->screen_mask_bottom = bottom;
decoder->screen_mask_left = left;
// Calculate new decoding dimensions (active region size)
decoder->decoding_width = decoder->header.width - left - right;
decoder->decoding_height = decoder->header.height - top - bottom;
if (verbose) {
fprintf(stderr, "Packet %d: SCREEN_MASK (0x%02X) - frame=%u top=%u right=%u bottom=%u left=%u\n",
total_packets, packet_type, frame_num, top, right, bottom, left);
fprintf(stderr, "Packet %d: SCREEN_MASK (0x%02X) - frame=%u top=%u right=%u bottom=%u left=%u (decoding: %dx%d)\n",
total_packets, packet_type, frame_num, top, right, bottom, left,
decoder->decoding_width, decoder->decoding_height);
}
continue;
}
@@ -2689,27 +2869,47 @@ int main(int argc, char *argv[]) {
}
// Postprocess coefficients based on entropy_coder value
// Phase 2: Use decoding dimensions (actual encoded size) for postprocessing
int decoding_pixels = decoder->decoding_width * decoder->decoding_height;
// Keep full frame size for buffer allocation
const int num_pixels = decoder->header.width * decoder->header.height;
int16_t ***quantised_gop;
// GOP dimensions (may differ from full frame with crop encoding)
int gop_width = decoder->decoding_width;
int gop_height = decoder->decoding_height;
if (decoder->header.entropy_coder == 2) {
// RAW format: simple concatenated int16 arrays
if (verbose) {
fprintf(stderr, " Using RAW postprocessing (entropy_coder=2)\n");
fprintf(stderr, " Using RAW postprocessing (entropy_coder=2) for %dx%d (%d pixels)\n",
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
}
quantised_gop = postprocess_gop_raw(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
} else if (decoder->header.entropy_coder == 1) {
// EZBC format: embedded zero-block coding
if (verbose) {
fprintf(stderr, " Using EZBC postprocessing (entropy_coder=1)\n");
fprintf(stderr, " Using EZBC postprocessing (entropy_coder=1) for %dx%d (%d pixels)\n",
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
}
// EZBC will return actual GOP dimensions (may be cropped with crop encoding)
quantised_gop = postprocess_gop_ezbc(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
gop_size, num_pixels, decoder->header.channel_layout,
&gop_width, &gop_height);
// Update decoding_pixels to match actual GOP dimensions
if (gop_width > 0 && gop_height > 0) {
decoding_pixels = gop_width * gop_height;
if (verbose) {
fprintf(stderr, " Actual GOP dimensions from EZBC: %dx%d (%d pixels)\n",
gop_width, gop_height, decoding_pixels);
}
}
} else {
// Default: Twobitmap format (entropy_coder=0)
if (verbose) {
fprintf(stderr, " Using Twobitmap postprocessing (entropy_coder=0)\n");
fprintf(stderr, " Using Twobitmap postprocessing (entropy_coder=0) for %dx%d (%d pixels)\n",
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
}
quantised_gop = postprocess_gop_unified(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
@@ -2724,14 +2924,15 @@ int main(int argc, char *argv[]) {
}
// Allocate GOP float buffers
// Phase 2: Allocate at decoding size (cropped region), will composite to full frame later
float **gop_y = malloc(gop_size * sizeof(float *));
float **gop_co = malloc(gop_size * sizeof(float *));
float **gop_cg = malloc(gop_size * sizeof(float *));
for (int t = 0; t < gop_size; t++) {
gop_y[t] = calloc(num_pixels, sizeof(float));
gop_co[t] = calloc(num_pixels, sizeof(float));
gop_cg[t] = calloc(num_pixels, sizeof(float));
gop_y[t] = calloc(decoding_pixels, sizeof(float));
gop_co[t] = calloc(decoding_pixels, sizeof(float));
gop_cg[t] = calloc(decoding_pixels, sizeof(float));
}
// Dequantise with temporal scaling (perceptual quantisation for versions 5-8)
@@ -2751,17 +2952,18 @@ int main(int argc, char *argv[]) {
const float base_q_co = roundf(QLUT[decoder->header.quantiser_co] * temporal_scale);
const float base_q_cg = roundf(QLUT[decoder->header.quantiser_cg] * temporal_scale);
// Phase 2: Use GOP dimensions (may be cropped) for dequantisation
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][0], gop_y[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][1], gop_co[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][2], gop_cg[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
if (t == 0 && verbose) {
@@ -2786,21 +2988,23 @@ int main(int argc, char *argv[]) {
const float base_q_cg = roundf(QLUT[decoder->header.quantiser_cg] * temporal_scale);
if (is_perceptual) {
// Phase 2: Use GOP dimensions (may be cropped) for dequantisation
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][0], gop_y[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][1], gop_co[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][2], gop_cg[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
} else {
// Uniform quantisation for older versions
for (int i = 0; i < num_pixels; i++) {
// Phase 2: Use decoding_pixels for uniform dequantisation
for (int i = 0; i < decoding_pixels; i++) {
gop_y[t][i] = quantised_gop[t][0][i] * base_q_y;
gop_co[t][i] = quantised_gop[t][1][i] * base_q_co;
gop_cg[t][i] = quantised_gop[t][2][i] * base_q_cg;
@@ -2819,14 +3023,16 @@ int main(int argc, char *argv[]) {
free(quantised_gop);
// Phase 2: Use GOP dimensions (may be cropped) for grain removal
for (int t = 0; t < gop_size; t++) {
remove_grain_synthesis_decoder(gop_y[t], decoder->header.width, decoder->header.height,
remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height,
decoder->header.decomp_levels, decoder->frame_count + t,
decoder->header.quantiser_y);
}
// Apply inverse 3D DWT (spatial + temporal)
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, decoder->header.width, decoder->header.height,
// Phase 2: Use GOP dimensions (may be cropped) for inverse DWT
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, gop_width, gop_height,
gop_size, decoder->header.decomp_levels, temporal_levels,
decoder->header.wavelet_filter);
@@ -2859,35 +3065,78 @@ int main(int argc, char *argv[]) {
// (size_t)decoder->frame_size * 3, decoder->header.width * decoder->header.height * 3);
// }
// Calculate consistent screen mask offsets for crop-encoded GOPs
// When crop encoding is active, all frames in GOP use same dimensions
const int is_crop_encoded = (gop_width != decoder->header.width ||
gop_height != decoder->header.height);
uint16_t gop_mask_top = 0, gop_mask_bottom = 0, gop_mask_left = 0, gop_mask_right = 0;
if (is_crop_encoded) {
// Center the cropped region in the full frame
if (gop_height < decoder->header.height) {
gop_mask_top = (decoder->header.height - gop_height) / 2;
gop_mask_bottom = decoder->header.height - gop_height - gop_mask_top;
}
if (gop_width < decoder->header.width) {
gop_mask_left = (decoder->header.width - gop_width) / 2;
gop_mask_right = decoder->header.width - gop_width - gop_mask_left;
}
if (verbose && decoder->frame_count == 0) {
fprintf(stderr, "[GOP-Crop] Centering %dx%d in %dx%d: top=%u, bottom=%u, left=%u, right=%u\n",
gop_width, gop_height, decoder->header.width, decoder->header.height,
gop_mask_top, gop_mask_bottom, gop_mask_left, gop_mask_right);
}
}
for (int t = 0; t < gop_size; t++) {
// Allocate frame buffer
uint8_t *frame_rgb = malloc(decoder->frame_size * 3);
if (!frame_rgb) {
fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n");
// Update screen mask only if NOT crop-encoded
// Crop-encoded GOPs use consistent offsets calculated above
if (!is_crop_encoded) {
update_screen_mask(decoder, decoder->frame_count + t);
}
// Phase 2: Convert cropped region to RGB, then composite to full frame
uint8_t *cropped_rgb = malloc(decoding_pixels * 3);
if (!cropped_rgb) {
fprintf(stderr, "Error: Failed to allocate cropped GOP frame buffer\n");
result = -1;
break;
}
// Convert to RGB
for (int i = 0; i < decoder->frame_size; i++) {
// Convert cropped region to RGB
for (int i = 0; i < decoding_pixels; i++) {
uint8_t r, g, b;
if (is_ictcp) {
ictcp_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
} else {
ycocg_r_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
}
frame_rgb[i * 3 + 0] = r;
frame_rgb[i * 3 + 1] = g;
frame_rgb[i * 3 + 2] = b;
cropped_rgb[i * 3 + 0] = r;
cropped_rgb[i * 3 + 1] = g;
cropped_rgb[i * 3 + 2] = b;
}
// Update active screen mask for this GOP frame
update_screen_mask(decoder, decoder->frame_count + t);
// Composite cropped frame to full frame with black borders
// Use GOP-consistent offsets for crop-encoded, or per-frame offsets otherwise
const uint16_t mask_top = is_crop_encoded ? gop_mask_top : decoder->screen_mask_top;
const uint16_t mask_bottom = is_crop_encoded ? gop_mask_bottom : decoder->screen_mask_bottom;
const uint16_t mask_left = is_crop_encoded ? gop_mask_left : decoder->screen_mask_left;
const uint16_t mask_right = is_crop_encoded ? gop_mask_right : decoder->screen_mask_right;
// Fill masked regions with black (letterbox/pillarbox bars)
fill_masked_regions(frame_rgb, decoder->header.width, decoder->header.height,
decoder->screen_mask_top, decoder->screen_mask_right,
decoder->screen_mask_bottom, decoder->screen_mask_left);
uint8_t *frame_rgb = composite_to_full_frame(cropped_rgb,
gop_width, gop_height,
decoder->header.width, decoder->header.height,
mask_top, mask_right, mask_bottom, mask_left);
free(cropped_rgb);
if (!frame_rgb) {
fprintf(stderr, "Error: Failed to composite GOP frame to full size\n");
result = -1;
break;
}
// Note: Phase 1 fill_masked_regions() is now replaced by Phase 2 composite function
// which places the decoded cropped frame into a full-frame buffer with black borders
// Write frame to FFmpeg video pipe
const size_t bytes_to_write = decoder->frame_size * 3;

File diff suppressed because it is too large Load Diff